diff --git "a/last-checkpoint/trainer_state.json" "b/last-checkpoint/trainer_state.json" --- "a/last-checkpoint/trainer_state.json" +++ "b/last-checkpoint/trainer_state.json" @@ -1,290931 +1,3 @@ -{ - "best_metric": null, - "best_model_checkpoint": null, - "epoch": 6.691332179234268, - "eval_steps": 500, - "global_step": 41550, - "is_hyper_param_search": false, - "is_local_process_zero": true, - "is_world_process_zero": true, - "log_history": [ - { - "epoch": 0.0001610370787873908, - "grad_norm": 9.459293323743623e-06, - "learning_rate": 2e-05, - "loss": 46.0, - "step": 1 - }, - { - "epoch": 0.0003220741575747816, - "grad_norm": 1.8449256458552554e-05, - "learning_rate": 4e-05, - "loss": 46.0, - "step": 2 - }, - { - "epoch": 0.0004831112363621724, - "grad_norm": 1.7989095795201138e-05, - "learning_rate": 6e-05, - "loss": 46.0, - "step": 3 - }, - { - "epoch": 0.0006441483151495632, - "grad_norm": 1.1759363587771077e-05, - "learning_rate": 8e-05, - "loss": 46.0, - "step": 4 - }, - { - "epoch": 0.000805185393936954, - "grad_norm": 1.139960022555897e-05, - "learning_rate": 0.0001, - "loss": 46.0, - "step": 5 - }, - { - "epoch": 0.0009662224727243448, - "grad_norm": 1.4804975762672257e-05, - "learning_rate": 0.00012, - "loss": 46.0, - "step": 6 - }, - { - "epoch": 0.0011272595515117356, - "grad_norm": 2.1032992663094774e-05, - "learning_rate": 0.00014, - "loss": 46.0, - "step": 7 - }, - { - "epoch": 0.0012882966302991264, - "grad_norm": 1.5429051927640103e-05, - "learning_rate": 0.00016, - "loss": 46.0, - "step": 8 - }, - { - "epoch": 0.001449333709086517, - "grad_norm": 1.1477978659968358e-05, - "learning_rate": 0.00018, - "loss": 46.0, - "step": 9 - }, - { - "epoch": 0.001610370787873908, - "grad_norm": 1.7123571524280123e-05, - "learning_rate": 0.0002, - "loss": 46.0, - "step": 10 - }, - { - "epoch": 0.0017714078666612987, - "grad_norm": 1.0772208952403162e-05, - "learning_rate": 0.00019999999999998722, - "loss": 46.0, - "step": 11 - }, - { - "epoch": 0.0019324449454486896, - "grad_norm": 9.808137292566244e-06, - "learning_rate": 0.0001999999999999488, - "loss": 46.0, - "step": 12 - }, - { - "epoch": 0.0020934820242360804, - "grad_norm": 1.8400718545308337e-05, - "learning_rate": 0.00019999999999988484, - "loss": 46.0, - "step": 13 - }, - { - "epoch": 0.0022545191030234712, - "grad_norm": 2.247749398520682e-05, - "learning_rate": 0.00019999999999979523, - "loss": 46.0, - "step": 14 - }, - { - "epoch": 0.002415556181810862, - "grad_norm": 1.1134417945868336e-05, - "learning_rate": 0.0001999999999996801, - "loss": 46.0, - "step": 15 - }, - { - "epoch": 0.002576593260598253, - "grad_norm": 1.4492045920633245e-05, - "learning_rate": 0.0001999999999995393, - "loss": 46.0, - "step": 16 - }, - { - "epoch": 0.0027376303393856437, - "grad_norm": 1.4358376574818976e-05, - "learning_rate": 0.00019999999999937293, - "loss": 46.0, - "step": 17 - }, - { - "epoch": 0.002898667418173034, - "grad_norm": 1.1229038136661984e-05, - "learning_rate": 0.00019999999999918098, - "loss": 46.0, - "step": 18 - }, - { - "epoch": 0.003059704496960425, - "grad_norm": 1.850031731009949e-05, - "learning_rate": 0.0001999999999989634, - "loss": 46.0, - "step": 19 - }, - { - "epoch": 0.003220741575747816, - "grad_norm": 1.7780901544028893e-05, - "learning_rate": 0.00019999999999872027, - "loss": 46.0, - "step": 20 - }, - { - "epoch": 0.0033817786545352066, - "grad_norm": 1.23179188449285e-05, - "learning_rate": 0.00019999999999845152, - "loss": 46.0, - "step": 21 - }, - { - "epoch": 0.0035428157333225975, - "grad_norm": 1.1472806363599375e-05, - "learning_rate": 0.00019999999999815716, - "loss": 46.0, - "step": 22 - }, - { - "epoch": 0.0037038528121099883, - "grad_norm": 9.506068636255804e-06, - "learning_rate": 0.00019999999999783724, - "loss": 46.0, - "step": 23 - }, - { - "epoch": 0.003864889890897379, - "grad_norm": 1.6990292351692915e-05, - "learning_rate": 0.0001999999999974917, - "loss": 46.0, - "step": 24 - }, - { - "epoch": 0.00402592696968477, - "grad_norm": 1.920577051350847e-05, - "learning_rate": 0.00019999999999712059, - "loss": 46.0, - "step": 25 - }, - { - "epoch": 0.004186964048472161, - "grad_norm": 1.57214544742601e-05, - "learning_rate": 0.00019999999999672388, - "loss": 46.0, - "step": 26 - }, - { - "epoch": 0.004348001127259552, - "grad_norm": 2.9738939701928757e-05, - "learning_rate": 0.00019999999999630155, - "loss": 46.0, - "step": 27 - }, - { - "epoch": 0.0045090382060469425, - "grad_norm": 1.2327154763625003e-05, - "learning_rate": 0.00019999999999585364, - "loss": 46.0, - "step": 28 - }, - { - "epoch": 0.004670075284834333, - "grad_norm": 1.3533114724850748e-05, - "learning_rate": 0.00019999999999538014, - "loss": 46.0, - "step": 29 - }, - { - "epoch": 0.004831112363621724, - "grad_norm": 1.5398421965073794e-05, - "learning_rate": 0.00019999999999488103, - "loss": 46.0, - "step": 30 - }, - { - "epoch": 0.004992149442409115, - "grad_norm": 1.6816700735944323e-05, - "learning_rate": 0.00019999999999435636, - "loss": 46.0, - "step": 31 - }, - { - "epoch": 0.005153186521196506, - "grad_norm": 1.3670231965079438e-05, - "learning_rate": 0.00019999999999380604, - "loss": 46.0, - "step": 32 - }, - { - "epoch": 0.005314223599983897, - "grad_norm": 1.3132093954482116e-05, - "learning_rate": 0.00019999999999323017, - "loss": 46.0, - "step": 33 - }, - { - "epoch": 0.0054752606787712875, - "grad_norm": 2.7774303816840984e-05, - "learning_rate": 0.00019999999999262868, - "loss": 46.0, - "step": 34 - }, - { - "epoch": 0.005636297757558678, - "grad_norm": 2.077547469525598e-05, - "learning_rate": 0.0001999999999920016, - "loss": 46.0, - "step": 35 - }, - { - "epoch": 0.005797334836346068, - "grad_norm": 1.980315391847398e-05, - "learning_rate": 0.00019999999999134892, - "loss": 46.0, - "step": 36 - }, - { - "epoch": 0.005958371915133459, - "grad_norm": 2.2662210540147498e-05, - "learning_rate": 0.00019999999999067067, - "loss": 46.0, - "step": 37 - }, - { - "epoch": 0.00611940899392085, - "grad_norm": 2.1207339159445837e-05, - "learning_rate": 0.00019999999998996683, - "loss": 46.0, - "step": 38 - }, - { - "epoch": 0.006280446072708241, - "grad_norm": 1.7205984477186576e-05, - "learning_rate": 0.00019999999998923738, - "loss": 46.0, - "step": 39 - }, - { - "epoch": 0.006441483151495632, - "grad_norm": 2.1629894035868347e-05, - "learning_rate": 0.00019999999998848234, - "loss": 46.0, - "step": 40 - }, - { - "epoch": 0.006602520230283022, - "grad_norm": 2.7965374101768248e-05, - "learning_rate": 0.00019999999998770166, - "loss": 46.0, - "step": 41 - }, - { - "epoch": 0.006763557309070413, - "grad_norm": 2.5096620447584428e-05, - "learning_rate": 0.00019999999998689542, - "loss": 46.0, - "step": 42 - }, - { - "epoch": 0.006924594387857804, - "grad_norm": 3.5594304790720344e-05, - "learning_rate": 0.0001999999999860636, - "loss": 46.0, - "step": 43 - }, - { - "epoch": 0.007085631466645195, - "grad_norm": 3.1788349588168785e-05, - "learning_rate": 0.00019999999998520618, - "loss": 46.0, - "step": 44 - }, - { - "epoch": 0.007246668545432586, - "grad_norm": 2.8515927624539472e-05, - "learning_rate": 0.00019999999998432315, - "loss": 46.0, - "step": 45 - }, - { - "epoch": 0.007407705624219977, - "grad_norm": 2.7847892852150835e-05, - "learning_rate": 0.0001999999999834145, - "loss": 46.0, - "step": 46 - }, - { - "epoch": 0.007568742703007367, - "grad_norm": 3.0626124498667195e-05, - "learning_rate": 0.00019999999998248033, - "loss": 46.0, - "step": 47 - }, - { - "epoch": 0.007729779781794758, - "grad_norm": 2.7933489036513492e-05, - "learning_rate": 0.00019999999998152052, - "loss": 46.0, - "step": 48 - }, - { - "epoch": 0.007890816860582149, - "grad_norm": 3.10891009576153e-05, - "learning_rate": 0.0001999999999805351, - "loss": 46.0, - "step": 49 - }, - { - "epoch": 0.00805185393936954, - "grad_norm": 1.9566661649150774e-05, - "learning_rate": 0.00019999999997952412, - "loss": 46.0, - "step": 50 - }, - { - "epoch": 0.00821289101815693, - "grad_norm": 5.5264958064071834e-05, - "learning_rate": 0.00019999999997848754, - "loss": 46.0, - "step": 51 - }, - { - "epoch": 0.008373928096944322, - "grad_norm": 2.4125354684656486e-05, - "learning_rate": 0.00019999999997742532, - "loss": 46.0, - "step": 52 - }, - { - "epoch": 0.008534965175731712, - "grad_norm": 3.317361915833317e-05, - "learning_rate": 0.00019999999997633757, - "loss": 46.0, - "step": 53 - }, - { - "epoch": 0.008696002254519103, - "grad_norm": 3.8641785067738965e-05, - "learning_rate": 0.00019999999997522417, - "loss": 46.0, - "step": 54 - }, - { - "epoch": 0.008857039333306494, - "grad_norm": 1.588473423907999e-05, - "learning_rate": 0.0001999999999740852, - "loss": 46.0, - "step": 55 - }, - { - "epoch": 0.009018076412093885, - "grad_norm": 3.6028392059961334e-05, - "learning_rate": 0.00019999999997292065, - "loss": 46.0, - "step": 56 - }, - { - "epoch": 0.009179113490881276, - "grad_norm": 3.70649722754024e-05, - "learning_rate": 0.0001999999999717305, - "loss": 46.0, - "step": 57 - }, - { - "epoch": 0.009340150569668667, - "grad_norm": 3.086036304011941e-05, - "learning_rate": 0.0001999999999705147, - "loss": 46.0, - "step": 58 - }, - { - "epoch": 0.009501187648456057, - "grad_norm": 3.073871630476788e-05, - "learning_rate": 0.00019999999996927337, - "loss": 46.0, - "step": 59 - }, - { - "epoch": 0.009662224727243448, - "grad_norm": 5.74203186261002e-05, - "learning_rate": 0.00019999999996800642, - "loss": 46.0, - "step": 60 - }, - { - "epoch": 0.009823261806030839, - "grad_norm": 1.878445800684858e-05, - "learning_rate": 0.0001999999999667139, - "loss": 46.0, - "step": 61 - }, - { - "epoch": 0.00998429888481823, - "grad_norm": 3.903669130522758e-05, - "learning_rate": 0.00019999999996539575, - "loss": 46.0, - "step": 62 - }, - { - "epoch": 0.01014533596360562, - "grad_norm": 6.173171277623624e-05, - "learning_rate": 0.000199999999964052, - "loss": 46.0, - "step": 63 - }, - { - "epoch": 0.010306373042393012, - "grad_norm": 3.220455255359411e-05, - "learning_rate": 0.0001999999999626827, - "loss": 46.0, - "step": 64 - }, - { - "epoch": 0.010467410121180402, - "grad_norm": 6.513421976706013e-05, - "learning_rate": 0.00019999999996128778, - "loss": 46.0, - "step": 65 - }, - { - "epoch": 0.010628447199967793, - "grad_norm": 4.503427044255659e-05, - "learning_rate": 0.00019999999995986726, - "loss": 46.0, - "step": 66 - }, - { - "epoch": 0.010789484278755184, - "grad_norm": 5.680751200998202e-05, - "learning_rate": 0.00019999999995842113, - "loss": 46.0, - "step": 67 - }, - { - "epoch": 0.010950521357542575, - "grad_norm": 5.3842857596464455e-05, - "learning_rate": 0.00019999999995694943, - "loss": 46.0, - "step": 68 - }, - { - "epoch": 0.011111558436329966, - "grad_norm": 3.435022517805919e-05, - "learning_rate": 0.00019999999995545212, - "loss": 46.0, - "step": 69 - }, - { - "epoch": 0.011272595515117357, - "grad_norm": 4.8579728172626346e-05, - "learning_rate": 0.00019999999995392925, - "loss": 46.0, - "step": 70 - }, - { - "epoch": 0.011433632593904747, - "grad_norm": 6.448462227126583e-05, - "learning_rate": 0.00019999999995238077, - "loss": 46.0, - "step": 71 - }, - { - "epoch": 0.011594669672692137, - "grad_norm": 3.0377606890397146e-05, - "learning_rate": 0.00019999999995080667, - "loss": 46.0, - "step": 72 - }, - { - "epoch": 0.011755706751479527, - "grad_norm": 4.2671086703194305e-05, - "learning_rate": 0.000199999999949207, - "loss": 46.0, - "step": 73 - }, - { - "epoch": 0.011916743830266918, - "grad_norm": 5.8799378166440874e-05, - "learning_rate": 0.0001999999999475817, - "loss": 46.0, - "step": 74 - }, - { - "epoch": 0.012077780909054309, - "grad_norm": 3.1305702577810735e-05, - "learning_rate": 0.00019999999994593085, - "loss": 46.0, - "step": 75 - }, - { - "epoch": 0.0122388179878417, - "grad_norm": 4.199046816211194e-05, - "learning_rate": 0.00019999999994425437, - "loss": 46.0, - "step": 76 - }, - { - "epoch": 0.01239985506662909, - "grad_norm": 4.404900028021075e-05, - "learning_rate": 0.0001999999999425523, - "loss": 46.0, - "step": 77 - }, - { - "epoch": 0.012560892145416482, - "grad_norm": 4.4940319639863446e-05, - "learning_rate": 0.0001999999999408247, - "loss": 46.0, - "step": 78 - }, - { - "epoch": 0.012721929224203872, - "grad_norm": 5.616362614091486e-05, - "learning_rate": 0.00019999999993907143, - "loss": 46.0, - "step": 79 - }, - { - "epoch": 0.012882966302991263, - "grad_norm": 4.128489672439173e-05, - "learning_rate": 0.00019999999993729257, - "loss": 46.0, - "step": 80 - }, - { - "epoch": 0.013044003381778654, - "grad_norm": 3.650110375019722e-05, - "learning_rate": 0.00019999999993548814, - "loss": 46.0, - "step": 81 - }, - { - "epoch": 0.013205040460566045, - "grad_norm": 3.355491207912564e-05, - "learning_rate": 0.0001999999999336581, - "loss": 46.0, - "step": 82 - }, - { - "epoch": 0.013366077539353436, - "grad_norm": 3.100166577496566e-05, - "learning_rate": 0.00019999999993180247, - "loss": 46.0, - "step": 83 - }, - { - "epoch": 0.013527114618140827, - "grad_norm": 4.128339787712321e-05, - "learning_rate": 0.00019999999992992127, - "loss": 46.0, - "step": 84 - }, - { - "epoch": 0.013688151696928217, - "grad_norm": 5.978300396236591e-05, - "learning_rate": 0.00019999999992801443, - "loss": 46.0, - "step": 85 - }, - { - "epoch": 0.013849188775715608, - "grad_norm": 6.463456520577893e-05, - "learning_rate": 0.00019999999992608203, - "loss": 46.0, - "step": 86 - }, - { - "epoch": 0.014010225854502999, - "grad_norm": 4.458018156583421e-05, - "learning_rate": 0.00019999999992412402, - "loss": 46.0, - "step": 87 - }, - { - "epoch": 0.01417126293329039, - "grad_norm": 3.9894523069960997e-05, - "learning_rate": 0.00019999999992214042, - "loss": 46.0, - "step": 88 - }, - { - "epoch": 0.01433230001207778, - "grad_norm": 6.879306602058932e-05, - "learning_rate": 0.00019999999992013123, - "loss": 46.0, - "step": 89 - }, - { - "epoch": 0.014493337090865172, - "grad_norm": 7.311927038244903e-05, - "learning_rate": 0.00019999999991809642, - "loss": 46.0, - "step": 90 - }, - { - "epoch": 0.014654374169652562, - "grad_norm": 7.103729149093851e-05, - "learning_rate": 0.00019999999991603603, - "loss": 46.0, - "step": 91 - }, - { - "epoch": 0.014815411248439953, - "grad_norm": 6.073687472962774e-05, - "learning_rate": 0.00019999999991395005, - "loss": 46.0, - "step": 92 - }, - { - "epoch": 0.014976448327227344, - "grad_norm": 8.019880624487996e-05, - "learning_rate": 0.0001999999999118385, - "loss": 46.0, - "step": 93 - }, - { - "epoch": 0.015137485406014735, - "grad_norm": 7.244279549922794e-05, - "learning_rate": 0.0001999999999097013, - "loss": 46.0, - "step": 94 - }, - { - "epoch": 0.015298522484802126, - "grad_norm": 6.675118493149057e-05, - "learning_rate": 0.00019999999990753854, - "loss": 46.0, - "step": 95 - }, - { - "epoch": 0.015459559563589517, - "grad_norm": 8.598087151767686e-05, - "learning_rate": 0.0001999999999053502, - "loss": 46.0, - "step": 96 - }, - { - "epoch": 0.015620596642376907, - "grad_norm": 4.9705315177561715e-05, - "learning_rate": 0.00019999999990313622, - "loss": 46.0, - "step": 97 - }, - { - "epoch": 0.015781633721164298, - "grad_norm": 5.5819342378526926e-05, - "learning_rate": 0.00019999999990089667, - "loss": 46.0, - "step": 98 - }, - { - "epoch": 0.015942670799951687, - "grad_norm": 3.597228351281956e-05, - "learning_rate": 0.00019999999989863155, - "loss": 46.0, - "step": 99 - }, - { - "epoch": 0.01610370787873908, - "grad_norm": 5.974910527584143e-05, - "learning_rate": 0.0001999999998963408, - "loss": 46.0, - "step": 100 - }, - { - "epoch": 0.01626474495752647, - "grad_norm": 5.3069412388140336e-05, - "learning_rate": 0.00019999999989402448, - "loss": 46.0, - "step": 101 - }, - { - "epoch": 0.01642578203631386, - "grad_norm": 5.418806176749058e-05, - "learning_rate": 0.00019999999989168252, - "loss": 46.0, - "step": 102 - }, - { - "epoch": 0.01658681911510125, - "grad_norm": 9.603131184121594e-05, - "learning_rate": 0.000199999999889315, - "loss": 46.0, - "step": 103 - }, - { - "epoch": 0.016747856193888643, - "grad_norm": 5.194047116674483e-05, - "learning_rate": 0.0001999999998869219, - "loss": 46.0, - "step": 104 - }, - { - "epoch": 0.016908893272676032, - "grad_norm": 9.761226829141378e-05, - "learning_rate": 0.00019999999988450315, - "loss": 46.0, - "step": 105 - }, - { - "epoch": 0.017069930351463425, - "grad_norm": 6.508623482659459e-05, - "learning_rate": 0.00019999999988205884, - "loss": 46.0, - "step": 106 - }, - { - "epoch": 0.017230967430250814, - "grad_norm": 5.57337116333656e-05, - "learning_rate": 0.00019999999987958895, - "loss": 46.0, - "step": 107 - }, - { - "epoch": 0.017392004509038207, - "grad_norm": 0.00010947446571663022, - "learning_rate": 0.00019999999987709344, - "loss": 46.0, - "step": 108 - }, - { - "epoch": 0.017553041587825596, - "grad_norm": 5.486145164468326e-05, - "learning_rate": 0.00019999999987457235, - "loss": 46.0, - "step": 109 - }, - { - "epoch": 0.017714078666612988, - "grad_norm": 0.00012351380428299308, - "learning_rate": 0.00019999999987202566, - "loss": 46.0, - "step": 110 - }, - { - "epoch": 0.017875115745400377, - "grad_norm": 0.00010108689457410946, - "learning_rate": 0.00019999999986945337, - "loss": 46.0, - "step": 111 - }, - { - "epoch": 0.01803615282418777, - "grad_norm": 8.059964602580294e-05, - "learning_rate": 0.00019999999986685548, - "loss": 46.0, - "step": 112 - }, - { - "epoch": 0.01819718990297516, - "grad_norm": 0.00013831921387463808, - "learning_rate": 0.00019999999986423204, - "loss": 46.0, - "step": 113 - }, - { - "epoch": 0.01835822698176255, - "grad_norm": 7.420378824463114e-05, - "learning_rate": 0.00019999999986158295, - "loss": 46.0, - "step": 114 - }, - { - "epoch": 0.01851926406054994, - "grad_norm": 4.6814933739369735e-05, - "learning_rate": 0.00019999999985890828, - "loss": 46.0, - "step": 115 - }, - { - "epoch": 0.018680301139337333, - "grad_norm": 9.741364192450419e-05, - "learning_rate": 0.00019999999985620802, - "loss": 46.0, - "step": 116 - }, - { - "epoch": 0.018841338218124722, - "grad_norm": 6.949760427232832e-05, - "learning_rate": 0.0001999999998534822, - "loss": 46.0, - "step": 117 - }, - { - "epoch": 0.019002375296912115, - "grad_norm": 6.653879972873256e-05, - "learning_rate": 0.00019999999985073074, - "loss": 46.0, - "step": 118 - }, - { - "epoch": 0.019163412375699504, - "grad_norm": 0.0001317582791671157, - "learning_rate": 0.00019999999984795367, - "loss": 46.0, - "step": 119 - }, - { - "epoch": 0.019324449454486897, - "grad_norm": 8.747701940592378e-05, - "learning_rate": 0.00019999999984515106, - "loss": 46.0, - "step": 120 - }, - { - "epoch": 0.019485486533274286, - "grad_norm": 0.00013079583004582673, - "learning_rate": 0.0001999999998423228, - "loss": 46.0, - "step": 121 - }, - { - "epoch": 0.019646523612061678, - "grad_norm": 8.933498611440882e-05, - "learning_rate": 0.000199999999839469, - "loss": 46.0, - "step": 122 - }, - { - "epoch": 0.019807560690849067, - "grad_norm": 8.507933671353385e-05, - "learning_rate": 0.00019999999983658957, - "loss": 46.0, - "step": 123 - }, - { - "epoch": 0.01996859776963646, - "grad_norm": 0.00011648085637716576, - "learning_rate": 0.00019999999983368456, - "loss": 46.0, - "step": 124 - }, - { - "epoch": 0.02012963484842385, - "grad_norm": 0.00012115373829146847, - "learning_rate": 0.00019999999983075394, - "loss": 46.0, - "step": 125 - }, - { - "epoch": 0.02029067192721124, - "grad_norm": 0.00017015583580359817, - "learning_rate": 0.00019999999982779772, - "loss": 46.0, - "step": 126 - }, - { - "epoch": 0.02045170900599863, - "grad_norm": 6.490958912763745e-05, - "learning_rate": 0.00019999999982481592, - "loss": 46.0, - "step": 127 - }, - { - "epoch": 0.020612746084786023, - "grad_norm": 0.00011056676157750189, - "learning_rate": 0.00019999999982180854, - "loss": 46.0, - "step": 128 - }, - { - "epoch": 0.020773783163573412, - "grad_norm": 8.274184801848605e-05, - "learning_rate": 0.00019999999981877556, - "loss": 46.0, - "step": 129 - }, - { - "epoch": 0.020934820242360805, - "grad_norm": 7.937616464914754e-05, - "learning_rate": 0.00019999999981571695, - "loss": 46.0, - "step": 130 - }, - { - "epoch": 0.021095857321148194, - "grad_norm": 0.0001607205340405926, - "learning_rate": 0.00019999999981263277, - "loss": 46.0, - "step": 131 - }, - { - "epoch": 0.021256894399935587, - "grad_norm": 7.551934686489403e-05, - "learning_rate": 0.000199999999809523, - "loss": 46.0, - "step": 132 - }, - { - "epoch": 0.021417931478722976, - "grad_norm": 0.00015242550580296665, - "learning_rate": 0.00019999999980638763, - "loss": 46.0, - "step": 133 - }, - { - "epoch": 0.021578968557510368, - "grad_norm": 6.205275713000447e-05, - "learning_rate": 0.00019999999980322667, - "loss": 46.0, - "step": 134 - }, - { - "epoch": 0.021740005636297757, - "grad_norm": 0.00013843170017935336, - "learning_rate": 0.0001999999998000401, - "loss": 46.0, - "step": 135 - }, - { - "epoch": 0.02190104271508515, - "grad_norm": 8.66490590851754e-05, - "learning_rate": 0.00019999999979682795, - "loss": 46.0, - "step": 136 - }, - { - "epoch": 0.02206207979387254, - "grad_norm": 0.00012498012802097946, - "learning_rate": 0.0001999999997935902, - "loss": 46.0, - "step": 137 - }, - { - "epoch": 0.02222311687265993, - "grad_norm": 0.00010799634037539363, - "learning_rate": 0.00019999999979032683, - "loss": 46.0, - "step": 138 - }, - { - "epoch": 0.02238415395144732, - "grad_norm": 0.00010536867921473458, - "learning_rate": 0.00019999999978703793, - "loss": 46.0, - "step": 139 - }, - { - "epoch": 0.022545191030234713, - "grad_norm": 0.00013301914441399276, - "learning_rate": 0.00019999999978372336, - "loss": 46.0, - "step": 140 - }, - { - "epoch": 0.022706228109022102, - "grad_norm": 0.00018815284420270473, - "learning_rate": 0.00019999999978038323, - "loss": 46.0, - "step": 141 - }, - { - "epoch": 0.022867265187809495, - "grad_norm": 0.00010110333823831752, - "learning_rate": 0.00019999999977701752, - "loss": 46.0, - "step": 142 - }, - { - "epoch": 0.023028302266596884, - "grad_norm": 0.00014375909813679755, - "learning_rate": 0.0001999999997736262, - "loss": 46.0, - "step": 143 - }, - { - "epoch": 0.023189339345384273, - "grad_norm": 9.653297456679866e-05, - "learning_rate": 0.0001999999997702093, - "loss": 46.0, - "step": 144 - }, - { - "epoch": 0.023350376424171666, - "grad_norm": 0.00013295281678438187, - "learning_rate": 0.00019999999976676676, - "loss": 46.0, - "step": 145 - }, - { - "epoch": 0.023511413502959055, - "grad_norm": 0.00010267561447108164, - "learning_rate": 0.00019999999976329867, - "loss": 46.0, - "step": 146 - }, - { - "epoch": 0.023672450581746447, - "grad_norm": 0.00014794350136071444, - "learning_rate": 0.00019999999975980494, - "loss": 46.0, - "step": 147 - }, - { - "epoch": 0.023833487660533836, - "grad_norm": 0.00012542445620056242, - "learning_rate": 0.00019999999975628567, - "loss": 46.0, - "step": 148 - }, - { - "epoch": 0.02399452473932123, - "grad_norm": 0.0002975241222884506, - "learning_rate": 0.00019999999975274077, - "loss": 46.0, - "step": 149 - }, - { - "epoch": 0.024155561818108618, - "grad_norm": 7.225852459669113e-05, - "learning_rate": 0.0001999999997491703, - "loss": 46.0, - "step": 150 - }, - { - "epoch": 0.02431659889689601, - "grad_norm": 0.00010266884055454284, - "learning_rate": 0.00019999999974557422, - "loss": 46.0, - "step": 151 - }, - { - "epoch": 0.0244776359756834, - "grad_norm": 0.00019384155166335404, - "learning_rate": 0.00019999999974195255, - "loss": 46.0, - "step": 152 - }, - { - "epoch": 0.024638673054470792, - "grad_norm": 9.250854782294482e-05, - "learning_rate": 0.00019999999973830526, - "loss": 46.0, - "step": 153 - }, - { - "epoch": 0.02479971013325818, - "grad_norm": 0.00014043178816791624, - "learning_rate": 0.00019999999973463242, - "loss": 46.0, - "step": 154 - }, - { - "epoch": 0.024960747212045574, - "grad_norm": 9.878585115075111e-05, - "learning_rate": 0.00019999999973093396, - "loss": 46.0, - "step": 155 - }, - { - "epoch": 0.025121784290832963, - "grad_norm": 0.00014434207696467638, - "learning_rate": 0.00019999999972720992, - "loss": 46.0, - "step": 156 - }, - { - "epoch": 0.025282821369620356, - "grad_norm": 0.00012252044689375907, - "learning_rate": 0.00019999999972346023, - "loss": 46.0, - "step": 157 - }, - { - "epoch": 0.025443858448407745, - "grad_norm": 0.00013778013817500323, - "learning_rate": 0.00019999999971968499, - "loss": 46.0, - "step": 158 - }, - { - "epoch": 0.025604895527195137, - "grad_norm": 0.00012422144936863333, - "learning_rate": 0.00019999999971588418, - "loss": 46.0, - "step": 159 - }, - { - "epoch": 0.025765932605982526, - "grad_norm": 0.00015130244719330221, - "learning_rate": 0.0001999999997120577, - "loss": 46.0, - "step": 160 - }, - { - "epoch": 0.02592696968476992, - "grad_norm": 0.0001149854579125531, - "learning_rate": 0.0001999999997082057, - "loss": 46.0, - "step": 161 - }, - { - "epoch": 0.026088006763557308, - "grad_norm": 0.0001454713783459738, - "learning_rate": 0.00019999999970432808, - "loss": 46.0, - "step": 162 - }, - { - "epoch": 0.0262490438423447, - "grad_norm": 0.00012921511370223016, - "learning_rate": 0.00019999999970042487, - "loss": 46.0, - "step": 163 - }, - { - "epoch": 0.02641008092113209, - "grad_norm": 0.00023755831352900714, - "learning_rate": 0.00019999999969649604, - "loss": 46.0, - "step": 164 - }, - { - "epoch": 0.026571117999919482, - "grad_norm": 0.00018338421068619937, - "learning_rate": 0.00019999999969254163, - "loss": 46.0, - "step": 165 - }, - { - "epoch": 0.02673215507870687, - "grad_norm": 0.000321844156133011, - "learning_rate": 0.00019999999968856163, - "loss": 46.0, - "step": 166 - }, - { - "epoch": 0.026893192157494264, - "grad_norm": 0.0002377462078584358, - "learning_rate": 0.00019999999968455605, - "loss": 46.0, - "step": 167 - }, - { - "epoch": 0.027054229236281653, - "grad_norm": 0.0003046017372980714, - "learning_rate": 0.00019999999968052485, - "loss": 46.0, - "step": 168 - }, - { - "epoch": 0.027215266315069046, - "grad_norm": 0.00015924294712021947, - "learning_rate": 0.00019999999967646806, - "loss": 46.0, - "step": 169 - }, - { - "epoch": 0.027376303393856435, - "grad_norm": 0.00023570118355564773, - "learning_rate": 0.00019999999967238566, - "loss": 46.0, - "step": 170 - }, - { - "epoch": 0.027537340472643827, - "grad_norm": 0.00018532936519477516, - "learning_rate": 0.0001999999996682777, - "loss": 46.0, - "step": 171 - }, - { - "epoch": 0.027698377551431216, - "grad_norm": 0.00019276805687695742, - "learning_rate": 0.00019999999966414413, - "loss": 46.0, - "step": 172 - }, - { - "epoch": 0.02785941463021861, - "grad_norm": 0.0001177889498649165, - "learning_rate": 0.00019999999965998497, - "loss": 46.0, - "step": 173 - }, - { - "epoch": 0.028020451709005998, - "grad_norm": 0.0001398147433064878, - "learning_rate": 0.00019999999965580022, - "loss": 46.0, - "step": 174 - }, - { - "epoch": 0.02818148878779339, - "grad_norm": 0.00016932954895310104, - "learning_rate": 0.00019999999965158985, - "loss": 46.0, - "step": 175 - }, - { - "epoch": 0.02834252586658078, - "grad_norm": 0.000123539415653795, - "learning_rate": 0.00019999999964735393, - "loss": 46.0, - "step": 176 - }, - { - "epoch": 0.028503562945368172, - "grad_norm": 0.0002068693720502779, - "learning_rate": 0.00019999999964309236, - "loss": 46.0, - "step": 177 - }, - { - "epoch": 0.02866460002415556, - "grad_norm": 0.00022907050151843578, - "learning_rate": 0.0001999999996388052, - "loss": 46.0, - "step": 178 - }, - { - "epoch": 0.028825637102942954, - "grad_norm": 0.00013423155178315938, - "learning_rate": 0.00019999999963449247, - "loss": 46.0, - "step": 179 - }, - { - "epoch": 0.028986674181730343, - "grad_norm": 0.00021165514772292227, - "learning_rate": 0.00019999999963015417, - "loss": 46.0, - "step": 180 - }, - { - "epoch": 0.029147711260517736, - "grad_norm": 0.0002548027550801635, - "learning_rate": 0.00019999999962579023, - "loss": 46.0, - "step": 181 - }, - { - "epoch": 0.029308748339305125, - "grad_norm": 0.00017856726481113583, - "learning_rate": 0.0001999999996214007, - "loss": 46.0, - "step": 182 - }, - { - "epoch": 0.029469785418092517, - "grad_norm": 0.0002464374992996454, - "learning_rate": 0.0001999999996169856, - "loss": 46.0, - "step": 183 - }, - { - "epoch": 0.029630822496879906, - "grad_norm": 0.0002536777756176889, - "learning_rate": 0.00019999999961254489, - "loss": 46.0, - "step": 184 - }, - { - "epoch": 0.0297918595756673, - "grad_norm": 0.00021144087077118456, - "learning_rate": 0.00019999999960807857, - "loss": 46.0, - "step": 185 - }, - { - "epoch": 0.029952896654454688, - "grad_norm": 0.00031986809335649014, - "learning_rate": 0.00019999999960358667, - "loss": 46.0, - "step": 186 - }, - { - "epoch": 0.03011393373324208, - "grad_norm": 0.0001996238570427522, - "learning_rate": 0.00019999999959906917, - "loss": 46.0, - "step": 187 - }, - { - "epoch": 0.03027497081202947, - "grad_norm": 0.0002590011281426996, - "learning_rate": 0.0001999999995945261, - "loss": 46.0, - "step": 188 - }, - { - "epoch": 0.030436007890816862, - "grad_norm": 0.00011825704132206738, - "learning_rate": 0.0001999999995899574, - "loss": 46.0, - "step": 189 - }, - { - "epoch": 0.03059704496960425, - "grad_norm": 0.00025153084425255656, - "learning_rate": 0.00019999999958536312, - "loss": 46.0, - "step": 190 - }, - { - "epoch": 0.03075808204839164, - "grad_norm": 0.0004842008638661355, - "learning_rate": 0.00019999999958074326, - "loss": 46.0, - "step": 191 - }, - { - "epoch": 0.030919119127179033, - "grad_norm": 0.000218238215893507, - "learning_rate": 0.00019999999957609778, - "loss": 46.0, - "step": 192 - }, - { - "epoch": 0.031080156205966422, - "grad_norm": 0.00028900333563797176, - "learning_rate": 0.0001999999995714267, - "loss": 46.0, - "step": 193 - }, - { - "epoch": 0.031241193284753815, - "grad_norm": 0.00026324810460209846, - "learning_rate": 0.00019999999956673005, - "loss": 46.0, - "step": 194 - }, - { - "epoch": 0.03140223036354121, - "grad_norm": 0.0004234739753883332, - "learning_rate": 0.0001999999995620078, - "loss": 46.0, - "step": 195 - }, - { - "epoch": 0.031563267442328596, - "grad_norm": 0.0003671174345072359, - "learning_rate": 0.00019999999955725995, - "loss": 46.0, - "step": 196 - }, - { - "epoch": 0.031724304521115985, - "grad_norm": 0.0003977952292189002, - "learning_rate": 0.0001999999995524865, - "loss": 46.0, - "step": 197 - }, - { - "epoch": 0.031885341599903375, - "grad_norm": 0.0003290062304586172, - "learning_rate": 0.00019999999954768748, - "loss": 46.0, - "step": 198 - }, - { - "epoch": 0.03204637867869077, - "grad_norm": 0.00023715550196357071, - "learning_rate": 0.00019999999954286283, - "loss": 46.0, - "step": 199 - }, - { - "epoch": 0.03220741575747816, - "grad_norm": 0.0002557702246122062, - "learning_rate": 0.00019999999953801263, - "loss": 46.0, - "step": 200 - }, - { - "epoch": 0.03236845283626555, - "grad_norm": 0.00030496579711325467, - "learning_rate": 0.0001999999995331368, - "loss": 46.0, - "step": 201 - }, - { - "epoch": 0.03252948991505294, - "grad_norm": 0.00019574211910367012, - "learning_rate": 0.00019999999952823537, - "loss": 46.0, - "step": 202 - }, - { - "epoch": 0.032690526993840334, - "grad_norm": 0.000399620970711112, - "learning_rate": 0.00019999999952330838, - "loss": 46.0, - "step": 203 - }, - { - "epoch": 0.03285156407262772, - "grad_norm": 0.00023076729848980904, - "learning_rate": 0.00019999999951835577, - "loss": 46.0, - "step": 204 - }, - { - "epoch": 0.03301260115141511, - "grad_norm": 0.0003438382700551301, - "learning_rate": 0.00019999999951337758, - "loss": 46.0, - "step": 205 - }, - { - "epoch": 0.0331736382302025, - "grad_norm": 0.0002556856779847294, - "learning_rate": 0.00019999999950837377, - "loss": 46.0, - "step": 206 - }, - { - "epoch": 0.0333346753089899, - "grad_norm": 0.00024224602384492755, - "learning_rate": 0.00019999999950334437, - "loss": 46.0, - "step": 207 - }, - { - "epoch": 0.033495712387777286, - "grad_norm": 0.0002187650097766891, - "learning_rate": 0.00019999999949828938, - "loss": 46.0, - "step": 208 - }, - { - "epoch": 0.033656749466564675, - "grad_norm": 0.00031273020431399345, - "learning_rate": 0.0001999999994932088, - "loss": 46.0, - "step": 209 - }, - { - "epoch": 0.033817786545352065, - "grad_norm": 0.00029702080064453185, - "learning_rate": 0.00019999999948810263, - "loss": 46.0, - "step": 210 - }, - { - "epoch": 0.03397882362413946, - "grad_norm": 0.00018055284454021603, - "learning_rate": 0.00019999999948297085, - "loss": 46.0, - "step": 211 - }, - { - "epoch": 0.03413986070292685, - "grad_norm": 0.00024684282834641635, - "learning_rate": 0.0001999999994778135, - "loss": 46.0, - "step": 212 - }, - { - "epoch": 0.03430089778171424, - "grad_norm": 0.0003250887675676495, - "learning_rate": 0.00019999999947263052, - "loss": 46.0, - "step": 213 - }, - { - "epoch": 0.03446193486050163, - "grad_norm": 0.00041803737985901535, - "learning_rate": 0.00019999999946742199, - "loss": 46.0, - "step": 214 - }, - { - "epoch": 0.034622971939289024, - "grad_norm": 0.0002714908914640546, - "learning_rate": 0.0001999999994621878, - "loss": 46.0, - "step": 215 - }, - { - "epoch": 0.03478400901807641, - "grad_norm": 0.00026197044644504786, - "learning_rate": 0.00019999999945692807, - "loss": 46.0, - "step": 216 - }, - { - "epoch": 0.0349450460968638, - "grad_norm": 0.0002161135635105893, - "learning_rate": 0.00019999999945164275, - "loss": 46.0, - "step": 217 - }, - { - "epoch": 0.03510608317565119, - "grad_norm": 0.0003438770945649594, - "learning_rate": 0.0001999999994463318, - "loss": 46.0, - "step": 218 - }, - { - "epoch": 0.03526712025443859, - "grad_norm": 0.00020931630569975823, - "learning_rate": 0.0001999999994409953, - "loss": 46.0, - "step": 219 - }, - { - "epoch": 0.035428157333225976, - "grad_norm": 0.00029885736876167357, - "learning_rate": 0.00019999999943563315, - "loss": 46.0, - "step": 220 - }, - { - "epoch": 0.035589194412013365, - "grad_norm": 0.00047396900481544435, - "learning_rate": 0.00019999999943024543, - "loss": 46.0, - "step": 221 - }, - { - "epoch": 0.035750231490800755, - "grad_norm": 0.00031950193806551397, - "learning_rate": 0.0001999999994248321, - "loss": 46.0, - "step": 222 - }, - { - "epoch": 0.03591126856958815, - "grad_norm": 0.00022150597942527384, - "learning_rate": 0.0001999999994193932, - "loss": 46.0, - "step": 223 - }, - { - "epoch": 0.03607230564837554, - "grad_norm": 0.0003227836568839848, - "learning_rate": 0.0001999999994139287, - "loss": 46.0, - "step": 224 - }, - { - "epoch": 0.03623334272716293, - "grad_norm": 0.0002998760901391506, - "learning_rate": 0.0001999999994084386, - "loss": 46.0, - "step": 225 - }, - { - "epoch": 0.03639437980595032, - "grad_norm": 0.0004136904899496585, - "learning_rate": 0.0001999999994029229, - "loss": 46.0, - "step": 226 - }, - { - "epoch": 0.036555416884737714, - "grad_norm": 7.639649993507192e-05, - "learning_rate": 0.0001999999993973816, - "loss": 46.0, - "step": 227 - }, - { - "epoch": 0.0367164539635251, - "grad_norm": 0.00030812519253231585, - "learning_rate": 0.00019999999939181474, - "loss": 46.0, - "step": 228 - }, - { - "epoch": 0.03687749104231249, - "grad_norm": 0.0002964360173791647, - "learning_rate": 0.00019999999938622224, - "loss": 46.0, - "step": 229 - }, - { - "epoch": 0.03703852812109988, - "grad_norm": 0.0004796150024048984, - "learning_rate": 0.0001999999993806042, - "loss": 46.0, - "step": 230 - }, - { - "epoch": 0.03719956519988728, - "grad_norm": 0.0005432307370938361, - "learning_rate": 0.00019999999937496052, - "loss": 46.0, - "step": 231 - }, - { - "epoch": 0.037360602278674666, - "grad_norm": 0.00046832027146592736, - "learning_rate": 0.00019999999936929123, - "loss": 46.0, - "step": 232 - }, - { - "epoch": 0.037521639357462055, - "grad_norm": 0.0006342283450067043, - "learning_rate": 0.0001999999993635964, - "loss": 46.0, - "step": 233 - }, - { - "epoch": 0.037682676436249445, - "grad_norm": 0.0002783329109661281, - "learning_rate": 0.00019999999935787593, - "loss": 46.0, - "step": 234 - }, - { - "epoch": 0.03784371351503684, - "grad_norm": 0.00032400648342445493, - "learning_rate": 0.0001999999993521299, - "loss": 46.0, - "step": 235 - }, - { - "epoch": 0.03800475059382423, - "grad_norm": 0.0004526137199718505, - "learning_rate": 0.00019999999934635824, - "loss": 46.0, - "step": 236 - }, - { - "epoch": 0.03816578767261162, - "grad_norm": 0.0005123643204569817, - "learning_rate": 0.000199999999340561, - "loss": 46.0, - "step": 237 - }, - { - "epoch": 0.03832682475139901, - "grad_norm": 0.00032366730738431215, - "learning_rate": 0.00019999999933473816, - "loss": 46.0, - "step": 238 - }, - { - "epoch": 0.0384878618301864, - "grad_norm": 0.0005605536862276495, - "learning_rate": 0.00019999999932888973, - "loss": 46.0, - "step": 239 - }, - { - "epoch": 0.03864889890897379, - "grad_norm": 0.00045590754598379135, - "learning_rate": 0.00019999999932301572, - "loss": 46.0, - "step": 240 - }, - { - "epoch": 0.03880993598776118, - "grad_norm": 0.0004321521264500916, - "learning_rate": 0.0001999999993171161, - "loss": 46.0, - "step": 241 - }, - { - "epoch": 0.03897097306654857, - "grad_norm": 0.0005606827326118946, - "learning_rate": 0.00019999999931119092, - "loss": 46.0, - "step": 242 - }, - { - "epoch": 0.03913201014533596, - "grad_norm": 0.0004983293474651873, - "learning_rate": 0.00019999999930524006, - "loss": 46.0, - "step": 243 - }, - { - "epoch": 0.039293047224123356, - "grad_norm": 0.00033221906051039696, - "learning_rate": 0.0001999999992992637, - "loss": 46.0, - "step": 244 - }, - { - "epoch": 0.039454084302910745, - "grad_norm": 0.00033477682154625654, - "learning_rate": 0.00019999999929326169, - "loss": 46.0, - "step": 245 - }, - { - "epoch": 0.039615121381698135, - "grad_norm": 0.000464974669739604, - "learning_rate": 0.0001999999992872341, - "loss": 46.0, - "step": 246 - }, - { - "epoch": 0.039776158460485524, - "grad_norm": 0.00038884178502485156, - "learning_rate": 0.0001999999992811809, - "loss": 46.0, - "step": 247 - }, - { - "epoch": 0.03993719553927292, - "grad_norm": 0.0006443243473768234, - "learning_rate": 0.00019999999927510214, - "loss": 46.0, - "step": 248 - }, - { - "epoch": 0.04009823261806031, - "grad_norm": 0.0007490874268114567, - "learning_rate": 0.00019999999926899776, - "loss": 46.0, - "step": 249 - }, - { - "epoch": 0.0402592696968477, - "grad_norm": 0.0005477413069456816, - "learning_rate": 0.00019999999926286778, - "loss": 46.0, - "step": 250 - }, - { - "epoch": 0.04042030677563509, - "grad_norm": 0.0007731926161795855, - "learning_rate": 0.00019999999925671223, - "loss": 46.0, - "step": 251 - }, - { - "epoch": 0.04058134385442248, - "grad_norm": 0.0004357139696367085, - "learning_rate": 0.00019999999925053105, - "loss": 46.0, - "step": 252 - }, - { - "epoch": 0.04074238093320987, - "grad_norm": 0.0008482228149659932, - "learning_rate": 0.00019999999924432432, - "loss": 46.0, - "step": 253 - }, - { - "epoch": 0.04090341801199726, - "grad_norm": 0.000651440757792443, - "learning_rate": 0.00019999999923809195, - "loss": 46.0, - "step": 254 - }, - { - "epoch": 0.04106445509078465, - "grad_norm": 0.0006377010722644627, - "learning_rate": 0.000199999999231834, - "loss": 46.0, - "step": 255 - }, - { - "epoch": 0.041225492169572046, - "grad_norm": 0.0008143018931150436, - "learning_rate": 0.00019999999922555047, - "loss": 46.0, - "step": 256 - }, - { - "epoch": 0.041386529248359435, - "grad_norm": 0.0005103488801978528, - "learning_rate": 0.00019999999921924133, - "loss": 46.0, - "step": 257 - }, - { - "epoch": 0.041547566327146825, - "grad_norm": 0.000732742715626955, - "learning_rate": 0.0001999999992129066, - "loss": 46.0, - "step": 258 - }, - { - "epoch": 0.041708603405934214, - "grad_norm": 0.0006536121945828199, - "learning_rate": 0.00019999999920654627, - "loss": 46.0, - "step": 259 - }, - { - "epoch": 0.04186964048472161, - "grad_norm": 0.00036585298948921263, - "learning_rate": 0.00019999999920016035, - "loss": 46.0, - "step": 260 - }, - { - "epoch": 0.042030677563509, - "grad_norm": 0.0008182922028936446, - "learning_rate": 0.00019999999919374887, - "loss": 46.0, - "step": 261 - }, - { - "epoch": 0.04219171464229639, - "grad_norm": 0.0006231138831935823, - "learning_rate": 0.00019999999918731174, - "loss": 46.0, - "step": 262 - }, - { - "epoch": 0.04235275172108378, - "grad_norm": 0.0006047601345926523, - "learning_rate": 0.00019999999918084903, - "loss": 46.0, - "step": 263 - }, - { - "epoch": 0.04251378879987117, - "grad_norm": 0.0005017635994590819, - "learning_rate": 0.00019999999917436073, - "loss": 46.0, - "step": 264 - }, - { - "epoch": 0.04267482587865856, - "grad_norm": 0.0005903529818169773, - "learning_rate": 0.00019999999916784682, - "loss": 46.0, - "step": 265 - }, - { - "epoch": 0.04283586295744595, - "grad_norm": 0.0010839580791071057, - "learning_rate": 0.00019999999916130734, - "loss": 46.0, - "step": 266 - }, - { - "epoch": 0.04299690003623334, - "grad_norm": 0.0006588880205526948, - "learning_rate": 0.00019999999915474225, - "loss": 46.0, - "step": 267 - }, - { - "epoch": 0.043157937115020736, - "grad_norm": 0.0007730197394266725, - "learning_rate": 0.0001999999991481516, - "loss": 46.0, - "step": 268 - }, - { - "epoch": 0.043318974193808125, - "grad_norm": 0.0005474932258948684, - "learning_rate": 0.00019999999914153532, - "loss": 46.0, - "step": 269 - }, - { - "epoch": 0.043480011272595515, - "grad_norm": 0.0003796566743403673, - "learning_rate": 0.00019999999913489344, - "loss": 46.0, - "step": 270 - }, - { - "epoch": 0.043641048351382904, - "grad_norm": 0.000516220519784838, - "learning_rate": 0.00019999999912822598, - "loss": 46.0, - "step": 271 - }, - { - "epoch": 0.0438020854301703, - "grad_norm": 0.0006308844895102084, - "learning_rate": 0.0001999999991215329, - "loss": 46.0, - "step": 272 - }, - { - "epoch": 0.04396312250895769, - "grad_norm": 0.0006311295437626541, - "learning_rate": 0.00019999999911481426, - "loss": 46.0, - "step": 273 - }, - { - "epoch": 0.04412415958774508, - "grad_norm": 0.0005519656115211546, - "learning_rate": 0.00019999999910807, - "loss": 46.0, - "step": 274 - }, - { - "epoch": 0.04428519666653247, - "grad_norm": 0.0007428042590618134, - "learning_rate": 0.0001999999991013002, - "loss": 46.0, - "step": 275 - }, - { - "epoch": 0.04444623374531986, - "grad_norm": 0.0009881554869934916, - "learning_rate": 0.00019999999909450474, - "loss": 46.0, - "step": 276 - }, - { - "epoch": 0.04460727082410725, - "grad_norm": 0.00044621757115237415, - "learning_rate": 0.0001999999990876837, - "loss": 46.0, - "step": 277 - }, - { - "epoch": 0.04476830790289464, - "grad_norm": 0.0005154189420863986, - "learning_rate": 0.0001999999990808371, - "loss": 46.0, - "step": 278 - }, - { - "epoch": 0.04492934498168203, - "grad_norm": 0.0008768675616011024, - "learning_rate": 0.00019999999907396485, - "loss": 46.0, - "step": 279 - }, - { - "epoch": 0.045090382060469426, - "grad_norm": 0.0010444631334394217, - "learning_rate": 0.00019999999906706703, - "loss": 46.0, - "step": 280 - }, - { - "epoch": 0.045251419139256815, - "grad_norm": 0.000744470045901835, - "learning_rate": 0.0001999999990601436, - "loss": 46.0, - "step": 281 - }, - { - "epoch": 0.045412456218044205, - "grad_norm": 0.001204045256599784, - "learning_rate": 0.00019999999905319463, - "loss": 46.0, - "step": 282 - }, - { - "epoch": 0.045573493296831594, - "grad_norm": 0.0008198407595045865, - "learning_rate": 0.00019999999904622, - "loss": 46.0, - "step": 283 - }, - { - "epoch": 0.04573453037561899, - "grad_norm": 0.0010659419931471348, - "learning_rate": 0.0001999999990392198, - "loss": 46.0, - "step": 284 - }, - { - "epoch": 0.04589556745440638, - "grad_norm": 0.0010541955707594752, - "learning_rate": 0.00019999999903219404, - "loss": 46.0, - "step": 285 - }, - { - "epoch": 0.04605660453319377, - "grad_norm": 0.0006653243908658624, - "learning_rate": 0.00019999999902514263, - "loss": 46.0, - "step": 286 - }, - { - "epoch": 0.04621764161198116, - "grad_norm": 0.0009864730527624488, - "learning_rate": 0.00019999999901806564, - "loss": 46.0, - "step": 287 - }, - { - "epoch": 0.046378678690768546, - "grad_norm": 0.0012773185735568404, - "learning_rate": 0.00019999999901096306, - "loss": 46.0, - "step": 288 - }, - { - "epoch": 0.04653971576955594, - "grad_norm": 0.0012416749959811568, - "learning_rate": 0.0001999999990038349, - "loss": 46.0, - "step": 289 - }, - { - "epoch": 0.04670075284834333, - "grad_norm": 0.001295020105317235, - "learning_rate": 0.00019999999899668116, - "loss": 46.0, - "step": 290 - }, - { - "epoch": 0.04686178992713072, - "grad_norm": 0.0009199907653965056, - "learning_rate": 0.0001999999989895018, - "loss": 46.0, - "step": 291 - }, - { - "epoch": 0.04702282700591811, - "grad_norm": 0.0011596897384151816, - "learning_rate": 0.00019999999898229684, - "loss": 46.0, - "step": 292 - }, - { - "epoch": 0.047183864084705505, - "grad_norm": 0.000495227228384465, - "learning_rate": 0.00019999999897506626, - "loss": 46.0, - "step": 293 - }, - { - "epoch": 0.047344901163492895, - "grad_norm": 0.0010823584161698818, - "learning_rate": 0.00019999999896781013, - "loss": 46.0, - "step": 294 - }, - { - "epoch": 0.047505938242280284, - "grad_norm": 0.0009814115474000573, - "learning_rate": 0.0001999999989605284, - "loss": 46.0, - "step": 295 - }, - { - "epoch": 0.04766697532106767, - "grad_norm": 0.001199794583953917, - "learning_rate": 0.00019999999895322105, - "loss": 46.0, - "step": 296 - }, - { - "epoch": 0.04782801239985507, - "grad_norm": 0.0008057034574449062, - "learning_rate": 0.00019999999894588813, - "loss": 46.0, - "step": 297 - }, - { - "epoch": 0.04798904947864246, - "grad_norm": 0.0009269247530028224, - "learning_rate": 0.0001999999989385296, - "loss": 46.0, - "step": 298 - }, - { - "epoch": 0.04815008655742985, - "grad_norm": 0.0007923051598481834, - "learning_rate": 0.0001999999989311455, - "loss": 46.0, - "step": 299 - }, - { - "epoch": 0.048311123636217236, - "grad_norm": 0.0011324144434183836, - "learning_rate": 0.00019999999892373577, - "loss": 46.0, - "step": 300 - }, - { - "epoch": 0.04847216071500463, - "grad_norm": 0.0009203523513861, - "learning_rate": 0.00019999999891630045, - "loss": 46.0, - "step": 301 - }, - { - "epoch": 0.04863319779379202, - "grad_norm": 0.0010944072855636477, - "learning_rate": 0.00019999999890883956, - "loss": 46.0, - "step": 302 - }, - { - "epoch": 0.04879423487257941, - "grad_norm": 0.0010690286289900541, - "learning_rate": 0.00019999999890135307, - "loss": 46.0, - "step": 303 - }, - { - "epoch": 0.0489552719513668, - "grad_norm": 0.0013335959520190954, - "learning_rate": 0.00019999999889384095, - "loss": 46.0, - "step": 304 - }, - { - "epoch": 0.049116309030154195, - "grad_norm": 0.0011828442802652717, - "learning_rate": 0.00019999999888630328, - "loss": 46.0, - "step": 305 - }, - { - "epoch": 0.049277346108941585, - "grad_norm": 0.0008028900483623147, - "learning_rate": 0.00019999999887874, - "loss": 46.0, - "step": 306 - }, - { - "epoch": 0.049438383187728974, - "grad_norm": 0.0007478521438315511, - "learning_rate": 0.0001999999988711511, - "loss": 46.0, - "step": 307 - }, - { - "epoch": 0.04959942026651636, - "grad_norm": 0.0012638323241844773, - "learning_rate": 0.00019999999886353664, - "loss": 46.0, - "step": 308 - }, - { - "epoch": 0.04976045734530376, - "grad_norm": 0.0005974912201054394, - "learning_rate": 0.0001999999988558966, - "loss": 46.0, - "step": 309 - }, - { - "epoch": 0.04992149442409115, - "grad_norm": 0.000533314305357635, - "learning_rate": 0.0001999999988482309, - "loss": 46.0, - "step": 310 - }, - { - "epoch": 0.05008253150287854, - "grad_norm": 0.0014910665340721607, - "learning_rate": 0.00019999999884053965, - "loss": 46.0, - "step": 311 - }, - { - "epoch": 0.050243568581665926, - "grad_norm": 0.0012139989994466305, - "learning_rate": 0.00019999999883282281, - "loss": 46.0, - "step": 312 - }, - { - "epoch": 0.05040460566045332, - "grad_norm": 0.0012169424444437027, - "learning_rate": 0.00019999999882508034, - "loss": 46.0, - "step": 313 - }, - { - "epoch": 0.05056564273924071, - "grad_norm": 0.0011955752270296216, - "learning_rate": 0.0001999999988173123, - "loss": 46.0, - "step": 314 - }, - { - "epoch": 0.0507266798180281, - "grad_norm": 0.0008034955244511366, - "learning_rate": 0.00019999999880951865, - "loss": 46.0, - "step": 315 - }, - { - "epoch": 0.05088771689681549, - "grad_norm": 0.001763079664669931, - "learning_rate": 0.00019999999880169944, - "loss": 46.0, - "step": 316 - }, - { - "epoch": 0.051048753975602885, - "grad_norm": 0.0016992834862321615, - "learning_rate": 0.0001999999987938546, - "loss": 46.0, - "step": 317 - }, - { - "epoch": 0.051209791054390275, - "grad_norm": 0.0011860596714541316, - "learning_rate": 0.0001999999987859842, - "loss": 46.0, - "step": 318 - }, - { - "epoch": 0.051370828133177664, - "grad_norm": 0.0010058664483949542, - "learning_rate": 0.00019999999877808817, - "loss": 46.0, - "step": 319 - }, - { - "epoch": 0.05153186521196505, - "grad_norm": 0.0011413118336349726, - "learning_rate": 0.00019999999877016655, - "loss": 46.0, - "step": 320 - }, - { - "epoch": 0.05169290229075245, - "grad_norm": 0.0014378430787473917, - "learning_rate": 0.00019999999876221933, - "loss": 46.0, - "step": 321 - }, - { - "epoch": 0.05185393936953984, - "grad_norm": 0.0013385158963501453, - "learning_rate": 0.00019999999875424656, - "loss": 46.0, - "step": 322 - }, - { - "epoch": 0.05201497644832723, - "grad_norm": 0.0008365577086806297, - "learning_rate": 0.00019999999874624816, - "loss": 46.0, - "step": 323 - }, - { - "epoch": 0.052176013527114616, - "grad_norm": 0.0011793512385338545, - "learning_rate": 0.00019999999873822417, - "loss": 46.0, - "step": 324 - }, - { - "epoch": 0.05233705060590201, - "grad_norm": 0.001501126098446548, - "learning_rate": 0.00019999999873017456, - "loss": 46.0, - "step": 325 - }, - { - "epoch": 0.0524980876846894, - "grad_norm": 0.0008644497720524669, - "learning_rate": 0.00019999999872209937, - "loss": 46.0, - "step": 326 - }, - { - "epoch": 0.05265912476347679, - "grad_norm": 0.0007261348073370755, - "learning_rate": 0.00019999999871399862, - "loss": 46.0, - "step": 327 - }, - { - "epoch": 0.05282016184226418, - "grad_norm": 0.0009311382309533656, - "learning_rate": 0.00019999999870587225, - "loss": 46.0, - "step": 328 - }, - { - "epoch": 0.052981198921051575, - "grad_norm": 0.0009469041833654046, - "learning_rate": 0.00019999999869772027, - "loss": 46.0, - "step": 329 - }, - { - "epoch": 0.053142235999838965, - "grad_norm": 0.0008246630313806236, - "learning_rate": 0.0001999999986895427, - "loss": 46.0, - "step": 330 - }, - { - "epoch": 0.053303273078626354, - "grad_norm": 0.001087374985218048, - "learning_rate": 0.00019999999868133955, - "loss": 46.0, - "step": 331 - }, - { - "epoch": 0.05346431015741374, - "grad_norm": 0.0014433509204536676, - "learning_rate": 0.0001999999986731108, - "loss": 46.0, - "step": 332 - }, - { - "epoch": 0.05362534723620113, - "grad_norm": 0.0008655768469907343, - "learning_rate": 0.00019999999866485645, - "loss": 46.0, - "step": 333 - }, - { - "epoch": 0.05378638431498853, - "grad_norm": 0.0005709116230718791, - "learning_rate": 0.00019999999865657654, - "loss": 46.0, - "step": 334 - }, - { - "epoch": 0.05394742139377592, - "grad_norm": 0.0012545662466436625, - "learning_rate": 0.000199999998648271, - "loss": 46.0, - "step": 335 - }, - { - "epoch": 0.054108458472563306, - "grad_norm": 0.0007371182437054813, - "learning_rate": 0.00019999999863993986, - "loss": 46.0, - "step": 336 - }, - { - "epoch": 0.054269495551350695, - "grad_norm": 0.0013507921248674393, - "learning_rate": 0.00019999999863158313, - "loss": 46.0, - "step": 337 - }, - { - "epoch": 0.05443053263013809, - "grad_norm": 0.0010357642313465476, - "learning_rate": 0.0001999999986232008, - "loss": 46.0, - "step": 338 - }, - { - "epoch": 0.05459156970892548, - "grad_norm": 0.0007762422901578248, - "learning_rate": 0.0001999999986147929, - "loss": 46.0, - "step": 339 - }, - { - "epoch": 0.05475260678771287, - "grad_norm": 0.001174192875623703, - "learning_rate": 0.00019999999860635938, - "loss": 46.0, - "step": 340 - }, - { - "epoch": 0.05491364386650026, - "grad_norm": 0.0011004736879840493, - "learning_rate": 0.00019999999859790027, - "loss": 46.0, - "step": 341 - }, - { - "epoch": 0.055074680945287655, - "grad_norm": 0.001205814303830266, - "learning_rate": 0.0001999999985894156, - "loss": 46.0, - "step": 342 - }, - { - "epoch": 0.055235718024075044, - "grad_norm": 0.0008946635643951595, - "learning_rate": 0.0001999999985809053, - "loss": 46.0, - "step": 343 - }, - { - "epoch": 0.05539675510286243, - "grad_norm": 0.0007763797184452415, - "learning_rate": 0.00019999999857236943, - "loss": 46.0, - "step": 344 - }, - { - "epoch": 0.05555779218164982, - "grad_norm": 0.0015236377948895097, - "learning_rate": 0.00019999999856380792, - "loss": 46.0, - "step": 345 - }, - { - "epoch": 0.05571882926043722, - "grad_norm": 0.001082977163605392, - "learning_rate": 0.00019999999855522084, - "loss": 46.0, - "step": 346 - }, - { - "epoch": 0.05587986633922461, - "grad_norm": 0.0005746458773501217, - "learning_rate": 0.00019999999854660816, - "loss": 46.0, - "step": 347 - }, - { - "epoch": 0.056040903418011996, - "grad_norm": 0.0004423210339155048, - "learning_rate": 0.00019999999853796991, - "loss": 46.0, - "step": 348 - }, - { - "epoch": 0.056201940496799385, - "grad_norm": 0.0007441586349159479, - "learning_rate": 0.00019999999852930605, - "loss": 46.0, - "step": 349 - }, - { - "epoch": 0.05636297757558678, - "grad_norm": 0.0011474787024781108, - "learning_rate": 0.00019999999852061658, - "loss": 46.0, - "step": 350 - }, - { - "epoch": 0.05652401465437417, - "grad_norm": 0.0006653806194663048, - "learning_rate": 0.00019999999851190152, - "loss": 46.0, - "step": 351 - }, - { - "epoch": 0.05668505173316156, - "grad_norm": 0.0015253883320838213, - "learning_rate": 0.00019999999850316087, - "loss": 46.0, - "step": 352 - }, - { - "epoch": 0.05684608881194895, - "grad_norm": 0.0003858486597891897, - "learning_rate": 0.00019999999849439464, - "loss": 46.0, - "step": 353 - }, - { - "epoch": 0.057007125890736345, - "grad_norm": 0.0011664539342746139, - "learning_rate": 0.00019999999848560281, - "loss": 46.0, - "step": 354 - }, - { - "epoch": 0.057168162969523734, - "grad_norm": 0.0007604304701089859, - "learning_rate": 0.00019999999847678538, - "loss": 46.0, - "step": 355 - }, - { - "epoch": 0.05732920004831112, - "grad_norm": 0.0009285574778914452, - "learning_rate": 0.00019999999846794233, - "loss": 46.0, - "step": 356 - }, - { - "epoch": 0.05749023712709851, - "grad_norm": 0.0010224482975900173, - "learning_rate": 0.00019999999845907374, - "loss": 46.0, - "step": 357 - }, - { - "epoch": 0.05765127420588591, - "grad_norm": 0.0008152456721290946, - "learning_rate": 0.00019999999845017952, - "loss": 46.0, - "step": 358 - }, - { - "epoch": 0.0578123112846733, - "grad_norm": 0.0008902782574295998, - "learning_rate": 0.0001999999984412597, - "loss": 46.0, - "step": 359 - }, - { - "epoch": 0.057973348363460686, - "grad_norm": 0.0013972967863082886, - "learning_rate": 0.0001999999984323143, - "loss": 46.0, - "step": 360 - }, - { - "epoch": 0.058134385442248075, - "grad_norm": 0.0006916502024978399, - "learning_rate": 0.0001999999984233433, - "loss": 46.0, - "step": 361 - }, - { - "epoch": 0.05829542252103547, - "grad_norm": 0.0008311839774250984, - "learning_rate": 0.0001999999984143467, - "loss": 46.0, - "step": 362 - }, - { - "epoch": 0.05845645959982286, - "grad_norm": 0.0012427597539499402, - "learning_rate": 0.0001999999984053245, - "loss": 46.0, - "step": 363 - }, - { - "epoch": 0.05861749667861025, - "grad_norm": 0.0006254159961827099, - "learning_rate": 0.00019999999839627672, - "loss": 46.0, - "step": 364 - }, - { - "epoch": 0.05877853375739764, - "grad_norm": 0.00047978339716792107, - "learning_rate": 0.0001999999983872033, - "loss": 46.0, - "step": 365 - }, - { - "epoch": 0.058939570836185035, - "grad_norm": 0.0009679915965534747, - "learning_rate": 0.00019999999837810436, - "loss": 46.0, - "step": 366 - }, - { - "epoch": 0.059100607914972424, - "grad_norm": 0.0010024794610217214, - "learning_rate": 0.00019999999836897977, - "loss": 46.0, - "step": 367 - }, - { - "epoch": 0.05926164499375981, - "grad_norm": 0.0008527908357791603, - "learning_rate": 0.00019999999835982962, - "loss": 46.0, - "step": 368 - }, - { - "epoch": 0.0594226820725472, - "grad_norm": 0.000915585202164948, - "learning_rate": 0.00019999999835065385, - "loss": 46.0, - "step": 369 - }, - { - "epoch": 0.0595837191513346, - "grad_norm": 0.000650250818580389, - "learning_rate": 0.0001999999983414525, - "loss": 46.0, - "step": 370 - }, - { - "epoch": 0.05974475623012199, - "grad_norm": 0.0009959996677935123, - "learning_rate": 0.00019999999833222556, - "loss": 46.0, - "step": 371 - }, - { - "epoch": 0.059905793308909376, - "grad_norm": 0.001148334820754826, - "learning_rate": 0.000199999998322973, - "loss": 46.0, - "step": 372 - }, - { - "epoch": 0.060066830387696765, - "grad_norm": 0.000527723110280931, - "learning_rate": 0.00019999999831369486, - "loss": 46.0, - "step": 373 - }, - { - "epoch": 0.06022786746648416, - "grad_norm": 0.0009392919600941241, - "learning_rate": 0.00019999999830439113, - "loss": 46.0, - "step": 374 - }, - { - "epoch": 0.06038890454527155, - "grad_norm": 0.0005747764371335506, - "learning_rate": 0.0001999999982950618, - "loss": 46.0, - "step": 375 - }, - { - "epoch": 0.06054994162405894, - "grad_norm": 0.0005832109018228948, - "learning_rate": 0.0001999999982857069, - "loss": 46.0, - "step": 376 - }, - { - "epoch": 0.06071097870284633, - "grad_norm": 0.0007533972384408116, - "learning_rate": 0.00019999999827632637, - "loss": 46.0, - "step": 377 - }, - { - "epoch": 0.060872015781633725, - "grad_norm": 0.0007178978412412107, - "learning_rate": 0.00019999999826692024, - "loss": 46.0, - "step": 378 - }, - { - "epoch": 0.061033052860421114, - "grad_norm": 0.0006495073321275413, - "learning_rate": 0.00019999999825748855, - "loss": 46.0, - "step": 379 - }, - { - "epoch": 0.0611940899392085, - "grad_norm": 0.0006937084253877401, - "learning_rate": 0.00019999999824803121, - "loss": 46.0, - "step": 380 - }, - { - "epoch": 0.06135512701799589, - "grad_norm": 0.0013413194101303816, - "learning_rate": 0.00019999999823854832, - "loss": 46.0, - "step": 381 - }, - { - "epoch": 0.06151616409678328, - "grad_norm": 0.0008035088540054858, - "learning_rate": 0.00019999999822903984, - "loss": 46.0, - "step": 382 - }, - { - "epoch": 0.06167720117557068, - "grad_norm": 0.0006494863773696125, - "learning_rate": 0.00019999999821950575, - "loss": 46.0, - "step": 383 - }, - { - "epoch": 0.061838238254358066, - "grad_norm": 0.0010893800063058734, - "learning_rate": 0.00019999999820994606, - "loss": 46.0, - "step": 384 - }, - { - "epoch": 0.061999275333145455, - "grad_norm": 0.0008796124602667987, - "learning_rate": 0.00019999999820036077, - "loss": 46.0, - "step": 385 - }, - { - "epoch": 0.062160312411932844, - "grad_norm": 0.0009478065185248852, - "learning_rate": 0.00019999999819074994, - "loss": 46.0, - "step": 386 - }, - { - "epoch": 0.06232134949072024, - "grad_norm": 0.0006546524236910045, - "learning_rate": 0.00019999999818111347, - "loss": 46.0, - "step": 387 - }, - { - "epoch": 0.06248238656950763, - "grad_norm": 0.0006173010915517807, - "learning_rate": 0.00019999999817145139, - "loss": 46.0, - "step": 388 - }, - { - "epoch": 0.06264342364829502, - "grad_norm": 0.0004461584612727165, - "learning_rate": 0.00019999999816176374, - "loss": 46.0, - "step": 389 - }, - { - "epoch": 0.06280446072708241, - "grad_norm": 0.0008034147322177887, - "learning_rate": 0.00019999999815205048, - "loss": 46.0, - "step": 390 - }, - { - "epoch": 0.0629654978058698, - "grad_norm": 0.0011473468039184809, - "learning_rate": 0.0001999999981423116, - "loss": 46.0, - "step": 391 - }, - { - "epoch": 0.06312653488465719, - "grad_norm": 0.000513106700964272, - "learning_rate": 0.0001999999981325472, - "loss": 46.0, - "step": 392 - }, - { - "epoch": 0.06328757196344459, - "grad_norm": 0.0006003491580486298, - "learning_rate": 0.00019999999812275713, - "loss": 46.0, - "step": 393 - }, - { - "epoch": 0.06344860904223197, - "grad_norm": 0.0006133901188150048, - "learning_rate": 0.0001999999981129415, - "loss": 46.0, - "step": 394 - }, - { - "epoch": 0.06360964612101937, - "grad_norm": 0.0014315887819975615, - "learning_rate": 0.00019999999810310027, - "loss": 46.0, - "step": 395 - }, - { - "epoch": 0.06377068319980675, - "grad_norm": 0.0012847103644162416, - "learning_rate": 0.00019999999809323346, - "loss": 46.0, - "step": 396 - }, - { - "epoch": 0.06393172027859415, - "grad_norm": 0.0006227092235349119, - "learning_rate": 0.00019999999808334104, - "loss": 46.0, - "step": 397 - }, - { - "epoch": 0.06409275735738154, - "grad_norm": 0.0009588095708750188, - "learning_rate": 0.00019999999807342303, - "loss": 46.0, - "step": 398 - }, - { - "epoch": 0.06425379443616892, - "grad_norm": 0.00042919005500152707, - "learning_rate": 0.00019999999806347943, - "loss": 46.0, - "step": 399 - }, - { - "epoch": 0.06441483151495632, - "grad_norm": 0.0005253739072941244, - "learning_rate": 0.00019999999805351025, - "loss": 46.0, - "step": 400 - }, - { - "epoch": 0.06457586859374372, - "grad_norm": 0.0010851717088371515, - "learning_rate": 0.00019999999804351545, - "loss": 46.0, - "step": 401 - }, - { - "epoch": 0.0647369056725311, - "grad_norm": 0.0006280020461417735, - "learning_rate": 0.00019999999803349506, - "loss": 46.0, - "step": 402 - }, - { - "epoch": 0.0648979427513185, - "grad_norm": 0.0008929861360229552, - "learning_rate": 0.00019999999802344903, - "loss": 46.0, - "step": 403 - }, - { - "epoch": 0.06505897983010588, - "grad_norm": 0.001072326092980802, - "learning_rate": 0.00019999999801337745, - "loss": 46.0, - "step": 404 - }, - { - "epoch": 0.06522001690889327, - "grad_norm": 0.0006430553621612489, - "learning_rate": 0.0001999999980032803, - "loss": 46.0, - "step": 405 - }, - { - "epoch": 0.06538105398768067, - "grad_norm": 0.0004205092554911971, - "learning_rate": 0.00019999999799315754, - "loss": 46.0, - "step": 406 - }, - { - "epoch": 0.06554209106646805, - "grad_norm": 0.0009531372343190014, - "learning_rate": 0.00019999999798300916, - "loss": 46.0, - "step": 407 - }, - { - "epoch": 0.06570312814525545, - "grad_norm": 0.000906719418708235, - "learning_rate": 0.0001999999979728352, - "loss": 46.0, - "step": 408 - }, - { - "epoch": 0.06586416522404284, - "grad_norm": 0.0006057941354811192, - "learning_rate": 0.00019999999796263565, - "loss": 46.0, - "step": 409 - }, - { - "epoch": 0.06602520230283022, - "grad_norm": 0.0012407077010720968, - "learning_rate": 0.00019999999795241048, - "loss": 46.0, - "step": 410 - }, - { - "epoch": 0.06618623938161762, - "grad_norm": 0.0007258470286615193, - "learning_rate": 0.00019999999794215976, - "loss": 46.0, - "step": 411 - }, - { - "epoch": 0.066347276460405, - "grad_norm": 0.0007349825464189053, - "learning_rate": 0.0001999999979318834, - "loss": 46.0, - "step": 412 - }, - { - "epoch": 0.0665083135391924, - "grad_norm": 0.0008235921850427985, - "learning_rate": 0.00019999999792158146, - "loss": 46.0, - "step": 413 - }, - { - "epoch": 0.0666693506179798, - "grad_norm": 0.0005888417363166809, - "learning_rate": 0.00019999999791125395, - "loss": 46.0, - "step": 414 - }, - { - "epoch": 0.06683038769676718, - "grad_norm": 0.0008665561908856034, - "learning_rate": 0.0001999999979009008, - "loss": 46.0, - "step": 415 - }, - { - "epoch": 0.06699142477555457, - "grad_norm": 0.0012398657854646444, - "learning_rate": 0.00019999999789052208, - "loss": 46.0, - "step": 416 - }, - { - "epoch": 0.06715246185434197, - "grad_norm": 0.0007625618600286543, - "learning_rate": 0.00019999999788011778, - "loss": 46.0, - "step": 417 - }, - { - "epoch": 0.06731349893312935, - "grad_norm": 0.0009577570017427206, - "learning_rate": 0.00019999999786968786, - "loss": 46.0, - "step": 418 - }, - { - "epoch": 0.06747453601191675, - "grad_norm": 0.0010338622378185391, - "learning_rate": 0.00019999999785923236, - "loss": 46.0, - "step": 419 - }, - { - "epoch": 0.06763557309070413, - "grad_norm": 0.0006397149409167469, - "learning_rate": 0.00019999999784875127, - "loss": 46.0, - "step": 420 - }, - { - "epoch": 0.06779661016949153, - "grad_norm": 0.0005648143123835325, - "learning_rate": 0.00019999999783824456, - "loss": 46.0, - "step": 421 - }, - { - "epoch": 0.06795764724827892, - "grad_norm": 0.0004593702033162117, - "learning_rate": 0.00019999999782771227, - "loss": 46.0, - "step": 422 - }, - { - "epoch": 0.0681186843270663, - "grad_norm": 0.0006899412255734205, - "learning_rate": 0.00019999999781715442, - "loss": 46.0, - "step": 423 - }, - { - "epoch": 0.0682797214058537, - "grad_norm": 0.0006216235924512148, - "learning_rate": 0.00019999999780657092, - "loss": 46.0, - "step": 424 - }, - { - "epoch": 0.0684407584846411, - "grad_norm": 0.0006858006818220019, - "learning_rate": 0.00019999999779596187, - "loss": 46.0, - "step": 425 - }, - { - "epoch": 0.06860179556342848, - "grad_norm": 0.0012070111697539687, - "learning_rate": 0.00019999999778532717, - "loss": 46.0, - "step": 426 - }, - { - "epoch": 0.06876283264221587, - "grad_norm": 0.0007498610648326576, - "learning_rate": 0.00019999999777466695, - "loss": 46.0, - "step": 427 - }, - { - "epoch": 0.06892386972100326, - "grad_norm": 0.0006476924754679203, - "learning_rate": 0.00019999999776398108, - "loss": 46.0, - "step": 428 - }, - { - "epoch": 0.06908490679979065, - "grad_norm": 0.0010762411402538419, - "learning_rate": 0.00019999999775326962, - "loss": 46.0, - "step": 429 - }, - { - "epoch": 0.06924594387857805, - "grad_norm": 0.000566616712603718, - "learning_rate": 0.00019999999774253255, - "loss": 46.0, - "step": 430 - }, - { - "epoch": 0.06940698095736543, - "grad_norm": 0.000477853900520131, - "learning_rate": 0.00019999999773176992, - "loss": 46.0, - "step": 431 - }, - { - "epoch": 0.06956801803615283, - "grad_norm": 0.0006761650438420475, - "learning_rate": 0.0001999999977209817, - "loss": 46.0, - "step": 432 - }, - { - "epoch": 0.06972905511494021, - "grad_norm": 0.0007774282712489367, - "learning_rate": 0.00019999999771016787, - "loss": 46.0, - "step": 433 - }, - { - "epoch": 0.0698900921937276, - "grad_norm": 0.0011541653657332063, - "learning_rate": 0.00019999999769932845, - "loss": 46.0, - "step": 434 - }, - { - "epoch": 0.070051129272515, - "grad_norm": 0.0012715215561911464, - "learning_rate": 0.00019999999768846341, - "loss": 46.0, - "step": 435 - }, - { - "epoch": 0.07021216635130238, - "grad_norm": 0.0007645237492397428, - "learning_rate": 0.0001999999976775728, - "loss": 46.0, - "step": 436 - }, - { - "epoch": 0.07037320343008978, - "grad_norm": 0.001217718468979001, - "learning_rate": 0.00019999999766665658, - "loss": 46.0, - "step": 437 - }, - { - "epoch": 0.07053424050887717, - "grad_norm": 0.0006087617948651314, - "learning_rate": 0.0001999999976557148, - "loss": 46.0, - "step": 438 - }, - { - "epoch": 0.07069527758766456, - "grad_norm": 0.0011646051425486803, - "learning_rate": 0.00019999999764474738, - "loss": 46.0, - "step": 439 - }, - { - "epoch": 0.07085631466645195, - "grad_norm": 0.0016641434049233794, - "learning_rate": 0.00019999999763375436, - "loss": 46.0, - "step": 440 - }, - { - "epoch": 0.07101735174523933, - "grad_norm": 0.0011168629862368107, - "learning_rate": 0.00019999999762273577, - "loss": 46.0, - "step": 441 - }, - { - "epoch": 0.07117838882402673, - "grad_norm": 0.0006337081431411207, - "learning_rate": 0.00019999999761169163, - "loss": 46.0, - "step": 442 - }, - { - "epoch": 0.07133942590281413, - "grad_norm": 0.0006407672772184014, - "learning_rate": 0.00019999999760062181, - "loss": 46.0, - "step": 443 - }, - { - "epoch": 0.07150046298160151, - "grad_norm": 0.0007342903991229832, - "learning_rate": 0.00019999999758952647, - "loss": 46.0, - "step": 444 - }, - { - "epoch": 0.0716615000603889, - "grad_norm": 0.0006470965454354882, - "learning_rate": 0.00019999999757840548, - "loss": 46.0, - "step": 445 - }, - { - "epoch": 0.0718225371391763, - "grad_norm": 0.000804529117885977, - "learning_rate": 0.00019999999756725888, - "loss": 46.0, - "step": 446 - }, - { - "epoch": 0.07198357421796368, - "grad_norm": 0.0006461947341449559, - "learning_rate": 0.00019999999755608675, - "loss": 46.0, - "step": 447 - }, - { - "epoch": 0.07214461129675108, - "grad_norm": 0.0006891589728184044, - "learning_rate": 0.000199999997544889, - "loss": 46.0, - "step": 448 - }, - { - "epoch": 0.07230564837553846, - "grad_norm": 0.0013634568313136697, - "learning_rate": 0.00019999999753366564, - "loss": 46.0, - "step": 449 - }, - { - "epoch": 0.07246668545432586, - "grad_norm": 0.0006031526136212051, - "learning_rate": 0.00019999999752241669, - "loss": 46.0, - "step": 450 - }, - { - "epoch": 0.07262772253311325, - "grad_norm": 0.0006277126958593726, - "learning_rate": 0.00019999999751114215, - "loss": 46.0, - "step": 451 - }, - { - "epoch": 0.07278875961190064, - "grad_norm": 0.001542499870993197, - "learning_rate": 0.00019999999749984202, - "loss": 46.0, - "step": 452 - }, - { - "epoch": 0.07294979669068803, - "grad_norm": 0.0007459594053216279, - "learning_rate": 0.00019999999748851629, - "loss": 46.0, - "step": 453 - }, - { - "epoch": 0.07311083376947543, - "grad_norm": 0.0008035858627408743, - "learning_rate": 0.00019999999747716496, - "loss": 46.0, - "step": 454 - }, - { - "epoch": 0.07327187084826281, - "grad_norm": 0.0007284557796083391, - "learning_rate": 0.00019999999746578805, - "loss": 46.0, - "step": 455 - }, - { - "epoch": 0.0734329079270502, - "grad_norm": 0.0009163875947706401, - "learning_rate": 0.00019999999745438555, - "loss": 46.0, - "step": 456 - }, - { - "epoch": 0.07359394500583759, - "grad_norm": 0.0004265153256710619, - "learning_rate": 0.00019999999744295743, - "loss": 46.0, - "step": 457 - }, - { - "epoch": 0.07375498208462498, - "grad_norm": 0.00032776183797977865, - "learning_rate": 0.00019999999743150373, - "loss": 46.0, - "step": 458 - }, - { - "epoch": 0.07391601916341238, - "grad_norm": 0.0007664909935556352, - "learning_rate": 0.00019999999742002442, - "loss": 46.0, - "step": 459 - }, - { - "epoch": 0.07407705624219976, - "grad_norm": 0.000509080127812922, - "learning_rate": 0.00019999999740851954, - "loss": 46.0, - "step": 460 - }, - { - "epoch": 0.07423809332098716, - "grad_norm": 0.0005612930981442332, - "learning_rate": 0.00019999999739698905, - "loss": 46.0, - "step": 461 - }, - { - "epoch": 0.07439913039977455, - "grad_norm": 0.0009621938224881887, - "learning_rate": 0.00019999999738543295, - "loss": 46.0, - "step": 462 - }, - { - "epoch": 0.07456016747856194, - "grad_norm": 0.0008162377635017037, - "learning_rate": 0.00019999999737385128, - "loss": 46.0, - "step": 463 - }, - { - "epoch": 0.07472120455734933, - "grad_norm": 0.0004411375557538122, - "learning_rate": 0.000199999997362244, - "loss": 46.0, - "step": 464 - }, - { - "epoch": 0.07488224163613671, - "grad_norm": 0.00037210877053439617, - "learning_rate": 0.00019999999735061114, - "loss": 46.0, - "step": 465 - }, - { - "epoch": 0.07504327871492411, - "grad_norm": 0.0011236772406846285, - "learning_rate": 0.00019999999733895268, - "loss": 46.0, - "step": 466 - }, - { - "epoch": 0.07520431579371151, - "grad_norm": 0.0008054023492150009, - "learning_rate": 0.00019999999732726862, - "loss": 46.0, - "step": 467 - }, - { - "epoch": 0.07536535287249889, - "grad_norm": 0.0004958999343216419, - "learning_rate": 0.00019999999731555896, - "loss": 46.0, - "step": 468 - }, - { - "epoch": 0.07552638995128629, - "grad_norm": 0.0007969607831910253, - "learning_rate": 0.0001999999973038237, - "loss": 46.0, - "step": 469 - }, - { - "epoch": 0.07568742703007368, - "grad_norm": 0.0011302282800897956, - "learning_rate": 0.0001999999972920629, - "loss": 46.0, - "step": 470 - }, - { - "epoch": 0.07584846410886106, - "grad_norm": 0.0009448499768041074, - "learning_rate": 0.00019999999728027645, - "loss": 46.0, - "step": 471 - }, - { - "epoch": 0.07600950118764846, - "grad_norm": 0.0007362283649854362, - "learning_rate": 0.00019999999726846442, - "loss": 46.0, - "step": 472 - }, - { - "epoch": 0.07617053826643584, - "grad_norm": 0.0009826513705775142, - "learning_rate": 0.0001999999972566268, - "loss": 46.0, - "step": 473 - }, - { - "epoch": 0.07633157534522324, - "grad_norm": 0.0010123316897079349, - "learning_rate": 0.00019999999724476357, - "loss": 46.0, - "step": 474 - }, - { - "epoch": 0.07649261242401063, - "grad_norm": 0.0007657674723304808, - "learning_rate": 0.00019999999723287475, - "loss": 46.0, - "step": 475 - }, - { - "epoch": 0.07665364950279802, - "grad_norm": 0.0010322603629902005, - "learning_rate": 0.00019999999722096034, - "loss": 46.0, - "step": 476 - }, - { - "epoch": 0.07681468658158541, - "grad_norm": 0.0012446970213204622, - "learning_rate": 0.00019999999720902035, - "loss": 46.0, - "step": 477 - }, - { - "epoch": 0.0769757236603728, - "grad_norm": 0.0008477270603179932, - "learning_rate": 0.00019999999719705472, - "loss": 46.0, - "step": 478 - }, - { - "epoch": 0.07713676073916019, - "grad_norm": 0.0005673026898875833, - "learning_rate": 0.00019999999718506352, - "loss": 46.0, - "step": 479 - }, - { - "epoch": 0.07729779781794759, - "grad_norm": 0.0006731751491315663, - "learning_rate": 0.00019999999717304674, - "loss": 46.0, - "step": 480 - }, - { - "epoch": 0.07745883489673497, - "grad_norm": 0.0004351187963038683, - "learning_rate": 0.00019999999716100434, - "loss": 46.0, - "step": 481 - }, - { - "epoch": 0.07761987197552236, - "grad_norm": 0.0007636580266989768, - "learning_rate": 0.00019999999714893636, - "loss": 46.0, - "step": 482 - }, - { - "epoch": 0.07778090905430976, - "grad_norm": 0.000572064018342644, - "learning_rate": 0.0001999999971368428, - "loss": 46.0, - "step": 483 - }, - { - "epoch": 0.07794194613309714, - "grad_norm": 0.0005930215120315552, - "learning_rate": 0.00019999999712472363, - "loss": 46.0, - "step": 484 - }, - { - "epoch": 0.07810298321188454, - "grad_norm": 0.0006548817036673427, - "learning_rate": 0.00019999999711257886, - "loss": 46.0, - "step": 485 - }, - { - "epoch": 0.07826402029067192, - "grad_norm": 0.000415973539929837, - "learning_rate": 0.0001999999971004085, - "loss": 46.0, - "step": 486 - }, - { - "epoch": 0.07842505736945932, - "grad_norm": 0.0005942594725638628, - "learning_rate": 0.00019999999708821253, - "loss": 46.0, - "step": 487 - }, - { - "epoch": 0.07858609444824671, - "grad_norm": 0.0005471553304232657, - "learning_rate": 0.000199999997075991, - "loss": 46.0, - "step": 488 - }, - { - "epoch": 0.0787471315270341, - "grad_norm": 0.0005777325131930411, - "learning_rate": 0.00019999999706374385, - "loss": 46.0, - "step": 489 - }, - { - "epoch": 0.07890816860582149, - "grad_norm": 0.000728434999473393, - "learning_rate": 0.0001999999970514711, - "loss": 46.0, - "step": 490 - }, - { - "epoch": 0.07906920568460889, - "grad_norm": 0.0009583527571521699, - "learning_rate": 0.0001999999970391728, - "loss": 46.0, - "step": 491 - }, - { - "epoch": 0.07923024276339627, - "grad_norm": 0.0007965164259076118, - "learning_rate": 0.00019999999702684885, - "loss": 46.0, - "step": 492 - }, - { - "epoch": 0.07939127984218367, - "grad_norm": 0.0005470888572745025, - "learning_rate": 0.00019999999701449933, - "loss": 46.0, - "step": 493 - }, - { - "epoch": 0.07955231692097105, - "grad_norm": 0.0009533630218356848, - "learning_rate": 0.0001999999970021242, - "loss": 46.0, - "step": 494 - }, - { - "epoch": 0.07971335399975844, - "grad_norm": 0.0010155423078686, - "learning_rate": 0.0001999999969897235, - "loss": 46.0, - "step": 495 - }, - { - "epoch": 0.07987439107854584, - "grad_norm": 0.0009387807222083211, - "learning_rate": 0.00019999999697729715, - "loss": 46.0, - "step": 496 - }, - { - "epoch": 0.08003542815733322, - "grad_norm": 0.0007800189196132123, - "learning_rate": 0.00019999999696484528, - "loss": 46.0, - "step": 497 - }, - { - "epoch": 0.08019646523612062, - "grad_norm": 0.001141066080890596, - "learning_rate": 0.00019999999695236777, - "loss": 46.0, - "step": 498 - }, - { - "epoch": 0.08035750231490801, - "grad_norm": 0.0007430302212014794, - "learning_rate": 0.0001999999969398647, - "loss": 46.0, - "step": 499 - }, - { - "epoch": 0.0805185393936954, - "grad_norm": 0.0011654486879706383, - "learning_rate": 0.000199999996927336, - "loss": 46.0, - "step": 500 - }, - { - "epoch": 0.08067957647248279, - "grad_norm": 0.001330543658696115, - "learning_rate": 0.0001999999969147817, - "loss": 46.0, - "step": 501 - }, - { - "epoch": 0.08084061355127017, - "grad_norm": 0.0007218665559776127, - "learning_rate": 0.00019999999690220184, - "loss": 46.0, - "step": 502 - }, - { - "epoch": 0.08100165063005757, - "grad_norm": 0.000574540754314512, - "learning_rate": 0.00019999999688959636, - "loss": 46.0, - "step": 503 - }, - { - "epoch": 0.08116268770884497, - "grad_norm": 0.0006847270997241139, - "learning_rate": 0.0001999999968769653, - "loss": 46.0, - "step": 504 - }, - { - "epoch": 0.08132372478763235, - "grad_norm": 0.0005952648934908211, - "learning_rate": 0.00019999999686430862, - "loss": 46.0, - "step": 505 - }, - { - "epoch": 0.08148476186641974, - "grad_norm": 0.0008884788258001208, - "learning_rate": 0.00019999999685162636, - "loss": 46.0, - "step": 506 - }, - { - "epoch": 0.08164579894520714, - "grad_norm": 0.000789961835835129, - "learning_rate": 0.00019999999683891853, - "loss": 46.0, - "step": 507 - }, - { - "epoch": 0.08180683602399452, - "grad_norm": 0.000833467289339751, - "learning_rate": 0.00019999999682618507, - "loss": 46.0, - "step": 508 - }, - { - "epoch": 0.08196787310278192, - "grad_norm": 0.0014145354507490993, - "learning_rate": 0.00019999999681342604, - "loss": 46.0, - "step": 509 - }, - { - "epoch": 0.0821289101815693, - "grad_norm": 0.0005128522752784193, - "learning_rate": 0.0001999999968006414, - "loss": 46.0, - "step": 510 - }, - { - "epoch": 0.0822899472603567, - "grad_norm": 0.0005069162580184639, - "learning_rate": 0.00019999999678783117, - "loss": 46.0, - "step": 511 - }, - { - "epoch": 0.08245098433914409, - "grad_norm": 0.0009712486644275486, - "learning_rate": 0.00019999999677499535, - "loss": 46.0, - "step": 512 - }, - { - "epoch": 0.08261202141793147, - "grad_norm": 0.0004906330723315477, - "learning_rate": 0.0001999999967621339, - "loss": 46.0, - "step": 513 - }, - { - "epoch": 0.08277305849671887, - "grad_norm": 0.0009142711060121655, - "learning_rate": 0.0001999999967492469, - "loss": 46.0, - "step": 514 - }, - { - "epoch": 0.08293409557550627, - "grad_norm": 0.0008989947964437306, - "learning_rate": 0.00019999999673633428, - "loss": 46.0, - "step": 515 - }, - { - "epoch": 0.08309513265429365, - "grad_norm": 0.0005218035075813532, - "learning_rate": 0.0001999999967233961, - "loss": 46.0, - "step": 516 - }, - { - "epoch": 0.08325616973308105, - "grad_norm": 0.0009906719205901027, - "learning_rate": 0.00019999999671043228, - "loss": 46.0, - "step": 517 - }, - { - "epoch": 0.08341720681186843, - "grad_norm": 0.000553377321921289, - "learning_rate": 0.0001999999966974429, - "loss": 46.0, - "step": 518 - }, - { - "epoch": 0.08357824389065582, - "grad_norm": 0.0003130970289930701, - "learning_rate": 0.00019999999668442788, - "loss": 46.0, - "step": 519 - }, - { - "epoch": 0.08373928096944322, - "grad_norm": 0.0007157115614973009, - "learning_rate": 0.00019999999667138731, - "loss": 46.0, - "step": 520 - }, - { - "epoch": 0.0839003180482306, - "grad_norm": 0.0005627658101730049, - "learning_rate": 0.00019999999665832113, - "loss": 46.0, - "step": 521 - }, - { - "epoch": 0.084061355127018, - "grad_norm": 0.0005468367016874254, - "learning_rate": 0.00019999999664522934, - "loss": 46.0, - "step": 522 - }, - { - "epoch": 0.08422239220580538, - "grad_norm": 0.0007199125830084085, - "learning_rate": 0.000199999996632112, - "loss": 46.0, - "step": 523 - }, - { - "epoch": 0.08438342928459278, - "grad_norm": 0.0007692417129874229, - "learning_rate": 0.000199999996618969, - "loss": 46.0, - "step": 524 - }, - { - "epoch": 0.08454446636338017, - "grad_norm": 0.000630500609986484, - "learning_rate": 0.00019999999660580046, - "loss": 46.0, - "step": 525 - }, - { - "epoch": 0.08470550344216755, - "grad_norm": 0.0014079947723075747, - "learning_rate": 0.0001999999965926063, - "loss": 46.0, - "step": 526 - }, - { - "epoch": 0.08486654052095495, - "grad_norm": 0.0006229651044122875, - "learning_rate": 0.00019999999657938655, - "loss": 46.0, - "step": 527 - }, - { - "epoch": 0.08502757759974235, - "grad_norm": 0.000505558040458709, - "learning_rate": 0.0001999999965661412, - "loss": 46.0, - "step": 528 - }, - { - "epoch": 0.08518861467852973, - "grad_norm": 0.0007336870185099542, - "learning_rate": 0.00019999999655287028, - "loss": 46.0, - "step": 529 - }, - { - "epoch": 0.08534965175731712, - "grad_norm": 0.0004319110594224185, - "learning_rate": 0.00019999999653957373, - "loss": 46.0, - "step": 530 - }, - { - "epoch": 0.0855106888361045, - "grad_norm": 0.0007156056235544384, - "learning_rate": 0.00019999999652625162, - "loss": 46.0, - "step": 531 - }, - { - "epoch": 0.0856717259148919, - "grad_norm": 0.0006044222391210496, - "learning_rate": 0.0001999999965129039, - "loss": 46.0, - "step": 532 - }, - { - "epoch": 0.0858327629936793, - "grad_norm": 0.0004857086460106075, - "learning_rate": 0.00019999999649953056, - "loss": 46.0, - "step": 533 - }, - { - "epoch": 0.08599380007246668, - "grad_norm": 0.0006408842164091766, - "learning_rate": 0.00019999999648613167, - "loss": 46.0, - "step": 534 - }, - { - "epoch": 0.08615483715125408, - "grad_norm": 0.000600326107814908, - "learning_rate": 0.00019999999647270716, - "loss": 46.0, - "step": 535 - }, - { - "epoch": 0.08631587423004147, - "grad_norm": 0.0006083932239562273, - "learning_rate": 0.00019999999645925703, - "loss": 46.0, - "step": 536 - }, - { - "epoch": 0.08647691130882885, - "grad_norm": 0.00048264962970279157, - "learning_rate": 0.00019999999644578132, - "loss": 46.0, - "step": 537 - }, - { - "epoch": 0.08663794838761625, - "grad_norm": 0.0007165013230405748, - "learning_rate": 0.00019999999643228005, - "loss": 46.0, - "step": 538 - }, - { - "epoch": 0.08679898546640363, - "grad_norm": 0.000687831430695951, - "learning_rate": 0.00019999999641875317, - "loss": 46.0, - "step": 539 - }, - { - "epoch": 0.08696002254519103, - "grad_norm": 0.0006070000817999244, - "learning_rate": 0.00019999999640520067, - "loss": 46.0, - "step": 540 - }, - { - "epoch": 0.08712105962397843, - "grad_norm": 0.0006539839087054133, - "learning_rate": 0.00019999999639162258, - "loss": 46.0, - "step": 541 - }, - { - "epoch": 0.08728209670276581, - "grad_norm": 0.0005195296835154295, - "learning_rate": 0.00019999999637801893, - "loss": 46.0, - "step": 542 - }, - { - "epoch": 0.0874431337815532, - "grad_norm": 0.0006608666735701263, - "learning_rate": 0.00019999999636438967, - "loss": 46.0, - "step": 543 - }, - { - "epoch": 0.0876041708603406, - "grad_norm": 0.001367369550280273, - "learning_rate": 0.00019999999635073482, - "loss": 46.0, - "step": 544 - }, - { - "epoch": 0.08776520793912798, - "grad_norm": 0.000743956072255969, - "learning_rate": 0.00019999999633705433, - "loss": 46.0, - "step": 545 - }, - { - "epoch": 0.08792624501791538, - "grad_norm": 0.0008423933759331703, - "learning_rate": 0.00019999999632334828, - "loss": 46.0, - "step": 546 - }, - { - "epoch": 0.08808728209670276, - "grad_norm": 0.0011517110979184508, - "learning_rate": 0.00019999999630961664, - "loss": 46.0, - "step": 547 - }, - { - "epoch": 0.08824831917549016, - "grad_norm": 0.0010252471547573805, - "learning_rate": 0.0001999999962958594, - "loss": 46.0, - "step": 548 - }, - { - "epoch": 0.08840935625427755, - "grad_norm": 0.00048051943304017186, - "learning_rate": 0.00019999999628207658, - "loss": 46.0, - "step": 549 - }, - { - "epoch": 0.08857039333306493, - "grad_norm": 0.0007046059472486377, - "learning_rate": 0.00019999999626826813, - "loss": 46.0, - "step": 550 - }, - { - "epoch": 0.08873143041185233, - "grad_norm": 0.0010297683766111732, - "learning_rate": 0.0001999999962544341, - "loss": 46.0, - "step": 551 - }, - { - "epoch": 0.08889246749063973, - "grad_norm": 0.0005623582983389497, - "learning_rate": 0.00019999999624057449, - "loss": 46.0, - "step": 552 - }, - { - "epoch": 0.08905350456942711, - "grad_norm": 0.0010210892651230097, - "learning_rate": 0.00019999999622668924, - "loss": 46.0, - "step": 553 - }, - { - "epoch": 0.0892145416482145, - "grad_norm": 0.0010434873402118683, - "learning_rate": 0.00019999999621277844, - "loss": 46.0, - "step": 554 - }, - { - "epoch": 0.08937557872700189, - "grad_norm": 0.0007033632718957961, - "learning_rate": 0.00019999999619884205, - "loss": 46.0, - "step": 555 - }, - { - "epoch": 0.08953661580578928, - "grad_norm": 0.0005892971530556679, - "learning_rate": 0.00019999999618488005, - "loss": 46.0, - "step": 556 - }, - { - "epoch": 0.08969765288457668, - "grad_norm": 0.0006412552902474999, - "learning_rate": 0.00019999999617089243, - "loss": 46.0, - "step": 557 - }, - { - "epoch": 0.08985868996336406, - "grad_norm": 0.0007516163750551641, - "learning_rate": 0.00019999999615687926, - "loss": 46.0, - "step": 558 - }, - { - "epoch": 0.09001972704215146, - "grad_norm": 0.0005776139441877604, - "learning_rate": 0.0001999999961428405, - "loss": 46.0, - "step": 559 - }, - { - "epoch": 0.09018076412093885, - "grad_norm": 0.0007281535072252154, - "learning_rate": 0.00019999999612877608, - "loss": 46.0, - "step": 560 - }, - { - "epoch": 0.09034180119972623, - "grad_norm": 0.0009508816292509437, - "learning_rate": 0.00019999999611468612, - "loss": 46.0, - "step": 561 - }, - { - "epoch": 0.09050283827851363, - "grad_norm": 0.0005823606043122709, - "learning_rate": 0.00019999999610057054, - "loss": 46.0, - "step": 562 - }, - { - "epoch": 0.09066387535730101, - "grad_norm": 0.000553961144760251, - "learning_rate": 0.0001999999960864294, - "loss": 46.0, - "step": 563 - }, - { - "epoch": 0.09082491243608841, - "grad_norm": 0.0005303358775563538, - "learning_rate": 0.00019999999607226264, - "loss": 46.0, - "step": 564 - }, - { - "epoch": 0.0909859495148758, - "grad_norm": 0.0010254705557599664, - "learning_rate": 0.00019999999605807027, - "loss": 46.0, - "step": 565 - }, - { - "epoch": 0.09114698659366319, - "grad_norm": 0.001085357740521431, - "learning_rate": 0.00019999999604385234, - "loss": 46.0, - "step": 566 - }, - { - "epoch": 0.09130802367245058, - "grad_norm": 0.0015264761168509722, - "learning_rate": 0.00019999999602960877, - "loss": 46.0, - "step": 567 - }, - { - "epoch": 0.09146906075123798, - "grad_norm": 0.0005250285030342638, - "learning_rate": 0.00019999999601533964, - "loss": 46.0, - "step": 568 - }, - { - "epoch": 0.09163009783002536, - "grad_norm": 0.0004584493872243911, - "learning_rate": 0.0001999999960010449, - "loss": 46.0, - "step": 569 - }, - { - "epoch": 0.09179113490881276, - "grad_norm": 0.0019098113989457488, - "learning_rate": 0.00019999999598672457, - "loss": 46.0, - "step": 570 - }, - { - "epoch": 0.09195217198760014, - "grad_norm": 0.0005570544744841754, - "learning_rate": 0.00019999999597237867, - "loss": 46.0, - "step": 571 - }, - { - "epoch": 0.09211320906638754, - "grad_norm": 0.0012722942046821117, - "learning_rate": 0.00019999999595800714, - "loss": 46.0, - "step": 572 - }, - { - "epoch": 0.09227424614517493, - "grad_norm": 0.0006153949652798474, - "learning_rate": 0.00019999999594361002, - "loss": 46.0, - "step": 573 - }, - { - "epoch": 0.09243528322396231, - "grad_norm": 0.0004970795125700533, - "learning_rate": 0.0001999999959291873, - "loss": 46.0, - "step": 574 - }, - { - "epoch": 0.09259632030274971, - "grad_norm": 0.0006614192971028388, - "learning_rate": 0.000199999995914739, - "loss": 46.0, - "step": 575 - }, - { - "epoch": 0.09275735738153709, - "grad_norm": 0.0006567550590261817, - "learning_rate": 0.0001999999959002651, - "loss": 46.0, - "step": 576 - }, - { - "epoch": 0.09291839446032449, - "grad_norm": 0.0005471435142681003, - "learning_rate": 0.0001999999958857656, - "loss": 46.0, - "step": 577 - }, - { - "epoch": 0.09307943153911188, - "grad_norm": 0.0007232374045997858, - "learning_rate": 0.00019999999587124053, - "loss": 46.0, - "step": 578 - }, - { - "epoch": 0.09324046861789927, - "grad_norm": 0.0005138620617799461, - "learning_rate": 0.00019999999585668983, - "loss": 46.0, - "step": 579 - }, - { - "epoch": 0.09340150569668666, - "grad_norm": 0.0010566923301666975, - "learning_rate": 0.00019999999584211357, - "loss": 46.0, - "step": 580 - }, - { - "epoch": 0.09356254277547406, - "grad_norm": 0.0005584961618296802, - "learning_rate": 0.0001999999958275117, - "loss": 46.0, - "step": 581 - }, - { - "epoch": 0.09372357985426144, - "grad_norm": 0.00048247986705973744, - "learning_rate": 0.0001999999958128842, - "loss": 46.0, - "step": 582 - }, - { - "epoch": 0.09388461693304884, - "grad_norm": 0.0009872997179627419, - "learning_rate": 0.00019999999579823117, - "loss": 46.0, - "step": 583 - }, - { - "epoch": 0.09404565401183622, - "grad_norm": 0.0004425085207913071, - "learning_rate": 0.0001999999957835525, - "loss": 46.0, - "step": 584 - }, - { - "epoch": 0.09420669109062361, - "grad_norm": 0.00032448922866024077, - "learning_rate": 0.00019999999576884826, - "loss": 46.0, - "step": 585 - }, - { - "epoch": 0.09436772816941101, - "grad_norm": 0.0007217460079118609, - "learning_rate": 0.0001999999957541184, - "loss": 46.0, - "step": 586 - }, - { - "epoch": 0.0945287652481984, - "grad_norm": 0.0011977762915194035, - "learning_rate": 0.00019999999573936297, - "loss": 46.0, - "step": 587 - }, - { - "epoch": 0.09468980232698579, - "grad_norm": 0.0009963751072064042, - "learning_rate": 0.00019999999572458194, - "loss": 46.0, - "step": 588 - }, - { - "epoch": 0.09485083940577319, - "grad_norm": 0.0005563003360293806, - "learning_rate": 0.0001999999957097753, - "loss": 46.0, - "step": 589 - }, - { - "epoch": 0.09501187648456057, - "grad_norm": 0.00044900630018673837, - "learning_rate": 0.00019999999569494307, - "loss": 46.0, - "step": 590 - }, - { - "epoch": 0.09517291356334796, - "grad_norm": 0.0005432058242149651, - "learning_rate": 0.00019999999568008525, - "loss": 46.0, - "step": 591 - }, - { - "epoch": 0.09533395064213535, - "grad_norm": 0.0005411191377788782, - "learning_rate": 0.00019999999566520184, - "loss": 46.0, - "step": 592 - }, - { - "epoch": 0.09549498772092274, - "grad_norm": 0.0011375902686268091, - "learning_rate": 0.0001999999956502928, - "loss": 46.0, - "step": 593 - }, - { - "epoch": 0.09565602479971014, - "grad_norm": 0.0010755470721051097, - "learning_rate": 0.0001999999956353582, - "loss": 46.0, - "step": 594 - }, - { - "epoch": 0.09581706187849752, - "grad_norm": 0.0007898997864685953, - "learning_rate": 0.00019999999562039802, - "loss": 46.0, - "step": 595 - }, - { - "epoch": 0.09597809895728492, - "grad_norm": 0.0004766711499541998, - "learning_rate": 0.0001999999956054122, - "loss": 46.0, - "step": 596 - }, - { - "epoch": 0.09613913603607231, - "grad_norm": 0.000768249505199492, - "learning_rate": 0.00019999999559040083, - "loss": 46.0, - "step": 597 - }, - { - "epoch": 0.0963001731148597, - "grad_norm": 0.0007913981680758297, - "learning_rate": 0.00019999999557536384, - "loss": 46.0, - "step": 598 - }, - { - "epoch": 0.09646121019364709, - "grad_norm": 0.0006753549678251147, - "learning_rate": 0.00019999999556030127, - "loss": 46.0, - "step": 599 - }, - { - "epoch": 0.09662224727243447, - "grad_norm": 0.0004898831248283386, - "learning_rate": 0.00019999999554521307, - "loss": 46.0, - "step": 600 - }, - { - "epoch": 0.09678328435122187, - "grad_norm": 0.0013990583829581738, - "learning_rate": 0.00019999999553009932, - "loss": 46.0, - "step": 601 - }, - { - "epoch": 0.09694432143000926, - "grad_norm": 0.0006502924952656031, - "learning_rate": 0.00019999999551495993, - "loss": 46.0, - "step": 602 - }, - { - "epoch": 0.09710535850879665, - "grad_norm": 0.0009294247720390558, - "learning_rate": 0.000199999995499795, - "loss": 46.0, - "step": 603 - }, - { - "epoch": 0.09726639558758404, - "grad_norm": 0.0005790565046481788, - "learning_rate": 0.00019999999548460444, - "loss": 46.0, - "step": 604 - }, - { - "epoch": 0.09742743266637144, - "grad_norm": 0.0005814313190057874, - "learning_rate": 0.0001999999954693883, - "loss": 46.0, - "step": 605 - }, - { - "epoch": 0.09758846974515882, - "grad_norm": 0.0006459489814005792, - "learning_rate": 0.00019999999545414654, - "loss": 46.0, - "step": 606 - }, - { - "epoch": 0.09774950682394622, - "grad_norm": 0.0006937396829016507, - "learning_rate": 0.0001999999954388792, - "loss": 46.0, - "step": 607 - }, - { - "epoch": 0.0979105439027336, - "grad_norm": 0.0005056326044723392, - "learning_rate": 0.00019999999542358627, - "loss": 46.0, - "step": 608 - }, - { - "epoch": 0.098071580981521, - "grad_norm": 0.00040239939698949456, - "learning_rate": 0.00019999999540826771, - "loss": 46.0, - "step": 609 - }, - { - "epoch": 0.09823261806030839, - "grad_norm": 0.0016964153619483113, - "learning_rate": 0.0001999999953929236, - "loss": 46.0, - "step": 610 - }, - { - "epoch": 0.09839365513909577, - "grad_norm": 0.000712358858436346, - "learning_rate": 0.0001999999953775539, - "loss": 46.0, - "step": 611 - }, - { - "epoch": 0.09855469221788317, - "grad_norm": 0.0005845819250680506, - "learning_rate": 0.00019999999536215857, - "loss": 46.0, - "step": 612 - }, - { - "epoch": 0.09871572929667057, - "grad_norm": 0.0007552969036623836, - "learning_rate": 0.00019999999534673767, - "loss": 46.0, - "step": 613 - }, - { - "epoch": 0.09887676637545795, - "grad_norm": 0.0006136150914244354, - "learning_rate": 0.00019999999533129118, - "loss": 46.0, - "step": 614 - }, - { - "epoch": 0.09903780345424534, - "grad_norm": 0.0009305122075602412, - "learning_rate": 0.00019999999531581907, - "loss": 46.0, - "step": 615 - }, - { - "epoch": 0.09919884053303273, - "grad_norm": 0.0005232894327491522, - "learning_rate": 0.00019999999530032138, - "loss": 46.0, - "step": 616 - }, - { - "epoch": 0.09935987761182012, - "grad_norm": 0.0003367464814800769, - "learning_rate": 0.0001999999952847981, - "loss": 46.0, - "step": 617 - }, - { - "epoch": 0.09952091469060752, - "grad_norm": 0.00024847566965036094, - "learning_rate": 0.0001999999952692492, - "loss": 46.0, - "step": 618 - }, - { - "epoch": 0.0996819517693949, - "grad_norm": 0.0008768857805989683, - "learning_rate": 0.00019999999525367471, - "loss": 46.0, - "step": 619 - }, - { - "epoch": 0.0998429888481823, - "grad_norm": 0.0006605791859328747, - "learning_rate": 0.00019999999523807467, - "loss": 46.0, - "step": 620 - }, - { - "epoch": 0.10000402592696968, - "grad_norm": 0.0005555636016651988, - "learning_rate": 0.000199999995222449, - "loss": 46.0, - "step": 621 - }, - { - "epoch": 0.10016506300575707, - "grad_norm": 0.0007445701630786061, - "learning_rate": 0.00019999999520679774, - "loss": 46.0, - "step": 622 - }, - { - "epoch": 0.10032610008454447, - "grad_norm": 0.0008318775217048824, - "learning_rate": 0.00019999999519112086, - "loss": 46.0, - "step": 623 - }, - { - "epoch": 0.10048713716333185, - "grad_norm": 0.0015548253431916237, - "learning_rate": 0.00019999999517541844, - "loss": 46.0, - "step": 624 - }, - { - "epoch": 0.10064817424211925, - "grad_norm": 0.0005522758001461625, - "learning_rate": 0.00019999999515969038, - "loss": 46.0, - "step": 625 - }, - { - "epoch": 0.10080921132090664, - "grad_norm": 0.0005080297123640776, - "learning_rate": 0.00019999999514393673, - "loss": 46.0, - "step": 626 - }, - { - "epoch": 0.10097024839969403, - "grad_norm": 0.0008117557154037058, - "learning_rate": 0.0001999999951281575, - "loss": 46.0, - "step": 627 - }, - { - "epoch": 0.10113128547848142, - "grad_norm": 0.0007852627313695848, - "learning_rate": 0.00019999999511235265, - "loss": 46.0, - "step": 628 - }, - { - "epoch": 0.1012923225572688, - "grad_norm": 0.0006700888625346124, - "learning_rate": 0.00019999999509652224, - "loss": 46.0, - "step": 629 - }, - { - "epoch": 0.1014533596360562, - "grad_norm": 0.0005911216721870005, - "learning_rate": 0.00019999999508066622, - "loss": 46.0, - "step": 630 - }, - { - "epoch": 0.1016143967148436, - "grad_norm": 0.0007278225966729224, - "learning_rate": 0.0001999999950647846, - "loss": 46.0, - "step": 631 - }, - { - "epoch": 0.10177543379363098, - "grad_norm": 0.00048417862853966653, - "learning_rate": 0.00019999999504887738, - "loss": 46.0, - "step": 632 - }, - { - "epoch": 0.10193647087241837, - "grad_norm": 0.0011535398662090302, - "learning_rate": 0.00019999999503294457, - "loss": 46.0, - "step": 633 - }, - { - "epoch": 0.10209750795120577, - "grad_norm": 0.0007467381074093282, - "learning_rate": 0.0001999999950169862, - "loss": 46.0, - "step": 634 - }, - { - "epoch": 0.10225854502999315, - "grad_norm": 0.0016067279502749443, - "learning_rate": 0.00019999999500100218, - "loss": 46.0, - "step": 635 - }, - { - "epoch": 0.10241958210878055, - "grad_norm": 0.0008178745047189295, - "learning_rate": 0.0001999999949849926, - "loss": 46.0, - "step": 636 - }, - { - "epoch": 0.10258061918756793, - "grad_norm": 0.0019085468957200646, - "learning_rate": 0.0001999999949689574, - "loss": 46.0, - "step": 637 - }, - { - "epoch": 0.10274165626635533, - "grad_norm": 0.0012237601913511753, - "learning_rate": 0.00019999999495289662, - "loss": 46.0, - "step": 638 - }, - { - "epoch": 0.10290269334514272, - "grad_norm": 0.0004765917547047138, - "learning_rate": 0.0001999999949368103, - "loss": 46.0, - "step": 639 - }, - { - "epoch": 0.1030637304239301, - "grad_norm": 0.0006971553666517138, - "learning_rate": 0.00019999999492069828, - "loss": 46.0, - "step": 640 - }, - { - "epoch": 0.1032247675027175, - "grad_norm": 0.0005272507551126182, - "learning_rate": 0.00019999999490456075, - "loss": 46.0, - "step": 641 - }, - { - "epoch": 0.1033858045815049, - "grad_norm": 0.000627867178991437, - "learning_rate": 0.00019999999488839757, - "loss": 46.0, - "step": 642 - }, - { - "epoch": 0.10354684166029228, - "grad_norm": 0.000540842127520591, - "learning_rate": 0.0001999999948722088, - "loss": 46.0, - "step": 643 - }, - { - "epoch": 0.10370787873907968, - "grad_norm": 0.0008780147763900459, - "learning_rate": 0.00019999999485599448, - "loss": 46.0, - "step": 644 - }, - { - "epoch": 0.10386891581786706, - "grad_norm": 0.0004521933733485639, - "learning_rate": 0.00019999999483975454, - "loss": 46.0, - "step": 645 - }, - { - "epoch": 0.10402995289665445, - "grad_norm": 0.0009476662380620837, - "learning_rate": 0.00019999999482348896, - "loss": 46.0, - "step": 646 - }, - { - "epoch": 0.10419098997544185, - "grad_norm": 0.000842555717099458, - "learning_rate": 0.00019999999480719785, - "loss": 46.0, - "step": 647 - }, - { - "epoch": 0.10435202705422923, - "grad_norm": 0.0008570015779696405, - "learning_rate": 0.00019999999479088112, - "loss": 46.0, - "step": 648 - }, - { - "epoch": 0.10451306413301663, - "grad_norm": 0.0007610553875565529, - "learning_rate": 0.00019999999477453878, - "loss": 46.0, - "step": 649 - }, - { - "epoch": 0.10467410121180402, - "grad_norm": 0.0007040970376692712, - "learning_rate": 0.00019999999475817085, - "loss": 46.0, - "step": 650 - }, - { - "epoch": 0.1048351382905914, - "grad_norm": 0.0014141713036224246, - "learning_rate": 0.00019999999474177736, - "loss": 46.0, - "step": 651 - }, - { - "epoch": 0.1049961753693788, - "grad_norm": 0.0005423424299806356, - "learning_rate": 0.00019999999472535826, - "loss": 46.0, - "step": 652 - }, - { - "epoch": 0.10515721244816618, - "grad_norm": 0.0005851828027516603, - "learning_rate": 0.00019999999470891354, - "loss": 46.0, - "step": 653 - }, - { - "epoch": 0.10531824952695358, - "grad_norm": 0.0004984669503755867, - "learning_rate": 0.00019999999469244326, - "loss": 46.0, - "step": 654 - }, - { - "epoch": 0.10547928660574098, - "grad_norm": 0.0010419270256534219, - "learning_rate": 0.00019999999467594734, - "loss": 46.0, - "step": 655 - }, - { - "epoch": 0.10564032368452836, - "grad_norm": 0.0008180539007298648, - "learning_rate": 0.00019999999465942586, - "loss": 46.0, - "step": 656 - }, - { - "epoch": 0.10580136076331575, - "grad_norm": 0.001139450934715569, - "learning_rate": 0.00019999999464287877, - "loss": 46.0, - "step": 657 - }, - { - "epoch": 0.10596239784210315, - "grad_norm": 0.0004230669583193958, - "learning_rate": 0.00019999999462630612, - "loss": 46.0, - "step": 658 - }, - { - "epoch": 0.10612343492089053, - "grad_norm": 0.0006402652943506837, - "learning_rate": 0.00019999999460970785, - "loss": 46.0, - "step": 659 - }, - { - "epoch": 0.10628447199967793, - "grad_norm": 0.0006499659502878785, - "learning_rate": 0.000199999994593084, - "loss": 46.0, - "step": 660 - }, - { - "epoch": 0.10644550907846531, - "grad_norm": 0.0007256578537635505, - "learning_rate": 0.0001999999945764345, - "loss": 46.0, - "step": 661 - }, - { - "epoch": 0.10660654615725271, - "grad_norm": 0.0008143671439029276, - "learning_rate": 0.00019999999455975944, - "loss": 46.0, - "step": 662 - }, - { - "epoch": 0.1067675832360401, - "grad_norm": 0.0008687827503308654, - "learning_rate": 0.0001999999945430588, - "loss": 46.0, - "step": 663 - }, - { - "epoch": 0.10692862031482749, - "grad_norm": 0.0007232081843540072, - "learning_rate": 0.00019999999452633256, - "loss": 46.0, - "step": 664 - }, - { - "epoch": 0.10708965739361488, - "grad_norm": 0.0009402253199368715, - "learning_rate": 0.00019999999450958072, - "loss": 46.0, - "step": 665 - }, - { - "epoch": 0.10725069447240226, - "grad_norm": 0.0006775706424377859, - "learning_rate": 0.00019999999449280326, - "loss": 46.0, - "step": 666 - }, - { - "epoch": 0.10741173155118966, - "grad_norm": 0.00037028861697763205, - "learning_rate": 0.00019999999447600024, - "loss": 46.0, - "step": 667 - }, - { - "epoch": 0.10757276862997706, - "grad_norm": 0.0005190723459236324, - "learning_rate": 0.00019999999445917163, - "loss": 46.0, - "step": 668 - }, - { - "epoch": 0.10773380570876444, - "grad_norm": 0.000885549234226346, - "learning_rate": 0.00019999999444231739, - "loss": 46.0, - "step": 669 - }, - { - "epoch": 0.10789484278755183, - "grad_norm": 0.0013564539840444922, - "learning_rate": 0.0001999999944254376, - "loss": 46.0, - "step": 670 - }, - { - "epoch": 0.10805587986633923, - "grad_norm": 0.00039320086943916976, - "learning_rate": 0.00019999999440853218, - "loss": 46.0, - "step": 671 - }, - { - "epoch": 0.10821691694512661, - "grad_norm": 0.0006643789820373058, - "learning_rate": 0.00019999999439160115, - "loss": 46.0, - "step": 672 - }, - { - "epoch": 0.10837795402391401, - "grad_norm": 0.0006942615727894008, - "learning_rate": 0.00019999999437464455, - "loss": 46.0, - "step": 673 - }, - { - "epoch": 0.10853899110270139, - "grad_norm": 0.001256827381439507, - "learning_rate": 0.00019999999435766237, - "loss": 46.0, - "step": 674 - }, - { - "epoch": 0.10870002818148879, - "grad_norm": 0.0006793045904487371, - "learning_rate": 0.00019999999434065457, - "loss": 46.0, - "step": 675 - }, - { - "epoch": 0.10886106526027618, - "grad_norm": 0.0003982323396485299, - "learning_rate": 0.0001999999943236212, - "loss": 46.0, - "step": 676 - }, - { - "epoch": 0.10902210233906356, - "grad_norm": 0.0004949101130478084, - "learning_rate": 0.0001999999943065622, - "loss": 46.0, - "step": 677 - }, - { - "epoch": 0.10918313941785096, - "grad_norm": 0.00018608546815812588, - "learning_rate": 0.00019999999428947765, - "loss": 46.0, - "step": 678 - }, - { - "epoch": 0.10934417649663836, - "grad_norm": 0.000787493190728128, - "learning_rate": 0.00019999999427236748, - "loss": 46.0, - "step": 679 - }, - { - "epoch": 0.10950521357542574, - "grad_norm": 0.0006306565483100712, - "learning_rate": 0.00019999999425523171, - "loss": 46.0, - "step": 680 - }, - { - "epoch": 0.10966625065421313, - "grad_norm": 0.000702961056958884, - "learning_rate": 0.00019999999423807034, - "loss": 46.0, - "step": 681 - }, - { - "epoch": 0.10982728773300052, - "grad_norm": 0.0007224611472338438, - "learning_rate": 0.0001999999942208834, - "loss": 46.0, - "step": 682 - }, - { - "epoch": 0.10998832481178791, - "grad_norm": 0.0010548344580456614, - "learning_rate": 0.00019999999420367085, - "loss": 46.0, - "step": 683 - }, - { - "epoch": 0.11014936189057531, - "grad_norm": 0.00038637136458419263, - "learning_rate": 0.0001999999941864327, - "loss": 46.0, - "step": 684 - }, - { - "epoch": 0.11031039896936269, - "grad_norm": 0.0010405262000858784, - "learning_rate": 0.00019999999416916894, - "loss": 46.0, - "step": 685 - }, - { - "epoch": 0.11047143604815009, - "grad_norm": 0.000544868700671941, - "learning_rate": 0.00019999999415187963, - "loss": 46.0, - "step": 686 - }, - { - "epoch": 0.11063247312693748, - "grad_norm": 0.0003214654861949384, - "learning_rate": 0.0001999999941345647, - "loss": 46.0, - "step": 687 - }, - { - "epoch": 0.11079351020572487, - "grad_norm": 0.0006614724989049137, - "learning_rate": 0.0001999999941172242, - "loss": 46.0, - "step": 688 - }, - { - "epoch": 0.11095454728451226, - "grad_norm": 0.0009841398568823934, - "learning_rate": 0.00019999999409985806, - "loss": 46.0, - "step": 689 - }, - { - "epoch": 0.11111558436329964, - "grad_norm": 0.0010768999345600605, - "learning_rate": 0.00019999999408246635, - "loss": 46.0, - "step": 690 - }, - { - "epoch": 0.11127662144208704, - "grad_norm": 0.0007441352354362607, - "learning_rate": 0.00019999999406504905, - "loss": 46.0, - "step": 691 - }, - { - "epoch": 0.11143765852087444, - "grad_norm": 0.0003263895632699132, - "learning_rate": 0.00019999999404760613, - "loss": 46.0, - "step": 692 - }, - { - "epoch": 0.11159869559966182, - "grad_norm": 0.0004391400143504143, - "learning_rate": 0.00019999999403013763, - "loss": 46.0, - "step": 693 - }, - { - "epoch": 0.11175973267844921, - "grad_norm": 0.00041094841435551643, - "learning_rate": 0.00019999999401264354, - "loss": 46.0, - "step": 694 - }, - { - "epoch": 0.11192076975723661, - "grad_norm": 0.0005844959523528814, - "learning_rate": 0.00019999999399512386, - "loss": 46.0, - "step": 695 - }, - { - "epoch": 0.11208180683602399, - "grad_norm": 0.0009718054789118469, - "learning_rate": 0.00019999999397757857, - "loss": 46.0, - "step": 696 - }, - { - "epoch": 0.11224284391481139, - "grad_norm": 0.00024566747015342116, - "learning_rate": 0.0001999999939600077, - "loss": 46.0, - "step": 697 - }, - { - "epoch": 0.11240388099359877, - "grad_norm": 0.0004258676781319082, - "learning_rate": 0.00019999999394241123, - "loss": 46.0, - "step": 698 - }, - { - "epoch": 0.11256491807238617, - "grad_norm": 0.0005756361060775816, - "learning_rate": 0.00019999999392478915, - "loss": 46.0, - "step": 699 - }, - { - "epoch": 0.11272595515117356, - "grad_norm": 0.0007779954466968775, - "learning_rate": 0.00019999999390714148, - "loss": 46.0, - "step": 700 - }, - { - "epoch": 0.11288699222996094, - "grad_norm": 0.00047630540211685, - "learning_rate": 0.00019999999388946825, - "loss": 46.0, - "step": 701 - }, - { - "epoch": 0.11304802930874834, - "grad_norm": 0.0009862382430583239, - "learning_rate": 0.00019999999387176938, - "loss": 46.0, - "step": 702 - }, - { - "epoch": 0.11320906638753574, - "grad_norm": 0.00047935088514350355, - "learning_rate": 0.00019999999385404493, - "loss": 46.0, - "step": 703 - }, - { - "epoch": 0.11337010346632312, - "grad_norm": 0.0006019410793669522, - "learning_rate": 0.0001999999938362949, - "loss": 46.0, - "step": 704 - }, - { - "epoch": 0.11353114054511051, - "grad_norm": 0.0003566310042515397, - "learning_rate": 0.00019999999381851925, - "loss": 46.0, - "step": 705 - }, - { - "epoch": 0.1136921776238979, - "grad_norm": 0.0008112428477033973, - "learning_rate": 0.00019999999380071804, - "loss": 46.0, - "step": 706 - }, - { - "epoch": 0.1138532147026853, - "grad_norm": 0.0008270882535725832, - "learning_rate": 0.0001999999937828912, - "loss": 46.0, - "step": 707 - }, - { - "epoch": 0.11401425178147269, - "grad_norm": 0.0005021857796236873, - "learning_rate": 0.00019999999376503879, - "loss": 46.0, - "step": 708 - }, - { - "epoch": 0.11417528886026007, - "grad_norm": 0.0010833428241312504, - "learning_rate": 0.00019999999374716078, - "loss": 46.0, - "step": 709 - }, - { - "epoch": 0.11433632593904747, - "grad_norm": 0.0007432840066030622, - "learning_rate": 0.00019999999372925716, - "loss": 46.0, - "step": 710 - }, - { - "epoch": 0.11449736301783486, - "grad_norm": 0.0003462954191491008, - "learning_rate": 0.00019999999371132793, - "loss": 46.0, - "step": 711 - }, - { - "epoch": 0.11465840009662225, - "grad_norm": 0.0006248669815249741, - "learning_rate": 0.00019999999369337313, - "loss": 46.0, - "step": 712 - }, - { - "epoch": 0.11481943717540964, - "grad_norm": 0.0008630806696601212, - "learning_rate": 0.00019999999367539278, - "loss": 46.0, - "step": 713 - }, - { - "epoch": 0.11498047425419702, - "grad_norm": 0.0026565478183329105, - "learning_rate": 0.00019999999365738678, - "loss": 46.0, - "step": 714 - }, - { - "epoch": 0.11514151133298442, - "grad_norm": 0.0003844198363367468, - "learning_rate": 0.0001999999936393552, - "loss": 46.0, - "step": 715 - }, - { - "epoch": 0.11530254841177182, - "grad_norm": 0.0009668955462984741, - "learning_rate": 0.000199999993621298, - "loss": 46.0, - "step": 716 - }, - { - "epoch": 0.1154635854905592, - "grad_norm": 0.0006512926192954183, - "learning_rate": 0.00019999999360321522, - "loss": 46.0, - "step": 717 - }, - { - "epoch": 0.1156246225693466, - "grad_norm": 0.0004899580380879343, - "learning_rate": 0.00019999999358510687, - "loss": 46.0, - "step": 718 - }, - { - "epoch": 0.11578565964813398, - "grad_norm": 0.0007209365139715374, - "learning_rate": 0.0001999999935669729, - "loss": 46.0, - "step": 719 - }, - { - "epoch": 0.11594669672692137, - "grad_norm": 0.0008001966052688658, - "learning_rate": 0.00019999999354881334, - "loss": 46.0, - "step": 720 - }, - { - "epoch": 0.11610773380570877, - "grad_norm": 0.00044169294415041804, - "learning_rate": 0.00019999999353062818, - "loss": 46.0, - "step": 721 - }, - { - "epoch": 0.11626877088449615, - "grad_norm": 0.00034914870047941804, - "learning_rate": 0.00019999999351241746, - "loss": 46.0, - "step": 722 - }, - { - "epoch": 0.11642980796328355, - "grad_norm": 0.0007880452321842313, - "learning_rate": 0.0001999999934941811, - "loss": 46.0, - "step": 723 - }, - { - "epoch": 0.11659084504207094, - "grad_norm": 0.0006567139644175768, - "learning_rate": 0.00019999999347591915, - "loss": 46.0, - "step": 724 - }, - { - "epoch": 0.11675188212085832, - "grad_norm": 0.0012865960597991943, - "learning_rate": 0.00019999999345763164, - "loss": 46.0, - "step": 725 - }, - { - "epoch": 0.11691291919964572, - "grad_norm": 0.00031495318398810923, - "learning_rate": 0.00019999999343931851, - "loss": 46.0, - "step": 726 - }, - { - "epoch": 0.1170739562784331, - "grad_norm": 0.0009946618229150772, - "learning_rate": 0.00019999999342097978, - "loss": 46.0, - "step": 727 - }, - { - "epoch": 0.1172349933572205, - "grad_norm": 0.0006088796071708202, - "learning_rate": 0.00019999999340261545, - "loss": 46.0, - "step": 728 - }, - { - "epoch": 0.1173960304360079, - "grad_norm": 0.0006799606489948928, - "learning_rate": 0.00019999999338422554, - "loss": 46.0, - "step": 729 - }, - { - "epoch": 0.11755706751479528, - "grad_norm": 0.0011438957881182432, - "learning_rate": 0.00019999999336581, - "loss": 46.0, - "step": 730 - }, - { - "epoch": 0.11771810459358267, - "grad_norm": 0.0005068652098998427, - "learning_rate": 0.00019999999334736893, - "loss": 46.0, - "step": 731 - }, - { - "epoch": 0.11787914167237007, - "grad_norm": 0.0005817121127620339, - "learning_rate": 0.00019999999332890223, - "loss": 46.0, - "step": 732 - }, - { - "epoch": 0.11804017875115745, - "grad_norm": 0.0015141000039875507, - "learning_rate": 0.00019999999331040994, - "loss": 46.0, - "step": 733 - }, - { - "epoch": 0.11820121582994485, - "grad_norm": 0.0008203965262509882, - "learning_rate": 0.00019999999329189206, - "loss": 46.0, - "step": 734 - }, - { - "epoch": 0.11836225290873223, - "grad_norm": 0.0004771222302224487, - "learning_rate": 0.00019999999327334855, - "loss": 46.0, - "step": 735 - }, - { - "epoch": 0.11852328998751963, - "grad_norm": 0.0003979514294769615, - "learning_rate": 0.0001999999932547795, - "loss": 46.0, - "step": 736 - }, - { - "epoch": 0.11868432706630702, - "grad_norm": 0.0005276133888401091, - "learning_rate": 0.0001999999932361848, - "loss": 46.0, - "step": 737 - }, - { - "epoch": 0.1188453641450944, - "grad_norm": 0.0005074460059404373, - "learning_rate": 0.00019999999321756456, - "loss": 46.0, - "step": 738 - }, - { - "epoch": 0.1190064012238818, - "grad_norm": 0.0010084678651764989, - "learning_rate": 0.0001999999931989187, - "loss": 46.0, - "step": 739 - }, - { - "epoch": 0.1191674383026692, - "grad_norm": 0.0007157850777730346, - "learning_rate": 0.0001999999931802472, - "loss": 46.0, - "step": 740 - }, - { - "epoch": 0.11932847538145658, - "grad_norm": 0.00042946788016706705, - "learning_rate": 0.00019999999316155017, - "loss": 46.0, - "step": 741 - }, - { - "epoch": 0.11948951246024397, - "grad_norm": 0.00038639921694993973, - "learning_rate": 0.00019999999314282755, - "loss": 46.0, - "step": 742 - }, - { - "epoch": 0.11965054953903136, - "grad_norm": 0.000835128768812865, - "learning_rate": 0.00019999999312407928, - "loss": 46.0, - "step": 743 - }, - { - "epoch": 0.11981158661781875, - "grad_norm": 0.000891783507540822, - "learning_rate": 0.00019999999310530545, - "loss": 46.0, - "step": 744 - }, - { - "epoch": 0.11997262369660615, - "grad_norm": 0.0004395773576106876, - "learning_rate": 0.000199999993086506, - "loss": 46.0, - "step": 745 - }, - { - "epoch": 0.12013366077539353, - "grad_norm": 0.0005717150052078068, - "learning_rate": 0.000199999993067681, - "loss": 46.0, - "step": 746 - }, - { - "epoch": 0.12029469785418093, - "grad_norm": 0.0004644012951757759, - "learning_rate": 0.00019999999304883036, - "loss": 46.0, - "step": 747 - }, - { - "epoch": 0.12045573493296832, - "grad_norm": 0.0005582737503573298, - "learning_rate": 0.00019999999302995416, - "loss": 46.0, - "step": 748 - }, - { - "epoch": 0.1206167720117557, - "grad_norm": 0.00038077603676356375, - "learning_rate": 0.00019999999301105237, - "loss": 46.0, - "step": 749 - }, - { - "epoch": 0.1207778090905431, - "grad_norm": 0.0012594073778018355, - "learning_rate": 0.00019999999299212496, - "loss": 46.0, - "step": 750 - }, - { - "epoch": 0.12093884616933048, - "grad_norm": 0.0008036632789298892, - "learning_rate": 0.00019999999297317197, - "loss": 46.0, - "step": 751 - }, - { - "epoch": 0.12109988324811788, - "grad_norm": 0.0005055126966908574, - "learning_rate": 0.00019999999295419336, - "loss": 46.0, - "step": 752 - }, - { - "epoch": 0.12126092032690527, - "grad_norm": 0.0005579818389378488, - "learning_rate": 0.00019999999293518917, - "loss": 46.0, - "step": 753 - }, - { - "epoch": 0.12142195740569266, - "grad_norm": 0.0011566895991563797, - "learning_rate": 0.0001999999929161594, - "loss": 46.0, - "step": 754 - }, - { - "epoch": 0.12158299448448005, - "grad_norm": 0.0005442249239422381, - "learning_rate": 0.000199999992897104, - "loss": 46.0, - "step": 755 - }, - { - "epoch": 0.12174403156326745, - "grad_norm": 0.0005158275598660111, - "learning_rate": 0.000199999992878023, - "loss": 46.0, - "step": 756 - }, - { - "epoch": 0.12190506864205483, - "grad_norm": 0.0016388717340305448, - "learning_rate": 0.00019999999285891647, - "loss": 46.0, - "step": 757 - }, - { - "epoch": 0.12206610572084223, - "grad_norm": 0.0006762741249985993, - "learning_rate": 0.0001999999928397843, - "loss": 46.0, - "step": 758 - }, - { - "epoch": 0.12222714279962961, - "grad_norm": 0.0009991052793338895, - "learning_rate": 0.00019999999282062654, - "loss": 46.0, - "step": 759 - }, - { - "epoch": 0.122388179878417, - "grad_norm": 0.001074435655027628, - "learning_rate": 0.00019999999280144318, - "loss": 46.0, - "step": 760 - }, - { - "epoch": 0.1225492169572044, - "grad_norm": 0.00041439803317189217, - "learning_rate": 0.00019999999278223424, - "loss": 46.0, - "step": 761 - }, - { - "epoch": 0.12271025403599178, - "grad_norm": 0.000713004672434181, - "learning_rate": 0.00019999999276299968, - "loss": 46.0, - "step": 762 - }, - { - "epoch": 0.12287129111477918, - "grad_norm": 0.000530066667124629, - "learning_rate": 0.00019999999274373956, - "loss": 46.0, - "step": 763 - }, - { - "epoch": 0.12303232819356656, - "grad_norm": 0.000889762828592211, - "learning_rate": 0.0001999999927244538, - "loss": 46.0, - "step": 764 - }, - { - "epoch": 0.12319336527235396, - "grad_norm": 0.00025027673109434545, - "learning_rate": 0.0001999999927051425, - "loss": 46.0, - "step": 765 - }, - { - "epoch": 0.12335440235114135, - "grad_norm": 0.0018331586616113782, - "learning_rate": 0.00019999999268580557, - "loss": 46.0, - "step": 766 - }, - { - "epoch": 0.12351543942992874, - "grad_norm": 0.0006177728064358234, - "learning_rate": 0.00019999999266644304, - "loss": 46.0, - "step": 767 - }, - { - "epoch": 0.12367647650871613, - "grad_norm": 0.0011881053214892745, - "learning_rate": 0.00019999999264705493, - "loss": 46.0, - "step": 768 - }, - { - "epoch": 0.12383751358750353, - "grad_norm": 0.001661331276409328, - "learning_rate": 0.0001999999926276412, - "loss": 46.0, - "step": 769 - }, - { - "epoch": 0.12399855066629091, - "grad_norm": 0.0010164374252781272, - "learning_rate": 0.00019999999260820193, - "loss": 46.0, - "step": 770 - }, - { - "epoch": 0.1241595877450783, - "grad_norm": 0.000998982461169362, - "learning_rate": 0.00019999999258873703, - "loss": 46.0, - "step": 771 - }, - { - "epoch": 0.12432062482386569, - "grad_norm": 0.000523116672411561, - "learning_rate": 0.00019999999256924654, - "loss": 46.0, - "step": 772 - }, - { - "epoch": 0.12448166190265308, - "grad_norm": 0.001023276592604816, - "learning_rate": 0.00019999999254973044, - "loss": 46.0, - "step": 773 - }, - { - "epoch": 0.12464269898144048, - "grad_norm": 0.002048916881904006, - "learning_rate": 0.00019999999253018875, - "loss": 46.0, - "step": 774 - }, - { - "epoch": 0.12480373606022786, - "grad_norm": 0.0005882385303266346, - "learning_rate": 0.00019999999251062148, - "loss": 46.0, - "step": 775 - }, - { - "epoch": 0.12496477313901526, - "grad_norm": 0.00048309596604667604, - "learning_rate": 0.00019999999249102862, - "loss": 46.0, - "step": 776 - }, - { - "epoch": 0.12512581021780264, - "grad_norm": 0.0005158385611139238, - "learning_rate": 0.00019999999247141014, - "loss": 46.0, - "step": 777 - }, - { - "epoch": 0.12528684729659004, - "grad_norm": 0.0012636081082746387, - "learning_rate": 0.00019999999245176608, - "loss": 46.0, - "step": 778 - }, - { - "epoch": 0.12544788437537743, - "grad_norm": 0.000460075621958822, - "learning_rate": 0.00019999999243209643, - "loss": 46.0, - "step": 779 - }, - { - "epoch": 0.12560892145416483, - "grad_norm": 0.0013009477406740189, - "learning_rate": 0.0001999999924124012, - "loss": 46.0, - "step": 780 - }, - { - "epoch": 0.12576995853295223, - "grad_norm": 0.0004484928213059902, - "learning_rate": 0.00019999999239268034, - "loss": 46.0, - "step": 781 - }, - { - "epoch": 0.1259309956117396, - "grad_norm": 0.0005908760358579457, - "learning_rate": 0.00019999999237293388, - "loss": 46.0, - "step": 782 - }, - { - "epoch": 0.126092032690527, - "grad_norm": 0.0013063220540061593, - "learning_rate": 0.00019999999235316188, - "loss": 46.0, - "step": 783 - }, - { - "epoch": 0.12625306976931439, - "grad_norm": 0.0008520261035300791, - "learning_rate": 0.00019999999233336424, - "loss": 46.0, - "step": 784 - }, - { - "epoch": 0.12641410684810178, - "grad_norm": 0.0007049093255773187, - "learning_rate": 0.00019999999231354098, - "loss": 46.0, - "step": 785 - }, - { - "epoch": 0.12657514392688918, - "grad_norm": 0.0006392010254785419, - "learning_rate": 0.00019999999229369217, - "loss": 46.0, - "step": 786 - }, - { - "epoch": 0.12673618100567655, - "grad_norm": 0.000791541300714016, - "learning_rate": 0.00019999999227381777, - "loss": 46.0, - "step": 787 - }, - { - "epoch": 0.12689721808446394, - "grad_norm": 0.0006850518984720111, - "learning_rate": 0.00019999999225391775, - "loss": 46.0, - "step": 788 - }, - { - "epoch": 0.12705825516325134, - "grad_norm": 0.0010444442741572857, - "learning_rate": 0.00019999999223399215, - "loss": 46.0, - "step": 789 - }, - { - "epoch": 0.12721929224203873, - "grad_norm": 0.0006679598009213805, - "learning_rate": 0.00019999999221404093, - "loss": 46.0, - "step": 790 - }, - { - "epoch": 0.12738032932082613, - "grad_norm": 0.0009414281230419874, - "learning_rate": 0.00019999999219406415, - "loss": 46.0, - "step": 791 - }, - { - "epoch": 0.1275413663996135, - "grad_norm": 0.0002177072165068239, - "learning_rate": 0.00019999999217406176, - "loss": 46.0, - "step": 792 - }, - { - "epoch": 0.1277024034784009, - "grad_norm": 0.0007464716909453273, - "learning_rate": 0.00019999999215403378, - "loss": 46.0, - "step": 793 - }, - { - "epoch": 0.1278634405571883, - "grad_norm": 0.0006135865696705878, - "learning_rate": 0.0001999999921339802, - "loss": 46.0, - "step": 794 - }, - { - "epoch": 0.1280244776359757, - "grad_norm": 0.001909434562548995, - "learning_rate": 0.00019999999211390104, - "loss": 46.0, - "step": 795 - }, - { - "epoch": 0.12818551471476308, - "grad_norm": 0.0012111793039366603, - "learning_rate": 0.00019999999209379624, - "loss": 46.0, - "step": 796 - }, - { - "epoch": 0.12834655179355048, - "grad_norm": 0.0005831873277202249, - "learning_rate": 0.0001999999920736659, - "loss": 46.0, - "step": 797 - }, - { - "epoch": 0.12850758887233785, - "grad_norm": 0.0005240371683612466, - "learning_rate": 0.00019999999205350992, - "loss": 46.0, - "step": 798 - }, - { - "epoch": 0.12866862595112524, - "grad_norm": 0.0006707817665301263, - "learning_rate": 0.0001999999920333284, - "loss": 46.0, - "step": 799 - }, - { - "epoch": 0.12882966302991264, - "grad_norm": 0.0012076569255441427, - "learning_rate": 0.00019999999201312122, - "loss": 46.0, - "step": 800 - }, - { - "epoch": 0.12899070010870003, - "grad_norm": 0.0012680863728746772, - "learning_rate": 0.00019999999199288852, - "loss": 46.0, - "step": 801 - }, - { - "epoch": 0.12915173718748743, - "grad_norm": 0.0006465308251790702, - "learning_rate": 0.00019999999197263014, - "loss": 46.0, - "step": 802 - }, - { - "epoch": 0.1293127742662748, - "grad_norm": 0.0006390257622115314, - "learning_rate": 0.0001999999919523462, - "loss": 46.0, - "step": 803 - }, - { - "epoch": 0.1294738113450622, - "grad_norm": 0.00044906462426297367, - "learning_rate": 0.0001999999919320367, - "loss": 46.0, - "step": 804 - }, - { - "epoch": 0.1296348484238496, - "grad_norm": 0.001070193131454289, - "learning_rate": 0.0001999999919117016, - "loss": 46.0, - "step": 805 - }, - { - "epoch": 0.129795885502637, - "grad_norm": 0.0004957982455380261, - "learning_rate": 0.00019999999189134084, - "loss": 46.0, - "step": 806 - }, - { - "epoch": 0.12995692258142438, - "grad_norm": 0.00028306854073889554, - "learning_rate": 0.00019999999187095453, - "loss": 46.0, - "step": 807 - }, - { - "epoch": 0.13011795966021175, - "grad_norm": 0.001261686789803207, - "learning_rate": 0.00019999999185054264, - "loss": 46.0, - "step": 808 - }, - { - "epoch": 0.13027899673899915, - "grad_norm": 0.0004846264491789043, - "learning_rate": 0.00019999999183010513, - "loss": 46.0, - "step": 809 - }, - { - "epoch": 0.13044003381778654, - "grad_norm": 0.00039904803270474076, - "learning_rate": 0.00019999999180964203, - "loss": 46.0, - "step": 810 - }, - { - "epoch": 0.13060107089657394, - "grad_norm": 0.0005873935297131538, - "learning_rate": 0.00019999999178915335, - "loss": 46.0, - "step": 811 - }, - { - "epoch": 0.13076210797536134, - "grad_norm": 0.0003998268221039325, - "learning_rate": 0.00019999999176863905, - "loss": 46.0, - "step": 812 - }, - { - "epoch": 0.1309231450541487, - "grad_norm": 0.000923747313208878, - "learning_rate": 0.00019999999174809917, - "loss": 46.0, - "step": 813 - }, - { - "epoch": 0.1310841821329361, - "grad_norm": 0.0010215610964223742, - "learning_rate": 0.0001999999917275337, - "loss": 46.0, - "step": 814 - }, - { - "epoch": 0.1312452192117235, - "grad_norm": 0.00045427761506289244, - "learning_rate": 0.0001999999917069426, - "loss": 46.0, - "step": 815 - }, - { - "epoch": 0.1314062562905109, - "grad_norm": 0.00033573489054106176, - "learning_rate": 0.00019999999168632596, - "loss": 46.0, - "step": 816 - }, - { - "epoch": 0.1315672933692983, - "grad_norm": 0.0006197026814334095, - "learning_rate": 0.0001999999916656837, - "loss": 46.0, - "step": 817 - }, - { - "epoch": 0.13172833044808568, - "grad_norm": 0.000529855489730835, - "learning_rate": 0.00019999999164501585, - "loss": 46.0, - "step": 818 - }, - { - "epoch": 0.13188936752687305, - "grad_norm": 0.001919926144182682, - "learning_rate": 0.0001999999916243224, - "loss": 46.0, - "step": 819 - }, - { - "epoch": 0.13205040460566045, - "grad_norm": 0.00046961347106844187, - "learning_rate": 0.0001999999916036033, - "loss": 46.0, - "step": 820 - }, - { - "epoch": 0.13221144168444784, - "grad_norm": 0.0006622943910770118, - "learning_rate": 0.0001999999915828587, - "loss": 46.0, - "step": 821 - }, - { - "epoch": 0.13237247876323524, - "grad_norm": 0.00031556421890854836, - "learning_rate": 0.00019999999156208845, - "loss": 46.0, - "step": 822 - }, - { - "epoch": 0.13253351584202264, - "grad_norm": 0.000540242122951895, - "learning_rate": 0.00019999999154129264, - "loss": 46.0, - "step": 823 - }, - { - "epoch": 0.13269455292081, - "grad_norm": 0.0003218321071472019, - "learning_rate": 0.0001999999915204712, - "loss": 46.0, - "step": 824 - }, - { - "epoch": 0.1328555899995974, - "grad_norm": 0.000582866370677948, - "learning_rate": 0.00019999999149962418, - "loss": 46.0, - "step": 825 - }, - { - "epoch": 0.1330166270783848, - "grad_norm": 0.0018018016126006842, - "learning_rate": 0.00019999999147875158, - "loss": 46.0, - "step": 826 - }, - { - "epoch": 0.1331776641571722, - "grad_norm": 0.0010835527209565043, - "learning_rate": 0.00019999999145785336, - "loss": 46.0, - "step": 827 - }, - { - "epoch": 0.1333387012359596, - "grad_norm": 0.0008114787633530796, - "learning_rate": 0.00019999999143692956, - "loss": 46.0, - "step": 828 - }, - { - "epoch": 0.13349973831474696, - "grad_norm": 0.0005753064760938287, - "learning_rate": 0.00019999999141598015, - "loss": 46.0, - "step": 829 - }, - { - "epoch": 0.13366077539353435, - "grad_norm": 0.0004097472701687366, - "learning_rate": 0.00019999999139500515, - "loss": 46.0, - "step": 830 - }, - { - "epoch": 0.13382181247232175, - "grad_norm": 0.0005311646382324398, - "learning_rate": 0.00019999999137400458, - "loss": 46.0, - "step": 831 - }, - { - "epoch": 0.13398284955110915, - "grad_norm": 0.0011267259251326323, - "learning_rate": 0.00019999999135297838, - "loss": 46.0, - "step": 832 - }, - { - "epoch": 0.13414388662989654, - "grad_norm": 0.0002796841145027429, - "learning_rate": 0.0001999999913319266, - "loss": 46.0, - "step": 833 - }, - { - "epoch": 0.13430492370868394, - "grad_norm": 0.0008160819415934384, - "learning_rate": 0.00019999999131084924, - "loss": 46.0, - "step": 834 - }, - { - "epoch": 0.1344659607874713, - "grad_norm": 0.0005949228652752936, - "learning_rate": 0.00019999999128974627, - "loss": 46.0, - "step": 835 - }, - { - "epoch": 0.1346269978662587, - "grad_norm": 0.00039833923801779747, - "learning_rate": 0.0001999999912686177, - "loss": 46.0, - "step": 836 - }, - { - "epoch": 0.1347880349450461, - "grad_norm": 0.0007659844122827053, - "learning_rate": 0.00019999999124746353, - "loss": 46.0, - "step": 837 - }, - { - "epoch": 0.1349490720238335, - "grad_norm": 0.0008190494845621288, - "learning_rate": 0.0001999999912262838, - "loss": 46.0, - "step": 838 - }, - { - "epoch": 0.1351101091026209, - "grad_norm": 0.0006771478219889104, - "learning_rate": 0.00019999999120507843, - "loss": 46.0, - "step": 839 - }, - { - "epoch": 0.13527114618140826, - "grad_norm": 0.0004955170443281531, - "learning_rate": 0.0001999999911838475, - "loss": 46.0, - "step": 840 - }, - { - "epoch": 0.13543218326019565, - "grad_norm": 0.0004837427695747465, - "learning_rate": 0.00019999999116259096, - "loss": 46.0, - "step": 841 - }, - { - "epoch": 0.13559322033898305, - "grad_norm": 0.0005543114384636283, - "learning_rate": 0.00019999999114130883, - "loss": 46.0, - "step": 842 - }, - { - "epoch": 0.13575425741777045, - "grad_norm": 0.0011339414631947875, - "learning_rate": 0.0001999999911200011, - "loss": 46.0, - "step": 843 - }, - { - "epoch": 0.13591529449655784, - "grad_norm": 0.0006278124637901783, - "learning_rate": 0.00019999999109866776, - "loss": 46.0, - "step": 844 - }, - { - "epoch": 0.1360763315753452, - "grad_norm": 0.0016938516637310386, - "learning_rate": 0.00019999999107730884, - "loss": 46.0, - "step": 845 - }, - { - "epoch": 0.1362373686541326, - "grad_norm": 0.0019337739795446396, - "learning_rate": 0.00019999999105592436, - "loss": 46.0, - "step": 846 - }, - { - "epoch": 0.13639840573292, - "grad_norm": 0.0006349372561089694, - "learning_rate": 0.00019999999103451424, - "loss": 46.0, - "step": 847 - }, - { - "epoch": 0.1365594428117074, - "grad_norm": 0.0010687074391171336, - "learning_rate": 0.00019999999101307853, - "loss": 46.0, - "step": 848 - }, - { - "epoch": 0.1367204798904948, - "grad_norm": 0.0008426251588389277, - "learning_rate": 0.00019999999099161724, - "loss": 46.0, - "step": 849 - }, - { - "epoch": 0.1368815169692822, - "grad_norm": 0.0006499296869151294, - "learning_rate": 0.00019999999097013036, - "loss": 46.0, - "step": 850 - }, - { - "epoch": 0.13704255404806956, - "grad_norm": 0.0011350644053891301, - "learning_rate": 0.00019999999094861786, - "loss": 46.0, - "step": 851 - }, - { - "epoch": 0.13720359112685696, - "grad_norm": 0.0006878915010020137, - "learning_rate": 0.00019999999092707978, - "loss": 46.0, - "step": 852 - }, - { - "epoch": 0.13736462820564435, - "grad_norm": 0.000902946456335485, - "learning_rate": 0.00019999999090551608, - "loss": 46.0, - "step": 853 - }, - { - "epoch": 0.13752566528443175, - "grad_norm": 0.0003760393592528999, - "learning_rate": 0.00019999999088392682, - "loss": 46.0, - "step": 854 - }, - { - "epoch": 0.13768670236321914, - "grad_norm": 0.0011185002513229847, - "learning_rate": 0.00019999999086231195, - "loss": 46.0, - "step": 855 - }, - { - "epoch": 0.1378477394420065, - "grad_norm": 0.0007393692503683269, - "learning_rate": 0.00019999999084067152, - "loss": 46.0, - "step": 856 - }, - { - "epoch": 0.1380087765207939, - "grad_norm": 0.0006073627737350762, - "learning_rate": 0.00019999999081900545, - "loss": 46.0, - "step": 857 - }, - { - "epoch": 0.1381698135995813, - "grad_norm": 0.0011971439234912395, - "learning_rate": 0.0001999999907973138, - "loss": 46.0, - "step": 858 - }, - { - "epoch": 0.1383308506783687, - "grad_norm": 0.0007797310245223343, - "learning_rate": 0.00019999999077559654, - "loss": 46.0, - "step": 859 - }, - { - "epoch": 0.1384918877571561, - "grad_norm": 0.0002941296552307904, - "learning_rate": 0.0001999999907538537, - "loss": 46.0, - "step": 860 - }, - { - "epoch": 0.13865292483594346, - "grad_norm": 0.0004795315326191485, - "learning_rate": 0.00019999999073208526, - "loss": 46.0, - "step": 861 - }, - { - "epoch": 0.13881396191473086, - "grad_norm": 0.000500028021633625, - "learning_rate": 0.00019999999071029122, - "loss": 46.0, - "step": 862 - }, - { - "epoch": 0.13897499899351826, - "grad_norm": 0.0007521376828663051, - "learning_rate": 0.00019999999068847163, - "loss": 46.0, - "step": 863 - }, - { - "epoch": 0.13913603607230565, - "grad_norm": 0.0016052303835749626, - "learning_rate": 0.0001999999906666264, - "loss": 46.0, - "step": 864 - }, - { - "epoch": 0.13929707315109305, - "grad_norm": 0.0011634090915322304, - "learning_rate": 0.0001999999906447556, - "loss": 46.0, - "step": 865 - }, - { - "epoch": 0.13945811022988042, - "grad_norm": 0.0007818127050995827, - "learning_rate": 0.00019999999062285918, - "loss": 46.0, - "step": 866 - }, - { - "epoch": 0.1396191473086678, - "grad_norm": 0.00027503236196935177, - "learning_rate": 0.00019999999060093718, - "loss": 46.0, - "step": 867 - }, - { - "epoch": 0.1397801843874552, - "grad_norm": 0.0010879694018512964, - "learning_rate": 0.00019999999057898957, - "loss": 46.0, - "step": 868 - }, - { - "epoch": 0.1399412214662426, - "grad_norm": 0.00039905880112200975, - "learning_rate": 0.00019999999055701637, - "loss": 46.0, - "step": 869 - }, - { - "epoch": 0.14010225854503, - "grad_norm": 0.0005459731328301132, - "learning_rate": 0.00019999999053501758, - "loss": 46.0, - "step": 870 - }, - { - "epoch": 0.1402632956238174, - "grad_norm": 0.0011519825784489512, - "learning_rate": 0.0001999999905129932, - "loss": 46.0, - "step": 871 - }, - { - "epoch": 0.14042433270260476, - "grad_norm": 0.000506752694491297, - "learning_rate": 0.00019999999049094322, - "loss": 46.0, - "step": 872 - }, - { - "epoch": 0.14058536978139216, - "grad_norm": 0.0006271168240346014, - "learning_rate": 0.00019999999046886764, - "loss": 46.0, - "step": 873 - }, - { - "epoch": 0.14074640686017956, - "grad_norm": 0.0011113008949905634, - "learning_rate": 0.00019999999044676648, - "loss": 46.0, - "step": 874 - }, - { - "epoch": 0.14090744393896695, - "grad_norm": 0.0007349987863563001, - "learning_rate": 0.00019999999042463973, - "loss": 46.0, - "step": 875 - }, - { - "epoch": 0.14106848101775435, - "grad_norm": 0.0005502145504578948, - "learning_rate": 0.00019999999040248736, - "loss": 46.0, - "step": 876 - }, - { - "epoch": 0.14122951809654172, - "grad_norm": 0.0005725485389120877, - "learning_rate": 0.0001999999903803094, - "loss": 46.0, - "step": 877 - }, - { - "epoch": 0.1413905551753291, - "grad_norm": 0.0006727438885718584, - "learning_rate": 0.00019999999035810585, - "loss": 46.0, - "step": 878 - }, - { - "epoch": 0.1415515922541165, - "grad_norm": 0.00040779856499284506, - "learning_rate": 0.00019999999033587672, - "loss": 46.0, - "step": 879 - }, - { - "epoch": 0.1417126293329039, - "grad_norm": 0.0006494661211036146, - "learning_rate": 0.00019999999031362198, - "loss": 46.0, - "step": 880 - }, - { - "epoch": 0.1418736664116913, - "grad_norm": 0.001008855295367539, - "learning_rate": 0.00019999999029134165, - "loss": 46.0, - "step": 881 - }, - { - "epoch": 0.14203470349047867, - "grad_norm": 0.0003528682282194495, - "learning_rate": 0.0001999999902690357, - "loss": 46.0, - "step": 882 - }, - { - "epoch": 0.14219574056926607, - "grad_norm": 0.001214552205055952, - "learning_rate": 0.00019999999024670418, - "loss": 46.0, - "step": 883 - }, - { - "epoch": 0.14235677764805346, - "grad_norm": 0.0005489051109179854, - "learning_rate": 0.00019999999022434707, - "loss": 46.0, - "step": 884 - }, - { - "epoch": 0.14251781472684086, - "grad_norm": 0.0011040157405659556, - "learning_rate": 0.00019999999020196436, - "loss": 46.0, - "step": 885 - }, - { - "epoch": 0.14267885180562825, - "grad_norm": 0.0006406849715858698, - "learning_rate": 0.00019999999017955605, - "loss": 46.0, - "step": 886 - }, - { - "epoch": 0.14283988888441565, - "grad_norm": 0.0006732973852194846, - "learning_rate": 0.00019999999015712217, - "loss": 46.0, - "step": 887 - }, - { - "epoch": 0.14300092596320302, - "grad_norm": 0.0010554519249126315, - "learning_rate": 0.00019999999013466268, - "loss": 46.0, - "step": 888 - }, - { - "epoch": 0.14316196304199041, - "grad_norm": 0.0007054017623886466, - "learning_rate": 0.00019999999011217757, - "loss": 46.0, - "step": 889 - }, - { - "epoch": 0.1433230001207778, - "grad_norm": 0.0011370867723599076, - "learning_rate": 0.00019999999008966688, - "loss": 46.0, - "step": 890 - }, - { - "epoch": 0.1434840371995652, - "grad_norm": 0.0006134258583188057, - "learning_rate": 0.0001999999900671306, - "loss": 46.0, - "step": 891 - }, - { - "epoch": 0.1436450742783526, - "grad_norm": 0.000407539278967306, - "learning_rate": 0.00019999999004456873, - "loss": 46.0, - "step": 892 - }, - { - "epoch": 0.14380611135713997, - "grad_norm": 0.0005476608639582992, - "learning_rate": 0.00019999999002198125, - "loss": 46.0, - "step": 893 - }, - { - "epoch": 0.14396714843592737, - "grad_norm": 0.0007749724318273365, - "learning_rate": 0.00019999998999936818, - "loss": 46.0, - "step": 894 - }, - { - "epoch": 0.14412818551471476, - "grad_norm": 0.0010029029799625278, - "learning_rate": 0.00019999998997672952, - "loss": 46.0, - "step": 895 - }, - { - "epoch": 0.14428922259350216, - "grad_norm": 0.000552076380699873, - "learning_rate": 0.00019999998995406525, - "loss": 46.0, - "step": 896 - }, - { - "epoch": 0.14445025967228955, - "grad_norm": 0.0011516448576003313, - "learning_rate": 0.00019999998993137542, - "loss": 46.0, - "step": 897 - }, - { - "epoch": 0.14461129675107692, - "grad_norm": 0.0010751265799626708, - "learning_rate": 0.00019999998990865997, - "loss": 46.0, - "step": 898 - }, - { - "epoch": 0.14477233382986432, - "grad_norm": 0.0006675152108073235, - "learning_rate": 0.00019999998988591891, - "loss": 46.0, - "step": 899 - }, - { - "epoch": 0.14493337090865172, - "grad_norm": 0.0003535577270668, - "learning_rate": 0.0001999999898631523, - "loss": 46.0, - "step": 900 - }, - { - "epoch": 0.1450944079874391, - "grad_norm": 0.0012404737062752247, - "learning_rate": 0.0001999999898403601, - "loss": 46.0, - "step": 901 - }, - { - "epoch": 0.1452554450662265, - "grad_norm": 0.0006735806819051504, - "learning_rate": 0.00019999998981754224, - "loss": 46.0, - "step": 902 - }, - { - "epoch": 0.14541648214501388, - "grad_norm": 0.0003508478112053126, - "learning_rate": 0.00019999998979469883, - "loss": 46.0, - "step": 903 - }, - { - "epoch": 0.14557751922380127, - "grad_norm": 0.0003961780748795718, - "learning_rate": 0.0001999999897718298, - "loss": 46.0, - "step": 904 - }, - { - "epoch": 0.14573855630258867, - "grad_norm": 0.0009000244317576289, - "learning_rate": 0.0001999999897489352, - "loss": 46.0, - "step": 905 - }, - { - "epoch": 0.14589959338137606, - "grad_norm": 0.001379994209855795, - "learning_rate": 0.000199999989726015, - "loss": 46.0, - "step": 906 - }, - { - "epoch": 0.14606063046016346, - "grad_norm": 0.0010532027808949351, - "learning_rate": 0.0001999999897030692, - "loss": 46.0, - "step": 907 - }, - { - "epoch": 0.14622166753895086, - "grad_norm": 0.0007648330647498369, - "learning_rate": 0.0001999999896800978, - "loss": 46.0, - "step": 908 - }, - { - "epoch": 0.14638270461773822, - "grad_norm": 0.000269240525085479, - "learning_rate": 0.0001999999896571008, - "loss": 46.0, - "step": 909 - }, - { - "epoch": 0.14654374169652562, - "grad_norm": 0.0006921233725734055, - "learning_rate": 0.0001999999896340782, - "loss": 46.0, - "step": 910 - }, - { - "epoch": 0.14670477877531302, - "grad_norm": 0.0009213497396558523, - "learning_rate": 0.00019999998961103005, - "loss": 46.0, - "step": 911 - }, - { - "epoch": 0.1468658158541004, - "grad_norm": 0.000873835408128798, - "learning_rate": 0.00019999998958795628, - "loss": 46.0, - "step": 912 - }, - { - "epoch": 0.1470268529328878, - "grad_norm": 0.00044415402226150036, - "learning_rate": 0.00019999998956485692, - "loss": 46.0, - "step": 913 - }, - { - "epoch": 0.14718789001167518, - "grad_norm": 0.0006393120274879038, - "learning_rate": 0.00019999998954173194, - "loss": 46.0, - "step": 914 - }, - { - "epoch": 0.14734892709046257, - "grad_norm": 0.0005304512451402843, - "learning_rate": 0.00019999998951858138, - "loss": 46.0, - "step": 915 - }, - { - "epoch": 0.14750996416924997, - "grad_norm": 0.0004301891603972763, - "learning_rate": 0.0001999999894954052, - "loss": 46.0, - "step": 916 - }, - { - "epoch": 0.14767100124803736, - "grad_norm": 0.0005527819157578051, - "learning_rate": 0.0001999999894722035, - "loss": 46.0, - "step": 917 - }, - { - "epoch": 0.14783203832682476, - "grad_norm": 0.0008386385161429644, - "learning_rate": 0.00019999998944897614, - "loss": 46.0, - "step": 918 - }, - { - "epoch": 0.14799307540561213, - "grad_norm": 0.0010009764228016138, - "learning_rate": 0.0001999999894257232, - "loss": 46.0, - "step": 919 - }, - { - "epoch": 0.14815411248439952, - "grad_norm": 0.0014237198047339916, - "learning_rate": 0.00019999998940244468, - "loss": 46.0, - "step": 920 - }, - { - "epoch": 0.14831514956318692, - "grad_norm": 0.0007006244268268347, - "learning_rate": 0.00019999998937914054, - "loss": 46.0, - "step": 921 - }, - { - "epoch": 0.14847618664197432, - "grad_norm": 0.0006828503683209419, - "learning_rate": 0.00019999998935581084, - "loss": 46.0, - "step": 922 - }, - { - "epoch": 0.1486372237207617, - "grad_norm": 0.0011275885626673698, - "learning_rate": 0.00019999998933245553, - "loss": 46.0, - "step": 923 - }, - { - "epoch": 0.1487982607995491, - "grad_norm": 0.0003434883547015488, - "learning_rate": 0.00019999998930907457, - "loss": 46.0, - "step": 924 - }, - { - "epoch": 0.14895929787833648, - "grad_norm": 0.0004500861978158355, - "learning_rate": 0.0001999999892856681, - "loss": 46.0, - "step": 925 - }, - { - "epoch": 0.14912033495712387, - "grad_norm": 0.0009388361359015107, - "learning_rate": 0.00019999998926223598, - "loss": 46.0, - "step": 926 - }, - { - "epoch": 0.14928137203591127, - "grad_norm": 0.0006658335332758725, - "learning_rate": 0.00019999998923877826, - "loss": 46.0, - "step": 927 - }, - { - "epoch": 0.14944240911469867, - "grad_norm": 0.0005450811004266143, - "learning_rate": 0.00019999998921529498, - "loss": 46.0, - "step": 928 - }, - { - "epoch": 0.14960344619348606, - "grad_norm": 0.0003031016094610095, - "learning_rate": 0.00019999998919178612, - "loss": 46.0, - "step": 929 - }, - { - "epoch": 0.14976448327227343, - "grad_norm": 0.0008631189703010023, - "learning_rate": 0.00019999998916825162, - "loss": 46.0, - "step": 930 - }, - { - "epoch": 0.14992552035106083, - "grad_norm": 0.0008258898742496967, - "learning_rate": 0.00019999998914469152, - "loss": 46.0, - "step": 931 - }, - { - "epoch": 0.15008655742984822, - "grad_norm": 0.0003272114263381809, - "learning_rate": 0.00019999998912110587, - "loss": 46.0, - "step": 932 - }, - { - "epoch": 0.15024759450863562, - "grad_norm": 0.00045941982534714043, - "learning_rate": 0.00019999998909749463, - "loss": 46.0, - "step": 933 - }, - { - "epoch": 0.15040863158742301, - "grad_norm": 0.000885982473846525, - "learning_rate": 0.00019999998907385775, - "loss": 46.0, - "step": 934 - }, - { - "epoch": 0.15056966866621038, - "grad_norm": 0.0003253989852964878, - "learning_rate": 0.0001999999890501953, - "loss": 46.0, - "step": 935 - }, - { - "epoch": 0.15073070574499778, - "grad_norm": 0.0009691553423181176, - "learning_rate": 0.00019999998902650725, - "loss": 46.0, - "step": 936 - }, - { - "epoch": 0.15089174282378517, - "grad_norm": 0.0006153081776574254, - "learning_rate": 0.0001999999890027936, - "loss": 46.0, - "step": 937 - }, - { - "epoch": 0.15105277990257257, - "grad_norm": 0.0006243716343306005, - "learning_rate": 0.00019999998897905435, - "loss": 46.0, - "step": 938 - }, - { - "epoch": 0.15121381698135997, - "grad_norm": 0.0013074682792648673, - "learning_rate": 0.00019999998895528953, - "loss": 46.0, - "step": 939 - }, - { - "epoch": 0.15137485406014736, - "grad_norm": 0.0004748936044052243, - "learning_rate": 0.0001999999889314991, - "loss": 46.0, - "step": 940 - }, - { - "epoch": 0.15153589113893473, - "grad_norm": 0.0005445227143354714, - "learning_rate": 0.00019999998890768308, - "loss": 46.0, - "step": 941 - }, - { - "epoch": 0.15169692821772213, - "grad_norm": 0.00029829939012415707, - "learning_rate": 0.00019999998888384145, - "loss": 46.0, - "step": 942 - }, - { - "epoch": 0.15185796529650952, - "grad_norm": 0.0007962873787619174, - "learning_rate": 0.00019999998885997423, - "loss": 46.0, - "step": 943 - }, - { - "epoch": 0.15201900237529692, - "grad_norm": 0.0008284706273116171, - "learning_rate": 0.00019999998883608142, - "loss": 46.0, - "step": 944 - }, - { - "epoch": 0.15218003945408431, - "grad_norm": 0.0007207043236121535, - "learning_rate": 0.00019999998881216303, - "loss": 46.0, - "step": 945 - }, - { - "epoch": 0.15234107653287168, - "grad_norm": 0.0007585242274217308, - "learning_rate": 0.00019999998878821905, - "loss": 46.0, - "step": 946 - }, - { - "epoch": 0.15250211361165908, - "grad_norm": 0.00043909219675697386, - "learning_rate": 0.00019999998876424943, - "loss": 46.0, - "step": 947 - }, - { - "epoch": 0.15266315069044648, - "grad_norm": 0.0002628343063406646, - "learning_rate": 0.00019999998874025424, - "loss": 46.0, - "step": 948 - }, - { - "epoch": 0.15282418776923387, - "grad_norm": 0.0010836570290848613, - "learning_rate": 0.00019999998871623347, - "loss": 46.0, - "step": 949 - }, - { - "epoch": 0.15298522484802127, - "grad_norm": 0.0008976529352366924, - "learning_rate": 0.0001999999886921871, - "loss": 46.0, - "step": 950 - }, - { - "epoch": 0.15314626192680864, - "grad_norm": 0.0010474735172465444, - "learning_rate": 0.00019999998866811512, - "loss": 46.0, - "step": 951 - }, - { - "epoch": 0.15330729900559603, - "grad_norm": 0.00025471372646279633, - "learning_rate": 0.00019999998864401756, - "loss": 46.0, - "step": 952 - }, - { - "epoch": 0.15346833608438343, - "grad_norm": 0.00031341315479949117, - "learning_rate": 0.00019999998861989438, - "loss": 46.0, - "step": 953 - }, - { - "epoch": 0.15362937316317082, - "grad_norm": 0.0015352104092016816, - "learning_rate": 0.00019999998859574562, - "loss": 46.0, - "step": 954 - }, - { - "epoch": 0.15379041024195822, - "grad_norm": 0.0009060533484444022, - "learning_rate": 0.00019999998857157128, - "loss": 46.0, - "step": 955 - }, - { - "epoch": 0.1539514473207456, - "grad_norm": 0.00029884095420129597, - "learning_rate": 0.00019999998854737132, - "loss": 46.0, - "step": 956 - }, - { - "epoch": 0.15411248439953298, - "grad_norm": 0.0008722096681594849, - "learning_rate": 0.0001999999885231458, - "loss": 46.0, - "step": 957 - }, - { - "epoch": 0.15427352147832038, - "grad_norm": 0.00042822243995033205, - "learning_rate": 0.00019999998849889463, - "loss": 46.0, - "step": 958 - }, - { - "epoch": 0.15443455855710778, - "grad_norm": 0.0003030253865290433, - "learning_rate": 0.0001999999884746179, - "loss": 46.0, - "step": 959 - }, - { - "epoch": 0.15459559563589517, - "grad_norm": 0.0005117275868542492, - "learning_rate": 0.00019999998845031557, - "loss": 46.0, - "step": 960 - }, - { - "epoch": 0.15475663271468257, - "grad_norm": 0.0003974712744820863, - "learning_rate": 0.00019999998842598764, - "loss": 46.0, - "step": 961 - }, - { - "epoch": 0.15491766979346994, - "grad_norm": 0.0008037528023123741, - "learning_rate": 0.00019999998840163413, - "loss": 46.0, - "step": 962 - }, - { - "epoch": 0.15507870687225733, - "grad_norm": 0.0004638684040401131, - "learning_rate": 0.00019999998837725503, - "loss": 46.0, - "step": 963 - }, - { - "epoch": 0.15523974395104473, - "grad_norm": 0.0005257294396869838, - "learning_rate": 0.00019999998835285032, - "loss": 46.0, - "step": 964 - }, - { - "epoch": 0.15540078102983212, - "grad_norm": 0.0006085250061005354, - "learning_rate": 0.00019999998832842002, - "loss": 46.0, - "step": 965 - }, - { - "epoch": 0.15556181810861952, - "grad_norm": 0.0010381838073953986, - "learning_rate": 0.00019999998830396413, - "loss": 46.0, - "step": 966 - }, - { - "epoch": 0.1557228551874069, - "grad_norm": 0.001192597672343254, - "learning_rate": 0.00019999998827948263, - "loss": 46.0, - "step": 967 - }, - { - "epoch": 0.15588389226619428, - "grad_norm": 0.0005393070168793201, - "learning_rate": 0.00019999998825497554, - "loss": 46.0, - "step": 968 - }, - { - "epoch": 0.15604492934498168, - "grad_norm": 0.0009535938152112067, - "learning_rate": 0.00019999998823044287, - "loss": 46.0, - "step": 969 - }, - { - "epoch": 0.15620596642376908, - "grad_norm": 0.001277977367863059, - "learning_rate": 0.0001999999882058846, - "loss": 46.0, - "step": 970 - }, - { - "epoch": 0.15636700350255647, - "grad_norm": 0.000667413289193064, - "learning_rate": 0.0001999999881813007, - "loss": 46.0, - "step": 971 - }, - { - "epoch": 0.15652804058134384, - "grad_norm": 0.0013546354603022337, - "learning_rate": 0.00019999998815669126, - "loss": 46.0, - "step": 972 - }, - { - "epoch": 0.15668907766013124, - "grad_norm": 0.0005856928182765841, - "learning_rate": 0.00019999998813205618, - "loss": 46.0, - "step": 973 - }, - { - "epoch": 0.15685011473891863, - "grad_norm": 0.0004939138889312744, - "learning_rate": 0.00019999998810739554, - "loss": 46.0, - "step": 974 - }, - { - "epoch": 0.15701115181770603, - "grad_norm": 0.0005340013885870576, - "learning_rate": 0.0001999999880827093, - "loss": 46.0, - "step": 975 - }, - { - "epoch": 0.15717218889649343, - "grad_norm": 0.00032319730962626636, - "learning_rate": 0.00019999998805799742, - "loss": 46.0, - "step": 976 - }, - { - "epoch": 0.15733322597528082, - "grad_norm": 0.0008735068258829415, - "learning_rate": 0.00019999998803326002, - "loss": 46.0, - "step": 977 - }, - { - "epoch": 0.1574942630540682, - "grad_norm": 0.0009990877006202936, - "learning_rate": 0.00019999998800849698, - "loss": 46.0, - "step": 978 - }, - { - "epoch": 0.15765530013285559, - "grad_norm": 0.00027033319929614663, - "learning_rate": 0.00019999998798370835, - "loss": 46.0, - "step": 979 - }, - { - "epoch": 0.15781633721164298, - "grad_norm": 0.0009129714453592896, - "learning_rate": 0.00019999998795889413, - "loss": 46.0, - "step": 980 - }, - { - "epoch": 0.15797737429043038, - "grad_norm": 0.0018556106369942427, - "learning_rate": 0.0001999999879340543, - "loss": 46.0, - "step": 981 - }, - { - "epoch": 0.15813841136921777, - "grad_norm": 0.000484311836771667, - "learning_rate": 0.00019999998790918888, - "loss": 46.0, - "step": 982 - }, - { - "epoch": 0.15829944844800514, - "grad_norm": 0.0010563847608864307, - "learning_rate": 0.00019999998788429788, - "loss": 46.0, - "step": 983 - }, - { - "epoch": 0.15846048552679254, - "grad_norm": 0.0013567782007157803, - "learning_rate": 0.00019999998785938129, - "loss": 46.0, - "step": 984 - }, - { - "epoch": 0.15862152260557993, - "grad_norm": 0.0008116239914670587, - "learning_rate": 0.00019999998783443905, - "loss": 46.0, - "step": 985 - }, - { - "epoch": 0.15878255968436733, - "grad_norm": 0.00039403728442266583, - "learning_rate": 0.00019999998780947126, - "loss": 46.0, - "step": 986 - }, - { - "epoch": 0.15894359676315473, - "grad_norm": 0.0007989925798028708, - "learning_rate": 0.00019999998778447788, - "loss": 46.0, - "step": 987 - }, - { - "epoch": 0.1591046338419421, - "grad_norm": 0.00031061729532666504, - "learning_rate": 0.00019999998775945888, - "loss": 46.0, - "step": 988 - }, - { - "epoch": 0.1592656709207295, - "grad_norm": 0.0004982459358870983, - "learning_rate": 0.0001999999877344143, - "loss": 46.0, - "step": 989 - }, - { - "epoch": 0.1594267079995169, - "grad_norm": 0.0007720077410340309, - "learning_rate": 0.00019999998770934413, - "loss": 46.0, - "step": 990 - }, - { - "epoch": 0.15958774507830428, - "grad_norm": 0.0003660463262349367, - "learning_rate": 0.0001999999876842484, - "loss": 46.0, - "step": 991 - }, - { - "epoch": 0.15974878215709168, - "grad_norm": 0.0008429509471170604, - "learning_rate": 0.00019999998765912703, - "loss": 46.0, - "step": 992 - }, - { - "epoch": 0.15990981923587907, - "grad_norm": 0.0009334971546195447, - "learning_rate": 0.00019999998763398007, - "loss": 46.0, - "step": 993 - }, - { - "epoch": 0.16007085631466644, - "grad_norm": 0.00027421597042120993, - "learning_rate": 0.0001999999876088075, - "loss": 46.0, - "step": 994 - }, - { - "epoch": 0.16023189339345384, - "grad_norm": 0.0013092216104269028, - "learning_rate": 0.00019999998758360937, - "loss": 46.0, - "step": 995 - }, - { - "epoch": 0.16039293047224124, - "grad_norm": 0.0007798975566402078, - "learning_rate": 0.0001999999875583856, - "loss": 46.0, - "step": 996 - }, - { - "epoch": 0.16055396755102863, - "grad_norm": 0.00038831288111396134, - "learning_rate": 0.0001999999875331363, - "loss": 46.0, - "step": 997 - }, - { - "epoch": 0.16071500462981603, - "grad_norm": 0.00036058248952031136, - "learning_rate": 0.00019999998750786137, - "loss": 46.0, - "step": 998 - }, - { - "epoch": 0.1608760417086034, - "grad_norm": 0.00032413488952443004, - "learning_rate": 0.0001999999874825608, - "loss": 46.0, - "step": 999 - }, - { - "epoch": 0.1610370787873908, - "grad_norm": 0.0008596943807788193, - "learning_rate": 0.0001999999874572347, - "loss": 46.0, - "step": 1000 - }, - { - "epoch": 0.1611981158661782, - "grad_norm": 0.0003194579330738634, - "learning_rate": 0.00019999998743188297, - "loss": 46.0, - "step": 1001 - }, - { - "epoch": 0.16135915294496558, - "grad_norm": 0.00046182089135982096, - "learning_rate": 0.00019999998740650567, - "loss": 46.0, - "step": 1002 - }, - { - "epoch": 0.16152019002375298, - "grad_norm": 0.0010411414550617337, - "learning_rate": 0.00019999998738110274, - "loss": 46.0, - "step": 1003 - }, - { - "epoch": 0.16168122710254035, - "grad_norm": 0.0009827236644923687, - "learning_rate": 0.00019999998735567424, - "loss": 46.0, - "step": 1004 - }, - { - "epoch": 0.16184226418132774, - "grad_norm": 0.0006024896283634007, - "learning_rate": 0.00019999998733022015, - "loss": 46.0, - "step": 1005 - }, - { - "epoch": 0.16200330126011514, - "grad_norm": 0.00046483069309033453, - "learning_rate": 0.00019999998730474048, - "loss": 46.0, - "step": 1006 - }, - { - "epoch": 0.16216433833890254, - "grad_norm": 0.0009994750143960118, - "learning_rate": 0.0001999999872792352, - "loss": 46.0, - "step": 1007 - }, - { - "epoch": 0.16232537541768993, - "grad_norm": 0.0002608858922030777, - "learning_rate": 0.0001999999872537043, - "loss": 46.0, - "step": 1008 - }, - { - "epoch": 0.1624864124964773, - "grad_norm": 0.0008573723607696593, - "learning_rate": 0.00019999998722814783, - "loss": 46.0, - "step": 1009 - }, - { - "epoch": 0.1626474495752647, - "grad_norm": 0.00032435834873467684, - "learning_rate": 0.00019999998720256575, - "loss": 46.0, - "step": 1010 - }, - { - "epoch": 0.1628084866540521, - "grad_norm": 0.0008361474028788507, - "learning_rate": 0.0001999999871769581, - "loss": 46.0, - "step": 1011 - }, - { - "epoch": 0.1629695237328395, - "grad_norm": 0.0008367006666958332, - "learning_rate": 0.00019999998715132482, - "loss": 46.0, - "step": 1012 - }, - { - "epoch": 0.16313056081162688, - "grad_norm": 0.0007053214940242469, - "learning_rate": 0.00019999998712566598, - "loss": 46.0, - "step": 1013 - }, - { - "epoch": 0.16329159789041428, - "grad_norm": 0.000897809921298176, - "learning_rate": 0.00019999998709998153, - "loss": 46.0, - "step": 1014 - }, - { - "epoch": 0.16345263496920165, - "grad_norm": 0.0006495975540019572, - "learning_rate": 0.0001999999870742715, - "loss": 46.0, - "step": 1015 - }, - { - "epoch": 0.16361367204798904, - "grad_norm": 0.0011052775662392378, - "learning_rate": 0.00019999998704853584, - "loss": 46.0, - "step": 1016 - }, - { - "epoch": 0.16377470912677644, - "grad_norm": 0.001142477267421782, - "learning_rate": 0.00019999998702277462, - "loss": 46.0, - "step": 1017 - }, - { - "epoch": 0.16393574620556384, - "grad_norm": 0.00043422140879556537, - "learning_rate": 0.0001999999869969878, - "loss": 46.0, - "step": 1018 - }, - { - "epoch": 0.16409678328435123, - "grad_norm": 0.0006619326304644346, - "learning_rate": 0.00019999998697117538, - "loss": 46.0, - "step": 1019 - }, - { - "epoch": 0.1642578203631386, - "grad_norm": 0.0006206837715581059, - "learning_rate": 0.00019999998694533733, - "loss": 46.0, - "step": 1020 - }, - { - "epoch": 0.164418857441926, - "grad_norm": 0.0008341065840795636, - "learning_rate": 0.00019999998691947374, - "loss": 46.0, - "step": 1021 - }, - { - "epoch": 0.1645798945207134, - "grad_norm": 0.00038806578959338367, - "learning_rate": 0.0001999999868935845, - "loss": 46.0, - "step": 1022 - }, - { - "epoch": 0.1647409315995008, - "grad_norm": 0.0005078114336356521, - "learning_rate": 0.00019999998686766972, - "loss": 46.0, - "step": 1023 - }, - { - "epoch": 0.16490196867828819, - "grad_norm": 0.0003059906011912972, - "learning_rate": 0.0001999999868417293, - "loss": 46.0, - "step": 1024 - }, - { - "epoch": 0.16506300575707555, - "grad_norm": 0.0013124411925673485, - "learning_rate": 0.00019999998681576332, - "loss": 46.0, - "step": 1025 - }, - { - "epoch": 0.16522404283586295, - "grad_norm": 0.00030253586010076106, - "learning_rate": 0.00019999998678977172, - "loss": 46.0, - "step": 1026 - }, - { - "epoch": 0.16538507991465035, - "grad_norm": 0.0005021268152631819, - "learning_rate": 0.00019999998676375452, - "loss": 46.0, - "step": 1027 - }, - { - "epoch": 0.16554611699343774, - "grad_norm": 0.0009989166865125299, - "learning_rate": 0.00019999998673771177, - "loss": 46.0, - "step": 1028 - }, - { - "epoch": 0.16570715407222514, - "grad_norm": 0.001155222998932004, - "learning_rate": 0.0001999999867116434, - "loss": 46.0, - "step": 1029 - }, - { - "epoch": 0.16586819115101253, - "grad_norm": 0.0005994467646814883, - "learning_rate": 0.00019999998668554942, - "loss": 46.0, - "step": 1030 - }, - { - "epoch": 0.1660292282297999, - "grad_norm": 0.0009421958820894361, - "learning_rate": 0.00019999998665942985, - "loss": 46.0, - "step": 1031 - }, - { - "epoch": 0.1661902653085873, - "grad_norm": 0.0006251431186683476, - "learning_rate": 0.00019999998663328472, - "loss": 46.0, - "step": 1032 - }, - { - "epoch": 0.1663513023873747, - "grad_norm": 0.0008859604713506997, - "learning_rate": 0.00019999998660711395, - "loss": 46.0, - "step": 1033 - }, - { - "epoch": 0.1665123394661621, - "grad_norm": 0.0003262088866904378, - "learning_rate": 0.0001999999865809176, - "loss": 46.0, - "step": 1034 - }, - { - "epoch": 0.1666733765449495, - "grad_norm": 0.0009352091583423316, - "learning_rate": 0.00019999998655469568, - "loss": 46.0, - "step": 1035 - }, - { - "epoch": 0.16683441362373685, - "grad_norm": 0.0007035856833681464, - "learning_rate": 0.00019999998652844815, - "loss": 46.0, - "step": 1036 - }, - { - "epoch": 0.16699545070252425, - "grad_norm": 0.00041801491170190275, - "learning_rate": 0.000199999986502175, - "loss": 46.0, - "step": 1037 - }, - { - "epoch": 0.16715648778131165, - "grad_norm": 0.0001697216648608446, - "learning_rate": 0.00019999998647587627, - "loss": 46.0, - "step": 1038 - }, - { - "epoch": 0.16731752486009904, - "grad_norm": 0.0004610596806742251, - "learning_rate": 0.00019999998644955195, - "loss": 46.0, - "step": 1039 - }, - { - "epoch": 0.16747856193888644, - "grad_norm": 0.0004097422061022371, - "learning_rate": 0.00019999998642320204, - "loss": 46.0, - "step": 1040 - }, - { - "epoch": 0.1676395990176738, - "grad_norm": 0.0007244375301524997, - "learning_rate": 0.00019999998639682654, - "loss": 46.0, - "step": 1041 - }, - { - "epoch": 0.1678006360964612, - "grad_norm": 0.0004293345846235752, - "learning_rate": 0.00019999998637042544, - "loss": 46.0, - "step": 1042 - }, - { - "epoch": 0.1679616731752486, - "grad_norm": 0.0008935158839449286, - "learning_rate": 0.0001999999863439987, - "loss": 46.0, - "step": 1043 - }, - { - "epoch": 0.168122710254036, - "grad_norm": 0.0006440468714572489, - "learning_rate": 0.00019999998631754643, - "loss": 46.0, - "step": 1044 - }, - { - "epoch": 0.1682837473328234, - "grad_norm": 0.0007334767142310739, - "learning_rate": 0.00019999998629106853, - "loss": 46.0, - "step": 1045 - }, - { - "epoch": 0.16844478441161076, - "grad_norm": 0.0005875652423128486, - "learning_rate": 0.00019999998626456505, - "loss": 46.0, - "step": 1046 - }, - { - "epoch": 0.16860582149039816, - "grad_norm": 0.0005739011103287339, - "learning_rate": 0.00019999998623803595, - "loss": 46.0, - "step": 1047 - }, - { - "epoch": 0.16876685856918555, - "grad_norm": 0.0009351703920401633, - "learning_rate": 0.0001999999862114813, - "loss": 46.0, - "step": 1048 - }, - { - "epoch": 0.16892789564797295, - "grad_norm": 0.0002408776490483433, - "learning_rate": 0.00019999998618490102, - "loss": 46.0, - "step": 1049 - }, - { - "epoch": 0.16908893272676034, - "grad_norm": 0.0005334711167961359, - "learning_rate": 0.00019999998615829515, - "loss": 46.0, - "step": 1050 - }, - { - "epoch": 0.16924996980554774, - "grad_norm": 0.00042552888044156134, - "learning_rate": 0.00019999998613166368, - "loss": 46.0, - "step": 1051 - }, - { - "epoch": 0.1694110068843351, - "grad_norm": 0.0005725707160308957, - "learning_rate": 0.00019999998610500662, - "loss": 46.0, - "step": 1052 - }, - { - "epoch": 0.1695720439631225, - "grad_norm": 0.0008814763859845698, - "learning_rate": 0.00019999998607832397, - "loss": 46.0, - "step": 1053 - }, - { - "epoch": 0.1697330810419099, - "grad_norm": 0.0015081973979249597, - "learning_rate": 0.00019999998605161573, - "loss": 46.0, - "step": 1054 - }, - { - "epoch": 0.1698941181206973, - "grad_norm": 0.0009094245033338666, - "learning_rate": 0.0001999999860248819, - "loss": 46.0, - "step": 1055 - }, - { - "epoch": 0.1700551551994847, - "grad_norm": 0.0003089603269472718, - "learning_rate": 0.00019999998599812247, - "loss": 46.0, - "step": 1056 - }, - { - "epoch": 0.17021619227827206, - "grad_norm": 0.0007297676638700068, - "learning_rate": 0.00019999998597133742, - "loss": 46.0, - "step": 1057 - }, - { - "epoch": 0.17037722935705946, - "grad_norm": 0.000567425973713398, - "learning_rate": 0.00019999998594452683, - "loss": 46.0, - "step": 1058 - }, - { - "epoch": 0.17053826643584685, - "grad_norm": 0.0005964718293398619, - "learning_rate": 0.0001999999859176906, - "loss": 46.0, - "step": 1059 - }, - { - "epoch": 0.17069930351463425, - "grad_norm": 0.0003678709326777607, - "learning_rate": 0.00019999998589082877, - "loss": 46.0, - "step": 1060 - }, - { - "epoch": 0.17086034059342164, - "grad_norm": 0.0006606096285395324, - "learning_rate": 0.00019999998586394137, - "loss": 46.0, - "step": 1061 - }, - { - "epoch": 0.171021377672209, - "grad_norm": 0.0007891838322393596, - "learning_rate": 0.00019999998583702838, - "loss": 46.0, - "step": 1062 - }, - { - "epoch": 0.1711824147509964, - "grad_norm": 0.0009180408087559044, - "learning_rate": 0.00019999998581008978, - "loss": 46.0, - "step": 1063 - }, - { - "epoch": 0.1713434518297838, - "grad_norm": 0.0003136332961730659, - "learning_rate": 0.0001999999857831256, - "loss": 46.0, - "step": 1064 - }, - { - "epoch": 0.1715044889085712, - "grad_norm": 0.0006892234669066966, - "learning_rate": 0.00019999998575613578, - "loss": 46.0, - "step": 1065 - }, - { - "epoch": 0.1716655259873586, - "grad_norm": 0.0008585926843807101, - "learning_rate": 0.00019999998572912042, - "loss": 46.0, - "step": 1066 - }, - { - "epoch": 0.171826563066146, - "grad_norm": 0.000655372510664165, - "learning_rate": 0.00019999998570207944, - "loss": 46.0, - "step": 1067 - }, - { - "epoch": 0.17198760014493336, - "grad_norm": 0.0003225669206585735, - "learning_rate": 0.00019999998567501285, - "loss": 46.0, - "step": 1068 - }, - { - "epoch": 0.17214863722372076, - "grad_norm": 0.0006178176845423877, - "learning_rate": 0.0001999999856479207, - "loss": 46.0, - "step": 1069 - }, - { - "epoch": 0.17230967430250815, - "grad_norm": 0.00044223055010661483, - "learning_rate": 0.00019999998562080293, - "loss": 46.0, - "step": 1070 - }, - { - "epoch": 0.17247071138129555, - "grad_norm": 0.0013964077224954963, - "learning_rate": 0.00019999998559365958, - "loss": 46.0, - "step": 1071 - }, - { - "epoch": 0.17263174846008295, - "grad_norm": 0.001068666111677885, - "learning_rate": 0.0001999999855664906, - "loss": 46.0, - "step": 1072 - }, - { - "epoch": 0.1727927855388703, - "grad_norm": 0.0015525006456300616, - "learning_rate": 0.00019999998553929605, - "loss": 46.0, - "step": 1073 - }, - { - "epoch": 0.1729538226176577, - "grad_norm": 0.00045818460057489574, - "learning_rate": 0.00019999998551207594, - "loss": 46.0, - "step": 1074 - }, - { - "epoch": 0.1731148596964451, - "grad_norm": 0.0006378164980560541, - "learning_rate": 0.00019999998548483018, - "loss": 46.0, - "step": 1075 - }, - { - "epoch": 0.1732758967752325, - "grad_norm": 0.0015530576929450035, - "learning_rate": 0.00019999998545755884, - "loss": 46.0, - "step": 1076 - }, - { - "epoch": 0.1734369338540199, - "grad_norm": 0.0005802001105621457, - "learning_rate": 0.00019999998543026193, - "loss": 46.0, - "step": 1077 - }, - { - "epoch": 0.17359797093280727, - "grad_norm": 0.0009448960772715509, - "learning_rate": 0.00019999998540293942, - "loss": 46.0, - "step": 1078 - }, - { - "epoch": 0.17375900801159466, - "grad_norm": 0.0003484654880594462, - "learning_rate": 0.0001999999853755913, - "loss": 46.0, - "step": 1079 - }, - { - "epoch": 0.17392004509038206, - "grad_norm": 0.000427607330493629, - "learning_rate": 0.0001999999853482176, - "loss": 46.0, - "step": 1080 - }, - { - "epoch": 0.17408108216916945, - "grad_norm": 0.0005763288354501128, - "learning_rate": 0.00019999998532081828, - "loss": 46.0, - "step": 1081 - }, - { - "epoch": 0.17424211924795685, - "grad_norm": 0.000552642741240561, - "learning_rate": 0.00019999998529339336, - "loss": 46.0, - "step": 1082 - }, - { - "epoch": 0.17440315632674425, - "grad_norm": 0.0006849284982308745, - "learning_rate": 0.00019999998526594288, - "loss": 46.0, - "step": 1083 - }, - { - "epoch": 0.17456419340553161, - "grad_norm": 0.0011226956266909838, - "learning_rate": 0.0001999999852384668, - "loss": 46.0, - "step": 1084 - }, - { - "epoch": 0.174725230484319, - "grad_norm": 0.00029608685872517526, - "learning_rate": 0.0001999999852109651, - "loss": 46.0, - "step": 1085 - }, - { - "epoch": 0.1748862675631064, - "grad_norm": 0.00017822741938289255, - "learning_rate": 0.0001999999851834378, - "loss": 46.0, - "step": 1086 - }, - { - "epoch": 0.1750473046418938, - "grad_norm": 0.0009377987589687109, - "learning_rate": 0.00019999998515588495, - "loss": 46.0, - "step": 1087 - }, - { - "epoch": 0.1752083417206812, - "grad_norm": 0.001035149092786014, - "learning_rate": 0.00019999998512830648, - "loss": 46.0, - "step": 1088 - }, - { - "epoch": 0.17536937879946857, - "grad_norm": 0.0003625023819040507, - "learning_rate": 0.00019999998510070242, - "loss": 46.0, - "step": 1089 - }, - { - "epoch": 0.17553041587825596, - "grad_norm": 0.0008139334968291223, - "learning_rate": 0.00019999998507307275, - "loss": 46.0, - "step": 1090 - }, - { - "epoch": 0.17569145295704336, - "grad_norm": 0.00025367780472151935, - "learning_rate": 0.00019999998504541752, - "loss": 46.0, - "step": 1091 - }, - { - "epoch": 0.17585249003583076, - "grad_norm": 0.000908449525013566, - "learning_rate": 0.00019999998501773664, - "loss": 46.0, - "step": 1092 - }, - { - "epoch": 0.17601352711461815, - "grad_norm": 0.0009912943933159113, - "learning_rate": 0.0001999999849900302, - "loss": 46.0, - "step": 1093 - }, - { - "epoch": 0.17617456419340552, - "grad_norm": 0.0005765684763900936, - "learning_rate": 0.00019999998496229816, - "loss": 46.0, - "step": 1094 - }, - { - "epoch": 0.17633560127219292, - "grad_norm": 0.0009983826894313097, - "learning_rate": 0.00019999998493454052, - "loss": 46.0, - "step": 1095 - }, - { - "epoch": 0.1764966383509803, - "grad_norm": 0.001541944220662117, - "learning_rate": 0.0001999999849067573, - "loss": 46.0, - "step": 1096 - }, - { - "epoch": 0.1766576754297677, - "grad_norm": 0.0009314665803685784, - "learning_rate": 0.0001999999848789485, - "loss": 46.0, - "step": 1097 - }, - { - "epoch": 0.1768187125085551, - "grad_norm": 0.0005268935929052532, - "learning_rate": 0.00019999998485111407, - "loss": 46.0, - "step": 1098 - }, - { - "epoch": 0.17697974958734247, - "grad_norm": 0.0011514154030010104, - "learning_rate": 0.00019999998482325405, - "loss": 46.0, - "step": 1099 - }, - { - "epoch": 0.17714078666612987, - "grad_norm": 0.0004244883602950722, - "learning_rate": 0.00019999998479536843, - "loss": 46.0, - "step": 1100 - }, - { - "epoch": 0.17730182374491726, - "grad_norm": 0.001184743014164269, - "learning_rate": 0.00019999998476745722, - "loss": 46.0, - "step": 1101 - }, - { - "epoch": 0.17746286082370466, - "grad_norm": 0.0012123126070946455, - "learning_rate": 0.00019999998473952044, - "loss": 46.0, - "step": 1102 - }, - { - "epoch": 0.17762389790249206, - "grad_norm": 0.000795738014858216, - "learning_rate": 0.00019999998471155803, - "loss": 46.0, - "step": 1103 - }, - { - "epoch": 0.17778493498127945, - "grad_norm": 0.0005148123600520194, - "learning_rate": 0.00019999998468357005, - "loss": 46.0, - "step": 1104 - }, - { - "epoch": 0.17794597206006682, - "grad_norm": 0.0003261953534092754, - "learning_rate": 0.00019999998465555647, - "loss": 46.0, - "step": 1105 - }, - { - "epoch": 0.17810700913885422, - "grad_norm": 0.0004914383171126246, - "learning_rate": 0.0001999999846275173, - "loss": 46.0, - "step": 1106 - }, - { - "epoch": 0.1782680462176416, - "grad_norm": 0.0007221323903650045, - "learning_rate": 0.00019999998459945253, - "loss": 46.0, - "step": 1107 - }, - { - "epoch": 0.178429083296429, - "grad_norm": 0.0003358956892043352, - "learning_rate": 0.00019999998457136215, - "loss": 46.0, - "step": 1108 - }, - { - "epoch": 0.1785901203752164, - "grad_norm": 0.0006616240134462714, - "learning_rate": 0.0001999999845432462, - "loss": 46.0, - "step": 1109 - }, - { - "epoch": 0.17875115745400377, - "grad_norm": 0.0006990287220105529, - "learning_rate": 0.00019999998451510463, - "loss": 46.0, - "step": 1110 - }, - { - "epoch": 0.17891219453279117, - "grad_norm": 0.0008044660207815468, - "learning_rate": 0.0001999999844869375, - "loss": 46.0, - "step": 1111 - }, - { - "epoch": 0.17907323161157856, - "grad_norm": 0.00043473043479025364, - "learning_rate": 0.00019999998445874474, - "loss": 46.0, - "step": 1112 - }, - { - "epoch": 0.17923426869036596, - "grad_norm": 0.0006207488477230072, - "learning_rate": 0.00019999998443052637, - "loss": 46.0, - "step": 1113 - }, - { - "epoch": 0.17939530576915336, - "grad_norm": 0.0006231630686670542, - "learning_rate": 0.00019999998440228245, - "loss": 46.0, - "step": 1114 - }, - { - "epoch": 0.17955634284794073, - "grad_norm": 0.0007432869169861078, - "learning_rate": 0.00019999998437401293, - "loss": 46.0, - "step": 1115 - }, - { - "epoch": 0.17971737992672812, - "grad_norm": 0.0006049188668839633, - "learning_rate": 0.0001999999843457178, - "loss": 46.0, - "step": 1116 - }, - { - "epoch": 0.17987841700551552, - "grad_norm": 0.000956145056989044, - "learning_rate": 0.0001999999843173971, - "loss": 46.0, - "step": 1117 - }, - { - "epoch": 0.1800394540843029, - "grad_norm": 0.0004712227964773774, - "learning_rate": 0.00019999998428905076, - "loss": 46.0, - "step": 1118 - }, - { - "epoch": 0.1802004911630903, - "grad_norm": 0.0006539911846630275, - "learning_rate": 0.00019999998426067884, - "loss": 46.0, - "step": 1119 - }, - { - "epoch": 0.1803615282418777, - "grad_norm": 0.001276426948606968, - "learning_rate": 0.00019999998423228134, - "loss": 46.0, - "step": 1120 - }, - { - "epoch": 0.18052256532066507, - "grad_norm": 0.001008278806693852, - "learning_rate": 0.00019999998420385824, - "loss": 46.0, - "step": 1121 - }, - { - "epoch": 0.18068360239945247, - "grad_norm": 0.001220381585881114, - "learning_rate": 0.00019999998417540954, - "loss": 46.0, - "step": 1122 - }, - { - "epoch": 0.18084463947823987, - "grad_norm": 0.0005268471431918442, - "learning_rate": 0.00019999998414693527, - "loss": 46.0, - "step": 1123 - }, - { - "epoch": 0.18100567655702726, - "grad_norm": 0.002254362916573882, - "learning_rate": 0.00019999998411843536, - "loss": 46.0, - "step": 1124 - }, - { - "epoch": 0.18116671363581466, - "grad_norm": 0.0006368353497236967, - "learning_rate": 0.0001999999840899099, - "loss": 46.0, - "step": 1125 - }, - { - "epoch": 0.18132775071460203, - "grad_norm": 0.0004503419331740588, - "learning_rate": 0.00019999998406135881, - "loss": 46.0, - "step": 1126 - }, - { - "epoch": 0.18148878779338942, - "grad_norm": 0.00037043457268737257, - "learning_rate": 0.00019999998403278215, - "loss": 46.0, - "step": 1127 - }, - { - "epoch": 0.18164982487217682, - "grad_norm": 0.0008587737684138119, - "learning_rate": 0.00019999998400417986, - "loss": 46.0, - "step": 1128 - }, - { - "epoch": 0.18181086195096421, - "grad_norm": 0.0005351615254767239, - "learning_rate": 0.00019999998397555202, - "loss": 46.0, - "step": 1129 - }, - { - "epoch": 0.1819718990297516, - "grad_norm": 0.0004402895283419639, - "learning_rate": 0.0001999999839468986, - "loss": 46.0, - "step": 1130 - }, - { - "epoch": 0.18213293610853898, - "grad_norm": 0.0004181397089269012, - "learning_rate": 0.00019999998391821952, - "loss": 46.0, - "step": 1131 - }, - { - "epoch": 0.18229397318732637, - "grad_norm": 0.0014928559539839625, - "learning_rate": 0.00019999998388951488, - "loss": 46.0, - "step": 1132 - }, - { - "epoch": 0.18245501026611377, - "grad_norm": 0.0009738048538565636, - "learning_rate": 0.00019999998386078464, - "loss": 46.0, - "step": 1133 - }, - { - "epoch": 0.18261604734490117, - "grad_norm": 0.000441311567556113, - "learning_rate": 0.0001999999838320288, - "loss": 46.0, - "step": 1134 - }, - { - "epoch": 0.18277708442368856, - "grad_norm": 0.00042914546793326735, - "learning_rate": 0.00019999998380324738, - "loss": 46.0, - "step": 1135 - }, - { - "epoch": 0.18293812150247596, - "grad_norm": 0.001419546315446496, - "learning_rate": 0.00019999998377444035, - "loss": 46.0, - "step": 1136 - }, - { - "epoch": 0.18309915858126333, - "grad_norm": 0.0010176844662055373, - "learning_rate": 0.00019999998374560773, - "loss": 46.0, - "step": 1137 - }, - { - "epoch": 0.18326019566005072, - "grad_norm": 0.00042564354953356087, - "learning_rate": 0.00019999998371674952, - "loss": 46.0, - "step": 1138 - }, - { - "epoch": 0.18342123273883812, - "grad_norm": 0.000891973206307739, - "learning_rate": 0.0001999999836878657, - "loss": 46.0, - "step": 1139 - }, - { - "epoch": 0.18358226981762552, - "grad_norm": 0.0004178571398369968, - "learning_rate": 0.0001999999836589563, - "loss": 46.0, - "step": 1140 - }, - { - "epoch": 0.1837433068964129, - "grad_norm": 0.001021531643345952, - "learning_rate": 0.0001999999836300213, - "loss": 46.0, - "step": 1141 - }, - { - "epoch": 0.18390434397520028, - "grad_norm": 0.0007500174688175321, - "learning_rate": 0.00019999998360106073, - "loss": 46.0, - "step": 1142 - }, - { - "epoch": 0.18406538105398768, - "grad_norm": 0.000505536503624171, - "learning_rate": 0.00019999998357207456, - "loss": 46.0, - "step": 1143 - }, - { - "epoch": 0.18422641813277507, - "grad_norm": 0.0006568719400092959, - "learning_rate": 0.00019999998354306274, - "loss": 46.0, - "step": 1144 - }, - { - "epoch": 0.18438745521156247, - "grad_norm": 0.0003531081310939044, - "learning_rate": 0.00019999998351402537, - "loss": 46.0, - "step": 1145 - }, - { - "epoch": 0.18454849229034986, - "grad_norm": 0.0006274929619394243, - "learning_rate": 0.0001999999834849624, - "loss": 46.0, - "step": 1146 - }, - { - "epoch": 0.18470952936913723, - "grad_norm": 0.001293198554776609, - "learning_rate": 0.00019999998345587386, - "loss": 46.0, - "step": 1147 - }, - { - "epoch": 0.18487056644792463, - "grad_norm": 0.0007399072637781501, - "learning_rate": 0.00019999998342675967, - "loss": 46.0, - "step": 1148 - }, - { - "epoch": 0.18503160352671202, - "grad_norm": 0.0011488314485177398, - "learning_rate": 0.00019999998339761992, - "loss": 46.0, - "step": 1149 - }, - { - "epoch": 0.18519264060549942, - "grad_norm": 0.0017289138631895185, - "learning_rate": 0.00019999998336845458, - "loss": 46.0, - "step": 1150 - }, - { - "epoch": 0.18535367768428682, - "grad_norm": 0.00041705454350449145, - "learning_rate": 0.0001999999833392636, - "loss": 46.0, - "step": 1151 - }, - { - "epoch": 0.18551471476307418, - "grad_norm": 0.0011145389871671796, - "learning_rate": 0.00019999998331004707, - "loss": 46.0, - "step": 1152 - }, - { - "epoch": 0.18567575184186158, - "grad_norm": 0.0004316457489039749, - "learning_rate": 0.00019999998328080494, - "loss": 46.0, - "step": 1153 - }, - { - "epoch": 0.18583678892064898, - "grad_norm": 0.000757015950512141, - "learning_rate": 0.0001999999832515372, - "loss": 46.0, - "step": 1154 - }, - { - "epoch": 0.18599782599943637, - "grad_norm": 0.0006713431212119758, - "learning_rate": 0.0001999999832222439, - "loss": 46.0, - "step": 1155 - }, - { - "epoch": 0.18615886307822377, - "grad_norm": 0.0006979407626204193, - "learning_rate": 0.00019999998319292496, - "loss": 46.0, - "step": 1156 - }, - { - "epoch": 0.18631990015701116, - "grad_norm": 0.00021065879263915122, - "learning_rate": 0.00019999998316358046, - "loss": 46.0, - "step": 1157 - }, - { - "epoch": 0.18648093723579853, - "grad_norm": 0.0019219517707824707, - "learning_rate": 0.00019999998313421032, - "loss": 46.0, - "step": 1158 - }, - { - "epoch": 0.18664197431458593, - "grad_norm": 0.000829343858640641, - "learning_rate": 0.00019999998310481462, - "loss": 46.0, - "step": 1159 - }, - { - "epoch": 0.18680301139337332, - "grad_norm": 0.00031882067560218275, - "learning_rate": 0.00019999998307539333, - "loss": 46.0, - "step": 1160 - }, - { - "epoch": 0.18696404847216072, - "grad_norm": 0.000313698808895424, - "learning_rate": 0.00019999998304594642, - "loss": 46.0, - "step": 1161 - }, - { - "epoch": 0.18712508555094812, - "grad_norm": 0.0007100233342498541, - "learning_rate": 0.00019999998301647396, - "loss": 46.0, - "step": 1162 - }, - { - "epoch": 0.18728612262973549, - "grad_norm": 0.0006322844419628382, - "learning_rate": 0.00019999998298697585, - "loss": 46.0, - "step": 1163 - }, - { - "epoch": 0.18744715970852288, - "grad_norm": 0.0016928017139434814, - "learning_rate": 0.0001999999829574522, - "loss": 46.0, - "step": 1164 - }, - { - "epoch": 0.18760819678731028, - "grad_norm": 0.001201709033921361, - "learning_rate": 0.00019999998292790288, - "loss": 46.0, - "step": 1165 - }, - { - "epoch": 0.18776923386609767, - "grad_norm": 0.0006109843961894512, - "learning_rate": 0.00019999998289832804, - "loss": 46.0, - "step": 1166 - }, - { - "epoch": 0.18793027094488507, - "grad_norm": 0.0002959135454148054, - "learning_rate": 0.00019999998286872756, - "loss": 46.0, - "step": 1167 - }, - { - "epoch": 0.18809130802367244, - "grad_norm": 0.0003422789741307497, - "learning_rate": 0.00019999998283910152, - "loss": 46.0, - "step": 1168 - }, - { - "epoch": 0.18825234510245983, - "grad_norm": 0.0006142971687950194, - "learning_rate": 0.00019999998280944987, - "loss": 46.0, - "step": 1169 - }, - { - "epoch": 0.18841338218124723, - "grad_norm": 0.00048411451280117035, - "learning_rate": 0.00019999998277977262, - "loss": 46.0, - "step": 1170 - }, - { - "epoch": 0.18857441926003463, - "grad_norm": 0.0003633377200458199, - "learning_rate": 0.00019999998275006977, - "loss": 46.0, - "step": 1171 - }, - { - "epoch": 0.18873545633882202, - "grad_norm": 0.001585592282935977, - "learning_rate": 0.00019999998272034132, - "loss": 46.0, - "step": 1172 - }, - { - "epoch": 0.18889649341760942, - "grad_norm": 0.000719084870070219, - "learning_rate": 0.00019999998269058732, - "loss": 46.0, - "step": 1173 - }, - { - "epoch": 0.1890575304963968, - "grad_norm": 0.0004340797895565629, - "learning_rate": 0.00019999998266080767, - "loss": 46.0, - "step": 1174 - }, - { - "epoch": 0.18921856757518418, - "grad_norm": 0.0013872854178771377, - "learning_rate": 0.00019999998263100244, - "loss": 46.0, - "step": 1175 - }, - { - "epoch": 0.18937960465397158, - "grad_norm": 0.0007887212559580803, - "learning_rate": 0.00019999998260117162, - "loss": 46.0, - "step": 1176 - }, - { - "epoch": 0.18954064173275897, - "grad_norm": 0.0005852034664712846, - "learning_rate": 0.0001999999825713152, - "loss": 46.0, - "step": 1177 - }, - { - "epoch": 0.18970167881154637, - "grad_norm": 0.00038101040991023183, - "learning_rate": 0.0001999999825414332, - "loss": 46.0, - "step": 1178 - }, - { - "epoch": 0.18986271589033374, - "grad_norm": 0.00033767044078558683, - "learning_rate": 0.0001999999825115256, - "loss": 46.0, - "step": 1179 - }, - { - "epoch": 0.19002375296912113, - "grad_norm": 0.0005349040147848427, - "learning_rate": 0.0001999999824815924, - "loss": 46.0, - "step": 1180 - }, - { - "epoch": 0.19018479004790853, - "grad_norm": 0.000546657363884151, - "learning_rate": 0.0001999999824516336, - "loss": 46.0, - "step": 1181 - }, - { - "epoch": 0.19034582712669593, - "grad_norm": 0.0005343665834516287, - "learning_rate": 0.00019999998242164923, - "loss": 46.0, - "step": 1182 - }, - { - "epoch": 0.19050686420548332, - "grad_norm": 0.00037214389885775745, - "learning_rate": 0.00019999998239163925, - "loss": 46.0, - "step": 1183 - }, - { - "epoch": 0.1906679012842707, - "grad_norm": 0.0005355009925551713, - "learning_rate": 0.00019999998236160368, - "loss": 46.0, - "step": 1184 - }, - { - "epoch": 0.1908289383630581, - "grad_norm": 0.0005049539031460881, - "learning_rate": 0.0001999999823315425, - "loss": 46.0, - "step": 1185 - }, - { - "epoch": 0.19098997544184548, - "grad_norm": 0.0003507023211568594, - "learning_rate": 0.00019999998230145575, - "loss": 46.0, - "step": 1186 - }, - { - "epoch": 0.19115101252063288, - "grad_norm": 0.002438133815303445, - "learning_rate": 0.00019999998227134339, - "loss": 46.0, - "step": 1187 - }, - { - "epoch": 0.19131204959942028, - "grad_norm": 0.00040006329072639346, - "learning_rate": 0.00019999998224120538, - "loss": 46.0, - "step": 1188 - }, - { - "epoch": 0.19147308667820764, - "grad_norm": 0.0005568754859268665, - "learning_rate": 0.00019999998221104185, - "loss": 46.0, - "step": 1189 - }, - { - "epoch": 0.19163412375699504, - "grad_norm": 0.0005926373996771872, - "learning_rate": 0.0001999999821808527, - "loss": 46.0, - "step": 1190 - }, - { - "epoch": 0.19179516083578244, - "grad_norm": 0.000892647251021117, - "learning_rate": 0.00019999998215063797, - "loss": 46.0, - "step": 1191 - }, - { - "epoch": 0.19195619791456983, - "grad_norm": 0.0004454712907318026, - "learning_rate": 0.00019999998212039764, - "loss": 46.0, - "step": 1192 - }, - { - "epoch": 0.19211723499335723, - "grad_norm": 0.0007348660146817565, - "learning_rate": 0.0001999999820901317, - "loss": 46.0, - "step": 1193 - }, - { - "epoch": 0.19227827207214462, - "grad_norm": 0.0014717044541612267, - "learning_rate": 0.00019999998205984015, - "loss": 46.0, - "step": 1194 - }, - { - "epoch": 0.192439309150932, - "grad_norm": 0.0005465009598992765, - "learning_rate": 0.00019999998202952307, - "loss": 46.0, - "step": 1195 - }, - { - "epoch": 0.1926003462297194, - "grad_norm": 0.0009091340471059084, - "learning_rate": 0.00019999998199918034, - "loss": 46.0, - "step": 1196 - }, - { - "epoch": 0.19276138330850678, - "grad_norm": 0.0007343650213442743, - "learning_rate": 0.00019999998196881206, - "loss": 46.0, - "step": 1197 - }, - { - "epoch": 0.19292242038729418, - "grad_norm": 0.0005318308249115944, - "learning_rate": 0.00019999998193841813, - "loss": 46.0, - "step": 1198 - }, - { - "epoch": 0.19308345746608158, - "grad_norm": 0.00048128454363904893, - "learning_rate": 0.00019999998190799862, - "loss": 46.0, - "step": 1199 - }, - { - "epoch": 0.19324449454486894, - "grad_norm": 0.001856165356002748, - "learning_rate": 0.00019999998187755354, - "loss": 46.0, - "step": 1200 - }, - { - "epoch": 0.19340553162365634, - "grad_norm": 0.0003363307623658329, - "learning_rate": 0.00019999998184708285, - "loss": 46.0, - "step": 1201 - }, - { - "epoch": 0.19356656870244374, - "grad_norm": 0.0004334888653829694, - "learning_rate": 0.00019999998181658658, - "loss": 46.0, - "step": 1202 - }, - { - "epoch": 0.19372760578123113, - "grad_norm": 0.0006357750971801579, - "learning_rate": 0.00019999998178606466, - "loss": 46.0, - "step": 1203 - }, - { - "epoch": 0.19388864286001853, - "grad_norm": 0.0015225925017148256, - "learning_rate": 0.0001999999817555172, - "loss": 46.0, - "step": 1204 - }, - { - "epoch": 0.1940496799388059, - "grad_norm": 0.00039018222014419734, - "learning_rate": 0.00019999998172494414, - "loss": 46.0, - "step": 1205 - }, - { - "epoch": 0.1942107170175933, - "grad_norm": 0.0012539116432890296, - "learning_rate": 0.00019999998169434546, - "loss": 46.0, - "step": 1206 - }, - { - "epoch": 0.1943717540963807, - "grad_norm": 0.0006164974183775485, - "learning_rate": 0.0001999999816637212, - "loss": 46.0, - "step": 1207 - }, - { - "epoch": 0.19453279117516808, - "grad_norm": 0.0003595839662011713, - "learning_rate": 0.00019999998163307137, - "loss": 46.0, - "step": 1208 - }, - { - "epoch": 0.19469382825395548, - "grad_norm": 0.001383608439937234, - "learning_rate": 0.0001999999816023959, - "loss": 46.0, - "step": 1209 - }, - { - "epoch": 0.19485486533274288, - "grad_norm": 0.0003669859142974019, - "learning_rate": 0.00019999998157169485, - "loss": 46.0, - "step": 1210 - }, - { - "epoch": 0.19501590241153025, - "grad_norm": 0.0006808997713960707, - "learning_rate": 0.00019999998154096823, - "loss": 46.0, - "step": 1211 - }, - { - "epoch": 0.19517693949031764, - "grad_norm": 0.0005866778665222228, - "learning_rate": 0.000199999981510216, - "loss": 46.0, - "step": 1212 - }, - { - "epoch": 0.19533797656910504, - "grad_norm": 0.0008350455318577588, - "learning_rate": 0.00019999998147943816, - "loss": 46.0, - "step": 1213 - }, - { - "epoch": 0.19549901364789243, - "grad_norm": 0.0006567607051692903, - "learning_rate": 0.00019999998144863473, - "loss": 46.0, - "step": 1214 - }, - { - "epoch": 0.19566005072667983, - "grad_norm": 0.0005408382276073098, - "learning_rate": 0.00019999998141780574, - "loss": 46.0, - "step": 1215 - }, - { - "epoch": 0.1958210878054672, - "grad_norm": 0.00021963795006740838, - "learning_rate": 0.0001999999813869511, - "loss": 46.0, - "step": 1216 - }, - { - "epoch": 0.1959821248842546, - "grad_norm": 0.0008670611423440278, - "learning_rate": 0.0001999999813560709, - "loss": 46.0, - "step": 1217 - }, - { - "epoch": 0.196143161963042, - "grad_norm": 0.0005301130004227161, - "learning_rate": 0.00019999998132516508, - "loss": 46.0, - "step": 1218 - }, - { - "epoch": 0.19630419904182939, - "grad_norm": 0.0004676864482462406, - "learning_rate": 0.0001999999812942337, - "loss": 46.0, - "step": 1219 - }, - { - "epoch": 0.19646523612061678, - "grad_norm": 0.0006992176058702171, - "learning_rate": 0.00019999998126327674, - "loss": 46.0, - "step": 1220 - }, - { - "epoch": 0.19662627319940415, - "grad_norm": 0.00042340063373558223, - "learning_rate": 0.00019999998123229414, - "loss": 46.0, - "step": 1221 - }, - { - "epoch": 0.19678731027819155, - "grad_norm": 0.0007891609566286206, - "learning_rate": 0.00019999998120128596, - "loss": 46.0, - "step": 1222 - }, - { - "epoch": 0.19694834735697894, - "grad_norm": 0.0007281369180418551, - "learning_rate": 0.00019999998117025216, - "loss": 46.0, - "step": 1223 - }, - { - "epoch": 0.19710938443576634, - "grad_norm": 0.0013537160120904446, - "learning_rate": 0.00019999998113919278, - "loss": 46.0, - "step": 1224 - }, - { - "epoch": 0.19727042151455373, - "grad_norm": 0.0006485064513981342, - "learning_rate": 0.00019999998110810784, - "loss": 46.0, - "step": 1225 - }, - { - "epoch": 0.19743145859334113, - "grad_norm": 0.0009129114914685488, - "learning_rate": 0.00019999998107699728, - "loss": 46.0, - "step": 1226 - }, - { - "epoch": 0.1975924956721285, - "grad_norm": 0.0018358170054852962, - "learning_rate": 0.0001999999810458611, - "loss": 46.0, - "step": 1227 - }, - { - "epoch": 0.1977535327509159, - "grad_norm": 0.00032996985828503966, - "learning_rate": 0.00019999998101469935, - "loss": 46.0, - "step": 1228 - }, - { - "epoch": 0.1979145698297033, - "grad_norm": 0.0010607584845274687, - "learning_rate": 0.00019999998098351203, - "loss": 46.0, - "step": 1229 - }, - { - "epoch": 0.1980756069084907, - "grad_norm": 0.0004708537016995251, - "learning_rate": 0.0001999999809522991, - "loss": 46.0, - "step": 1230 - }, - { - "epoch": 0.19823664398727808, - "grad_norm": 0.0007520978688262403, - "learning_rate": 0.00019999998092106055, - "loss": 46.0, - "step": 1231 - }, - { - "epoch": 0.19839768106606545, - "grad_norm": 0.000604619737714529, - "learning_rate": 0.0001999999808897964, - "loss": 46.0, - "step": 1232 - }, - { - "epoch": 0.19855871814485285, - "grad_norm": 0.0009547881199978292, - "learning_rate": 0.0001999999808585067, - "loss": 46.0, - "step": 1233 - }, - { - "epoch": 0.19871975522364024, - "grad_norm": 0.0008918555686250329, - "learning_rate": 0.00019999998082719135, - "loss": 46.0, - "step": 1234 - }, - { - "epoch": 0.19888079230242764, - "grad_norm": 0.00038530738675035536, - "learning_rate": 0.00019999998079585045, - "loss": 46.0, - "step": 1235 - }, - { - "epoch": 0.19904182938121504, - "grad_norm": 0.0005893002962693572, - "learning_rate": 0.00019999998076448394, - "loss": 46.0, - "step": 1236 - }, - { - "epoch": 0.1992028664600024, - "grad_norm": 0.0002431150496704504, - "learning_rate": 0.00019999998073309182, - "loss": 46.0, - "step": 1237 - }, - { - "epoch": 0.1993639035387898, - "grad_norm": 0.0013590792659670115, - "learning_rate": 0.00019999998070167413, - "loss": 46.0, - "step": 1238 - }, - { - "epoch": 0.1995249406175772, - "grad_norm": 0.0006564633222296834, - "learning_rate": 0.00019999998067023083, - "loss": 46.0, - "step": 1239 - }, - { - "epoch": 0.1996859776963646, - "grad_norm": 0.001041515963152051, - "learning_rate": 0.00019999998063876192, - "loss": 46.0, - "step": 1240 - }, - { - "epoch": 0.199847014775152, - "grad_norm": 0.0007092384621500969, - "learning_rate": 0.00019999998060726745, - "loss": 46.0, - "step": 1241 - }, - { - "epoch": 0.20000805185393936, - "grad_norm": 0.000286154419882223, - "learning_rate": 0.00019999998057574738, - "loss": 46.0, - "step": 1242 - }, - { - "epoch": 0.20016908893272675, - "grad_norm": 0.0004713652888312936, - "learning_rate": 0.0001999999805442017, - "loss": 46.0, - "step": 1243 - }, - { - "epoch": 0.20033012601151415, - "grad_norm": 0.0015317403012886643, - "learning_rate": 0.00019999998051263042, - "loss": 46.0, - "step": 1244 - }, - { - "epoch": 0.20049116309030154, - "grad_norm": 0.000713564979378134, - "learning_rate": 0.00019999998048103357, - "loss": 46.0, - "step": 1245 - }, - { - "epoch": 0.20065220016908894, - "grad_norm": 0.0008483220590278506, - "learning_rate": 0.00019999998044941113, - "loss": 46.0, - "step": 1246 - }, - { - "epoch": 0.20081323724787634, - "grad_norm": 0.001816748408600688, - "learning_rate": 0.00019999998041776305, - "loss": 46.0, - "step": 1247 - }, - { - "epoch": 0.2009742743266637, - "grad_norm": 0.0003024954639840871, - "learning_rate": 0.00019999998038608942, - "loss": 46.0, - "step": 1248 - }, - { - "epoch": 0.2011353114054511, - "grad_norm": 0.0003261783276684582, - "learning_rate": 0.00019999998035439016, - "loss": 46.0, - "step": 1249 - }, - { - "epoch": 0.2012963484842385, - "grad_norm": 0.0006194661254994571, - "learning_rate": 0.00019999998032266532, - "loss": 46.0, - "step": 1250 - }, - { - "epoch": 0.2014573855630259, - "grad_norm": 0.0017727031372487545, - "learning_rate": 0.0001999999802909149, - "loss": 46.0, - "step": 1251 - }, - { - "epoch": 0.2016184226418133, - "grad_norm": 0.0008676075958646834, - "learning_rate": 0.00019999998025913888, - "loss": 46.0, - "step": 1252 - }, - { - "epoch": 0.20177945972060066, - "grad_norm": 0.0007908233674243093, - "learning_rate": 0.00019999998022733725, - "loss": 46.0, - "step": 1253 - }, - { - "epoch": 0.20194049679938805, - "grad_norm": 0.0010229131439700723, - "learning_rate": 0.00019999998019551004, - "loss": 46.0, - "step": 1254 - }, - { - "epoch": 0.20210153387817545, - "grad_norm": 0.0005363317322917283, - "learning_rate": 0.0001999999801636572, - "loss": 46.0, - "step": 1255 - }, - { - "epoch": 0.20226257095696284, - "grad_norm": 0.00044931445154361427, - "learning_rate": 0.00019999998013177882, - "loss": 46.0, - "step": 1256 - }, - { - "epoch": 0.20242360803575024, - "grad_norm": 0.0004354381235316396, - "learning_rate": 0.00019999998009987481, - "loss": 46.0, - "step": 1257 - }, - { - "epoch": 0.2025846451145376, - "grad_norm": 0.0011329345870763063, - "learning_rate": 0.0001999999800679452, - "loss": 46.0, - "step": 1258 - }, - { - "epoch": 0.202745682193325, - "grad_norm": 0.0013573268661275506, - "learning_rate": 0.00019999998003599002, - "loss": 46.0, - "step": 1259 - }, - { - "epoch": 0.2029067192721124, - "grad_norm": 0.0004111604648642242, - "learning_rate": 0.00019999998000400923, - "loss": 46.0, - "step": 1260 - }, - { - "epoch": 0.2030677563508998, - "grad_norm": 0.00033504419843666255, - "learning_rate": 0.00019999997997200285, - "loss": 46.0, - "step": 1261 - }, - { - "epoch": 0.2032287934296872, - "grad_norm": 0.001046760007739067, - "learning_rate": 0.00019999997993997085, - "loss": 46.0, - "step": 1262 - }, - { - "epoch": 0.2033898305084746, - "grad_norm": 0.000282672350294888, - "learning_rate": 0.0001999999799079133, - "loss": 46.0, - "step": 1263 - }, - { - "epoch": 0.20355086758726196, - "grad_norm": 0.0005610276130028069, - "learning_rate": 0.00019999997987583013, - "loss": 46.0, - "step": 1264 - }, - { - "epoch": 0.20371190466604935, - "grad_norm": 0.0006805678131058812, - "learning_rate": 0.00019999997984372137, - "loss": 46.0, - "step": 1265 - }, - { - "epoch": 0.20387294174483675, - "grad_norm": 0.00043429923243820667, - "learning_rate": 0.00019999997981158703, - "loss": 46.0, - "step": 1266 - }, - { - "epoch": 0.20403397882362415, - "grad_norm": 0.0004495600878726691, - "learning_rate": 0.00019999997977942707, - "loss": 46.0, - "step": 1267 - }, - { - "epoch": 0.20419501590241154, - "grad_norm": 0.0007008801912888885, - "learning_rate": 0.00019999997974724153, - "loss": 46.0, - "step": 1268 - }, - { - "epoch": 0.2043560529811989, - "grad_norm": 0.0013177571818232536, - "learning_rate": 0.0001999999797150304, - "loss": 46.0, - "step": 1269 - }, - { - "epoch": 0.2045170900599863, - "grad_norm": 0.0006462169112637639, - "learning_rate": 0.00019999997968279365, - "loss": 46.0, - "step": 1270 - }, - { - "epoch": 0.2046781271387737, - "grad_norm": 0.0005713836289942265, - "learning_rate": 0.00019999997965053132, - "loss": 46.0, - "step": 1271 - }, - { - "epoch": 0.2048391642175611, - "grad_norm": 0.0008084340370260179, - "learning_rate": 0.0001999999796182434, - "loss": 46.0, - "step": 1272 - }, - { - "epoch": 0.2050002012963485, - "grad_norm": 0.003203538479283452, - "learning_rate": 0.0001999999795859299, - "loss": 46.0, - "step": 1273 - }, - { - "epoch": 0.20516123837513586, - "grad_norm": 0.0016390079399570823, - "learning_rate": 0.00019999997955359077, - "loss": 46.0, - "step": 1274 - }, - { - "epoch": 0.20532227545392326, - "grad_norm": 0.0006589957047253847, - "learning_rate": 0.00019999997952122603, - "loss": 46.0, - "step": 1275 - }, - { - "epoch": 0.20548331253271065, - "grad_norm": 0.0004350185627117753, - "learning_rate": 0.00019999997948883574, - "loss": 46.0, - "step": 1276 - }, - { - "epoch": 0.20564434961149805, - "grad_norm": 0.0006646730471402407, - "learning_rate": 0.00019999997945641983, - "loss": 46.0, - "step": 1277 - }, - { - "epoch": 0.20580538669028545, - "grad_norm": 0.0010368756484240294, - "learning_rate": 0.00019999997942397836, - "loss": 46.0, - "step": 1278 - }, - { - "epoch": 0.20596642376907284, - "grad_norm": 0.001357734901830554, - "learning_rate": 0.00019999997939151127, - "loss": 46.0, - "step": 1279 - }, - { - "epoch": 0.2061274608478602, - "grad_norm": 0.0006798878312110901, - "learning_rate": 0.00019999997935901857, - "loss": 46.0, - "step": 1280 - }, - { - "epoch": 0.2062884979266476, - "grad_norm": 0.0006989031098783016, - "learning_rate": 0.0001999999793265003, - "loss": 46.0, - "step": 1281 - }, - { - "epoch": 0.206449535005435, - "grad_norm": 0.0006288923905231059, - "learning_rate": 0.0001999999792939564, - "loss": 46.0, - "step": 1282 - }, - { - "epoch": 0.2066105720842224, - "grad_norm": 0.0011941434349864721, - "learning_rate": 0.00019999997926138695, - "loss": 46.0, - "step": 1283 - }, - { - "epoch": 0.2067716091630098, - "grad_norm": 0.00041245456668548286, - "learning_rate": 0.00019999997922879188, - "loss": 46.0, - "step": 1284 - }, - { - "epoch": 0.20693264624179716, - "grad_norm": 0.0009822394931688905, - "learning_rate": 0.00019999997919617124, - "loss": 46.0, - "step": 1285 - }, - { - "epoch": 0.20709368332058456, - "grad_norm": 0.0004020059132017195, - "learning_rate": 0.00019999997916352497, - "loss": 46.0, - "step": 1286 - }, - { - "epoch": 0.20725472039937196, - "grad_norm": 0.00035606411984190345, - "learning_rate": 0.00019999997913085313, - "loss": 46.0, - "step": 1287 - }, - { - "epoch": 0.20741575747815935, - "grad_norm": 0.0002625926863402128, - "learning_rate": 0.00019999997909815568, - "loss": 46.0, - "step": 1288 - }, - { - "epoch": 0.20757679455694675, - "grad_norm": 0.0003559159522410482, - "learning_rate": 0.00019999997906543264, - "loss": 46.0, - "step": 1289 - }, - { - "epoch": 0.20773783163573412, - "grad_norm": 0.00042531246435828507, - "learning_rate": 0.00019999997903268405, - "loss": 46.0, - "step": 1290 - }, - { - "epoch": 0.2078988687145215, - "grad_norm": 0.0011229532537981868, - "learning_rate": 0.0001999999789999098, - "loss": 46.0, - "step": 1291 - }, - { - "epoch": 0.2080599057933089, - "grad_norm": 0.0007212982163764536, - "learning_rate": 0.00019999997896710995, - "loss": 46.0, - "step": 1292 - }, - { - "epoch": 0.2082209428720963, - "grad_norm": 0.00037363465526141226, - "learning_rate": 0.00019999997893428454, - "loss": 46.0, - "step": 1293 - }, - { - "epoch": 0.2083819799508837, - "grad_norm": 0.0004636388330254704, - "learning_rate": 0.00019999997890143354, - "loss": 46.0, - "step": 1294 - }, - { - "epoch": 0.20854301702967107, - "grad_norm": 0.00041166823939420283, - "learning_rate": 0.00019999997886855692, - "loss": 46.0, - "step": 1295 - }, - { - "epoch": 0.20870405410845846, - "grad_norm": 0.0011840173974633217, - "learning_rate": 0.00019999997883565472, - "loss": 46.0, - "step": 1296 - }, - { - "epoch": 0.20886509118724586, - "grad_norm": 0.0007331250235438347, - "learning_rate": 0.00019999997880272693, - "loss": 46.0, - "step": 1297 - }, - { - "epoch": 0.20902612826603326, - "grad_norm": 0.0004854659200645983, - "learning_rate": 0.00019999997876977356, - "loss": 46.0, - "step": 1298 - }, - { - "epoch": 0.20918716534482065, - "grad_norm": 0.0006746966973878443, - "learning_rate": 0.00019999997873679454, - "loss": 46.0, - "step": 1299 - }, - { - "epoch": 0.20934820242360805, - "grad_norm": 0.00042606753413565457, - "learning_rate": 0.00019999997870378996, - "loss": 46.0, - "step": 1300 - }, - { - "epoch": 0.20950923950239542, - "grad_norm": 0.00040148888365365565, - "learning_rate": 0.00019999997867075982, - "loss": 46.0, - "step": 1301 - }, - { - "epoch": 0.2096702765811828, - "grad_norm": 0.0003798995167016983, - "learning_rate": 0.00019999997863770404, - "loss": 46.0, - "step": 1302 - }, - { - "epoch": 0.2098313136599702, - "grad_norm": 0.0013018834870308638, - "learning_rate": 0.00019999997860462268, - "loss": 46.0, - "step": 1303 - }, - { - "epoch": 0.2099923507387576, - "grad_norm": 0.0006651169969700277, - "learning_rate": 0.0001999999785715157, - "loss": 46.0, - "step": 1304 - }, - { - "epoch": 0.210153387817545, - "grad_norm": 0.0007748394273221493, - "learning_rate": 0.00019999997853838316, - "loss": 46.0, - "step": 1305 - }, - { - "epoch": 0.21031442489633237, - "grad_norm": 0.00041452041477896273, - "learning_rate": 0.00019999997850522497, - "loss": 46.0, - "step": 1306 - }, - { - "epoch": 0.21047546197511977, - "grad_norm": 0.0011760251363739371, - "learning_rate": 0.00019999997847204126, - "loss": 46.0, - "step": 1307 - }, - { - "epoch": 0.21063649905390716, - "grad_norm": 0.0009818397229537368, - "learning_rate": 0.0001999999784388319, - "loss": 46.0, - "step": 1308 - }, - { - "epoch": 0.21079753613269456, - "grad_norm": 0.000671629561111331, - "learning_rate": 0.00019999997840559699, - "loss": 46.0, - "step": 1309 - }, - { - "epoch": 0.21095857321148195, - "grad_norm": 0.0003047497302759439, - "learning_rate": 0.00019999997837233643, - "loss": 46.0, - "step": 1310 - }, - { - "epoch": 0.21111961029026932, - "grad_norm": 0.00030269133276306093, - "learning_rate": 0.0001999999783390503, - "loss": 46.0, - "step": 1311 - }, - { - "epoch": 0.21128064736905672, - "grad_norm": 0.0009661979274824262, - "learning_rate": 0.0001999999783057386, - "loss": 46.0, - "step": 1312 - }, - { - "epoch": 0.2114416844478441, - "grad_norm": 0.0008302502683363855, - "learning_rate": 0.00019999997827240129, - "loss": 46.0, - "step": 1313 - }, - { - "epoch": 0.2116027215266315, - "grad_norm": 0.0007613637717440724, - "learning_rate": 0.00019999997823903838, - "loss": 46.0, - "step": 1314 - }, - { - "epoch": 0.2117637586054189, - "grad_norm": 0.0009449763456359506, - "learning_rate": 0.00019999997820564986, - "loss": 46.0, - "step": 1315 - }, - { - "epoch": 0.2119247956842063, - "grad_norm": 0.0008858305518515408, - "learning_rate": 0.0001999999781722358, - "loss": 46.0, - "step": 1316 - }, - { - "epoch": 0.21208583276299367, - "grad_norm": 0.001000791322439909, - "learning_rate": 0.0001999999781387961, - "loss": 46.0, - "step": 1317 - }, - { - "epoch": 0.21224686984178107, - "grad_norm": 0.0008186783525161445, - "learning_rate": 0.00019999997810533077, - "loss": 46.0, - "step": 1318 - }, - { - "epoch": 0.21240790692056846, - "grad_norm": 0.002846318995580077, - "learning_rate": 0.00019999997807183993, - "loss": 46.0, - "step": 1319 - }, - { - "epoch": 0.21256894399935586, - "grad_norm": 0.000925748492591083, - "learning_rate": 0.00019999997803832345, - "loss": 46.0, - "step": 1320 - }, - { - "epoch": 0.21272998107814325, - "grad_norm": 0.0009930927772074938, - "learning_rate": 0.00019999997800478135, - "loss": 46.0, - "step": 1321 - }, - { - "epoch": 0.21289101815693062, - "grad_norm": 0.0015030791983008385, - "learning_rate": 0.00019999997797121372, - "loss": 46.0, - "step": 1322 - }, - { - "epoch": 0.21305205523571802, - "grad_norm": 0.0009468320058658719, - "learning_rate": 0.00019999997793762042, - "loss": 46.0, - "step": 1323 - }, - { - "epoch": 0.21321309231450541, - "grad_norm": 0.0008606510818935931, - "learning_rate": 0.00019999997790400156, - "loss": 46.0, - "step": 1324 - }, - { - "epoch": 0.2133741293932928, - "grad_norm": 0.0002674665302038193, - "learning_rate": 0.0001999999778703571, - "loss": 46.0, - "step": 1325 - }, - { - "epoch": 0.2135351664720802, - "grad_norm": 0.00042528240010142326, - "learning_rate": 0.00019999997783668708, - "loss": 46.0, - "step": 1326 - }, - { - "epoch": 0.21369620355086758, - "grad_norm": 0.000520424684509635, - "learning_rate": 0.00019999997780299143, - "loss": 46.0, - "step": 1327 - }, - { - "epoch": 0.21385724062965497, - "grad_norm": 0.0005076273810118437, - "learning_rate": 0.00019999997776927017, - "loss": 46.0, - "step": 1328 - }, - { - "epoch": 0.21401827770844237, - "grad_norm": 0.0007098311325535178, - "learning_rate": 0.00019999997773552334, - "loss": 46.0, - "step": 1329 - }, - { - "epoch": 0.21417931478722976, - "grad_norm": 0.001110561192035675, - "learning_rate": 0.00019999997770175093, - "loss": 46.0, - "step": 1330 - }, - { - "epoch": 0.21434035186601716, - "grad_norm": 0.002431141911074519, - "learning_rate": 0.0001999999776679529, - "loss": 46.0, - "step": 1331 - }, - { - "epoch": 0.21450138894480453, - "grad_norm": 0.0009852462681010365, - "learning_rate": 0.0001999999776341293, - "loss": 46.0, - "step": 1332 - }, - { - "epoch": 0.21466242602359192, - "grad_norm": 0.0011847583809867501, - "learning_rate": 0.00019999997760028007, - "loss": 46.0, - "step": 1333 - }, - { - "epoch": 0.21482346310237932, - "grad_norm": 0.0006205018726177514, - "learning_rate": 0.00019999997756640526, - "loss": 46.0, - "step": 1334 - }, - { - "epoch": 0.21498450018116672, - "grad_norm": 0.0006113162962719798, - "learning_rate": 0.00019999997753250486, - "loss": 46.0, - "step": 1335 - }, - { - "epoch": 0.2151455372599541, - "grad_norm": 0.0008273277198895812, - "learning_rate": 0.00019999997749857887, - "loss": 46.0, - "step": 1336 - }, - { - "epoch": 0.2153065743387415, - "grad_norm": 0.0008812635205686092, - "learning_rate": 0.0001999999774646273, - "loss": 46.0, - "step": 1337 - }, - { - "epoch": 0.21546761141752888, - "grad_norm": 0.0002859734231606126, - "learning_rate": 0.0001999999774306501, - "loss": 46.0, - "step": 1338 - }, - { - "epoch": 0.21562864849631627, - "grad_norm": 0.0016791780944913626, - "learning_rate": 0.0001999999773966473, - "loss": 46.0, - "step": 1339 - }, - { - "epoch": 0.21578968557510367, - "grad_norm": 0.0007860228652134538, - "learning_rate": 0.00019999997736261892, - "loss": 46.0, - "step": 1340 - }, - { - "epoch": 0.21595072265389106, - "grad_norm": 0.001189288916066289, - "learning_rate": 0.00019999997732856497, - "loss": 46.0, - "step": 1341 - }, - { - "epoch": 0.21611175973267846, - "grad_norm": 0.0004669920017477125, - "learning_rate": 0.00019999997729448538, - "loss": 46.0, - "step": 1342 - }, - { - "epoch": 0.21627279681146583, - "grad_norm": 0.00029777351301163435, - "learning_rate": 0.00019999997726038025, - "loss": 46.0, - "step": 1343 - }, - { - "epoch": 0.21643383389025322, - "grad_norm": 0.0005833743489347398, - "learning_rate": 0.00019999997722624949, - "loss": 46.0, - "step": 1344 - }, - { - "epoch": 0.21659487096904062, - "grad_norm": 0.0005694436840713024, - "learning_rate": 0.00019999997719209313, - "loss": 46.0, - "step": 1345 - }, - { - "epoch": 0.21675590804782802, - "grad_norm": 0.0004959586076438427, - "learning_rate": 0.0001999999771579112, - "loss": 46.0, - "step": 1346 - }, - { - "epoch": 0.2169169451266154, - "grad_norm": 0.0007592539768666029, - "learning_rate": 0.00019999997712370364, - "loss": 46.0, - "step": 1347 - }, - { - "epoch": 0.21707798220540278, - "grad_norm": 0.0009125350625254214, - "learning_rate": 0.00019999997708947052, - "loss": 46.0, - "step": 1348 - }, - { - "epoch": 0.21723901928419018, - "grad_norm": 0.0004352336982265115, - "learning_rate": 0.0001999999770552118, - "loss": 46.0, - "step": 1349 - }, - { - "epoch": 0.21740005636297757, - "grad_norm": 0.0004969755536876619, - "learning_rate": 0.00019999997702092745, - "loss": 46.0, - "step": 1350 - }, - { - "epoch": 0.21756109344176497, - "grad_norm": 0.0008792067528702319, - "learning_rate": 0.00019999997698661755, - "loss": 46.0, - "step": 1351 - }, - { - "epoch": 0.21772213052055236, - "grad_norm": 0.002492264611646533, - "learning_rate": 0.00019999997695228203, - "loss": 46.0, - "step": 1352 - }, - { - "epoch": 0.21788316759933976, - "grad_norm": 0.0009090930689126253, - "learning_rate": 0.00019999997691792093, - "loss": 46.0, - "step": 1353 - }, - { - "epoch": 0.21804420467812713, - "grad_norm": 0.0006379113183356822, - "learning_rate": 0.0001999999768835342, - "loss": 46.0, - "step": 1354 - }, - { - "epoch": 0.21820524175691453, - "grad_norm": 0.000359359139110893, - "learning_rate": 0.0001999999768491219, - "loss": 46.0, - "step": 1355 - }, - { - "epoch": 0.21836627883570192, - "grad_norm": 0.001027589780278504, - "learning_rate": 0.00019999997681468404, - "loss": 46.0, - "step": 1356 - }, - { - "epoch": 0.21852731591448932, - "grad_norm": 0.000839768152218312, - "learning_rate": 0.00019999997678022053, - "loss": 46.0, - "step": 1357 - }, - { - "epoch": 0.2186883529932767, - "grad_norm": 0.0015528691001236439, - "learning_rate": 0.00019999997674573146, - "loss": 46.0, - "step": 1358 - }, - { - "epoch": 0.21884939007206408, - "grad_norm": 0.00020052723994012922, - "learning_rate": 0.00019999997671121678, - "loss": 46.0, - "step": 1359 - }, - { - "epoch": 0.21901042715085148, - "grad_norm": 0.0004890299751423299, - "learning_rate": 0.0001999999766766765, - "loss": 46.0, - "step": 1360 - }, - { - "epoch": 0.21917146422963887, - "grad_norm": 0.0008463022531941533, - "learning_rate": 0.00019999997664211063, - "loss": 46.0, - "step": 1361 - }, - { - "epoch": 0.21933250130842627, - "grad_norm": 0.0009772107005119324, - "learning_rate": 0.00019999997660751916, - "loss": 46.0, - "step": 1362 - }, - { - "epoch": 0.21949353838721367, - "grad_norm": 0.0022033867426216602, - "learning_rate": 0.0001999999765729021, - "loss": 46.0, - "step": 1363 - }, - { - "epoch": 0.21965457546600103, - "grad_norm": 0.0009736161446198821, - "learning_rate": 0.00019999997653825946, - "loss": 46.0, - "step": 1364 - }, - { - "epoch": 0.21981561254478843, - "grad_norm": 0.002472112188115716, - "learning_rate": 0.00019999997650359123, - "loss": 46.0, - "step": 1365 - }, - { - "epoch": 0.21997664962357583, - "grad_norm": 0.0005026605795137584, - "learning_rate": 0.00019999997646889738, - "loss": 46.0, - "step": 1366 - }, - { - "epoch": 0.22013768670236322, - "grad_norm": 0.00116539909504354, - "learning_rate": 0.00019999997643417792, - "loss": 46.0, - "step": 1367 - }, - { - "epoch": 0.22029872378115062, - "grad_norm": 0.0004651888448279351, - "learning_rate": 0.0001999999763994329, - "loss": 46.0, - "step": 1368 - }, - { - "epoch": 0.22045976085993801, - "grad_norm": 0.0014744664076715708, - "learning_rate": 0.0001999999763646623, - "loss": 46.0, - "step": 1369 - }, - { - "epoch": 0.22062079793872538, - "grad_norm": 0.00045267806854099035, - "learning_rate": 0.00019999997632986604, - "loss": 46.0, - "step": 1370 - }, - { - "epoch": 0.22078183501751278, - "grad_norm": 0.000523657537996769, - "learning_rate": 0.00019999997629504423, - "loss": 46.0, - "step": 1371 - }, - { - "epoch": 0.22094287209630017, - "grad_norm": 0.0012504342012107372, - "learning_rate": 0.0001999999762601968, - "loss": 46.0, - "step": 1372 - }, - { - "epoch": 0.22110390917508757, - "grad_norm": 0.0005932914209552109, - "learning_rate": 0.00019999997622532383, - "loss": 46.0, - "step": 1373 - }, - { - "epoch": 0.22126494625387497, - "grad_norm": 0.0027143647894263268, - "learning_rate": 0.0001999999761904252, - "loss": 46.0, - "step": 1374 - }, - { - "epoch": 0.22142598333266234, - "grad_norm": 0.000593595381360501, - "learning_rate": 0.00019999997615550102, - "loss": 46.0, - "step": 1375 - }, - { - "epoch": 0.22158702041144973, - "grad_norm": 0.0009034270187839866, - "learning_rate": 0.00019999997612055124, - "loss": 46.0, - "step": 1376 - }, - { - "epoch": 0.22174805749023713, - "grad_norm": 0.0006523123593069613, - "learning_rate": 0.00019999997608557583, - "loss": 46.0, - "step": 1377 - }, - { - "epoch": 0.22190909456902452, - "grad_norm": 0.005185622721910477, - "learning_rate": 0.00019999997605057486, - "loss": 46.0, - "step": 1378 - }, - { - "epoch": 0.22207013164781192, - "grad_norm": 0.0007409183890558779, - "learning_rate": 0.0001999999760155483, - "loss": 46.0, - "step": 1379 - }, - { - "epoch": 0.2222311687265993, - "grad_norm": 0.0009297197684645653, - "learning_rate": 0.00019999997598049612, - "loss": 46.0, - "step": 1380 - }, - { - "epoch": 0.22239220580538668, - "grad_norm": 0.001152454991824925, - "learning_rate": 0.00019999997594541836, - "loss": 46.0, - "step": 1381 - }, - { - "epoch": 0.22255324288417408, - "grad_norm": 0.00031069456599652767, - "learning_rate": 0.000199999975910315, - "loss": 46.0, - "step": 1382 - }, - { - "epoch": 0.22271427996296148, - "grad_norm": 0.0006136883166618645, - "learning_rate": 0.00019999997587518605, - "loss": 46.0, - "step": 1383 - }, - { - "epoch": 0.22287531704174887, - "grad_norm": 0.001425751717761159, - "learning_rate": 0.0001999999758400315, - "loss": 46.0, - "step": 1384 - }, - { - "epoch": 0.22303635412053624, - "grad_norm": 0.0005713800201192498, - "learning_rate": 0.00019999997580485133, - "loss": 46.0, - "step": 1385 - }, - { - "epoch": 0.22319739119932364, - "grad_norm": 0.0012481024023145437, - "learning_rate": 0.0001999999757696456, - "loss": 46.0, - "step": 1386 - }, - { - "epoch": 0.22335842827811103, - "grad_norm": 0.0007147170254029334, - "learning_rate": 0.00019999997573441427, - "loss": 46.0, - "step": 1387 - }, - { - "epoch": 0.22351946535689843, - "grad_norm": 0.0009140543988905847, - "learning_rate": 0.00019999997569915734, - "loss": 46.0, - "step": 1388 - }, - { - "epoch": 0.22368050243568582, - "grad_norm": 0.0003687191638164222, - "learning_rate": 0.0001999999756638748, - "loss": 46.0, - "step": 1389 - }, - { - "epoch": 0.22384153951447322, - "grad_norm": 0.0006373602082021534, - "learning_rate": 0.0001999999756285667, - "loss": 46.0, - "step": 1390 - }, - { - "epoch": 0.2240025765932606, - "grad_norm": 0.0003636250621639192, - "learning_rate": 0.00019999997559323298, - "loss": 46.0, - "step": 1391 - }, - { - "epoch": 0.22416361367204798, - "grad_norm": 0.0018738931976258755, - "learning_rate": 0.00019999997555787368, - "loss": 46.0, - "step": 1392 - }, - { - "epoch": 0.22432465075083538, - "grad_norm": 0.00021359573293011636, - "learning_rate": 0.00019999997552248876, - "loss": 46.0, - "step": 1393 - }, - { - "epoch": 0.22448568782962278, - "grad_norm": 0.0005083411233499646, - "learning_rate": 0.00019999997548707826, - "loss": 46.0, - "step": 1394 - }, - { - "epoch": 0.22464672490841017, - "grad_norm": 0.0004379588644951582, - "learning_rate": 0.00019999997545164217, - "loss": 46.0, - "step": 1395 - }, - { - "epoch": 0.22480776198719754, - "grad_norm": 0.0023488837759941816, - "learning_rate": 0.0001999999754161805, - "loss": 46.0, - "step": 1396 - }, - { - "epoch": 0.22496879906598494, - "grad_norm": 0.001294159796088934, - "learning_rate": 0.0001999999753806932, - "loss": 46.0, - "step": 1397 - }, - { - "epoch": 0.22512983614477233, - "grad_norm": 0.000325981731293723, - "learning_rate": 0.00019999997534518032, - "loss": 46.0, - "step": 1398 - }, - { - "epoch": 0.22529087322355973, - "grad_norm": 0.0004426139057613909, - "learning_rate": 0.00019999997530964185, - "loss": 46.0, - "step": 1399 - }, - { - "epoch": 0.22545191030234712, - "grad_norm": 0.0008275063592009246, - "learning_rate": 0.0001999999752740778, - "loss": 46.0, - "step": 1400 - }, - { - "epoch": 0.2256129473811345, - "grad_norm": 0.000515216845087707, - "learning_rate": 0.00019999997523848813, - "loss": 46.0, - "step": 1401 - }, - { - "epoch": 0.2257739844599219, - "grad_norm": 0.00035010234569199383, - "learning_rate": 0.00019999997520287285, - "loss": 46.0, - "step": 1402 - }, - { - "epoch": 0.22593502153870929, - "grad_norm": 0.0015862504951655865, - "learning_rate": 0.000199999975167232, - "loss": 46.0, - "step": 1403 - }, - { - "epoch": 0.22609605861749668, - "grad_norm": 0.0005513600190170109, - "learning_rate": 0.00019999997513156558, - "loss": 46.0, - "step": 1404 - }, - { - "epoch": 0.22625709569628408, - "grad_norm": 0.001568580511957407, - "learning_rate": 0.0001999999750958735, - "loss": 46.0, - "step": 1405 - }, - { - "epoch": 0.22641813277507147, - "grad_norm": 0.002001952612772584, - "learning_rate": 0.00019999997506015587, - "loss": 46.0, - "step": 1406 - }, - { - "epoch": 0.22657916985385884, - "grad_norm": 0.0004378009762149304, - "learning_rate": 0.00019999997502441266, - "loss": 46.0, - "step": 1407 - }, - { - "epoch": 0.22674020693264624, - "grad_norm": 0.0003171944117639214, - "learning_rate": 0.00019999997498864382, - "loss": 46.0, - "step": 1408 - }, - { - "epoch": 0.22690124401143363, - "grad_norm": 0.0013251467607915401, - "learning_rate": 0.0001999999749528494, - "loss": 46.0, - "step": 1409 - }, - { - "epoch": 0.22706228109022103, - "grad_norm": 0.0007552897441200912, - "learning_rate": 0.0001999999749170294, - "loss": 46.0, - "step": 1410 - }, - { - "epoch": 0.22722331816900843, - "grad_norm": 0.0005726873641833663, - "learning_rate": 0.00019999997488118378, - "loss": 46.0, - "step": 1411 - }, - { - "epoch": 0.2273843552477958, - "grad_norm": 0.0004938723286613822, - "learning_rate": 0.00019999997484531257, - "loss": 46.0, - "step": 1412 - }, - { - "epoch": 0.2275453923265832, - "grad_norm": 0.00035536818904802203, - "learning_rate": 0.00019999997480941577, - "loss": 46.0, - "step": 1413 - }, - { - "epoch": 0.2277064294053706, - "grad_norm": 0.00028908287640661, - "learning_rate": 0.0001999999747734934, - "loss": 46.0, - "step": 1414 - }, - { - "epoch": 0.22786746648415798, - "grad_norm": 0.0012565568322315812, - "learning_rate": 0.0001999999747375454, - "loss": 46.0, - "step": 1415 - }, - { - "epoch": 0.22802850356294538, - "grad_norm": 0.0004596236685756594, - "learning_rate": 0.0001999999747015718, - "loss": 46.0, - "step": 1416 - }, - { - "epoch": 0.22818954064173275, - "grad_norm": 0.000986226019449532, - "learning_rate": 0.0001999999746655726, - "loss": 46.0, - "step": 1417 - }, - { - "epoch": 0.22835057772052014, - "grad_norm": 0.0005775231984443963, - "learning_rate": 0.00019999997462954785, - "loss": 46.0, - "step": 1418 - }, - { - "epoch": 0.22851161479930754, - "grad_norm": 0.000912361079826951, - "learning_rate": 0.00019999997459349745, - "loss": 46.0, - "step": 1419 - }, - { - "epoch": 0.22867265187809493, - "grad_norm": 0.00042985662003047764, - "learning_rate": 0.00019999997455742152, - "loss": 46.0, - "step": 1420 - }, - { - "epoch": 0.22883368895688233, - "grad_norm": 0.0005762381479144096, - "learning_rate": 0.00019999997452131994, - "loss": 46.0, - "step": 1421 - }, - { - "epoch": 0.22899472603566973, - "grad_norm": 0.0005960650159977376, - "learning_rate": 0.0001999999744851928, - "loss": 46.0, - "step": 1422 - }, - { - "epoch": 0.2291557631144571, - "grad_norm": 0.0006273018661886454, - "learning_rate": 0.00019999997444904003, - "loss": 46.0, - "step": 1423 - }, - { - "epoch": 0.2293168001932445, - "grad_norm": 0.00036709007690660655, - "learning_rate": 0.0001999999744128617, - "loss": 46.0, - "step": 1424 - }, - { - "epoch": 0.2294778372720319, - "grad_norm": 0.0005468791932798922, - "learning_rate": 0.00019999997437665775, - "loss": 46.0, - "step": 1425 - }, - { - "epoch": 0.22963887435081928, - "grad_norm": 0.0006789955659769475, - "learning_rate": 0.00019999997434042824, - "loss": 46.0, - "step": 1426 - }, - { - "epoch": 0.22979991142960668, - "grad_norm": 0.0006706491694785655, - "learning_rate": 0.0001999999743041731, - "loss": 46.0, - "step": 1427 - }, - { - "epoch": 0.22996094850839405, - "grad_norm": 0.0005731749115511775, - "learning_rate": 0.00019999997426789238, - "loss": 46.0, - "step": 1428 - }, - { - "epoch": 0.23012198558718144, - "grad_norm": 0.0005523664294742048, - "learning_rate": 0.00019999997423158608, - "loss": 46.0, - "step": 1429 - }, - { - "epoch": 0.23028302266596884, - "grad_norm": 0.000494952779263258, - "learning_rate": 0.00019999997419525416, - "loss": 46.0, - "step": 1430 - }, - { - "epoch": 0.23044405974475624, - "grad_norm": 0.0006475234404206276, - "learning_rate": 0.00019999997415889664, - "loss": 46.0, - "step": 1431 - }, - { - "epoch": 0.23060509682354363, - "grad_norm": 0.0004894636804237962, - "learning_rate": 0.00019999997412251355, - "loss": 46.0, - "step": 1432 - }, - { - "epoch": 0.230766133902331, - "grad_norm": 0.000386284664273262, - "learning_rate": 0.00019999997408610485, - "loss": 46.0, - "step": 1433 - }, - { - "epoch": 0.2309271709811184, - "grad_norm": 0.0005470376927405596, - "learning_rate": 0.00019999997404967056, - "loss": 46.0, - "step": 1434 - }, - { - "epoch": 0.2310882080599058, - "grad_norm": 0.0005402215174399316, - "learning_rate": 0.00019999997401321066, - "loss": 46.0, - "step": 1435 - }, - { - "epoch": 0.2312492451386932, - "grad_norm": 0.00040768837789073586, - "learning_rate": 0.0001999999739767252, - "loss": 46.0, - "step": 1436 - }, - { - "epoch": 0.23141028221748058, - "grad_norm": 0.0004655256343539804, - "learning_rate": 0.00019999997394021412, - "loss": 46.0, - "step": 1437 - }, - { - "epoch": 0.23157131929626795, - "grad_norm": 0.002065239707008004, - "learning_rate": 0.00019999997390367745, - "loss": 46.0, - "step": 1438 - }, - { - "epoch": 0.23173235637505535, - "grad_norm": 0.0010129864094778895, - "learning_rate": 0.00019999997386711515, - "loss": 46.0, - "step": 1439 - }, - { - "epoch": 0.23189339345384274, - "grad_norm": 0.0005043009296059608, - "learning_rate": 0.00019999997383052728, - "loss": 46.0, - "step": 1440 - }, - { - "epoch": 0.23205443053263014, - "grad_norm": 0.0005455774371512234, - "learning_rate": 0.00019999997379391385, - "loss": 46.0, - "step": 1441 - }, - { - "epoch": 0.23221546761141754, - "grad_norm": 0.0008954782970249653, - "learning_rate": 0.00019999997375727479, - "loss": 46.0, - "step": 1442 - }, - { - "epoch": 0.23237650469020493, - "grad_norm": 0.00048723258078098297, - "learning_rate": 0.00019999997372061016, - "loss": 46.0, - "step": 1443 - }, - { - "epoch": 0.2325375417689923, - "grad_norm": 0.000462414143839851, - "learning_rate": 0.0001999999736839199, - "loss": 46.0, - "step": 1444 - }, - { - "epoch": 0.2326985788477797, - "grad_norm": 0.00027866853633895516, - "learning_rate": 0.00019999997364720406, - "loss": 46.0, - "step": 1445 - }, - { - "epoch": 0.2328596159265671, - "grad_norm": 0.0005929471226409078, - "learning_rate": 0.00019999997361046264, - "loss": 46.0, - "step": 1446 - }, - { - "epoch": 0.2330206530053545, - "grad_norm": 0.0005723999929614365, - "learning_rate": 0.00019999997357369561, - "loss": 46.0, - "step": 1447 - }, - { - "epoch": 0.23318169008414188, - "grad_norm": 0.00035127042792737484, - "learning_rate": 0.000199999973536903, - "loss": 46.0, - "step": 1448 - }, - { - "epoch": 0.23334272716292925, - "grad_norm": 0.0011589425848796964, - "learning_rate": 0.00019999997350008476, - "loss": 46.0, - "step": 1449 - }, - { - "epoch": 0.23350376424171665, - "grad_norm": 0.0015349846798926592, - "learning_rate": 0.00019999997346324095, - "loss": 46.0, - "step": 1450 - }, - { - "epoch": 0.23366480132050405, - "grad_norm": 0.001190618029795587, - "learning_rate": 0.00019999997342637157, - "loss": 46.0, - "step": 1451 - }, - { - "epoch": 0.23382583839929144, - "grad_norm": 0.0009072727989405394, - "learning_rate": 0.00019999997338947655, - "loss": 46.0, - "step": 1452 - }, - { - "epoch": 0.23398687547807884, - "grad_norm": 0.0006187950493767858, - "learning_rate": 0.00019999997335255597, - "loss": 46.0, - "step": 1453 - }, - { - "epoch": 0.2341479125568662, - "grad_norm": 0.00041065565892495215, - "learning_rate": 0.00019999997331560974, - "loss": 46.0, - "step": 1454 - }, - { - "epoch": 0.2343089496356536, - "grad_norm": 0.0014450005255639553, - "learning_rate": 0.000199999973278638, - "loss": 46.0, - "step": 1455 - }, - { - "epoch": 0.234469986714441, - "grad_norm": 0.0004300459986552596, - "learning_rate": 0.00019999997324164062, - "loss": 46.0, - "step": 1456 - }, - { - "epoch": 0.2346310237932284, - "grad_norm": 0.000641741615254432, - "learning_rate": 0.00019999997320461764, - "loss": 46.0, - "step": 1457 - }, - { - "epoch": 0.2347920608720158, - "grad_norm": 0.0007824747590348125, - "learning_rate": 0.00019999997316756907, - "loss": 46.0, - "step": 1458 - }, - { - "epoch": 0.23495309795080319, - "grad_norm": 0.0004166323342360556, - "learning_rate": 0.0001999999731304949, - "loss": 46.0, - "step": 1459 - }, - { - "epoch": 0.23511413502959055, - "grad_norm": 0.0008934013312682509, - "learning_rate": 0.00019999997309339514, - "loss": 46.0, - "step": 1460 - }, - { - "epoch": 0.23527517210837795, - "grad_norm": 0.0007518811617046595, - "learning_rate": 0.00019999997305626978, - "loss": 46.0, - "step": 1461 - }, - { - "epoch": 0.23543620918716535, - "grad_norm": 0.0011339082848280668, - "learning_rate": 0.00019999997301911883, - "loss": 46.0, - "step": 1462 - }, - { - "epoch": 0.23559724626595274, - "grad_norm": 0.0009774438804015517, - "learning_rate": 0.0001999999729819423, - "loss": 46.0, - "step": 1463 - }, - { - "epoch": 0.23575828334474014, - "grad_norm": 0.0009151012054644525, - "learning_rate": 0.00019999997294474015, - "loss": 46.0, - "step": 1464 - }, - { - "epoch": 0.2359193204235275, - "grad_norm": 0.00034659350058063865, - "learning_rate": 0.00019999997290751241, - "loss": 46.0, - "step": 1465 - }, - { - "epoch": 0.2360803575023149, - "grad_norm": 0.0022008137311786413, - "learning_rate": 0.0001999999728702591, - "loss": 46.0, - "step": 1466 - }, - { - "epoch": 0.2362413945811023, - "grad_norm": 0.0004182037664577365, - "learning_rate": 0.00019999997283298018, - "loss": 46.0, - "step": 1467 - }, - { - "epoch": 0.2364024316598897, - "grad_norm": 0.0003482593165244907, - "learning_rate": 0.00019999997279567563, - "loss": 46.0, - "step": 1468 - }, - { - "epoch": 0.2365634687386771, - "grad_norm": 0.0003351763589307666, - "learning_rate": 0.00019999997275834552, - "loss": 46.0, - "step": 1469 - }, - { - "epoch": 0.23672450581746446, - "grad_norm": 0.001521661994047463, - "learning_rate": 0.00019999997272098982, - "loss": 46.0, - "step": 1470 - }, - { - "epoch": 0.23688554289625186, - "grad_norm": 0.0005002696998417377, - "learning_rate": 0.0001999999726836085, - "loss": 46.0, - "step": 1471 - }, - { - "epoch": 0.23704657997503925, - "grad_norm": 0.0005701422924175858, - "learning_rate": 0.00019999997264620164, - "loss": 46.0, - "step": 1472 - }, - { - "epoch": 0.23720761705382665, - "grad_norm": 0.0021671743597835302, - "learning_rate": 0.00019999997260876912, - "loss": 46.0, - "step": 1473 - }, - { - "epoch": 0.23736865413261404, - "grad_norm": 0.0006085827481001616, - "learning_rate": 0.00019999997257131105, - "loss": 46.0, - "step": 1474 - }, - { - "epoch": 0.2375296912114014, - "grad_norm": 0.0009190710843540728, - "learning_rate": 0.00019999997253382736, - "loss": 46.0, - "step": 1475 - }, - { - "epoch": 0.2376907282901888, - "grad_norm": 0.0007860172190703452, - "learning_rate": 0.0001999999724963181, - "loss": 46.0, - "step": 1476 - }, - { - "epoch": 0.2378517653689762, - "grad_norm": 0.0005892843473702669, - "learning_rate": 0.00019999997245878323, - "loss": 46.0, - "step": 1477 - }, - { - "epoch": 0.2380128024477636, - "grad_norm": 0.0005900952382944524, - "learning_rate": 0.00019999997242122275, - "loss": 46.0, - "step": 1478 - }, - { - "epoch": 0.238173839526551, - "grad_norm": 0.0016260374104604125, - "learning_rate": 0.00019999997238363668, - "loss": 46.0, - "step": 1479 - }, - { - "epoch": 0.2383348766053384, - "grad_norm": 0.0005029069725424051, - "learning_rate": 0.00019999997234602503, - "loss": 46.0, - "step": 1480 - }, - { - "epoch": 0.23849591368412576, - "grad_norm": 0.0009254749165847898, - "learning_rate": 0.00019999997230838777, - "loss": 46.0, - "step": 1481 - }, - { - "epoch": 0.23865695076291316, - "grad_norm": 0.0003914689878001809, - "learning_rate": 0.00019999997227072492, - "loss": 46.0, - "step": 1482 - }, - { - "epoch": 0.23881798784170055, - "grad_norm": 0.0007488206611014903, - "learning_rate": 0.00019999997223303648, - "loss": 46.0, - "step": 1483 - }, - { - "epoch": 0.23897902492048795, - "grad_norm": 0.0002851858444046229, - "learning_rate": 0.00019999997219532245, - "loss": 46.0, - "step": 1484 - }, - { - "epoch": 0.23914006199927534, - "grad_norm": 0.0003480883315205574, - "learning_rate": 0.0001999999721575828, - "loss": 46.0, - "step": 1485 - }, - { - "epoch": 0.2393010990780627, - "grad_norm": 0.0012112134136259556, - "learning_rate": 0.00019999997211981758, - "loss": 46.0, - "step": 1486 - }, - { - "epoch": 0.2394621361568501, - "grad_norm": 0.0008839130750857294, - "learning_rate": 0.0001999999720820268, - "loss": 46.0, - "step": 1487 - }, - { - "epoch": 0.2396231732356375, - "grad_norm": 0.0004892251454293728, - "learning_rate": 0.00019999997204421036, - "loss": 46.0, - "step": 1488 - }, - { - "epoch": 0.2397842103144249, - "grad_norm": 0.00029318995075300336, - "learning_rate": 0.00019999997200636832, - "loss": 46.0, - "step": 1489 - }, - { - "epoch": 0.2399452473932123, - "grad_norm": 0.0005369277787394822, - "learning_rate": 0.00019999997196850074, - "loss": 46.0, - "step": 1490 - }, - { - "epoch": 0.24010628447199966, - "grad_norm": 0.000695554306730628, - "learning_rate": 0.00019999997193060755, - "loss": 46.0, - "step": 1491 - }, - { - "epoch": 0.24026732155078706, - "grad_norm": 0.0009490074007771909, - "learning_rate": 0.00019999997189268872, - "loss": 46.0, - "step": 1492 - }, - { - "epoch": 0.24042835862957446, - "grad_norm": 0.0004491792933549732, - "learning_rate": 0.00019999997185474438, - "loss": 46.0, - "step": 1493 - }, - { - "epoch": 0.24058939570836185, - "grad_norm": 0.0015042545273900032, - "learning_rate": 0.00019999997181677437, - "loss": 46.0, - "step": 1494 - }, - { - "epoch": 0.24075043278714925, - "grad_norm": 0.002967716893181205, - "learning_rate": 0.00019999997177877878, - "loss": 46.0, - "step": 1495 - }, - { - "epoch": 0.24091146986593664, - "grad_norm": 0.0004654759541153908, - "learning_rate": 0.00019999997174075762, - "loss": 46.0, - "step": 1496 - }, - { - "epoch": 0.241072506944724, - "grad_norm": 0.0005797880003228784, - "learning_rate": 0.00019999997170271086, - "loss": 46.0, - "step": 1497 - }, - { - "epoch": 0.2412335440235114, - "grad_norm": 0.0014245264464989305, - "learning_rate": 0.00019999997166463847, - "loss": 46.0, - "step": 1498 - }, - { - "epoch": 0.2413945811022988, - "grad_norm": 0.0009832995710894465, - "learning_rate": 0.0001999999716265405, - "loss": 46.0, - "step": 1499 - }, - { - "epoch": 0.2415556181810862, - "grad_norm": 0.0005896422662772238, - "learning_rate": 0.00019999997158841697, - "loss": 46.0, - "step": 1500 - }, - { - "epoch": 0.2417166552598736, - "grad_norm": 0.000540532055310905, - "learning_rate": 0.00019999997155026783, - "loss": 46.0, - "step": 1501 - }, - { - "epoch": 0.24187769233866097, - "grad_norm": 0.0005968528566882014, - "learning_rate": 0.0001999999715120931, - "loss": 46.0, - "step": 1502 - }, - { - "epoch": 0.24203872941744836, - "grad_norm": 0.0003688789438456297, - "learning_rate": 0.00019999997147389275, - "loss": 46.0, - "step": 1503 - }, - { - "epoch": 0.24219976649623576, - "grad_norm": 0.0022087355609983206, - "learning_rate": 0.0001999999714356668, - "loss": 46.0, - "step": 1504 - }, - { - "epoch": 0.24236080357502315, - "grad_norm": 0.0012558646267279983, - "learning_rate": 0.00019999997139741527, - "loss": 46.0, - "step": 1505 - }, - { - "epoch": 0.24252184065381055, - "grad_norm": 0.000379395904019475, - "learning_rate": 0.00019999997135913814, - "loss": 46.0, - "step": 1506 - }, - { - "epoch": 0.24268287773259792, - "grad_norm": 0.0005132973310537636, - "learning_rate": 0.00019999997132083544, - "loss": 46.0, - "step": 1507 - }, - { - "epoch": 0.24284391481138531, - "grad_norm": 0.0005915425135754049, - "learning_rate": 0.00019999997128250713, - "loss": 46.0, - "step": 1508 - }, - { - "epoch": 0.2430049518901727, - "grad_norm": 0.0006312188343144953, - "learning_rate": 0.0001999999712441532, - "loss": 46.0, - "step": 1509 - }, - { - "epoch": 0.2431659889689601, - "grad_norm": 0.0008536765235476196, - "learning_rate": 0.00019999997120577373, - "loss": 46.0, - "step": 1510 - }, - { - "epoch": 0.2433270260477475, - "grad_norm": 0.0006685783737339079, - "learning_rate": 0.0001999999711673686, - "loss": 46.0, - "step": 1511 - }, - { - "epoch": 0.2434880631265349, - "grad_norm": 0.0004960522637702525, - "learning_rate": 0.00019999997112893795, - "loss": 46.0, - "step": 1512 - }, - { - "epoch": 0.24364910020532227, - "grad_norm": 0.0005449623567983508, - "learning_rate": 0.00019999997109048162, - "loss": 46.0, - "step": 1513 - }, - { - "epoch": 0.24381013728410966, - "grad_norm": 0.0006992222042754292, - "learning_rate": 0.00019999997105199974, - "loss": 46.0, - "step": 1514 - }, - { - "epoch": 0.24397117436289706, - "grad_norm": 0.0009419902344234288, - "learning_rate": 0.0001999999710134923, - "loss": 46.0, - "step": 1515 - }, - { - "epoch": 0.24413221144168445, - "grad_norm": 0.0003883341560140252, - "learning_rate": 0.0001999999709749592, - "loss": 46.0, - "step": 1516 - }, - { - "epoch": 0.24429324852047185, - "grad_norm": 0.0006075865821912885, - "learning_rate": 0.00019999997093640053, - "loss": 46.0, - "step": 1517 - }, - { - "epoch": 0.24445428559925922, - "grad_norm": 0.00044282834278419614, - "learning_rate": 0.0001999999708978163, - "loss": 46.0, - "step": 1518 - }, - { - "epoch": 0.24461532267804662, - "grad_norm": 0.0008768010884523392, - "learning_rate": 0.00019999997085920642, - "loss": 46.0, - "step": 1519 - }, - { - "epoch": 0.244776359756834, - "grad_norm": 0.0017720020841807127, - "learning_rate": 0.00019999997082057095, - "loss": 46.0, - "step": 1520 - }, - { - "epoch": 0.2449373968356214, - "grad_norm": 0.0008989129564724863, - "learning_rate": 0.00019999997078190993, - "loss": 46.0, - "step": 1521 - }, - { - "epoch": 0.2450984339144088, - "grad_norm": 0.0010180702665820718, - "learning_rate": 0.00019999997074322327, - "loss": 46.0, - "step": 1522 - }, - { - "epoch": 0.24525947099319617, - "grad_norm": 0.0005450667231343687, - "learning_rate": 0.00019999997070451104, - "loss": 46.0, - "step": 1523 - }, - { - "epoch": 0.24542050807198357, - "grad_norm": 0.00030308065470308065, - "learning_rate": 0.00019999997066577323, - "loss": 46.0, - "step": 1524 - }, - { - "epoch": 0.24558154515077096, - "grad_norm": 0.0004089412686880678, - "learning_rate": 0.0001999999706270098, - "loss": 46.0, - "step": 1525 - }, - { - "epoch": 0.24574258222955836, - "grad_norm": 0.0007387937512248755, - "learning_rate": 0.00019999997058822076, - "loss": 46.0, - "step": 1526 - }, - { - "epoch": 0.24590361930834576, - "grad_norm": 0.00023531209444627166, - "learning_rate": 0.00019999997054940616, - "loss": 46.0, - "step": 1527 - }, - { - "epoch": 0.24606465638713312, - "grad_norm": 0.0010702188592404127, - "learning_rate": 0.00019999997051056595, - "loss": 46.0, - "step": 1528 - }, - { - "epoch": 0.24622569346592052, - "grad_norm": 0.0003514440613798797, - "learning_rate": 0.00019999997047170015, - "loss": 46.0, - "step": 1529 - }, - { - "epoch": 0.24638673054470792, - "grad_norm": 0.00024456362007185817, - "learning_rate": 0.00019999997043280873, - "loss": 46.0, - "step": 1530 - }, - { - "epoch": 0.2465477676234953, - "grad_norm": 0.0003912733809556812, - "learning_rate": 0.00019999997039389175, - "loss": 46.0, - "step": 1531 - }, - { - "epoch": 0.2467088047022827, - "grad_norm": 0.00044792823609896004, - "learning_rate": 0.00019999997035494916, - "loss": 46.0, - "step": 1532 - }, - { - "epoch": 0.2468698417810701, - "grad_norm": 0.000588231545407325, - "learning_rate": 0.00019999997031598099, - "loss": 46.0, - "step": 1533 - }, - { - "epoch": 0.24703087885985747, - "grad_norm": 0.0015653539448976517, - "learning_rate": 0.0001999999702769872, - "loss": 46.0, - "step": 1534 - }, - { - "epoch": 0.24719191593864487, - "grad_norm": 0.0003892197273671627, - "learning_rate": 0.00019999997023796784, - "loss": 46.0, - "step": 1535 - }, - { - "epoch": 0.24735295301743226, - "grad_norm": 0.0002733547589741647, - "learning_rate": 0.00019999997019892285, - "loss": 46.0, - "step": 1536 - }, - { - "epoch": 0.24751399009621966, - "grad_norm": 0.0002564641763456166, - "learning_rate": 0.00019999997015985227, - "loss": 46.0, - "step": 1537 - }, - { - "epoch": 0.24767502717500706, - "grad_norm": 0.0014239680022001266, - "learning_rate": 0.00019999997012075613, - "loss": 46.0, - "step": 1538 - }, - { - "epoch": 0.24783606425379442, - "grad_norm": 0.0004348398360889405, - "learning_rate": 0.00019999997008163437, - "loss": 46.0, - "step": 1539 - }, - { - "epoch": 0.24799710133258182, - "grad_norm": 0.000407269224524498, - "learning_rate": 0.00019999997004248703, - "loss": 46.0, - "step": 1540 - }, - { - "epoch": 0.24815813841136922, - "grad_norm": 0.0006929379305802286, - "learning_rate": 0.00019999997000331407, - "loss": 46.0, - "step": 1541 - }, - { - "epoch": 0.2483191754901566, - "grad_norm": 0.0007344314944930375, - "learning_rate": 0.00019999996996411556, - "loss": 46.0, - "step": 1542 - }, - { - "epoch": 0.248480212568944, - "grad_norm": 0.0005696816951967776, - "learning_rate": 0.00019999996992489143, - "loss": 46.0, - "step": 1543 - }, - { - "epoch": 0.24864124964773138, - "grad_norm": 0.00044112952309660614, - "learning_rate": 0.00019999996988564168, - "loss": 46.0, - "step": 1544 - }, - { - "epoch": 0.24880228672651877, - "grad_norm": 0.0008369360584765673, - "learning_rate": 0.00019999996984636638, - "loss": 46.0, - "step": 1545 - }, - { - "epoch": 0.24896332380530617, - "grad_norm": 0.00041345180943608284, - "learning_rate": 0.00019999996980706546, - "loss": 46.0, - "step": 1546 - }, - { - "epoch": 0.24912436088409357, - "grad_norm": 0.0006784378783777356, - "learning_rate": 0.00019999996976773892, - "loss": 46.0, - "step": 1547 - }, - { - "epoch": 0.24928539796288096, - "grad_norm": 0.0013008955866098404, - "learning_rate": 0.00019999996972838683, - "loss": 46.0, - "step": 1548 - }, - { - "epoch": 0.24944643504166836, - "grad_norm": 0.0007540644728578627, - "learning_rate": 0.00019999996968900912, - "loss": 46.0, - "step": 1549 - }, - { - "epoch": 0.24960747212045573, - "grad_norm": 0.0006829394842498004, - "learning_rate": 0.00019999996964960583, - "loss": 46.0, - "step": 1550 - }, - { - "epoch": 0.24976850919924312, - "grad_norm": 0.0015458069974556565, - "learning_rate": 0.00019999996961017695, - "loss": 46.0, - "step": 1551 - }, - { - "epoch": 0.24992954627803052, - "grad_norm": 0.0007320896256715059, - "learning_rate": 0.00019999996957072248, - "loss": 46.0, - "step": 1552 - }, - { - "epoch": 0.2500905833568179, - "grad_norm": 0.001418554806150496, - "learning_rate": 0.00019999996953124237, - "loss": 46.0, - "step": 1553 - }, - { - "epoch": 0.2502516204356053, - "grad_norm": 0.001416199142113328, - "learning_rate": 0.0001999999694917367, - "loss": 46.0, - "step": 1554 - }, - { - "epoch": 0.2504126575143927, - "grad_norm": 0.0007409258396364748, - "learning_rate": 0.00019999996945220544, - "loss": 46.0, - "step": 1555 - }, - { - "epoch": 0.2505736945931801, - "grad_norm": 0.0005304866353981197, - "learning_rate": 0.00019999996941264856, - "loss": 46.0, - "step": 1556 - }, - { - "epoch": 0.25073473167196747, - "grad_norm": 0.0006201507640071213, - "learning_rate": 0.00019999996937306608, - "loss": 46.0, - "step": 1557 - }, - { - "epoch": 0.25089576875075487, - "grad_norm": 0.00029246992198750377, - "learning_rate": 0.00019999996933345803, - "loss": 46.0, - "step": 1558 - }, - { - "epoch": 0.25105680582954226, - "grad_norm": 0.00046166894026100636, - "learning_rate": 0.0001999999692938244, - "loss": 46.0, - "step": 1559 - }, - { - "epoch": 0.25121784290832966, - "grad_norm": 0.002228065626695752, - "learning_rate": 0.00019999996925416515, - "loss": 46.0, - "step": 1560 - }, - { - "epoch": 0.25137887998711705, - "grad_norm": 0.001267742714844644, - "learning_rate": 0.00019999996921448031, - "loss": 46.0, - "step": 1561 - }, - { - "epoch": 0.25153991706590445, - "grad_norm": 0.0004505883844103664, - "learning_rate": 0.00019999996917476986, - "loss": 46.0, - "step": 1562 - }, - { - "epoch": 0.2517009541446918, - "grad_norm": 0.00058057124260813, - "learning_rate": 0.00019999996913503383, - "loss": 46.0, - "step": 1563 - }, - { - "epoch": 0.2518619912234792, - "grad_norm": 0.0009239677456207573, - "learning_rate": 0.0001999999690952722, - "loss": 46.0, - "step": 1564 - }, - { - "epoch": 0.2520230283022666, - "grad_norm": 0.0006732263718731701, - "learning_rate": 0.000199999969055485, - "loss": 46.0, - "step": 1565 - }, - { - "epoch": 0.252184065381054, - "grad_norm": 0.0004894639714621007, - "learning_rate": 0.0001999999690156722, - "loss": 46.0, - "step": 1566 - }, - { - "epoch": 0.2523451024598414, - "grad_norm": 0.0012960133608430624, - "learning_rate": 0.00019999996897583378, - "loss": 46.0, - "step": 1567 - }, - { - "epoch": 0.25250613953862877, - "grad_norm": 0.0005833309260196984, - "learning_rate": 0.00019999996893596978, - "loss": 46.0, - "step": 1568 - }, - { - "epoch": 0.25266717661741617, - "grad_norm": 0.0004100526275578886, - "learning_rate": 0.00019999996889608017, - "loss": 46.0, - "step": 1569 - }, - { - "epoch": 0.25282821369620356, - "grad_norm": 0.0006177093018777668, - "learning_rate": 0.000199999968856165, - "loss": 46.0, - "step": 1570 - }, - { - "epoch": 0.25298925077499096, - "grad_norm": 0.0012867854675278068, - "learning_rate": 0.00019999996881622418, - "loss": 46.0, - "step": 1571 - }, - { - "epoch": 0.25315028785377836, - "grad_norm": 0.0005361064104363322, - "learning_rate": 0.0001999999687762578, - "loss": 46.0, - "step": 1572 - }, - { - "epoch": 0.2533113249325657, - "grad_norm": 0.00029784213984385133, - "learning_rate": 0.00019999996873626584, - "loss": 46.0, - "step": 1573 - }, - { - "epoch": 0.2534723620113531, - "grad_norm": 0.000444651348516345, - "learning_rate": 0.00019999996869624824, - "loss": 46.0, - "step": 1574 - }, - { - "epoch": 0.2536333990901405, - "grad_norm": 0.0005632179090753198, - "learning_rate": 0.00019999996865620507, - "loss": 46.0, - "step": 1575 - }, - { - "epoch": 0.2537944361689279, - "grad_norm": 0.0005233383853919804, - "learning_rate": 0.00019999996861613632, - "loss": 46.0, - "step": 1576 - }, - { - "epoch": 0.2539554732477153, - "grad_norm": 0.0007815441931597888, - "learning_rate": 0.00019999996857604196, - "loss": 46.0, - "step": 1577 - }, - { - "epoch": 0.2541165103265027, - "grad_norm": 0.0005321700591593981, - "learning_rate": 0.000199999968535922, - "loss": 46.0, - "step": 1578 - }, - { - "epoch": 0.25427754740529007, - "grad_norm": 0.0005488034221343696, - "learning_rate": 0.00019999996849577646, - "loss": 46.0, - "step": 1579 - }, - { - "epoch": 0.25443858448407747, - "grad_norm": 0.00032734667183831334, - "learning_rate": 0.0001999999684556053, - "loss": 46.0, - "step": 1580 - }, - { - "epoch": 0.25459962156286486, - "grad_norm": 0.00039045518497005105, - "learning_rate": 0.00019999996841540857, - "loss": 46.0, - "step": 1581 - }, - { - "epoch": 0.25476065864165226, - "grad_norm": 0.0007887433748692274, - "learning_rate": 0.00019999996837518624, - "loss": 46.0, - "step": 1582 - }, - { - "epoch": 0.25492169572043966, - "grad_norm": 0.0002828974393196404, - "learning_rate": 0.00019999996833493832, - "loss": 46.0, - "step": 1583 - }, - { - "epoch": 0.255082732799227, - "grad_norm": 0.0010236426023766398, - "learning_rate": 0.00019999996829466482, - "loss": 46.0, - "step": 1584 - }, - { - "epoch": 0.2552437698780144, - "grad_norm": 0.0007546267588622868, - "learning_rate": 0.00019999996825436568, - "loss": 46.0, - "step": 1585 - }, - { - "epoch": 0.2554048069568018, - "grad_norm": 0.001640638685785234, - "learning_rate": 0.00019999996821404097, - "loss": 46.0, - "step": 1586 - }, - { - "epoch": 0.2555658440355892, - "grad_norm": 0.0006235822220332921, - "learning_rate": 0.00019999996817369065, - "loss": 46.0, - "step": 1587 - }, - { - "epoch": 0.2557268811143766, - "grad_norm": 0.0017373906448483467, - "learning_rate": 0.00019999996813331477, - "loss": 46.0, - "step": 1588 - }, - { - "epoch": 0.255887918193164, - "grad_norm": 0.0007070084102451801, - "learning_rate": 0.00019999996809291325, - "loss": 46.0, - "step": 1589 - }, - { - "epoch": 0.2560489552719514, - "grad_norm": 0.00045126734767109156, - "learning_rate": 0.00019999996805248617, - "loss": 46.0, - "step": 1590 - }, - { - "epoch": 0.25620999235073877, - "grad_norm": 0.0006181900971569121, - "learning_rate": 0.00019999996801203348, - "loss": 46.0, - "step": 1591 - }, - { - "epoch": 0.25637102942952616, - "grad_norm": 0.0002809020515996963, - "learning_rate": 0.0001999999679715552, - "loss": 46.0, - "step": 1592 - }, - { - "epoch": 0.25653206650831356, - "grad_norm": 0.002091732807457447, - "learning_rate": 0.0001999999679310513, - "loss": 46.0, - "step": 1593 - }, - { - "epoch": 0.25669310358710096, - "grad_norm": 0.00030604659696109593, - "learning_rate": 0.00019999996789052187, - "loss": 46.0, - "step": 1594 - }, - { - "epoch": 0.2568541406658883, - "grad_norm": 0.0003619135241024196, - "learning_rate": 0.0001999999678499668, - "loss": 46.0, - "step": 1595 - }, - { - "epoch": 0.2570151777446757, - "grad_norm": 0.0003118929744232446, - "learning_rate": 0.00019999996780938612, - "loss": 46.0, - "step": 1596 - }, - { - "epoch": 0.2571762148234631, - "grad_norm": 0.0004634863289538771, - "learning_rate": 0.00019999996776877988, - "loss": 46.0, - "step": 1597 - }, - { - "epoch": 0.2573372519022505, - "grad_norm": 0.0014591977233067155, - "learning_rate": 0.00019999996772814802, - "loss": 46.0, - "step": 1598 - }, - { - "epoch": 0.2574982889810379, - "grad_norm": 0.00050339475274086, - "learning_rate": 0.0001999999676874906, - "loss": 46.0, - "step": 1599 - }, - { - "epoch": 0.2576593260598253, - "grad_norm": 0.0008291835547424853, - "learning_rate": 0.00019999996764680754, - "loss": 46.0, - "step": 1600 - }, - { - "epoch": 0.2578203631386127, - "grad_norm": 0.0006652891752310097, - "learning_rate": 0.0001999999676060989, - "loss": 46.0, - "step": 1601 - }, - { - "epoch": 0.25798140021740007, - "grad_norm": 0.002328990027308464, - "learning_rate": 0.00019999996756536468, - "loss": 46.0, - "step": 1602 - }, - { - "epoch": 0.25814243729618747, - "grad_norm": 0.0004221296403557062, - "learning_rate": 0.00019999996752460483, - "loss": 46.0, - "step": 1603 - }, - { - "epoch": 0.25830347437497486, - "grad_norm": 0.0019949637353420258, - "learning_rate": 0.00019999996748381942, - "loss": 46.0, - "step": 1604 - }, - { - "epoch": 0.2584645114537622, - "grad_norm": 0.000833825848530978, - "learning_rate": 0.00019999996744300843, - "loss": 46.0, - "step": 1605 - }, - { - "epoch": 0.2586255485325496, - "grad_norm": 0.00023156612587627023, - "learning_rate": 0.0001999999674021718, - "loss": 46.0, - "step": 1606 - }, - { - "epoch": 0.258786585611337, - "grad_norm": 0.00037104793591424823, - "learning_rate": 0.0001999999673613096, - "loss": 46.0, - "step": 1607 - }, - { - "epoch": 0.2589476226901244, - "grad_norm": 0.0005041220574639738, - "learning_rate": 0.0001999999673204218, - "loss": 46.0, - "step": 1608 - }, - { - "epoch": 0.2591086597689118, - "grad_norm": 0.0009729259763844311, - "learning_rate": 0.0001999999672795084, - "loss": 46.0, - "step": 1609 - }, - { - "epoch": 0.2592696968476992, - "grad_norm": 0.0005305565427988768, - "learning_rate": 0.00019999996723856942, - "loss": 46.0, - "step": 1610 - }, - { - "epoch": 0.2594307339264866, - "grad_norm": 0.0005919802933931351, - "learning_rate": 0.00019999996719760485, - "loss": 46.0, - "step": 1611 - }, - { - "epoch": 0.259591771005274, - "grad_norm": 0.0008214295958168805, - "learning_rate": 0.00019999996715661466, - "loss": 46.0, - "step": 1612 - }, - { - "epoch": 0.25975280808406137, - "grad_norm": 0.0016582038952037692, - "learning_rate": 0.0001999999671155989, - "loss": 46.0, - "step": 1613 - }, - { - "epoch": 0.25991384516284877, - "grad_norm": 0.0005017261137254536, - "learning_rate": 0.0001999999670745575, - "loss": 46.0, - "step": 1614 - }, - { - "epoch": 0.26007488224163616, - "grad_norm": 0.001049538841471076, - "learning_rate": 0.00019999996703349055, - "loss": 46.0, - "step": 1615 - }, - { - "epoch": 0.2602359193204235, - "grad_norm": 0.0008739667246118188, - "learning_rate": 0.00019999996699239801, - "loss": 46.0, - "step": 1616 - }, - { - "epoch": 0.2603969563992109, - "grad_norm": 0.0003842436126433313, - "learning_rate": 0.00019999996695127984, - "loss": 46.0, - "step": 1617 - }, - { - "epoch": 0.2605579934779983, - "grad_norm": 0.0005943210562691092, - "learning_rate": 0.0001999999669101361, - "loss": 46.0, - "step": 1618 - }, - { - "epoch": 0.2607190305567857, - "grad_norm": 0.0008918180246837437, - "learning_rate": 0.00019999996686896675, - "loss": 46.0, - "step": 1619 - }, - { - "epoch": 0.2608800676355731, - "grad_norm": 0.0005695180152542889, - "learning_rate": 0.00019999996682777183, - "loss": 46.0, - "step": 1620 - }, - { - "epoch": 0.2610411047143605, - "grad_norm": 0.0008891428587958217, - "learning_rate": 0.00019999996678655128, - "loss": 46.0, - "step": 1621 - }, - { - "epoch": 0.2612021417931479, - "grad_norm": 0.001256552874110639, - "learning_rate": 0.00019999996674530514, - "loss": 46.0, - "step": 1622 - }, - { - "epoch": 0.2613631788719353, - "grad_norm": 0.0011620331788435578, - "learning_rate": 0.00019999996670403344, - "loss": 46.0, - "step": 1623 - }, - { - "epoch": 0.26152421595072267, - "grad_norm": 0.00022432550031226128, - "learning_rate": 0.00019999996666273612, - "loss": 46.0, - "step": 1624 - }, - { - "epoch": 0.26168525302951007, - "grad_norm": 0.0008183136233128607, - "learning_rate": 0.0001999999666214132, - "loss": 46.0, - "step": 1625 - }, - { - "epoch": 0.2618462901082974, - "grad_norm": 0.0006643966771662235, - "learning_rate": 0.0001999999665800647, - "loss": 46.0, - "step": 1626 - }, - { - "epoch": 0.2620073271870848, - "grad_norm": 0.0006474255351349711, - "learning_rate": 0.0001999999665386906, - "loss": 46.0, - "step": 1627 - }, - { - "epoch": 0.2621683642658722, - "grad_norm": 0.0021476037800312042, - "learning_rate": 0.0001999999664972909, - "loss": 46.0, - "step": 1628 - }, - { - "epoch": 0.2623294013446596, - "grad_norm": 0.0009265922126360238, - "learning_rate": 0.0001999999664558656, - "loss": 46.0, - "step": 1629 - }, - { - "epoch": 0.262490438423447, - "grad_norm": 0.0010417335433885455, - "learning_rate": 0.00019999996641441471, - "loss": 46.0, - "step": 1630 - }, - { - "epoch": 0.2626514755022344, - "grad_norm": 0.00042905265581794083, - "learning_rate": 0.00019999996637293826, - "loss": 46.0, - "step": 1631 - }, - { - "epoch": 0.2628125125810218, - "grad_norm": 0.0004169638850726187, - "learning_rate": 0.00019999996633143617, - "loss": 46.0, - "step": 1632 - }, - { - "epoch": 0.2629735496598092, - "grad_norm": 0.0007529768045060337, - "learning_rate": 0.0001999999662899085, - "loss": 46.0, - "step": 1633 - }, - { - "epoch": 0.2631345867385966, - "grad_norm": 0.0016266483580693603, - "learning_rate": 0.00019999996624835525, - "loss": 46.0, - "step": 1634 - }, - { - "epoch": 0.263295623817384, - "grad_norm": 0.0009302504477091134, - "learning_rate": 0.00019999996620677636, - "loss": 46.0, - "step": 1635 - }, - { - "epoch": 0.26345666089617137, - "grad_norm": 0.00047719170106574893, - "learning_rate": 0.00019999996616517192, - "loss": 46.0, - "step": 1636 - }, - { - "epoch": 0.2636176979749587, - "grad_norm": 0.000791926053352654, - "learning_rate": 0.0001999999661235419, - "loss": 46.0, - "step": 1637 - }, - { - "epoch": 0.2637787350537461, - "grad_norm": 0.00034146930556744337, - "learning_rate": 0.00019999996608188622, - "loss": 46.0, - "step": 1638 - }, - { - "epoch": 0.2639397721325335, - "grad_norm": 0.0005008805310353637, - "learning_rate": 0.000199999966040205, - "loss": 46.0, - "step": 1639 - }, - { - "epoch": 0.2641008092113209, - "grad_norm": 0.00041181285632774234, - "learning_rate": 0.00019999996599849815, - "loss": 46.0, - "step": 1640 - }, - { - "epoch": 0.2642618462901083, - "grad_norm": 0.0003014621906913817, - "learning_rate": 0.00019999996595676574, - "loss": 46.0, - "step": 1641 - }, - { - "epoch": 0.2644228833688957, - "grad_norm": 0.0006959281163290143, - "learning_rate": 0.00019999996591500772, - "loss": 46.0, - "step": 1642 - }, - { - "epoch": 0.2645839204476831, - "grad_norm": 0.0006425845203921199, - "learning_rate": 0.00019999996587322406, - "loss": 46.0, - "step": 1643 - }, - { - "epoch": 0.2647449575264705, - "grad_norm": 0.0005940453265793622, - "learning_rate": 0.00019999996583141487, - "loss": 46.0, - "step": 1644 - }, - { - "epoch": 0.2649059946052579, - "grad_norm": 0.0006798694957979023, - "learning_rate": 0.00019999996578958006, - "loss": 46.0, - "step": 1645 - }, - { - "epoch": 0.2650670316840453, - "grad_norm": 0.0007211341289803386, - "learning_rate": 0.00019999996574771966, - "loss": 46.0, - "step": 1646 - }, - { - "epoch": 0.26522806876283267, - "grad_norm": 0.0005820472142659128, - "learning_rate": 0.00019999996570583365, - "loss": 46.0, - "step": 1647 - }, - { - "epoch": 0.26538910584162, - "grad_norm": 0.0013958399649709463, - "learning_rate": 0.00019999996566392208, - "loss": 46.0, - "step": 1648 - }, - { - "epoch": 0.2655501429204074, - "grad_norm": 0.0003573969879653305, - "learning_rate": 0.00019999996562198487, - "loss": 46.0, - "step": 1649 - }, - { - "epoch": 0.2657111799991948, - "grad_norm": 0.0004746705817524344, - "learning_rate": 0.0001999999655800221, - "loss": 46.0, - "step": 1650 - }, - { - "epoch": 0.2658722170779822, - "grad_norm": 0.0003913062100764364, - "learning_rate": 0.00019999996553803372, - "loss": 46.0, - "step": 1651 - }, - { - "epoch": 0.2660332541567696, - "grad_norm": 0.0010346017079427838, - "learning_rate": 0.00019999996549601974, - "loss": 46.0, - "step": 1652 - }, - { - "epoch": 0.266194291235557, - "grad_norm": 0.0005458653904497623, - "learning_rate": 0.00019999996545398018, - "loss": 46.0, - "step": 1653 - }, - { - "epoch": 0.2663553283143444, - "grad_norm": 0.0004970126319676638, - "learning_rate": 0.000199999965411915, - "loss": 46.0, - "step": 1654 - }, - { - "epoch": 0.2665163653931318, - "grad_norm": 0.00036395867937244475, - "learning_rate": 0.00019999996536982425, - "loss": 46.0, - "step": 1655 - }, - { - "epoch": 0.2666774024719192, - "grad_norm": 0.0016682945424690843, - "learning_rate": 0.0001999999653277079, - "loss": 46.0, - "step": 1656 - }, - { - "epoch": 0.2668384395507066, - "grad_norm": 0.0003370000922586769, - "learning_rate": 0.00019999996528556596, - "loss": 46.0, - "step": 1657 - }, - { - "epoch": 0.2669994766294939, - "grad_norm": 0.0012163551291450858, - "learning_rate": 0.00019999996524339841, - "loss": 46.0, - "step": 1658 - }, - { - "epoch": 0.2671605137082813, - "grad_norm": 0.0010939136845991015, - "learning_rate": 0.00019999996520120528, - "loss": 46.0, - "step": 1659 - }, - { - "epoch": 0.2673215507870687, - "grad_norm": 0.0007409893441945314, - "learning_rate": 0.00019999996515898655, - "loss": 46.0, - "step": 1660 - }, - { - "epoch": 0.2674825878658561, - "grad_norm": 0.00038251234218478203, - "learning_rate": 0.00019999996511674221, - "loss": 46.0, - "step": 1661 - }, - { - "epoch": 0.2676436249446435, - "grad_norm": 0.0004333512915764004, - "learning_rate": 0.0001999999650744723, - "loss": 46.0, - "step": 1662 - }, - { - "epoch": 0.2678046620234309, - "grad_norm": 0.0010560126975178719, - "learning_rate": 0.0001999999650321768, - "loss": 46.0, - "step": 1663 - }, - { - "epoch": 0.2679656991022183, - "grad_norm": 0.000265865441178903, - "learning_rate": 0.00019999996498985568, - "loss": 46.0, - "step": 1664 - }, - { - "epoch": 0.2681267361810057, - "grad_norm": 0.0004550080338958651, - "learning_rate": 0.00019999996494750896, - "loss": 46.0, - "step": 1665 - }, - { - "epoch": 0.2682877732597931, - "grad_norm": 0.000625417975243181, - "learning_rate": 0.00019999996490513666, - "loss": 46.0, - "step": 1666 - }, - { - "epoch": 0.2684488103385805, - "grad_norm": 0.0003973632992710918, - "learning_rate": 0.00019999996486273877, - "loss": 46.0, - "step": 1667 - }, - { - "epoch": 0.2686098474173679, - "grad_norm": 0.0009851433569565415, - "learning_rate": 0.0001999999648203153, - "loss": 46.0, - "step": 1668 - }, - { - "epoch": 0.2687708844961552, - "grad_norm": 0.0006015888066031039, - "learning_rate": 0.00019999996477786618, - "loss": 46.0, - "step": 1669 - }, - { - "epoch": 0.2689319215749426, - "grad_norm": 0.0022469062823802233, - "learning_rate": 0.0001999999647353915, - "loss": 46.0, - "step": 1670 - }, - { - "epoch": 0.26909295865373, - "grad_norm": 0.0010461158817633986, - "learning_rate": 0.00019999996469289124, - "loss": 46.0, - "step": 1671 - }, - { - "epoch": 0.2692539957325174, - "grad_norm": 0.0011682932963594794, - "learning_rate": 0.00019999996465036536, - "loss": 46.0, - "step": 1672 - }, - { - "epoch": 0.2694150328113048, - "grad_norm": 0.0020578380208462477, - "learning_rate": 0.0001999999646078139, - "loss": 46.0, - "step": 1673 - }, - { - "epoch": 0.2695760698900922, - "grad_norm": 0.0016831484390422702, - "learning_rate": 0.00019999996456523684, - "loss": 46.0, - "step": 1674 - }, - { - "epoch": 0.2697371069688796, - "grad_norm": 0.0006138753378763795, - "learning_rate": 0.0001999999645226342, - "loss": 46.0, - "step": 1675 - }, - { - "epoch": 0.269898144047667, - "grad_norm": 0.00041789753595367074, - "learning_rate": 0.00019999996448000595, - "loss": 46.0, - "step": 1676 - }, - { - "epoch": 0.2700591811264544, - "grad_norm": 0.0006504368502646685, - "learning_rate": 0.0001999999644373521, - "loss": 46.0, - "step": 1677 - }, - { - "epoch": 0.2702202182052418, - "grad_norm": 0.0007486810791306198, - "learning_rate": 0.00019999996439467265, - "loss": 46.0, - "step": 1678 - }, - { - "epoch": 0.2703812552840291, - "grad_norm": 0.0007228718604892492, - "learning_rate": 0.0001999999643519676, - "loss": 46.0, - "step": 1679 - }, - { - "epoch": 0.2705422923628165, - "grad_norm": 0.00026839703787118196, - "learning_rate": 0.000199999964309237, - "loss": 46.0, - "step": 1680 - }, - { - "epoch": 0.2707033294416039, - "grad_norm": 0.00033148820511996746, - "learning_rate": 0.00019999996426648076, - "loss": 46.0, - "step": 1681 - }, - { - "epoch": 0.2708643665203913, - "grad_norm": 0.00047075640759430826, - "learning_rate": 0.00019999996422369895, - "loss": 46.0, - "step": 1682 - }, - { - "epoch": 0.2710254035991787, - "grad_norm": 0.00040921446634456515, - "learning_rate": 0.00019999996418089153, - "loss": 46.0, - "step": 1683 - }, - { - "epoch": 0.2711864406779661, - "grad_norm": 0.0005522763822227716, - "learning_rate": 0.00019999996413805853, - "loss": 46.0, - "step": 1684 - }, - { - "epoch": 0.2713474777567535, - "grad_norm": 0.0018814326031133533, - "learning_rate": 0.0001999999640951999, - "loss": 46.0, - "step": 1685 - }, - { - "epoch": 0.2715085148355409, - "grad_norm": 0.0008086210582405329, - "learning_rate": 0.00019999996405231573, - "loss": 46.0, - "step": 1686 - }, - { - "epoch": 0.2716695519143283, - "grad_norm": 0.003679858520627022, - "learning_rate": 0.00019999996400940596, - "loss": 46.0, - "step": 1687 - }, - { - "epoch": 0.2718305889931157, - "grad_norm": 0.0006994935101829469, - "learning_rate": 0.00019999996396647055, - "loss": 46.0, - "step": 1688 - }, - { - "epoch": 0.2719916260719031, - "grad_norm": 0.001574354711920023, - "learning_rate": 0.00019999996392350958, - "loss": 46.0, - "step": 1689 - }, - { - "epoch": 0.2721526631506904, - "grad_norm": 0.0005769500276073813, - "learning_rate": 0.000199999963880523, - "loss": 46.0, - "step": 1690 - }, - { - "epoch": 0.2723137002294778, - "grad_norm": 0.00021050751092843711, - "learning_rate": 0.00019999996383751083, - "loss": 46.0, - "step": 1691 - }, - { - "epoch": 0.2724747373082652, - "grad_norm": 0.0010245003504678607, - "learning_rate": 0.00019999996379447304, - "loss": 46.0, - "step": 1692 - }, - { - "epoch": 0.2726357743870526, - "grad_norm": 0.0017468957230448723, - "learning_rate": 0.0001999999637514097, - "loss": 46.0, - "step": 1693 - }, - { - "epoch": 0.27279681146584, - "grad_norm": 0.0009225663961842656, - "learning_rate": 0.0001999999637083207, - "loss": 46.0, - "step": 1694 - }, - { - "epoch": 0.2729578485446274, - "grad_norm": 0.0004138411895837635, - "learning_rate": 0.00019999996366520617, - "loss": 46.0, - "step": 1695 - }, - { - "epoch": 0.2731188856234148, - "grad_norm": 0.000979991746135056, - "learning_rate": 0.00019999996362206603, - "loss": 46.0, - "step": 1696 - }, - { - "epoch": 0.2732799227022022, - "grad_norm": 0.0005196937127038836, - "learning_rate": 0.00019999996357890029, - "loss": 46.0, - "step": 1697 - }, - { - "epoch": 0.2734409597809896, - "grad_norm": 0.0005258449236862361, - "learning_rate": 0.00019999996353570895, - "loss": 46.0, - "step": 1698 - }, - { - "epoch": 0.273601996859777, - "grad_norm": 0.0002927335153799504, - "learning_rate": 0.00019999996349249203, - "loss": 46.0, - "step": 1699 - }, - { - "epoch": 0.2737630339385644, - "grad_norm": 0.00042445422150194645, - "learning_rate": 0.0001999999634492495, - "loss": 46.0, - "step": 1700 - }, - { - "epoch": 0.2739240710173517, - "grad_norm": 0.0007652249187231064, - "learning_rate": 0.00019999996340598137, - "loss": 46.0, - "step": 1701 - }, - { - "epoch": 0.2740851080961391, - "grad_norm": 0.00042803556425496936, - "learning_rate": 0.00019999996336268766, - "loss": 46.0, - "step": 1702 - }, - { - "epoch": 0.2742461451749265, - "grad_norm": 0.0005782022490166128, - "learning_rate": 0.00019999996331936833, - "loss": 46.0, - "step": 1703 - }, - { - "epoch": 0.2744071822537139, - "grad_norm": 0.0004274678649380803, - "learning_rate": 0.00019999996327602345, - "loss": 46.0, - "step": 1704 - }, - { - "epoch": 0.2745682193325013, - "grad_norm": 0.000336539902491495, - "learning_rate": 0.00019999996323265292, - "loss": 46.0, - "step": 1705 - }, - { - "epoch": 0.2747292564112887, - "grad_norm": 0.0009749602759256959, - "learning_rate": 0.00019999996318925684, - "loss": 46.0, - "step": 1706 - }, - { - "epoch": 0.2748902934900761, - "grad_norm": 0.0003062020696233958, - "learning_rate": 0.00019999996314583516, - "loss": 46.0, - "step": 1707 - }, - { - "epoch": 0.2750513305688635, - "grad_norm": 0.0005401447997428477, - "learning_rate": 0.0001999999631023879, - "loss": 46.0, - "step": 1708 - }, - { - "epoch": 0.2752123676476509, - "grad_norm": 0.0007868324755690992, - "learning_rate": 0.000199999963058915, - "loss": 46.0, - "step": 1709 - }, - { - "epoch": 0.2753734047264383, - "grad_norm": 0.0003401754656806588, - "learning_rate": 0.00019999996301541654, - "loss": 46.0, - "step": 1710 - }, - { - "epoch": 0.2755344418052256, - "grad_norm": 0.0010305638425052166, - "learning_rate": 0.00019999996297189246, - "loss": 46.0, - "step": 1711 - }, - { - "epoch": 0.275695478884013, - "grad_norm": 0.0006922843167558312, - "learning_rate": 0.0001999999629283428, - "loss": 46.0, - "step": 1712 - }, - { - "epoch": 0.2758565159628004, - "grad_norm": 0.0005645108758471906, - "learning_rate": 0.00019999996288476752, - "loss": 46.0, - "step": 1713 - }, - { - "epoch": 0.2760175530415878, - "grad_norm": 0.00034462963230907917, - "learning_rate": 0.00019999996284116665, - "loss": 46.0, - "step": 1714 - }, - { - "epoch": 0.2761785901203752, - "grad_norm": 0.0004854914150200784, - "learning_rate": 0.00019999996279754023, - "loss": 46.0, - "step": 1715 - }, - { - "epoch": 0.2763396271991626, - "grad_norm": 0.0007340558222495019, - "learning_rate": 0.00019999996275388816, - "loss": 46.0, - "step": 1716 - }, - { - "epoch": 0.27650066427795, - "grad_norm": 0.0007356297574006021, - "learning_rate": 0.00019999996271021054, - "loss": 46.0, - "step": 1717 - }, - { - "epoch": 0.2766617013567374, - "grad_norm": 0.0003644342941697687, - "learning_rate": 0.00019999996266650732, - "loss": 46.0, - "step": 1718 - }, - { - "epoch": 0.2768227384355248, - "grad_norm": 0.0006618080660700798, - "learning_rate": 0.00019999996262277847, - "loss": 46.0, - "step": 1719 - }, - { - "epoch": 0.2769837755143122, - "grad_norm": 0.0002794034662656486, - "learning_rate": 0.00019999996257902405, - "loss": 46.0, - "step": 1720 - }, - { - "epoch": 0.2771448125930996, - "grad_norm": 0.0007610495667904615, - "learning_rate": 0.00019999996253524405, - "loss": 46.0, - "step": 1721 - }, - { - "epoch": 0.27730584967188693, - "grad_norm": 0.0004926969413645566, - "learning_rate": 0.00019999996249143843, - "loss": 46.0, - "step": 1722 - }, - { - "epoch": 0.2774668867506743, - "grad_norm": 0.0005112563376314938, - "learning_rate": 0.0001999999624476072, - "loss": 46.0, - "step": 1723 - }, - { - "epoch": 0.2776279238294617, - "grad_norm": 0.0007417351589538157, - "learning_rate": 0.00019999996240375044, - "loss": 46.0, - "step": 1724 - }, - { - "epoch": 0.2777889609082491, - "grad_norm": 0.0008114571101032197, - "learning_rate": 0.00019999996235986803, - "loss": 46.0, - "step": 1725 - }, - { - "epoch": 0.2779499979870365, - "grad_norm": 0.0004694694362115115, - "learning_rate": 0.00019999996231596004, - "loss": 46.0, - "step": 1726 - }, - { - "epoch": 0.2781110350658239, - "grad_norm": 0.0013439524918794632, - "learning_rate": 0.00019999996227202643, - "loss": 46.0, - "step": 1727 - }, - { - "epoch": 0.2782720721446113, - "grad_norm": 0.0005587266059592366, - "learning_rate": 0.00019999996222806727, - "loss": 46.0, - "step": 1728 - }, - { - "epoch": 0.2784331092233987, - "grad_norm": 0.0008996734395623207, - "learning_rate": 0.00019999996218408248, - "loss": 46.0, - "step": 1729 - }, - { - "epoch": 0.2785941463021861, - "grad_norm": 0.0003521718899719417, - "learning_rate": 0.00019999996214007212, - "loss": 46.0, - "step": 1730 - }, - { - "epoch": 0.2787551833809735, - "grad_norm": 0.0021810263860970736, - "learning_rate": 0.00019999996209603613, - "loss": 46.0, - "step": 1731 - }, - { - "epoch": 0.27891622045976083, - "grad_norm": 0.000892314943484962, - "learning_rate": 0.00019999996205197456, - "loss": 46.0, - "step": 1732 - }, - { - "epoch": 0.27907725753854823, - "grad_norm": 0.0002361432125326246, - "learning_rate": 0.0001999999620078874, - "loss": 46.0, - "step": 1733 - }, - { - "epoch": 0.2792382946173356, - "grad_norm": 0.0004186244332231581, - "learning_rate": 0.0001999999619637747, - "loss": 46.0, - "step": 1734 - }, - { - "epoch": 0.279399331696123, - "grad_norm": 0.00048368930583819747, - "learning_rate": 0.00019999996191963633, - "loss": 46.0, - "step": 1735 - }, - { - "epoch": 0.2795603687749104, - "grad_norm": 0.0003011162916664034, - "learning_rate": 0.00019999996187547239, - "loss": 46.0, - "step": 1736 - }, - { - "epoch": 0.2797214058536978, - "grad_norm": 0.0010776141425594687, - "learning_rate": 0.00019999996183128285, - "loss": 46.0, - "step": 1737 - }, - { - "epoch": 0.2798824429324852, - "grad_norm": 0.0005428716540336609, - "learning_rate": 0.00019999996178706773, - "loss": 46.0, - "step": 1738 - }, - { - "epoch": 0.2800434800112726, - "grad_norm": 0.0012752683833241463, - "learning_rate": 0.00019999996174282697, - "loss": 46.0, - "step": 1739 - }, - { - "epoch": 0.28020451709006, - "grad_norm": 0.0007044172962196171, - "learning_rate": 0.00019999996169856065, - "loss": 46.0, - "step": 1740 - }, - { - "epoch": 0.2803655541688474, - "grad_norm": 0.0008881782414391637, - "learning_rate": 0.00019999996165426874, - "loss": 46.0, - "step": 1741 - }, - { - "epoch": 0.2805265912476348, - "grad_norm": 0.0005630353698506951, - "learning_rate": 0.00019999996160995125, - "loss": 46.0, - "step": 1742 - }, - { - "epoch": 0.28068762832642213, - "grad_norm": 0.0005720113986171782, - "learning_rate": 0.00019999996156560814, - "loss": 46.0, - "step": 1743 - }, - { - "epoch": 0.28084866540520953, - "grad_norm": 0.0006910123047418892, - "learning_rate": 0.00019999996152123944, - "loss": 46.0, - "step": 1744 - }, - { - "epoch": 0.2810097024839969, - "grad_norm": 0.0009649637504480779, - "learning_rate": 0.00019999996147684516, - "loss": 46.0, - "step": 1745 - }, - { - "epoch": 0.2811707395627843, - "grad_norm": 0.0015174208674579859, - "learning_rate": 0.00019999996143242526, - "loss": 46.0, - "step": 1746 - }, - { - "epoch": 0.2813317766415717, - "grad_norm": 0.0003052711545024067, - "learning_rate": 0.00019999996138797977, - "loss": 46.0, - "step": 1747 - }, - { - "epoch": 0.2814928137203591, - "grad_norm": 0.0021164272911846638, - "learning_rate": 0.0001999999613435087, - "loss": 46.0, - "step": 1748 - }, - { - "epoch": 0.2816538507991465, - "grad_norm": 0.00044605176663026214, - "learning_rate": 0.000199999961299012, - "loss": 46.0, - "step": 1749 - }, - { - "epoch": 0.2818148878779339, - "grad_norm": 0.0008384662214666605, - "learning_rate": 0.00019999996125448974, - "loss": 46.0, - "step": 1750 - }, - { - "epoch": 0.2819759249567213, - "grad_norm": 0.0014187163906171918, - "learning_rate": 0.00019999996120994188, - "loss": 46.0, - "step": 1751 - }, - { - "epoch": 0.2821369620355087, - "grad_norm": 0.0004106323467567563, - "learning_rate": 0.00019999996116536843, - "loss": 46.0, - "step": 1752 - }, - { - "epoch": 0.2822979991142961, - "grad_norm": 0.000659812125377357, - "learning_rate": 0.00019999996112076936, - "loss": 46.0, - "step": 1753 - }, - { - "epoch": 0.28245903619308343, - "grad_norm": 0.00042445125291123986, - "learning_rate": 0.00019999996107614471, - "loss": 46.0, - "step": 1754 - }, - { - "epoch": 0.28262007327187083, - "grad_norm": 0.0009564983774907887, - "learning_rate": 0.00019999996103149448, - "loss": 46.0, - "step": 1755 - }, - { - "epoch": 0.2827811103506582, - "grad_norm": 0.0003712513134814799, - "learning_rate": 0.00019999996098681865, - "loss": 46.0, - "step": 1756 - }, - { - "epoch": 0.2829421474294456, - "grad_norm": 0.001536081195808947, - "learning_rate": 0.0001999999609421172, - "loss": 46.0, - "step": 1757 - }, - { - "epoch": 0.283103184508233, - "grad_norm": 0.0016187526052817702, - "learning_rate": 0.0001999999608973902, - "loss": 46.0, - "step": 1758 - }, - { - "epoch": 0.2832642215870204, - "grad_norm": 0.00040868844371289015, - "learning_rate": 0.00019999996085263755, - "loss": 46.0, - "step": 1759 - }, - { - "epoch": 0.2834252586658078, - "grad_norm": 0.00116222002543509, - "learning_rate": 0.00019999996080785935, - "loss": 46.0, - "step": 1760 - }, - { - "epoch": 0.2835862957445952, - "grad_norm": 0.00030192965641617775, - "learning_rate": 0.0001999999607630555, - "loss": 46.0, - "step": 1761 - }, - { - "epoch": 0.2837473328233826, - "grad_norm": 0.0005130195058882236, - "learning_rate": 0.00019999996071822613, - "loss": 46.0, - "step": 1762 - }, - { - "epoch": 0.28390836990217, - "grad_norm": 0.0002736506867222488, - "learning_rate": 0.00019999996067337112, - "loss": 46.0, - "step": 1763 - }, - { - "epoch": 0.28406940698095734, - "grad_norm": 0.00039211285184137523, - "learning_rate": 0.00019999996062849054, - "loss": 46.0, - "step": 1764 - }, - { - "epoch": 0.28423044405974474, - "grad_norm": 0.0006182106444612145, - "learning_rate": 0.00019999996058358432, - "loss": 46.0, - "step": 1765 - }, - { - "epoch": 0.28439148113853213, - "grad_norm": 0.00035284028854221106, - "learning_rate": 0.00019999996053865254, - "loss": 46.0, - "step": 1766 - }, - { - "epoch": 0.28455251821731953, - "grad_norm": 0.00034616593620739877, - "learning_rate": 0.00019999996049369515, - "loss": 46.0, - "step": 1767 - }, - { - "epoch": 0.2847135552961069, - "grad_norm": 0.0005541214486584067, - "learning_rate": 0.0001999999604487122, - "loss": 46.0, - "step": 1768 - }, - { - "epoch": 0.2848745923748943, - "grad_norm": 0.00048042586422525346, - "learning_rate": 0.00019999996040370363, - "loss": 46.0, - "step": 1769 - }, - { - "epoch": 0.2850356294536817, - "grad_norm": 0.0005554311792366207, - "learning_rate": 0.00019999996035866943, - "loss": 46.0, - "step": 1770 - }, - { - "epoch": 0.2851966665324691, - "grad_norm": 0.00022341113071888685, - "learning_rate": 0.00019999996031360971, - "loss": 46.0, - "step": 1771 - }, - { - "epoch": 0.2853577036112565, - "grad_norm": 0.0006562733906321228, - "learning_rate": 0.00019999996026852433, - "loss": 46.0, - "step": 1772 - }, - { - "epoch": 0.2855187406900439, - "grad_norm": 0.0004613788623828441, - "learning_rate": 0.0001999999602234134, - "loss": 46.0, - "step": 1773 - }, - { - "epoch": 0.2856797777688313, - "grad_norm": 0.0006948672817088664, - "learning_rate": 0.00019999996017827683, - "loss": 46.0, - "step": 1774 - }, - { - "epoch": 0.28584081484761864, - "grad_norm": 0.0021478149574249983, - "learning_rate": 0.00019999996013311472, - "loss": 46.0, - "step": 1775 - }, - { - "epoch": 0.28600185192640604, - "grad_norm": 0.0006172028952278197, - "learning_rate": 0.00019999996008792696, - "loss": 46.0, - "step": 1776 - }, - { - "epoch": 0.28616288900519343, - "grad_norm": 0.001169553492218256, - "learning_rate": 0.00019999996004271364, - "loss": 46.0, - "step": 1777 - }, - { - "epoch": 0.28632392608398083, - "grad_norm": 0.0011354945600032806, - "learning_rate": 0.0001999999599974747, - "loss": 46.0, - "step": 1778 - }, - { - "epoch": 0.2864849631627682, - "grad_norm": 0.0019422700861468911, - "learning_rate": 0.0001999999599522102, - "loss": 46.0, - "step": 1779 - }, - { - "epoch": 0.2866460002415556, - "grad_norm": 0.00022413030092138797, - "learning_rate": 0.00019999995990692008, - "loss": 46.0, - "step": 1780 - }, - { - "epoch": 0.286807037320343, - "grad_norm": 0.001351299462839961, - "learning_rate": 0.0001999999598616044, - "loss": 46.0, - "step": 1781 - }, - { - "epoch": 0.2869680743991304, - "grad_norm": 0.00043627008562907577, - "learning_rate": 0.00019999995981626306, - "loss": 46.0, - "step": 1782 - }, - { - "epoch": 0.2871291114779178, - "grad_norm": 0.00038289171061478555, - "learning_rate": 0.00019999995977089616, - "loss": 46.0, - "step": 1783 - }, - { - "epoch": 0.2872901485567052, - "grad_norm": 0.0005784197128377855, - "learning_rate": 0.00019999995972550365, - "loss": 46.0, - "step": 1784 - }, - { - "epoch": 0.28745118563549255, - "grad_norm": 0.0012538628652691841, - "learning_rate": 0.00019999995968008558, - "loss": 46.0, - "step": 1785 - }, - { - "epoch": 0.28761222271427994, - "grad_norm": 0.00039487218600697815, - "learning_rate": 0.0001999999596346419, - "loss": 46.0, - "step": 1786 - }, - { - "epoch": 0.28777325979306734, - "grad_norm": 0.0005266358493827283, - "learning_rate": 0.0001999999595891726, - "loss": 46.0, - "step": 1787 - }, - { - "epoch": 0.28793429687185473, - "grad_norm": 0.0002815193438436836, - "learning_rate": 0.00019999995954367774, - "loss": 46.0, - "step": 1788 - }, - { - "epoch": 0.28809533395064213, - "grad_norm": 0.0004312753735575825, - "learning_rate": 0.0001999999594981573, - "loss": 46.0, - "step": 1789 - }, - { - "epoch": 0.2882563710294295, - "grad_norm": 0.0004867517272941768, - "learning_rate": 0.0001999999594526112, - "loss": 46.0, - "step": 1790 - }, - { - "epoch": 0.2884174081082169, - "grad_norm": 0.0015913225943222642, - "learning_rate": 0.00019999995940703954, - "loss": 46.0, - "step": 1791 - }, - { - "epoch": 0.2885784451870043, - "grad_norm": 0.0006699271034449339, - "learning_rate": 0.00019999995936144228, - "loss": 46.0, - "step": 1792 - }, - { - "epoch": 0.2887394822657917, - "grad_norm": 0.0006766504375264049, - "learning_rate": 0.00019999995931581946, - "loss": 46.0, - "step": 1793 - }, - { - "epoch": 0.2889005193445791, - "grad_norm": 0.00027622951893135905, - "learning_rate": 0.00019999995927017102, - "loss": 46.0, - "step": 1794 - }, - { - "epoch": 0.2890615564233665, - "grad_norm": 0.0013221491826698184, - "learning_rate": 0.00019999995922449697, - "loss": 46.0, - "step": 1795 - }, - { - "epoch": 0.28922259350215385, - "grad_norm": 0.0005121470312587917, - "learning_rate": 0.00019999995917879734, - "loss": 46.0, - "step": 1796 - }, - { - "epoch": 0.28938363058094124, - "grad_norm": 0.0005169783835299313, - "learning_rate": 0.0001999999591330721, - "loss": 46.0, - "step": 1797 - }, - { - "epoch": 0.28954466765972864, - "grad_norm": 0.001369944540783763, - "learning_rate": 0.0001999999590873213, - "loss": 46.0, - "step": 1798 - }, - { - "epoch": 0.28970570473851603, - "grad_norm": 0.00030911123030819, - "learning_rate": 0.00019999995904154488, - "loss": 46.0, - "step": 1799 - }, - { - "epoch": 0.28986674181730343, - "grad_norm": 0.0007034396403469145, - "learning_rate": 0.00019999995899574286, - "loss": 46.0, - "step": 1800 - }, - { - "epoch": 0.2900277788960908, - "grad_norm": 0.0007518244092352688, - "learning_rate": 0.00019999995894991526, - "loss": 46.0, - "step": 1801 - }, - { - "epoch": 0.2901888159748782, - "grad_norm": 0.0005977744003757834, - "learning_rate": 0.00019999995890406205, - "loss": 46.0, - "step": 1802 - }, - { - "epoch": 0.2903498530536656, - "grad_norm": 0.0005462935660034418, - "learning_rate": 0.00019999995885818325, - "loss": 46.0, - "step": 1803 - }, - { - "epoch": 0.290510890132453, - "grad_norm": 0.0005249497480690479, - "learning_rate": 0.00019999995881227886, - "loss": 46.0, - "step": 1804 - }, - { - "epoch": 0.2906719272112404, - "grad_norm": 0.000635012227576226, - "learning_rate": 0.00019999995876634886, - "loss": 46.0, - "step": 1805 - }, - { - "epoch": 0.29083296429002775, - "grad_norm": 0.0003316863439977169, - "learning_rate": 0.0001999999587203933, - "loss": 46.0, - "step": 1806 - }, - { - "epoch": 0.29099400136881515, - "grad_norm": 0.0018930124351754785, - "learning_rate": 0.00019999995867441212, - "loss": 46.0, - "step": 1807 - }, - { - "epoch": 0.29115503844760254, - "grad_norm": 0.0013742187293246388, - "learning_rate": 0.00019999995862840536, - "loss": 46.0, - "step": 1808 - }, - { - "epoch": 0.29131607552638994, - "grad_norm": 0.0005785526591353118, - "learning_rate": 0.000199999958582373, - "loss": 46.0, - "step": 1809 - }, - { - "epoch": 0.29147711260517734, - "grad_norm": 0.0006674519390799105, - "learning_rate": 0.00019999995853631504, - "loss": 46.0, - "step": 1810 - }, - { - "epoch": 0.29163814968396473, - "grad_norm": 0.0011885145213454962, - "learning_rate": 0.00019999995849023146, - "loss": 46.0, - "step": 1811 - }, - { - "epoch": 0.2917991867627521, - "grad_norm": 0.0010971532901749015, - "learning_rate": 0.00019999995844412232, - "loss": 46.0, - "step": 1812 - }, - { - "epoch": 0.2919602238415395, - "grad_norm": 0.0006365017616190016, - "learning_rate": 0.00019999995839798757, - "loss": 46.0, - "step": 1813 - }, - { - "epoch": 0.2921212609203269, - "grad_norm": 0.00039623305201530457, - "learning_rate": 0.00019999995835182722, - "loss": 46.0, - "step": 1814 - }, - { - "epoch": 0.2922822979991143, - "grad_norm": 0.0014342054491862655, - "learning_rate": 0.0001999999583056413, - "loss": 46.0, - "step": 1815 - }, - { - "epoch": 0.2924433350779017, - "grad_norm": 0.00039575566188432276, - "learning_rate": 0.00019999995825942975, - "loss": 46.0, - "step": 1816 - }, - { - "epoch": 0.29260437215668905, - "grad_norm": 0.0017069606110453606, - "learning_rate": 0.00019999995821319265, - "loss": 46.0, - "step": 1817 - }, - { - "epoch": 0.29276540923547645, - "grad_norm": 0.0016336991684511304, - "learning_rate": 0.00019999995816692993, - "loss": 46.0, - "step": 1818 - }, - { - "epoch": 0.29292644631426384, - "grad_norm": 0.00032223513699136674, - "learning_rate": 0.0001999999581206416, - "loss": 46.0, - "step": 1819 - }, - { - "epoch": 0.29308748339305124, - "grad_norm": 0.00021341664250940084, - "learning_rate": 0.00019999995807432768, - "loss": 46.0, - "step": 1820 - }, - { - "epoch": 0.29324852047183864, - "grad_norm": 0.0006592419813387096, - "learning_rate": 0.00019999995802798818, - "loss": 46.0, - "step": 1821 - }, - { - "epoch": 0.29340955755062603, - "grad_norm": 0.000712876149918884, - "learning_rate": 0.00019999995798162309, - "loss": 46.0, - "step": 1822 - }, - { - "epoch": 0.29357059462941343, - "grad_norm": 0.00023029805743135512, - "learning_rate": 0.0001999999579352324, - "loss": 46.0, - "step": 1823 - }, - { - "epoch": 0.2937316317082008, - "grad_norm": 0.0007901446660980582, - "learning_rate": 0.0001999999578888161, - "loss": 46.0, - "step": 1824 - }, - { - "epoch": 0.2938926687869882, - "grad_norm": 0.0005559787969104946, - "learning_rate": 0.00019999995784237423, - "loss": 46.0, - "step": 1825 - }, - { - "epoch": 0.2940537058657756, - "grad_norm": 0.00031766644679009914, - "learning_rate": 0.00019999995779590676, - "loss": 46.0, - "step": 1826 - }, - { - "epoch": 0.294214742944563, - "grad_norm": 0.0003780285478569567, - "learning_rate": 0.00019999995774941368, - "loss": 46.0, - "step": 1827 - }, - { - "epoch": 0.29437578002335035, - "grad_norm": 0.0007771283271722496, - "learning_rate": 0.000199999957702895, - "loss": 46.0, - "step": 1828 - }, - { - "epoch": 0.29453681710213775, - "grad_norm": 0.0005054048378951848, - "learning_rate": 0.00019999995765635075, - "loss": 46.0, - "step": 1829 - }, - { - "epoch": 0.29469785418092515, - "grad_norm": 0.0005131353973411024, - "learning_rate": 0.0001999999576097809, - "loss": 46.0, - "step": 1830 - }, - { - "epoch": 0.29485889125971254, - "grad_norm": 0.00022505295055452734, - "learning_rate": 0.00019999995756318545, - "loss": 46.0, - "step": 1831 - }, - { - "epoch": 0.29501992833849994, - "grad_norm": 0.00022824318148195744, - "learning_rate": 0.0001999999575165644, - "loss": 46.0, - "step": 1832 - }, - { - "epoch": 0.29518096541728733, - "grad_norm": 0.0005779191269539297, - "learning_rate": 0.00019999995746991774, - "loss": 46.0, - "step": 1833 - }, - { - "epoch": 0.29534200249607473, - "grad_norm": 0.0006373411160893738, - "learning_rate": 0.00019999995742324552, - "loss": 46.0, - "step": 1834 - }, - { - "epoch": 0.2955030395748621, - "grad_norm": 0.00031253762426786125, - "learning_rate": 0.0001999999573765477, - "loss": 46.0, - "step": 1835 - }, - { - "epoch": 0.2956640766536495, - "grad_norm": 0.0007603811682201922, - "learning_rate": 0.00019999995732982427, - "loss": 46.0, - "step": 1836 - }, - { - "epoch": 0.2958251137324369, - "grad_norm": 0.0003260927915107459, - "learning_rate": 0.00019999995728307523, - "loss": 46.0, - "step": 1837 - }, - { - "epoch": 0.29598615081122426, - "grad_norm": 0.0005162190063856542, - "learning_rate": 0.00019999995723630064, - "loss": 46.0, - "step": 1838 - }, - { - "epoch": 0.29614718789001165, - "grad_norm": 0.0004505754041019827, - "learning_rate": 0.00019999995718950043, - "loss": 46.0, - "step": 1839 - }, - { - "epoch": 0.29630822496879905, - "grad_norm": 0.0008108963374979794, - "learning_rate": 0.0001999999571426746, - "loss": 46.0, - "step": 1840 - }, - { - "epoch": 0.29646926204758645, - "grad_norm": 0.0007972571766003966, - "learning_rate": 0.0001999999570958232, - "loss": 46.0, - "step": 1841 - }, - { - "epoch": 0.29663029912637384, - "grad_norm": 0.0012649011332541704, - "learning_rate": 0.00019999995704894622, - "loss": 46.0, - "step": 1842 - }, - { - "epoch": 0.29679133620516124, - "grad_norm": 0.0007034620502963662, - "learning_rate": 0.0001999999570020436, - "loss": 46.0, - "step": 1843 - }, - { - "epoch": 0.29695237328394863, - "grad_norm": 0.0008080760599114001, - "learning_rate": 0.00019999995695511543, - "loss": 46.0, - "step": 1844 - }, - { - "epoch": 0.29711341036273603, - "grad_norm": 0.00039131162338890135, - "learning_rate": 0.00019999995690816164, - "loss": 46.0, - "step": 1845 - }, - { - "epoch": 0.2972744474415234, - "grad_norm": 0.0004916760371997952, - "learning_rate": 0.00019999995686118227, - "loss": 46.0, - "step": 1846 - }, - { - "epoch": 0.2974354845203108, - "grad_norm": 0.0005050976760685444, - "learning_rate": 0.0001999999568141773, - "loss": 46.0, - "step": 1847 - }, - { - "epoch": 0.2975965215990982, - "grad_norm": 0.00033225907827727497, - "learning_rate": 0.00019999995676714673, - "loss": 46.0, - "step": 1848 - }, - { - "epoch": 0.29775755867788556, - "grad_norm": 0.0008810764993540943, - "learning_rate": 0.00019999995672009057, - "loss": 46.0, - "step": 1849 - }, - { - "epoch": 0.29791859575667295, - "grad_norm": 0.0004395331779960543, - "learning_rate": 0.00019999995667300882, - "loss": 46.0, - "step": 1850 - }, - { - "epoch": 0.29807963283546035, - "grad_norm": 0.00041944763506762683, - "learning_rate": 0.00019999995662590148, - "loss": 46.0, - "step": 1851 - }, - { - "epoch": 0.29824066991424775, - "grad_norm": 0.00198348262347281, - "learning_rate": 0.0001999999565787685, - "loss": 46.0, - "step": 1852 - }, - { - "epoch": 0.29840170699303514, - "grad_norm": 0.0006780798430554569, - "learning_rate": 0.00019999995653161, - "loss": 46.0, - "step": 1853 - }, - { - "epoch": 0.29856274407182254, - "grad_norm": 0.0005949756596237421, - "learning_rate": 0.00019999995648442584, - "loss": 46.0, - "step": 1854 - }, - { - "epoch": 0.29872378115060993, - "grad_norm": 0.00024164844944607466, - "learning_rate": 0.00019999995643721612, - "loss": 46.0, - "step": 1855 - }, - { - "epoch": 0.29888481822939733, - "grad_norm": 0.0006569984834641218, - "learning_rate": 0.00019999995638998082, - "loss": 46.0, - "step": 1856 - }, - { - "epoch": 0.2990458553081847, - "grad_norm": 0.0007791115203872323, - "learning_rate": 0.00019999995634271985, - "loss": 46.0, - "step": 1857 - }, - { - "epoch": 0.2992068923869721, - "grad_norm": 0.0005597388371825218, - "learning_rate": 0.00019999995629543338, - "loss": 46.0, - "step": 1858 - }, - { - "epoch": 0.29936792946575946, - "grad_norm": 0.0009192762663587928, - "learning_rate": 0.00019999995624812126, - "loss": 46.0, - "step": 1859 - }, - { - "epoch": 0.29952896654454686, - "grad_norm": 0.0004769730439875275, - "learning_rate": 0.00019999995620078356, - "loss": 46.0, - "step": 1860 - }, - { - "epoch": 0.29969000362333426, - "grad_norm": 0.0004369158123154193, - "learning_rate": 0.00019999995615342027, - "loss": 46.0, - "step": 1861 - }, - { - "epoch": 0.29985104070212165, - "grad_norm": 0.0014577005058526993, - "learning_rate": 0.00019999995610603136, - "loss": 46.0, - "step": 1862 - }, - { - "epoch": 0.30001207778090905, - "grad_norm": 0.00032134598586708307, - "learning_rate": 0.00019999995605861687, - "loss": 46.0, - "step": 1863 - }, - { - "epoch": 0.30017311485969644, - "grad_norm": 0.00032133038621395826, - "learning_rate": 0.0001999999560111768, - "loss": 46.0, - "step": 1864 - }, - { - "epoch": 0.30033415193848384, - "grad_norm": 0.0007147595752030611, - "learning_rate": 0.00019999995596371113, - "loss": 46.0, - "step": 1865 - }, - { - "epoch": 0.30049518901727124, - "grad_norm": 0.0004683503066189587, - "learning_rate": 0.00019999995591621982, - "loss": 46.0, - "step": 1866 - }, - { - "epoch": 0.30065622609605863, - "grad_norm": 0.0004294590326026082, - "learning_rate": 0.00019999995586870298, - "loss": 46.0, - "step": 1867 - }, - { - "epoch": 0.30081726317484603, - "grad_norm": 0.0006481785676442087, - "learning_rate": 0.00019999995582116052, - "loss": 46.0, - "step": 1868 - }, - { - "epoch": 0.3009783002536334, - "grad_norm": 0.00149905018042773, - "learning_rate": 0.00019999995577359245, - "loss": 46.0, - "step": 1869 - }, - { - "epoch": 0.30113933733242076, - "grad_norm": 0.0002647887449711561, - "learning_rate": 0.0001999999557259988, - "loss": 46.0, - "step": 1870 - }, - { - "epoch": 0.30130037441120816, - "grad_norm": 0.0018763943808153272, - "learning_rate": 0.00019999995567837955, - "loss": 46.0, - "step": 1871 - }, - { - "epoch": 0.30146141148999556, - "grad_norm": 0.0003123406204394996, - "learning_rate": 0.00019999995563073472, - "loss": 46.0, - "step": 1872 - }, - { - "epoch": 0.30162244856878295, - "grad_norm": 0.00041101162787526846, - "learning_rate": 0.00019999995558306428, - "loss": 46.0, - "step": 1873 - }, - { - "epoch": 0.30178348564757035, - "grad_norm": 0.00036199679016135633, - "learning_rate": 0.00019999995553536824, - "loss": 46.0, - "step": 1874 - }, - { - "epoch": 0.30194452272635774, - "grad_norm": 0.0006806057062931359, - "learning_rate": 0.0001999999554876466, - "loss": 46.0, - "step": 1875 - }, - { - "epoch": 0.30210555980514514, - "grad_norm": 0.0007571068708784878, - "learning_rate": 0.0001999999554398994, - "loss": 46.0, - "step": 1876 - }, - { - "epoch": 0.30226659688393254, - "grad_norm": 0.00042474898509681225, - "learning_rate": 0.00019999995539212657, - "loss": 46.0, - "step": 1877 - }, - { - "epoch": 0.30242763396271993, - "grad_norm": 0.00032792898127809167, - "learning_rate": 0.0001999999553443282, - "loss": 46.0, - "step": 1878 - }, - { - "epoch": 0.30258867104150733, - "grad_norm": 0.0013749575009569526, - "learning_rate": 0.00019999995529650417, - "loss": 46.0, - "step": 1879 - }, - { - "epoch": 0.3027497081202947, - "grad_norm": 0.00045742292422801256, - "learning_rate": 0.00019999995524865459, - "loss": 46.0, - "step": 1880 - }, - { - "epoch": 0.30291074519908207, - "grad_norm": 0.0005735918530263007, - "learning_rate": 0.00019999995520077936, - "loss": 46.0, - "step": 1881 - }, - { - "epoch": 0.30307178227786946, - "grad_norm": 0.00024322968965861946, - "learning_rate": 0.00019999995515287858, - "loss": 46.0, - "step": 1882 - }, - { - "epoch": 0.30323281935665686, - "grad_norm": 0.0006605838425457478, - "learning_rate": 0.0001999999551049522, - "loss": 46.0, - "step": 1883 - }, - { - "epoch": 0.30339385643544425, - "grad_norm": 0.0010462775826454163, - "learning_rate": 0.0001999999550570002, - "loss": 46.0, - "step": 1884 - }, - { - "epoch": 0.30355489351423165, - "grad_norm": 0.0007160481764003634, - "learning_rate": 0.00019999995500902265, - "loss": 46.0, - "step": 1885 - }, - { - "epoch": 0.30371593059301905, - "grad_norm": 0.00022240905673243105, - "learning_rate": 0.00019999995496101946, - "loss": 46.0, - "step": 1886 - }, - { - "epoch": 0.30387696767180644, - "grad_norm": 0.00023290369426831603, - "learning_rate": 0.00019999995491299072, - "loss": 46.0, - "step": 1887 - }, - { - "epoch": 0.30403800475059384, - "grad_norm": 0.0012489889049902558, - "learning_rate": 0.00019999995486493635, - "loss": 46.0, - "step": 1888 - }, - { - "epoch": 0.30419904182938123, - "grad_norm": 0.00040387638728134334, - "learning_rate": 0.00019999995481685638, - "loss": 46.0, - "step": 1889 - }, - { - "epoch": 0.30436007890816863, - "grad_norm": 0.0003309334279038012, - "learning_rate": 0.00019999995476875084, - "loss": 46.0, - "step": 1890 - }, - { - "epoch": 0.30452111598695597, - "grad_norm": 0.00021112282411195338, - "learning_rate": 0.0001999999547206197, - "loss": 46.0, - "step": 1891 - }, - { - "epoch": 0.30468215306574337, - "grad_norm": 0.0014652787940576673, - "learning_rate": 0.00019999995467246296, - "loss": 46.0, - "step": 1892 - }, - { - "epoch": 0.30484319014453076, - "grad_norm": 0.0016692185308784246, - "learning_rate": 0.00019999995462428063, - "loss": 46.0, - "step": 1893 - }, - { - "epoch": 0.30500422722331816, - "grad_norm": 0.0006531266262754798, - "learning_rate": 0.0001999999545760727, - "loss": 46.0, - "step": 1894 - }, - { - "epoch": 0.30516526430210555, - "grad_norm": 0.001210064860060811, - "learning_rate": 0.00019999995452783917, - "loss": 46.0, - "step": 1895 - }, - { - "epoch": 0.30532630138089295, - "grad_norm": 0.0005247330409474671, - "learning_rate": 0.00019999995447958003, - "loss": 46.0, - "step": 1896 - }, - { - "epoch": 0.30548733845968035, - "grad_norm": 0.0012705554254353046, - "learning_rate": 0.00019999995443129533, - "loss": 46.0, - "step": 1897 - }, - { - "epoch": 0.30564837553846774, - "grad_norm": 0.0013942826772108674, - "learning_rate": 0.00019999995438298504, - "loss": 46.0, - "step": 1898 - }, - { - "epoch": 0.30580941261725514, - "grad_norm": 0.0005664772470481694, - "learning_rate": 0.00019999995433464914, - "loss": 46.0, - "step": 1899 - }, - { - "epoch": 0.30597044969604253, - "grad_norm": 0.0010166022693738341, - "learning_rate": 0.00019999995428628763, - "loss": 46.0, - "step": 1900 - }, - { - "epoch": 0.30613148677482993, - "grad_norm": 0.00048385770060122013, - "learning_rate": 0.00019999995423790055, - "loss": 46.0, - "step": 1901 - }, - { - "epoch": 0.30629252385361727, - "grad_norm": 0.0007341625750996172, - "learning_rate": 0.00019999995418948786, - "loss": 46.0, - "step": 1902 - }, - { - "epoch": 0.30645356093240467, - "grad_norm": 0.0019976783078163862, - "learning_rate": 0.00019999995414104958, - "loss": 46.0, - "step": 1903 - }, - { - "epoch": 0.30661459801119206, - "grad_norm": 0.0008493874338455498, - "learning_rate": 0.0001999999540925857, - "loss": 46.0, - "step": 1904 - }, - { - "epoch": 0.30677563508997946, - "grad_norm": 0.0024145091883838177, - "learning_rate": 0.00019999995404409621, - "loss": 46.0, - "step": 1905 - }, - { - "epoch": 0.30693667216876686, - "grad_norm": 0.00027187468367628753, - "learning_rate": 0.00019999995399558115, - "loss": 46.0, - "step": 1906 - }, - { - "epoch": 0.30709770924755425, - "grad_norm": 0.000542456575203687, - "learning_rate": 0.0001999999539470405, - "loss": 46.0, - "step": 1907 - }, - { - "epoch": 0.30725874632634165, - "grad_norm": 0.00044663192238658667, - "learning_rate": 0.00019999995389847423, - "loss": 46.0, - "step": 1908 - }, - { - "epoch": 0.30741978340512904, - "grad_norm": 0.0008326354436576366, - "learning_rate": 0.00019999995384988237, - "loss": 46.0, - "step": 1909 - }, - { - "epoch": 0.30758082048391644, - "grad_norm": 0.0005407662829384208, - "learning_rate": 0.00019999995380126496, - "loss": 46.0, - "step": 1910 - }, - { - "epoch": 0.30774185756270384, - "grad_norm": 0.0008776931208558381, - "learning_rate": 0.0001999999537526219, - "loss": 46.0, - "step": 1911 - }, - { - "epoch": 0.3079028946414912, - "grad_norm": 0.00022998114582151175, - "learning_rate": 0.0001999999537039533, - "loss": 46.0, - "step": 1912 - }, - { - "epoch": 0.30806393172027857, - "grad_norm": 0.0004771912354044616, - "learning_rate": 0.00019999995365525903, - "loss": 46.0, - "step": 1913 - }, - { - "epoch": 0.30822496879906597, - "grad_norm": 0.0014869171427562833, - "learning_rate": 0.0001999999536065392, - "loss": 46.0, - "step": 1914 - }, - { - "epoch": 0.30838600587785336, - "grad_norm": 0.0011783756781369448, - "learning_rate": 0.0001999999535577938, - "loss": 46.0, - "step": 1915 - }, - { - "epoch": 0.30854704295664076, - "grad_norm": 0.001212630420923233, - "learning_rate": 0.00019999995350902276, - "loss": 46.0, - "step": 1916 - }, - { - "epoch": 0.30870808003542816, - "grad_norm": 0.0020968548487871885, - "learning_rate": 0.00019999995346022616, - "loss": 46.0, - "step": 1917 - }, - { - "epoch": 0.30886911711421555, - "grad_norm": 0.0010417501907795668, - "learning_rate": 0.00019999995341140396, - "loss": 46.0, - "step": 1918 - }, - { - "epoch": 0.30903015419300295, - "grad_norm": 0.0009635143796913326, - "learning_rate": 0.00019999995336255616, - "loss": 46.0, - "step": 1919 - }, - { - "epoch": 0.30919119127179034, - "grad_norm": 0.0006447298219427466, - "learning_rate": 0.00019999995331368276, - "loss": 46.0, - "step": 1920 - }, - { - "epoch": 0.30935222835057774, - "grad_norm": 0.0010088207200169563, - "learning_rate": 0.00019999995326478378, - "loss": 46.0, - "step": 1921 - }, - { - "epoch": 0.30951326542936514, - "grad_norm": 0.0005225542699918151, - "learning_rate": 0.00019999995321585918, - "loss": 46.0, - "step": 1922 - }, - { - "epoch": 0.3096743025081525, - "grad_norm": 0.0003586713573895395, - "learning_rate": 0.000199999953166909, - "loss": 46.0, - "step": 1923 - }, - { - "epoch": 0.3098353395869399, - "grad_norm": 0.0003926061326637864, - "learning_rate": 0.00019999995311793326, - "loss": 46.0, - "step": 1924 - }, - { - "epoch": 0.30999637666572727, - "grad_norm": 0.0008712766575627029, - "learning_rate": 0.00019999995306893187, - "loss": 46.0, - "step": 1925 - }, - { - "epoch": 0.31015741374451467, - "grad_norm": 0.0004809441161341965, - "learning_rate": 0.0001999999530199049, - "loss": 46.0, - "step": 1926 - }, - { - "epoch": 0.31031845082330206, - "grad_norm": 0.000390014291042462, - "learning_rate": 0.00019999995297085234, - "loss": 46.0, - "step": 1927 - }, - { - "epoch": 0.31047948790208946, - "grad_norm": 0.0004524428804870695, - "learning_rate": 0.0001999999529217742, - "loss": 46.0, - "step": 1928 - }, - { - "epoch": 0.31064052498087685, - "grad_norm": 0.0022342661395668983, - "learning_rate": 0.00019999995287267044, - "loss": 46.0, - "step": 1929 - }, - { - "epoch": 0.31080156205966425, - "grad_norm": 0.0007858508615754545, - "learning_rate": 0.0001999999528235411, - "loss": 46.0, - "step": 1930 - }, - { - "epoch": 0.31096259913845165, - "grad_norm": 0.002166828140616417, - "learning_rate": 0.00019999995277438615, - "loss": 46.0, - "step": 1931 - }, - { - "epoch": 0.31112363621723904, - "grad_norm": 0.00030961562879383564, - "learning_rate": 0.0001999999527252056, - "loss": 46.0, - "step": 1932 - }, - { - "epoch": 0.31128467329602644, - "grad_norm": 0.0007051625871099532, - "learning_rate": 0.0001999999526759995, - "loss": 46.0, - "step": 1933 - }, - { - "epoch": 0.3114457103748138, - "grad_norm": 0.0006752362241968513, - "learning_rate": 0.00019999995262676777, - "loss": 46.0, - "step": 1934 - }, - { - "epoch": 0.3116067474536012, - "grad_norm": 0.0004780669114552438, - "learning_rate": 0.00019999995257751046, - "loss": 46.0, - "step": 1935 - }, - { - "epoch": 0.31176778453238857, - "grad_norm": 0.00041254935786128044, - "learning_rate": 0.00019999995252822754, - "loss": 46.0, - "step": 1936 - }, - { - "epoch": 0.31192882161117597, - "grad_norm": 0.001679743523709476, - "learning_rate": 0.00019999995247891903, - "loss": 46.0, - "step": 1937 - }, - { - "epoch": 0.31208985868996336, - "grad_norm": 0.0017879728693515062, - "learning_rate": 0.00019999995242958493, - "loss": 46.0, - "step": 1938 - }, - { - "epoch": 0.31225089576875076, - "grad_norm": 0.001382913556881249, - "learning_rate": 0.00019999995238022524, - "loss": 46.0, - "step": 1939 - }, - { - "epoch": 0.31241193284753815, - "grad_norm": 0.003139752196148038, - "learning_rate": 0.00019999995233083994, - "loss": 46.0, - "step": 1940 - }, - { - "epoch": 0.31257296992632555, - "grad_norm": 0.0009320085518993437, - "learning_rate": 0.00019999995228142905, - "loss": 46.0, - "step": 1941 - }, - { - "epoch": 0.31273400700511295, - "grad_norm": 0.0003911656385753304, - "learning_rate": 0.00019999995223199258, - "loss": 46.0, - "step": 1942 - }, - { - "epoch": 0.31289504408390034, - "grad_norm": 0.0010725856991484761, - "learning_rate": 0.00019999995218253052, - "loss": 46.0, - "step": 1943 - }, - { - "epoch": 0.3130560811626877, - "grad_norm": 0.00045514703379012644, - "learning_rate": 0.0001999999521330428, - "loss": 46.0, - "step": 1944 - }, - { - "epoch": 0.3132171182414751, - "grad_norm": 0.00111088075209409, - "learning_rate": 0.00019999995208352958, - "loss": 46.0, - "step": 1945 - }, - { - "epoch": 0.3133781553202625, - "grad_norm": 0.0006126022199168801, - "learning_rate": 0.0001999999520339907, - "loss": 46.0, - "step": 1946 - }, - { - "epoch": 0.31353919239904987, - "grad_norm": 0.000604628527071327, - "learning_rate": 0.00019999995198442626, - "loss": 46.0, - "step": 1947 - }, - { - "epoch": 0.31370022947783727, - "grad_norm": 0.0004172946501057595, - "learning_rate": 0.0001999999519348362, - "loss": 46.0, - "step": 1948 - }, - { - "epoch": 0.31386126655662466, - "grad_norm": 0.0006414538365788758, - "learning_rate": 0.00019999995188522057, - "loss": 46.0, - "step": 1949 - }, - { - "epoch": 0.31402230363541206, - "grad_norm": 0.0002925368899013847, - "learning_rate": 0.0001999999518355793, - "loss": 46.0, - "step": 1950 - }, - { - "epoch": 0.31418334071419945, - "grad_norm": 0.0005588184576481581, - "learning_rate": 0.00019999995178591247, - "loss": 46.0, - "step": 1951 - }, - { - "epoch": 0.31434437779298685, - "grad_norm": 0.0006587941315956414, - "learning_rate": 0.00019999995173622005, - "loss": 46.0, - "step": 1952 - }, - { - "epoch": 0.31450541487177425, - "grad_norm": 0.0017494851490482688, - "learning_rate": 0.000199999951686502, - "loss": 46.0, - "step": 1953 - }, - { - "epoch": 0.31466645195056164, - "grad_norm": 0.0011188029311597347, - "learning_rate": 0.0001999999516367584, - "loss": 46.0, - "step": 1954 - }, - { - "epoch": 0.314827489029349, - "grad_norm": 0.0006506229401566088, - "learning_rate": 0.00019999995158698919, - "loss": 46.0, - "step": 1955 - }, - { - "epoch": 0.3149885261081364, - "grad_norm": 0.0010482812067493796, - "learning_rate": 0.00019999995153719438, - "loss": 46.0, - "step": 1956 - }, - { - "epoch": 0.3151495631869238, - "grad_norm": 0.0005690926918759942, - "learning_rate": 0.00019999995148737396, - "loss": 46.0, - "step": 1957 - }, - { - "epoch": 0.31531060026571117, - "grad_norm": 0.0004758228315040469, - "learning_rate": 0.00019999995143752796, - "loss": 46.0, - "step": 1958 - }, - { - "epoch": 0.31547163734449857, - "grad_norm": 0.0005149694625288248, - "learning_rate": 0.00019999995138765637, - "loss": 46.0, - "step": 1959 - }, - { - "epoch": 0.31563267442328596, - "grad_norm": 0.0005577944102697074, - "learning_rate": 0.0001999999513377592, - "loss": 46.0, - "step": 1960 - }, - { - "epoch": 0.31579371150207336, - "grad_norm": 0.0010247377213090658, - "learning_rate": 0.0001999999512878364, - "loss": 46.0, - "step": 1961 - }, - { - "epoch": 0.31595474858086076, - "grad_norm": 0.0005274456925690174, - "learning_rate": 0.000199999951237888, - "loss": 46.0, - "step": 1962 - }, - { - "epoch": 0.31611578565964815, - "grad_norm": 0.0008289752877317369, - "learning_rate": 0.00019999995118791404, - "loss": 46.0, - "step": 1963 - }, - { - "epoch": 0.31627682273843555, - "grad_norm": 0.0006728868465870619, - "learning_rate": 0.0001999999511379145, - "loss": 46.0, - "step": 1964 - }, - { - "epoch": 0.3164378598172229, - "grad_norm": 0.00047442642971873283, - "learning_rate": 0.00019999995108788932, - "loss": 46.0, - "step": 1965 - }, - { - "epoch": 0.3165988968960103, - "grad_norm": 0.0007199350511655211, - "learning_rate": 0.00019999995103783856, - "loss": 46.0, - "step": 1966 - }, - { - "epoch": 0.3167599339747977, - "grad_norm": 0.0003270877932664007, - "learning_rate": 0.00019999995098776222, - "loss": 46.0, - "step": 1967 - }, - { - "epoch": 0.3169209710535851, - "grad_norm": 0.0014233592664822936, - "learning_rate": 0.00019999995093766026, - "loss": 46.0, - "step": 1968 - }, - { - "epoch": 0.3170820081323725, - "grad_norm": 0.0005007522413507104, - "learning_rate": 0.00019999995088753272, - "loss": 46.0, - "step": 1969 - }, - { - "epoch": 0.31724304521115987, - "grad_norm": 0.0006731337634846568, - "learning_rate": 0.00019999995083737958, - "loss": 46.0, - "step": 1970 - }, - { - "epoch": 0.31740408228994726, - "grad_norm": 0.0010688817128539085, - "learning_rate": 0.00019999995078720084, - "loss": 46.0, - "step": 1971 - }, - { - "epoch": 0.31756511936873466, - "grad_norm": 0.0017510128673166037, - "learning_rate": 0.0001999999507369965, - "loss": 46.0, - "step": 1972 - }, - { - "epoch": 0.31772615644752206, - "grad_norm": 0.000877570768352598, - "learning_rate": 0.0001999999506867666, - "loss": 46.0, - "step": 1973 - }, - { - "epoch": 0.31788719352630945, - "grad_norm": 0.00034507166128605604, - "learning_rate": 0.00019999995063651107, - "loss": 46.0, - "step": 1974 - }, - { - "epoch": 0.31804823060509685, - "grad_norm": 0.0005786537076346576, - "learning_rate": 0.00019999995058622995, - "loss": 46.0, - "step": 1975 - }, - { - "epoch": 0.3182092676838842, - "grad_norm": 0.0009048181818798184, - "learning_rate": 0.00019999995053592327, - "loss": 46.0, - "step": 1976 - }, - { - "epoch": 0.3183703047626716, - "grad_norm": 0.0012877408880740404, - "learning_rate": 0.00019999995048559094, - "loss": 46.0, - "step": 1977 - }, - { - "epoch": 0.318531341841459, - "grad_norm": 0.001697192550636828, - "learning_rate": 0.00019999995043523306, - "loss": 46.0, - "step": 1978 - }, - { - "epoch": 0.3186923789202464, - "grad_norm": 0.0004150728927925229, - "learning_rate": 0.0001999999503848496, - "loss": 46.0, - "step": 1979 - }, - { - "epoch": 0.3188534159990338, - "grad_norm": 0.0007999094668775797, - "learning_rate": 0.00019999995033444048, - "loss": 46.0, - "step": 1980 - }, - { - "epoch": 0.31901445307782117, - "grad_norm": 0.0008080822299234569, - "learning_rate": 0.0001999999502840058, - "loss": 46.0, - "step": 1981 - }, - { - "epoch": 0.31917549015660857, - "grad_norm": 0.0006414721719920635, - "learning_rate": 0.00019999995023354554, - "loss": 46.0, - "step": 1982 - }, - { - "epoch": 0.31933652723539596, - "grad_norm": 0.0012701860396191478, - "learning_rate": 0.00019999995018305967, - "loss": 46.0, - "step": 1983 - }, - { - "epoch": 0.31949756431418336, - "grad_norm": 0.0016254791989922523, - "learning_rate": 0.0001999999501325482, - "loss": 46.0, - "step": 1984 - }, - { - "epoch": 0.31965860139297075, - "grad_norm": 0.000467137957457453, - "learning_rate": 0.00019999995008201113, - "loss": 46.0, - "step": 1985 - }, - { - "epoch": 0.31981963847175815, - "grad_norm": 0.0004153347690589726, - "learning_rate": 0.00019999995003144847, - "loss": 46.0, - "step": 1986 - }, - { - "epoch": 0.3199806755505455, - "grad_norm": 0.000923548883292824, - "learning_rate": 0.00019999994998086022, - "loss": 46.0, - "step": 1987 - }, - { - "epoch": 0.3201417126293329, - "grad_norm": 0.0004752822278533131, - "learning_rate": 0.00019999994993024638, - "loss": 46.0, - "step": 1988 - }, - { - "epoch": 0.3203027497081203, - "grad_norm": 0.0006223291275091469, - "learning_rate": 0.00019999994987960693, - "loss": 46.0, - "step": 1989 - }, - { - "epoch": 0.3204637867869077, - "grad_norm": 0.0008064461289905012, - "learning_rate": 0.0001999999498289419, - "loss": 46.0, - "step": 1990 - }, - { - "epoch": 0.3206248238656951, - "grad_norm": 0.0010274579981341958, - "learning_rate": 0.00019999994977825127, - "loss": 46.0, - "step": 1991 - }, - { - "epoch": 0.32078586094448247, - "grad_norm": 0.0006910872762091458, - "learning_rate": 0.00019999994972753505, - "loss": 46.0, - "step": 1992 - }, - { - "epoch": 0.32094689802326987, - "grad_norm": 0.0013477810425683856, - "learning_rate": 0.00019999994967679323, - "loss": 46.0, - "step": 1993 - }, - { - "epoch": 0.32110793510205726, - "grad_norm": 0.00043071535765193403, - "learning_rate": 0.0001999999496260258, - "loss": 46.0, - "step": 1994 - }, - { - "epoch": 0.32126897218084466, - "grad_norm": 0.0011416514171287417, - "learning_rate": 0.0001999999495752328, - "loss": 46.0, - "step": 1995 - }, - { - "epoch": 0.32143000925963205, - "grad_norm": 0.0008783790399320424, - "learning_rate": 0.00019999994952441422, - "loss": 46.0, - "step": 1996 - }, - { - "epoch": 0.3215910463384194, - "grad_norm": 0.00042994358227588236, - "learning_rate": 0.00019999994947357, - "loss": 46.0, - "step": 1997 - }, - { - "epoch": 0.3217520834172068, - "grad_norm": 0.0008973407093435526, - "learning_rate": 0.0001999999494227002, - "loss": 46.0, - "step": 1998 - }, - { - "epoch": 0.3219131204959942, - "grad_norm": 0.00044807515223510563, - "learning_rate": 0.00019999994937180482, - "loss": 46.0, - "step": 1999 - }, - { - "epoch": 0.3220741575747816, - "grad_norm": 0.00028068304527550936, - "learning_rate": 0.00019999994932088383, - "loss": 46.0, - "step": 2000 - }, - { - "epoch": 0.322235194653569, - "grad_norm": 0.0004950006259605289, - "learning_rate": 0.00019999994926993725, - "loss": 46.0, - "step": 2001 - }, - { - "epoch": 0.3223962317323564, - "grad_norm": 0.0008484144927933812, - "learning_rate": 0.0001999999492189651, - "loss": 46.0, - "step": 2002 - }, - { - "epoch": 0.32255726881114377, - "grad_norm": 0.0011145814787596464, - "learning_rate": 0.0001999999491679673, - "loss": 46.0, - "step": 2003 - }, - { - "epoch": 0.32271830588993117, - "grad_norm": 0.000589332077652216, - "learning_rate": 0.00019999994911694394, - "loss": 46.0, - "step": 2004 - }, - { - "epoch": 0.32287934296871856, - "grad_norm": 0.0019545366521924734, - "learning_rate": 0.00019999994906589499, - "loss": 46.0, - "step": 2005 - }, - { - "epoch": 0.32304038004750596, - "grad_norm": 0.0008821808150969446, - "learning_rate": 0.00019999994901482044, - "loss": 46.0, - "step": 2006 - }, - { - "epoch": 0.32320141712629336, - "grad_norm": 0.0005089048063382506, - "learning_rate": 0.0001999999489637203, - "loss": 46.0, - "step": 2007 - }, - { - "epoch": 0.3233624542050807, - "grad_norm": 0.0002948503533843905, - "learning_rate": 0.00019999994891259454, - "loss": 46.0, - "step": 2008 - }, - { - "epoch": 0.3235234912838681, - "grad_norm": 0.0019477446330711246, - "learning_rate": 0.00019999994886144321, - "loss": 46.0, - "step": 2009 - }, - { - "epoch": 0.3236845283626555, - "grad_norm": 0.0003937899600714445, - "learning_rate": 0.0001999999488102663, - "loss": 46.0, - "step": 2010 - }, - { - "epoch": 0.3238455654414429, - "grad_norm": 0.00036119233118370175, - "learning_rate": 0.00019999994875906377, - "loss": 46.0, - "step": 2011 - }, - { - "epoch": 0.3240066025202303, - "grad_norm": 0.00027369082090444863, - "learning_rate": 0.00019999994870783562, - "loss": 46.0, - "step": 2012 - }, - { - "epoch": 0.3241676395990177, - "grad_norm": 0.0005119135603308678, - "learning_rate": 0.0001999999486565819, - "loss": 46.0, - "step": 2013 - }, - { - "epoch": 0.3243286766778051, - "grad_norm": 0.001278889481909573, - "learning_rate": 0.00019999994860530262, - "loss": 46.0, - "step": 2014 - }, - { - "epoch": 0.32448971375659247, - "grad_norm": 0.00034376498661004007, - "learning_rate": 0.0001999999485539977, - "loss": 46.0, - "step": 2015 - }, - { - "epoch": 0.32465075083537986, - "grad_norm": 0.0003255880146753043, - "learning_rate": 0.00019999994850266722, - "loss": 46.0, - "step": 2016 - }, - { - "epoch": 0.32481178791416726, - "grad_norm": 0.00047895527677610517, - "learning_rate": 0.00019999994845131108, - "loss": 46.0, - "step": 2017 - }, - { - "epoch": 0.3249728249929546, - "grad_norm": 0.0009383424185216427, - "learning_rate": 0.0001999999483999294, - "loss": 46.0, - "step": 2018 - }, - { - "epoch": 0.325133862071742, - "grad_norm": 0.0008149516070261598, - "learning_rate": 0.00019999994834852213, - "loss": 46.0, - "step": 2019 - }, - { - "epoch": 0.3252948991505294, - "grad_norm": 0.0013515559257939458, - "learning_rate": 0.00019999994829708926, - "loss": 46.0, - "step": 2020 - }, - { - "epoch": 0.3254559362293168, - "grad_norm": 0.000812262820545584, - "learning_rate": 0.00019999994824563077, - "loss": 46.0, - "step": 2021 - }, - { - "epoch": 0.3256169733081042, - "grad_norm": 0.0009132091654464602, - "learning_rate": 0.0001999999481941467, - "loss": 46.0, - "step": 2022 - }, - { - "epoch": 0.3257780103868916, - "grad_norm": 0.002700040815398097, - "learning_rate": 0.00019999994814263701, - "loss": 46.0, - "step": 2023 - }, - { - "epoch": 0.325939047465679, - "grad_norm": 0.0010764887556433678, - "learning_rate": 0.0001999999480911018, - "loss": 46.0, - "step": 2024 - }, - { - "epoch": 0.3261000845444664, - "grad_norm": 0.0014558681286871433, - "learning_rate": 0.0001999999480395409, - "loss": 46.0, - "step": 2025 - }, - { - "epoch": 0.32626112162325377, - "grad_norm": 0.00041977630462497473, - "learning_rate": 0.00019999994798795446, - "loss": 46.0, - "step": 2026 - }, - { - "epoch": 0.32642215870204117, - "grad_norm": 0.0005945920129306614, - "learning_rate": 0.00019999994793634245, - "loss": 46.0, - "step": 2027 - }, - { - "epoch": 0.32658319578082856, - "grad_norm": 0.00208011525683105, - "learning_rate": 0.0001999999478847048, - "loss": 46.0, - "step": 2028 - }, - { - "epoch": 0.3267442328596159, - "grad_norm": 0.0008152250084094703, - "learning_rate": 0.00019999994783304157, - "loss": 46.0, - "step": 2029 - }, - { - "epoch": 0.3269052699384033, - "grad_norm": 0.0003730282187461853, - "learning_rate": 0.00019999994778135274, - "loss": 46.0, - "step": 2030 - }, - { - "epoch": 0.3270663070171907, - "grad_norm": 0.00042543260497041047, - "learning_rate": 0.0001999999477296383, - "loss": 46.0, - "step": 2031 - }, - { - "epoch": 0.3272273440959781, - "grad_norm": 0.002453569555655122, - "learning_rate": 0.00019999994767789828, - "loss": 46.0, - "step": 2032 - }, - { - "epoch": 0.3273883811747655, - "grad_norm": 0.00020690720702987164, - "learning_rate": 0.00019999994762613267, - "loss": 46.0, - "step": 2033 - }, - { - "epoch": 0.3275494182535529, - "grad_norm": 0.0013863329077139497, - "learning_rate": 0.00019999994757434147, - "loss": 46.0, - "step": 2034 - }, - { - "epoch": 0.3277104553323403, - "grad_norm": 0.00252534169703722, - "learning_rate": 0.00019999994752252466, - "loss": 46.0, - "step": 2035 - }, - { - "epoch": 0.3278714924111277, - "grad_norm": 0.0031253830529749393, - "learning_rate": 0.00019999994747068228, - "loss": 46.0, - "step": 2036 - }, - { - "epoch": 0.32803252948991507, - "grad_norm": 0.0006806250894442201, - "learning_rate": 0.00019999994741881427, - "loss": 46.0, - "step": 2037 - }, - { - "epoch": 0.32819356656870247, - "grad_norm": 0.0010512153385207057, - "learning_rate": 0.0001999999473669207, - "loss": 46.0, - "step": 2038 - }, - { - "epoch": 0.32835460364748986, - "grad_norm": 0.001149252406321466, - "learning_rate": 0.0001999999473150015, - "loss": 46.0, - "step": 2039 - }, - { - "epoch": 0.3285156407262772, - "grad_norm": 0.0019053128780797124, - "learning_rate": 0.0001999999472630567, - "loss": 46.0, - "step": 2040 - }, - { - "epoch": 0.3286766778050646, - "grad_norm": 0.0009342010016553104, - "learning_rate": 0.00019999994721108633, - "loss": 46.0, - "step": 2041 - }, - { - "epoch": 0.328837714883852, - "grad_norm": 0.0003408027987461537, - "learning_rate": 0.00019999994715909038, - "loss": 46.0, - "step": 2042 - }, - { - "epoch": 0.3289987519626394, - "grad_norm": 0.0008491163607686758, - "learning_rate": 0.00019999994710706882, - "loss": 46.0, - "step": 2043 - }, - { - "epoch": 0.3291597890414268, - "grad_norm": 0.0006491068634204566, - "learning_rate": 0.00019999994705502167, - "loss": 46.0, - "step": 2044 - }, - { - "epoch": 0.3293208261202142, - "grad_norm": 0.0005705612129531801, - "learning_rate": 0.0001999999470029489, - "loss": 46.0, - "step": 2045 - }, - { - "epoch": 0.3294818631990016, - "grad_norm": 0.0004783225595019758, - "learning_rate": 0.00019999994695085055, - "loss": 46.0, - "step": 2046 - }, - { - "epoch": 0.329642900277789, - "grad_norm": 0.0003770444600377232, - "learning_rate": 0.00019999994689872663, - "loss": 46.0, - "step": 2047 - }, - { - "epoch": 0.32980393735657637, - "grad_norm": 0.0007112284074537456, - "learning_rate": 0.0001999999468465771, - "loss": 46.0, - "step": 2048 - }, - { - "epoch": 0.32996497443536377, - "grad_norm": 0.001417339313775301, - "learning_rate": 0.00019999994679440196, - "loss": 46.0, - "step": 2049 - }, - { - "epoch": 0.3301260115141511, - "grad_norm": 0.00069203827297315, - "learning_rate": 0.0001999999467422012, - "loss": 46.0, - "step": 2050 - }, - { - "epoch": 0.3302870485929385, - "grad_norm": 0.0004047035181429237, - "learning_rate": 0.00019999994668997491, - "loss": 46.0, - "step": 2051 - }, - { - "epoch": 0.3304480856717259, - "grad_norm": 0.00046402288717217743, - "learning_rate": 0.00019999994663772298, - "loss": 46.0, - "step": 2052 - }, - { - "epoch": 0.3306091227505133, - "grad_norm": 0.000519428460393101, - "learning_rate": 0.0001999999465854455, - "loss": 46.0, - "step": 2053 - }, - { - "epoch": 0.3307701598293007, - "grad_norm": 0.0007428214885294437, - "learning_rate": 0.00019999994653314239, - "loss": 46.0, - "step": 2054 - }, - { - "epoch": 0.3309311969080881, - "grad_norm": 0.0007421969785355031, - "learning_rate": 0.0001999999464808137, - "loss": 46.0, - "step": 2055 - }, - { - "epoch": 0.3310922339868755, - "grad_norm": 0.0006881988374516368, - "learning_rate": 0.0001999999464284594, - "loss": 46.0, - "step": 2056 - }, - { - "epoch": 0.3312532710656629, - "grad_norm": 0.001343220821581781, - "learning_rate": 0.0001999999463760795, - "loss": 46.0, - "step": 2057 - }, - { - "epoch": 0.3314143081444503, - "grad_norm": 0.0006997298332862556, - "learning_rate": 0.000199999946323674, - "loss": 46.0, - "step": 2058 - }, - { - "epoch": 0.33157534522323767, - "grad_norm": 0.0005575785762630403, - "learning_rate": 0.00019999994627124292, - "loss": 46.0, - "step": 2059 - }, - { - "epoch": 0.33173638230202507, - "grad_norm": 0.00034918313031084836, - "learning_rate": 0.00019999994621878626, - "loss": 46.0, - "step": 2060 - }, - { - "epoch": 0.3318974193808124, - "grad_norm": 0.00030340024386532605, - "learning_rate": 0.00019999994616630396, - "loss": 46.0, - "step": 2061 - }, - { - "epoch": 0.3320584564595998, - "grad_norm": 0.0005032563931308687, - "learning_rate": 0.0001999999461137961, - "loss": 46.0, - "step": 2062 - }, - { - "epoch": 0.3322194935383872, - "grad_norm": 0.0008769478881731629, - "learning_rate": 0.00019999994606126266, - "loss": 46.0, - "step": 2063 - }, - { - "epoch": 0.3323805306171746, - "grad_norm": 0.0005646526697091758, - "learning_rate": 0.0001999999460087036, - "loss": 46.0, - "step": 2064 - }, - { - "epoch": 0.332541567695962, - "grad_norm": 0.0005444358685053885, - "learning_rate": 0.00019999994595611896, - "loss": 46.0, - "step": 2065 - }, - { - "epoch": 0.3327026047747494, - "grad_norm": 0.0009491333621554077, - "learning_rate": 0.00019999994590350873, - "loss": 46.0, - "step": 2066 - }, - { - "epoch": 0.3328636418535368, - "grad_norm": 0.003124666167423129, - "learning_rate": 0.00019999994585087288, - "loss": 46.0, - "step": 2067 - }, - { - "epoch": 0.3330246789323242, - "grad_norm": 0.0009676054469309747, - "learning_rate": 0.00019999994579821144, - "loss": 46.0, - "step": 2068 - }, - { - "epoch": 0.3331857160111116, - "grad_norm": 0.0006022557499818504, - "learning_rate": 0.0001999999457455244, - "loss": 46.0, - "step": 2069 - }, - { - "epoch": 0.333346753089899, - "grad_norm": 0.0006285131676122546, - "learning_rate": 0.0001999999456928118, - "loss": 46.0, - "step": 2070 - }, - { - "epoch": 0.3335077901686863, - "grad_norm": 0.0010589825687929988, - "learning_rate": 0.0001999999456400736, - "loss": 46.0, - "step": 2071 - }, - { - "epoch": 0.3336688272474737, - "grad_norm": 0.0002169949293602258, - "learning_rate": 0.00019999994558730975, - "loss": 46.0, - "step": 2072 - }, - { - "epoch": 0.3338298643262611, - "grad_norm": 0.000426082726335153, - "learning_rate": 0.00019999994553452036, - "loss": 46.0, - "step": 2073 - }, - { - "epoch": 0.3339909014050485, - "grad_norm": 0.0018415822414681315, - "learning_rate": 0.00019999994548170532, - "loss": 46.0, - "step": 2074 - }, - { - "epoch": 0.3341519384838359, - "grad_norm": 0.0005673546111211181, - "learning_rate": 0.00019999994542886475, - "loss": 46.0, - "step": 2075 - }, - { - "epoch": 0.3343129755626233, - "grad_norm": 0.0007914051529951394, - "learning_rate": 0.00019999994537599856, - "loss": 46.0, - "step": 2076 - }, - { - "epoch": 0.3344740126414107, - "grad_norm": 0.0019066901877522469, - "learning_rate": 0.00019999994532310676, - "loss": 46.0, - "step": 2077 - }, - { - "epoch": 0.3346350497201981, - "grad_norm": 0.0008363793604075909, - "learning_rate": 0.00019999994527018937, - "loss": 46.0, - "step": 2078 - }, - { - "epoch": 0.3347960867989855, - "grad_norm": 0.0004497472255025059, - "learning_rate": 0.0001999999452172464, - "loss": 46.0, - "step": 2079 - }, - { - "epoch": 0.3349571238777729, - "grad_norm": 0.0014955061487853527, - "learning_rate": 0.00019999994516427784, - "loss": 46.0, - "step": 2080 - }, - { - "epoch": 0.3351181609565603, - "grad_norm": 0.001897261361591518, - "learning_rate": 0.0001999999451112837, - "loss": 46.0, - "step": 2081 - }, - { - "epoch": 0.3352791980353476, - "grad_norm": 0.0014495557406917214, - "learning_rate": 0.00019999994505826393, - "loss": 46.0, - "step": 2082 - }, - { - "epoch": 0.335440235114135, - "grad_norm": 0.0004804792406503111, - "learning_rate": 0.00019999994500521855, - "loss": 46.0, - "step": 2083 - }, - { - "epoch": 0.3356012721929224, - "grad_norm": 0.0007584398263134062, - "learning_rate": 0.0001999999449521476, - "loss": 46.0, - "step": 2084 - }, - { - "epoch": 0.3357623092717098, - "grad_norm": 0.00047130463644862175, - "learning_rate": 0.00019999994489905106, - "loss": 46.0, - "step": 2085 - }, - { - "epoch": 0.3359233463504972, - "grad_norm": 0.0008734721341170371, - "learning_rate": 0.00019999994484592892, - "loss": 46.0, - "step": 2086 - }, - { - "epoch": 0.3360843834292846, - "grad_norm": 0.0008928874740377069, - "learning_rate": 0.00019999994479278117, - "loss": 46.0, - "step": 2087 - }, - { - "epoch": 0.336245420508072, - "grad_norm": 0.0017567990580573678, - "learning_rate": 0.00019999994473960783, - "loss": 46.0, - "step": 2088 - }, - { - "epoch": 0.3364064575868594, - "grad_norm": 0.0010819401359185576, - "learning_rate": 0.00019999994468640893, - "loss": 46.0, - "step": 2089 - }, - { - "epoch": 0.3365674946656468, - "grad_norm": 0.0021045005414634943, - "learning_rate": 0.00019999994463318439, - "loss": 46.0, - "step": 2090 - }, - { - "epoch": 0.3367285317444342, - "grad_norm": 0.0004154415219090879, - "learning_rate": 0.00019999994457993428, - "loss": 46.0, - "step": 2091 - }, - { - "epoch": 0.3368895688232215, - "grad_norm": 0.0018240243662148714, - "learning_rate": 0.00019999994452665857, - "loss": 46.0, - "step": 2092 - }, - { - "epoch": 0.3370506059020089, - "grad_norm": 0.0012899583671241999, - "learning_rate": 0.00019999994447335726, - "loss": 46.0, - "step": 2093 - }, - { - "epoch": 0.3372116429807963, - "grad_norm": 0.0009415480890311301, - "learning_rate": 0.00019999994442003037, - "loss": 46.0, - "step": 2094 - }, - { - "epoch": 0.3373726800595837, - "grad_norm": 0.00041233381489291787, - "learning_rate": 0.0001999999443666779, - "loss": 46.0, - "step": 2095 - }, - { - "epoch": 0.3375337171383711, - "grad_norm": 0.0005574105307459831, - "learning_rate": 0.00019999994431329978, - "loss": 46.0, - "step": 2096 - }, - { - "epoch": 0.3376947542171585, - "grad_norm": 0.00043876664130948484, - "learning_rate": 0.0001999999442598961, - "loss": 46.0, - "step": 2097 - }, - { - "epoch": 0.3378557912959459, - "grad_norm": 0.00038646222674287856, - "learning_rate": 0.00019999994420646683, - "loss": 46.0, - "step": 2098 - }, - { - "epoch": 0.3380168283747333, - "grad_norm": 0.0008918823441490531, - "learning_rate": 0.00019999994415301195, - "loss": 46.0, - "step": 2099 - }, - { - "epoch": 0.3381778654535207, - "grad_norm": 0.00026927090948447585, - "learning_rate": 0.00019999994409953148, - "loss": 46.0, - "step": 2100 - }, - { - "epoch": 0.3383389025323081, - "grad_norm": 0.00041321897879242897, - "learning_rate": 0.0001999999440460254, - "loss": 46.0, - "step": 2101 - }, - { - "epoch": 0.3384999396110955, - "grad_norm": 0.0005375874461606145, - "learning_rate": 0.00019999994399249376, - "loss": 46.0, - "step": 2102 - }, - { - "epoch": 0.3386609766898828, - "grad_norm": 0.00026700831949710846, - "learning_rate": 0.0001999999439389365, - "loss": 46.0, - "step": 2103 - }, - { - "epoch": 0.3388220137686702, - "grad_norm": 0.000915630254894495, - "learning_rate": 0.00019999994388535366, - "loss": 46.0, - "step": 2104 - }, - { - "epoch": 0.3389830508474576, - "grad_norm": 0.0006769729661755264, - "learning_rate": 0.0001999999438317452, - "loss": 46.0, - "step": 2105 - }, - { - "epoch": 0.339144087926245, - "grad_norm": 0.0007546727429144084, - "learning_rate": 0.00019999994377811118, - "loss": 46.0, - "step": 2106 - }, - { - "epoch": 0.3393051250050324, - "grad_norm": 0.0009583216742612422, - "learning_rate": 0.00019999994372445155, - "loss": 46.0, - "step": 2107 - }, - { - "epoch": 0.3394661620838198, - "grad_norm": 0.00047133295447565615, - "learning_rate": 0.0001999999436707663, - "loss": 46.0, - "step": 2108 - }, - { - "epoch": 0.3396271991626072, - "grad_norm": 0.00082333420868963, - "learning_rate": 0.0001999999436170555, - "loss": 46.0, - "step": 2109 - }, - { - "epoch": 0.3397882362413946, - "grad_norm": 0.0006518145091831684, - "learning_rate": 0.00019999994356331908, - "loss": 46.0, - "step": 2110 - }, - { - "epoch": 0.339949273320182, - "grad_norm": 0.0005809074500575662, - "learning_rate": 0.00019999994350955707, - "loss": 46.0, - "step": 2111 - }, - { - "epoch": 0.3401103103989694, - "grad_norm": 0.0006611874559894204, - "learning_rate": 0.00019999994345576948, - "loss": 46.0, - "step": 2112 - }, - { - "epoch": 0.3402713474777568, - "grad_norm": 0.00037456219433806837, - "learning_rate": 0.00019999994340195624, - "loss": 46.0, - "step": 2113 - }, - { - "epoch": 0.3404323845565441, - "grad_norm": 0.0002828915312420577, - "learning_rate": 0.00019999994334811744, - "loss": 46.0, - "step": 2114 - }, - { - "epoch": 0.3405934216353315, - "grad_norm": 0.00040224610711447895, - "learning_rate": 0.00019999994329425306, - "loss": 46.0, - "step": 2115 - }, - { - "epoch": 0.3407544587141189, - "grad_norm": 0.0005563198355957866, - "learning_rate": 0.00019999994324036306, - "loss": 46.0, - "step": 2116 - }, - { - "epoch": 0.3409154957929063, - "grad_norm": 0.0008193428511731327, - "learning_rate": 0.00019999994318644748, - "loss": 46.0, - "step": 2117 - }, - { - "epoch": 0.3410765328716937, - "grad_norm": 0.0004005931841675192, - "learning_rate": 0.00019999994313250633, - "loss": 46.0, - "step": 2118 - }, - { - "epoch": 0.3412375699504811, - "grad_norm": 0.0003109571698587388, - "learning_rate": 0.00019999994307853954, - "loss": 46.0, - "step": 2119 - }, - { - "epoch": 0.3413986070292685, - "grad_norm": 0.0004226868913974613, - "learning_rate": 0.0001999999430245472, - "loss": 46.0, - "step": 2120 - }, - { - "epoch": 0.3415596441080559, - "grad_norm": 0.0006647769478149712, - "learning_rate": 0.0001999999429705292, - "loss": 46.0, - "step": 2121 - }, - { - "epoch": 0.3417206811868433, - "grad_norm": 0.001019866205751896, - "learning_rate": 0.00019999994291648566, - "loss": 46.0, - "step": 2122 - }, - { - "epoch": 0.3418817182656307, - "grad_norm": 0.0006915233097970486, - "learning_rate": 0.0001999999428624165, - "loss": 46.0, - "step": 2123 - }, - { - "epoch": 0.342042755344418, - "grad_norm": 0.0012263247044757009, - "learning_rate": 0.00019999994280832178, - "loss": 46.0, - "step": 2124 - }, - { - "epoch": 0.3422037924232054, - "grad_norm": 0.0009923606412485242, - "learning_rate": 0.0001999999427542014, - "loss": 46.0, - "step": 2125 - }, - { - "epoch": 0.3423648295019928, - "grad_norm": 0.0009882006561383605, - "learning_rate": 0.0001999999427000555, - "loss": 46.0, - "step": 2126 - }, - { - "epoch": 0.3425258665807802, - "grad_norm": 0.00047184983850456774, - "learning_rate": 0.00019999994264588398, - "loss": 46.0, - "step": 2127 - }, - { - "epoch": 0.3426869036595676, - "grad_norm": 0.0005706138908863068, - "learning_rate": 0.00019999994259168685, - "loss": 46.0, - "step": 2128 - }, - { - "epoch": 0.342847940738355, - "grad_norm": 0.0003796783566940576, - "learning_rate": 0.00019999994253746411, - "loss": 46.0, - "step": 2129 - }, - { - "epoch": 0.3430089778171424, - "grad_norm": 0.0002847843279596418, - "learning_rate": 0.00019999994248321581, - "loss": 46.0, - "step": 2130 - }, - { - "epoch": 0.3431700148959298, - "grad_norm": 0.0005586830084212124, - "learning_rate": 0.0001999999424289419, - "loss": 46.0, - "step": 2131 - }, - { - "epoch": 0.3433310519747172, - "grad_norm": 0.0005941084818914533, - "learning_rate": 0.0001999999423746424, - "loss": 46.0, - "step": 2132 - }, - { - "epoch": 0.3434920890535046, - "grad_norm": 0.0003910789673682302, - "learning_rate": 0.00019999994232031728, - "loss": 46.0, - "step": 2133 - }, - { - "epoch": 0.343653126132292, - "grad_norm": 0.0002927309542428702, - "learning_rate": 0.00019999994226596658, - "loss": 46.0, - "step": 2134 - }, - { - "epoch": 0.3438141632110793, - "grad_norm": 0.0016613490879535675, - "learning_rate": 0.00019999994221159032, - "loss": 46.0, - "step": 2135 - }, - { - "epoch": 0.3439752002898667, - "grad_norm": 0.00047319408622570336, - "learning_rate": 0.00019999994215718841, - "loss": 46.0, - "step": 2136 - }, - { - "epoch": 0.3441362373686541, - "grad_norm": 0.00018736837955657393, - "learning_rate": 0.00019999994210276095, - "loss": 46.0, - "step": 2137 - }, - { - "epoch": 0.3442972744474415, - "grad_norm": 0.0004510493890848011, - "learning_rate": 0.00019999994204830787, - "loss": 46.0, - "step": 2138 - }, - { - "epoch": 0.3444583115262289, - "grad_norm": 0.0006545883952639997, - "learning_rate": 0.0001999999419938292, - "loss": 46.0, - "step": 2139 - }, - { - "epoch": 0.3446193486050163, - "grad_norm": 0.0008365369867533445, - "learning_rate": 0.00019999994193932495, - "loss": 46.0, - "step": 2140 - }, - { - "epoch": 0.3447803856838037, - "grad_norm": 0.00029896796331740916, - "learning_rate": 0.0001999999418847951, - "loss": 46.0, - "step": 2141 - }, - { - "epoch": 0.3449414227625911, - "grad_norm": 0.001084184623323381, - "learning_rate": 0.00019999994183023963, - "loss": 46.0, - "step": 2142 - }, - { - "epoch": 0.3451024598413785, - "grad_norm": 0.0005514733493328094, - "learning_rate": 0.00019999994177565856, - "loss": 46.0, - "step": 2143 - }, - { - "epoch": 0.3452634969201659, - "grad_norm": 0.0004598818486556411, - "learning_rate": 0.00019999994172105194, - "loss": 46.0, - "step": 2144 - }, - { - "epoch": 0.34542453399895323, - "grad_norm": 0.0004823756462428719, - "learning_rate": 0.00019999994166641972, - "loss": 46.0, - "step": 2145 - }, - { - "epoch": 0.3455855710777406, - "grad_norm": 0.0004527256533037871, - "learning_rate": 0.00019999994161176186, - "loss": 46.0, - "step": 2146 - }, - { - "epoch": 0.345746608156528, - "grad_norm": 0.00024040110292844474, - "learning_rate": 0.00019999994155707845, - "loss": 46.0, - "step": 2147 - }, - { - "epoch": 0.3459076452353154, - "grad_norm": 0.0005315089947544038, - "learning_rate": 0.00019999994150236941, - "loss": 46.0, - "step": 2148 - }, - { - "epoch": 0.3460686823141028, - "grad_norm": 0.00047090795123949647, - "learning_rate": 0.0001999999414476348, - "loss": 46.0, - "step": 2149 - }, - { - "epoch": 0.3462297193928902, - "grad_norm": 0.0005423012189567089, - "learning_rate": 0.0001999999413928746, - "loss": 46.0, - "step": 2150 - }, - { - "epoch": 0.3463907564716776, - "grad_norm": 0.000755565008148551, - "learning_rate": 0.0001999999413380888, - "loss": 46.0, - "step": 2151 - }, - { - "epoch": 0.346551793550465, - "grad_norm": 0.0002905244182329625, - "learning_rate": 0.0001999999412832774, - "loss": 46.0, - "step": 2152 - }, - { - "epoch": 0.3467128306292524, - "grad_norm": 0.0005571144865825772, - "learning_rate": 0.0001999999412284404, - "loss": 46.0, - "step": 2153 - }, - { - "epoch": 0.3468738677080398, - "grad_norm": 0.00036992679815739393, - "learning_rate": 0.0001999999411735778, - "loss": 46.0, - "step": 2154 - }, - { - "epoch": 0.3470349047868272, - "grad_norm": 0.001313650282099843, - "learning_rate": 0.0001999999411186896, - "loss": 46.0, - "step": 2155 - }, - { - "epoch": 0.34719594186561453, - "grad_norm": 0.0012090746313333511, - "learning_rate": 0.00019999994106377584, - "loss": 46.0, - "step": 2156 - }, - { - "epoch": 0.34735697894440193, - "grad_norm": 0.0005325045203790069, - "learning_rate": 0.00019999994100883647, - "loss": 46.0, - "step": 2157 - }, - { - "epoch": 0.3475180160231893, - "grad_norm": 0.000600369821768254, - "learning_rate": 0.00019999994095387148, - "loss": 46.0, - "step": 2158 - }, - { - "epoch": 0.3476790531019767, - "grad_norm": 0.00028068615938536823, - "learning_rate": 0.0001999999408988809, - "loss": 46.0, - "step": 2159 - }, - { - "epoch": 0.3478400901807641, - "grad_norm": 0.0009161363705061376, - "learning_rate": 0.00019999994084386478, - "loss": 46.0, - "step": 2160 - }, - { - "epoch": 0.3480011272595515, - "grad_norm": 0.0017773328581824899, - "learning_rate": 0.000199999940788823, - "loss": 46.0, - "step": 2161 - }, - { - "epoch": 0.3481621643383389, - "grad_norm": 0.00041139687527902424, - "learning_rate": 0.00019999994073375565, - "loss": 46.0, - "step": 2162 - }, - { - "epoch": 0.3483232014171263, - "grad_norm": 0.0003068914811592549, - "learning_rate": 0.0001999999406786627, - "loss": 46.0, - "step": 2163 - }, - { - "epoch": 0.3484842384959137, - "grad_norm": 0.0003006128827109933, - "learning_rate": 0.00019999994062354416, - "loss": 46.0, - "step": 2164 - }, - { - "epoch": 0.3486452755747011, - "grad_norm": 0.0007143109105527401, - "learning_rate": 0.00019999994056840004, - "loss": 46.0, - "step": 2165 - }, - { - "epoch": 0.3488063126534885, - "grad_norm": 0.0005406810087151825, - "learning_rate": 0.00019999994051323033, - "loss": 46.0, - "step": 2166 - }, - { - "epoch": 0.34896734973227583, - "grad_norm": 0.0006577089661732316, - "learning_rate": 0.00019999994045803498, - "loss": 46.0, - "step": 2167 - }, - { - "epoch": 0.34912838681106323, - "grad_norm": 0.0008731005946174264, - "learning_rate": 0.00019999994040281407, - "loss": 46.0, - "step": 2168 - }, - { - "epoch": 0.3492894238898506, - "grad_norm": 0.0011856661876663566, - "learning_rate": 0.00019999994034756755, - "loss": 46.0, - "step": 2169 - }, - { - "epoch": 0.349450460968638, - "grad_norm": 0.0011892268666997552, - "learning_rate": 0.00019999994029229546, - "loss": 46.0, - "step": 2170 - }, - { - "epoch": 0.3496114980474254, - "grad_norm": 0.0003948225639760494, - "learning_rate": 0.00019999994023699774, - "loss": 46.0, - "step": 2171 - }, - { - "epoch": 0.3497725351262128, - "grad_norm": 0.00041553142364136875, - "learning_rate": 0.00019999994018167442, - "loss": 46.0, - "step": 2172 - }, - { - "epoch": 0.3499335722050002, - "grad_norm": 0.0009228391572833061, - "learning_rate": 0.00019999994012632555, - "loss": 46.0, - "step": 2173 - }, - { - "epoch": 0.3500946092837876, - "grad_norm": 0.0006806949386373162, - "learning_rate": 0.00019999994007095106, - "loss": 46.0, - "step": 2174 - }, - { - "epoch": 0.350255646362575, - "grad_norm": 0.0005127970362082124, - "learning_rate": 0.000199999940015551, - "loss": 46.0, - "step": 2175 - }, - { - "epoch": 0.3504166834413624, - "grad_norm": 0.0007062973454594612, - "learning_rate": 0.0001999999399601253, - "loss": 46.0, - "step": 2176 - }, - { - "epoch": 0.35057772052014974, - "grad_norm": 0.00033460246049799025, - "learning_rate": 0.00019999993990467402, - "loss": 46.0, - "step": 2177 - }, - { - "epoch": 0.35073875759893713, - "grad_norm": 0.0003914613334927708, - "learning_rate": 0.00019999993984919716, - "loss": 46.0, - "step": 2178 - }, - { - "epoch": 0.35089979467772453, - "grad_norm": 0.0007057320908643305, - "learning_rate": 0.00019999993979369468, - "loss": 46.0, - "step": 2179 - }, - { - "epoch": 0.3510608317565119, - "grad_norm": 0.0005471279146149755, - "learning_rate": 0.00019999993973816664, - "loss": 46.0, - "step": 2180 - }, - { - "epoch": 0.3512218688352993, - "grad_norm": 0.0006510689272545278, - "learning_rate": 0.000199999939682613, - "loss": 46.0, - "step": 2181 - }, - { - "epoch": 0.3513829059140867, - "grad_norm": 0.0006412953371182084, - "learning_rate": 0.00019999993962703373, - "loss": 46.0, - "step": 2182 - }, - { - "epoch": 0.3515439429928741, - "grad_norm": 0.0005228205700404942, - "learning_rate": 0.00019999993957142887, - "loss": 46.0, - "step": 2183 - }, - { - "epoch": 0.3517049800716615, - "grad_norm": 0.0008050735341385007, - "learning_rate": 0.00019999993951579846, - "loss": 46.0, - "step": 2184 - }, - { - "epoch": 0.3518660171504489, - "grad_norm": 0.0014420299557968974, - "learning_rate": 0.00019999993946014243, - "loss": 46.0, - "step": 2185 - }, - { - "epoch": 0.3520270542292363, - "grad_norm": 0.0005427033174782991, - "learning_rate": 0.0001999999394044608, - "loss": 46.0, - "step": 2186 - }, - { - "epoch": 0.3521880913080237, - "grad_norm": 0.0015302340034395456, - "learning_rate": 0.0001999999393487536, - "loss": 46.0, - "step": 2187 - }, - { - "epoch": 0.35234912838681104, - "grad_norm": 0.0017362490762025118, - "learning_rate": 0.00019999993929302077, - "loss": 46.0, - "step": 2188 - }, - { - "epoch": 0.35251016546559844, - "grad_norm": 0.0018614483997225761, - "learning_rate": 0.00019999993923726237, - "loss": 46.0, - "step": 2189 - }, - { - "epoch": 0.35267120254438583, - "grad_norm": 0.00042189229861833155, - "learning_rate": 0.00019999993918147835, - "loss": 46.0, - "step": 2190 - }, - { - "epoch": 0.3528322396231732, - "grad_norm": 0.0005222007748670876, - "learning_rate": 0.00019999993912566874, - "loss": 46.0, - "step": 2191 - }, - { - "epoch": 0.3529932767019606, - "grad_norm": 0.0005323282093741, - "learning_rate": 0.00019999993906983355, - "loss": 46.0, - "step": 2192 - }, - { - "epoch": 0.353154313780748, - "grad_norm": 0.00101379188708961, - "learning_rate": 0.00019999993901397275, - "loss": 46.0, - "step": 2193 - }, - { - "epoch": 0.3533153508595354, - "grad_norm": 0.0004553424078039825, - "learning_rate": 0.00019999993895808638, - "loss": 46.0, - "step": 2194 - }, - { - "epoch": 0.3534763879383228, - "grad_norm": 0.00032894639298319817, - "learning_rate": 0.0001999999389021744, - "loss": 46.0, - "step": 2195 - }, - { - "epoch": 0.3536374250171102, - "grad_norm": 0.0012497679563239217, - "learning_rate": 0.00019999993884623683, - "loss": 46.0, - "step": 2196 - }, - { - "epoch": 0.3537984620958976, - "grad_norm": 0.001451124087907374, - "learning_rate": 0.00019999993879027365, - "loss": 46.0, - "step": 2197 - }, - { - "epoch": 0.35395949917468494, - "grad_norm": 0.0010124671971425414, - "learning_rate": 0.00019999993873428488, - "loss": 46.0, - "step": 2198 - }, - { - "epoch": 0.35412053625347234, - "grad_norm": 0.000441828859038651, - "learning_rate": 0.00019999993867827055, - "loss": 46.0, - "step": 2199 - }, - { - "epoch": 0.35428157333225974, - "grad_norm": 0.00082436139928177, - "learning_rate": 0.00019999993862223055, - "loss": 46.0, - "step": 2200 - }, - { - "epoch": 0.35444261041104713, - "grad_norm": 0.0008977174293249846, - "learning_rate": 0.00019999993856616502, - "loss": 46.0, - "step": 2201 - }, - { - "epoch": 0.35460364748983453, - "grad_norm": 0.0006015023100189865, - "learning_rate": 0.00019999993851007388, - "loss": 46.0, - "step": 2202 - }, - { - "epoch": 0.3547646845686219, - "grad_norm": 0.0005927614984102547, - "learning_rate": 0.00019999993845395714, - "loss": 46.0, - "step": 2203 - }, - { - "epoch": 0.3549257216474093, - "grad_norm": 0.0011519651161506772, - "learning_rate": 0.00019999993839781482, - "loss": 46.0, - "step": 2204 - }, - { - "epoch": 0.3550867587261967, - "grad_norm": 0.0005731416749767959, - "learning_rate": 0.00019999993834164686, - "loss": 46.0, - "step": 2205 - }, - { - "epoch": 0.3552477958049841, - "grad_norm": 0.0007182052358984947, - "learning_rate": 0.00019999993828545337, - "loss": 46.0, - "step": 2206 - }, - { - "epoch": 0.3554088328837715, - "grad_norm": 0.0006719041266478598, - "learning_rate": 0.00019999993822923423, - "loss": 46.0, - "step": 2207 - }, - { - "epoch": 0.3555698699625589, - "grad_norm": 0.0005202871398068964, - "learning_rate": 0.0001999999381729895, - "loss": 46.0, - "step": 2208 - }, - { - "epoch": 0.35573090704134624, - "grad_norm": 0.0008943456341512501, - "learning_rate": 0.00019999993811671923, - "loss": 46.0, - "step": 2209 - }, - { - "epoch": 0.35589194412013364, - "grad_norm": 0.00039654961437918246, - "learning_rate": 0.00019999993806042333, - "loss": 46.0, - "step": 2210 - }, - { - "epoch": 0.35605298119892104, - "grad_norm": 0.0008803161908872426, - "learning_rate": 0.00019999993800410182, - "loss": 46.0, - "step": 2211 - }, - { - "epoch": 0.35621401827770843, - "grad_norm": 0.0012736518401652575, - "learning_rate": 0.00019999993794775472, - "loss": 46.0, - "step": 2212 - }, - { - "epoch": 0.35637505535649583, - "grad_norm": 0.0010360858868807554, - "learning_rate": 0.00019999993789138204, - "loss": 46.0, - "step": 2213 - }, - { - "epoch": 0.3565360924352832, - "grad_norm": 0.0007000893820077181, - "learning_rate": 0.00019999993783498377, - "loss": 46.0, - "step": 2214 - }, - { - "epoch": 0.3566971295140706, - "grad_norm": 0.00021741408272646368, - "learning_rate": 0.00019999993777855988, - "loss": 46.0, - "step": 2215 - }, - { - "epoch": 0.356858166592858, - "grad_norm": 0.00039426906732842326, - "learning_rate": 0.0001999999377221104, - "loss": 46.0, - "step": 2216 - }, - { - "epoch": 0.3570192036716454, - "grad_norm": 0.001243783626705408, - "learning_rate": 0.00019999993766563534, - "loss": 46.0, - "step": 2217 - }, - { - "epoch": 0.3571802407504328, - "grad_norm": 0.0012748971348628402, - "learning_rate": 0.0001999999376091347, - "loss": 46.0, - "step": 2218 - }, - { - "epoch": 0.3573412778292202, - "grad_norm": 0.0005534195224754512, - "learning_rate": 0.0001999999375526084, - "loss": 46.0, - "step": 2219 - }, - { - "epoch": 0.35750231490800755, - "grad_norm": 0.000428906234446913, - "learning_rate": 0.00019999993749605656, - "loss": 46.0, - "step": 2220 - }, - { - "epoch": 0.35766335198679494, - "grad_norm": 0.0006642548833042383, - "learning_rate": 0.00019999993743947912, - "loss": 46.0, - "step": 2221 - }, - { - "epoch": 0.35782438906558234, - "grad_norm": 0.0005308387917466462, - "learning_rate": 0.00019999993738287607, - "loss": 46.0, - "step": 2222 - }, - { - "epoch": 0.35798542614436973, - "grad_norm": 0.0007748939679004252, - "learning_rate": 0.00019999993732624746, - "loss": 46.0, - "step": 2223 - }, - { - "epoch": 0.35814646322315713, - "grad_norm": 0.000501910166349262, - "learning_rate": 0.00019999993726959323, - "loss": 46.0, - "step": 2224 - }, - { - "epoch": 0.3583075003019445, - "grad_norm": 0.0004448570543900132, - "learning_rate": 0.0001999999372129134, - "loss": 46.0, - "step": 2225 - }, - { - "epoch": 0.3584685373807319, - "grad_norm": 0.0002656817378010601, - "learning_rate": 0.00019999993715620796, - "loss": 46.0, - "step": 2226 - }, - { - "epoch": 0.3586295744595193, - "grad_norm": 0.0006572982529178262, - "learning_rate": 0.00019999993709947695, - "loss": 46.0, - "step": 2227 - }, - { - "epoch": 0.3587906115383067, - "grad_norm": 0.0004981006495654583, - "learning_rate": 0.00019999993704272035, - "loss": 46.0, - "step": 2228 - }, - { - "epoch": 0.3589516486170941, - "grad_norm": 0.0004337487043812871, - "learning_rate": 0.00019999993698593813, - "loss": 46.0, - "step": 2229 - }, - { - "epoch": 0.35911268569588145, - "grad_norm": 0.0003623554075602442, - "learning_rate": 0.00019999993692913033, - "loss": 46.0, - "step": 2230 - }, - { - "epoch": 0.35927372277466885, - "grad_norm": 0.00041682831943035126, - "learning_rate": 0.00019999993687229694, - "loss": 46.0, - "step": 2231 - }, - { - "epoch": 0.35943475985345624, - "grad_norm": 0.0004244146402925253, - "learning_rate": 0.00019999993681543796, - "loss": 46.0, - "step": 2232 - }, - { - "epoch": 0.35959579693224364, - "grad_norm": 0.000629731104709208, - "learning_rate": 0.00019999993675855337, - "loss": 46.0, - "step": 2233 - }, - { - "epoch": 0.35975683401103103, - "grad_norm": 0.0007296414696611464, - "learning_rate": 0.0001999999367016432, - "loss": 46.0, - "step": 2234 - }, - { - "epoch": 0.35991787108981843, - "grad_norm": 0.0006106806686148047, - "learning_rate": 0.00019999993664470743, - "loss": 46.0, - "step": 2235 - }, - { - "epoch": 0.3600789081686058, - "grad_norm": 0.0006707884604111314, - "learning_rate": 0.00019999993658774605, - "loss": 46.0, - "step": 2236 - }, - { - "epoch": 0.3602399452473932, - "grad_norm": 0.00045559322461485863, - "learning_rate": 0.00019999993653075908, - "loss": 46.0, - "step": 2237 - }, - { - "epoch": 0.3604009823261806, - "grad_norm": 0.0005608388455584645, - "learning_rate": 0.00019999993647374652, - "loss": 46.0, - "step": 2238 - }, - { - "epoch": 0.360562019404968, - "grad_norm": 0.00019335455726832151, - "learning_rate": 0.00019999993641670836, - "loss": 46.0, - "step": 2239 - }, - { - "epoch": 0.3607230564837554, - "grad_norm": 0.00037204186082817614, - "learning_rate": 0.00019999993635964463, - "loss": 46.0, - "step": 2240 - }, - { - "epoch": 0.36088409356254275, - "grad_norm": 0.00035147430025972426, - "learning_rate": 0.00019999993630255526, - "loss": 46.0, - "step": 2241 - }, - { - "epoch": 0.36104513064133015, - "grad_norm": 0.0006060279556550086, - "learning_rate": 0.00019999993624544033, - "loss": 46.0, - "step": 2242 - }, - { - "epoch": 0.36120616772011754, - "grad_norm": 0.0017185431206598878, - "learning_rate": 0.0001999999361882998, - "loss": 46.0, - "step": 2243 - }, - { - "epoch": 0.36136720479890494, - "grad_norm": 0.003380146576091647, - "learning_rate": 0.00019999993613113368, - "loss": 46.0, - "step": 2244 - }, - { - "epoch": 0.36152824187769234, - "grad_norm": 0.0014559919945895672, - "learning_rate": 0.00019999993607394196, - "loss": 46.0, - "step": 2245 - }, - { - "epoch": 0.36168927895647973, - "grad_norm": 0.0008151179645210505, - "learning_rate": 0.00019999993601672465, - "loss": 46.0, - "step": 2246 - }, - { - "epoch": 0.3618503160352671, - "grad_norm": 0.00041224207961931825, - "learning_rate": 0.00019999993595948173, - "loss": 46.0, - "step": 2247 - }, - { - "epoch": 0.3620113531140545, - "grad_norm": 0.0012288088910281658, - "learning_rate": 0.00019999993590221323, - "loss": 46.0, - "step": 2248 - }, - { - "epoch": 0.3621723901928419, - "grad_norm": 0.002109048655256629, - "learning_rate": 0.00019999993584491913, - "loss": 46.0, - "step": 2249 - }, - { - "epoch": 0.3623334272716293, - "grad_norm": 0.0003549403336364776, - "learning_rate": 0.00019999993578759942, - "loss": 46.0, - "step": 2250 - }, - { - "epoch": 0.36249446435041666, - "grad_norm": 0.0005241574835963547, - "learning_rate": 0.00019999993573025413, - "loss": 46.0, - "step": 2251 - }, - { - "epoch": 0.36265550142920405, - "grad_norm": 0.0009149570250883698, - "learning_rate": 0.00019999993567288324, - "loss": 46.0, - "step": 2252 - }, - { - "epoch": 0.36281653850799145, - "grad_norm": 0.0008694474818184972, - "learning_rate": 0.00019999993561548677, - "loss": 46.0, - "step": 2253 - }, - { - "epoch": 0.36297757558677884, - "grad_norm": 0.0006183795048855245, - "learning_rate": 0.0001999999355580647, - "loss": 46.0, - "step": 2254 - }, - { - "epoch": 0.36313861266556624, - "grad_norm": 0.0026527612935751677, - "learning_rate": 0.00019999993550061702, - "loss": 46.0, - "step": 2255 - }, - { - "epoch": 0.36329964974435364, - "grad_norm": 0.003594527952373028, - "learning_rate": 0.00019999993544314376, - "loss": 46.0, - "step": 2256 - }, - { - "epoch": 0.36346068682314103, - "grad_norm": 0.0005805824184790254, - "learning_rate": 0.0001999999353856449, - "loss": 46.0, - "step": 2257 - }, - { - "epoch": 0.36362172390192843, - "grad_norm": 0.0006647557020187378, - "learning_rate": 0.00019999993532812042, - "loss": 46.0, - "step": 2258 - }, - { - "epoch": 0.3637827609807158, - "grad_norm": 0.00038508547004312277, - "learning_rate": 0.00019999993527057038, - "loss": 46.0, - "step": 2259 - }, - { - "epoch": 0.3639437980595032, - "grad_norm": 0.0014680176973342896, - "learning_rate": 0.00019999993521299474, - "loss": 46.0, - "step": 2260 - }, - { - "epoch": 0.3641048351382906, - "grad_norm": 0.0010807851795107126, - "learning_rate": 0.0001999999351553935, - "loss": 46.0, - "step": 2261 - }, - { - "epoch": 0.36426587221707796, - "grad_norm": 0.00028498127358034253, - "learning_rate": 0.00019999993509776666, - "loss": 46.0, - "step": 2262 - }, - { - "epoch": 0.36442690929586535, - "grad_norm": 0.0010145478881895542, - "learning_rate": 0.00019999993504011423, - "loss": 46.0, - "step": 2263 - }, - { - "epoch": 0.36458794637465275, - "grad_norm": 0.000994094298221171, - "learning_rate": 0.00019999993498243622, - "loss": 46.0, - "step": 2264 - }, - { - "epoch": 0.36474898345344015, - "grad_norm": 0.0034477945882827044, - "learning_rate": 0.0001999999349247326, - "loss": 46.0, - "step": 2265 - }, - { - "epoch": 0.36491002053222754, - "grad_norm": 0.0006467928760685027, - "learning_rate": 0.00019999993486700336, - "loss": 46.0, - "step": 2266 - }, - { - "epoch": 0.36507105761101494, - "grad_norm": 0.00040097400778904557, - "learning_rate": 0.00019999993480924856, - "loss": 46.0, - "step": 2267 - }, - { - "epoch": 0.36523209468980233, - "grad_norm": 0.0007467146497219801, - "learning_rate": 0.00019999993475146814, - "loss": 46.0, - "step": 2268 - }, - { - "epoch": 0.36539313176858973, - "grad_norm": 0.0005757774924859405, - "learning_rate": 0.00019999993469366217, - "loss": 46.0, - "step": 2269 - }, - { - "epoch": 0.3655541688473771, - "grad_norm": 0.0007973664905875921, - "learning_rate": 0.00019999993463583056, - "loss": 46.0, - "step": 2270 - }, - { - "epoch": 0.3657152059261645, - "grad_norm": 0.0010491148568689823, - "learning_rate": 0.00019999993457797338, - "loss": 46.0, - "step": 2271 - }, - { - "epoch": 0.3658762430049519, - "grad_norm": 0.00041741313179954886, - "learning_rate": 0.0001999999345200906, - "loss": 46.0, - "step": 2272 - }, - { - "epoch": 0.36603728008373926, - "grad_norm": 0.0014166667824611068, - "learning_rate": 0.00019999993446218222, - "loss": 46.0, - "step": 2273 - }, - { - "epoch": 0.36619831716252665, - "grad_norm": 0.001200210303068161, - "learning_rate": 0.00019999993440424825, - "loss": 46.0, - "step": 2274 - }, - { - "epoch": 0.36635935424131405, - "grad_norm": 0.0004238256369717419, - "learning_rate": 0.00019999993434628867, - "loss": 46.0, - "step": 2275 - }, - { - "epoch": 0.36652039132010145, - "grad_norm": 0.0013004005886614323, - "learning_rate": 0.0001999999342883035, - "loss": 46.0, - "step": 2276 - }, - { - "epoch": 0.36668142839888884, - "grad_norm": 0.0008044597343541682, - "learning_rate": 0.00019999993423029276, - "loss": 46.0, - "step": 2277 - }, - { - "epoch": 0.36684246547767624, - "grad_norm": 0.0007422963972203434, - "learning_rate": 0.0001999999341722564, - "loss": 46.0, - "step": 2278 - }, - { - "epoch": 0.36700350255646363, - "grad_norm": 0.0003879069408867508, - "learning_rate": 0.00019999993411419444, - "loss": 46.0, - "step": 2279 - }, - { - "epoch": 0.36716453963525103, - "grad_norm": 0.0020238852594047785, - "learning_rate": 0.0001999999340561069, - "loss": 46.0, - "step": 2280 - }, - { - "epoch": 0.3673255767140384, - "grad_norm": 0.0018274880712851882, - "learning_rate": 0.00019999993399799377, - "loss": 46.0, - "step": 2281 - }, - { - "epoch": 0.3674866137928258, - "grad_norm": 0.0013155628694221377, - "learning_rate": 0.00019999993393985503, - "loss": 46.0, - "step": 2282 - }, - { - "epoch": 0.36764765087161316, - "grad_norm": 0.0008399191428907216, - "learning_rate": 0.00019999993388169072, - "loss": 46.0, - "step": 2283 - }, - { - "epoch": 0.36780868795040056, - "grad_norm": 0.0004666945606004447, - "learning_rate": 0.0001999999338235008, - "loss": 46.0, - "step": 2284 - }, - { - "epoch": 0.36796972502918796, - "grad_norm": 0.0008019223459996283, - "learning_rate": 0.00019999993376528528, - "loss": 46.0, - "step": 2285 - }, - { - "epoch": 0.36813076210797535, - "grad_norm": 0.000614597403910011, - "learning_rate": 0.00019999993370704418, - "loss": 46.0, - "step": 2286 - }, - { - "epoch": 0.36829179918676275, - "grad_norm": 0.0001731208903947845, - "learning_rate": 0.00019999993364877745, - "loss": 46.0, - "step": 2287 - }, - { - "epoch": 0.36845283626555014, - "grad_norm": 0.0010255471570417285, - "learning_rate": 0.00019999993359048513, - "loss": 46.0, - "step": 2288 - }, - { - "epoch": 0.36861387334433754, - "grad_norm": 0.00031396085978485644, - "learning_rate": 0.00019999993353216725, - "loss": 46.0, - "step": 2289 - }, - { - "epoch": 0.36877491042312494, - "grad_norm": 0.0005155057879164815, - "learning_rate": 0.00019999993347382376, - "loss": 46.0, - "step": 2290 - }, - { - "epoch": 0.36893594750191233, - "grad_norm": 0.00048207244253717363, - "learning_rate": 0.00019999993341545468, - "loss": 46.0, - "step": 2291 - }, - { - "epoch": 0.3690969845806997, - "grad_norm": 0.0020481268875300884, - "learning_rate": 0.00019999993335706, - "loss": 46.0, - "step": 2292 - }, - { - "epoch": 0.3692580216594871, - "grad_norm": 0.0010415689321234822, - "learning_rate": 0.0001999999332986397, - "loss": 46.0, - "step": 2293 - }, - { - "epoch": 0.36941905873827446, - "grad_norm": 0.00044290709774941206, - "learning_rate": 0.00019999993324019383, - "loss": 46.0, - "step": 2294 - }, - { - "epoch": 0.36958009581706186, - "grad_norm": 0.000494443520437926, - "learning_rate": 0.0001999999331817224, - "loss": 46.0, - "step": 2295 - }, - { - "epoch": 0.36974113289584926, - "grad_norm": 0.0002783818927127868, - "learning_rate": 0.0001999999331232253, - "loss": 46.0, - "step": 2296 - }, - { - "epoch": 0.36990216997463665, - "grad_norm": 0.00034031918039545417, - "learning_rate": 0.00019999993306470267, - "loss": 46.0, - "step": 2297 - }, - { - "epoch": 0.37006320705342405, - "grad_norm": 0.0005188029608689249, - "learning_rate": 0.00019999993300615443, - "loss": 46.0, - "step": 2298 - }, - { - "epoch": 0.37022424413221144, - "grad_norm": 0.0005571770598180592, - "learning_rate": 0.00019999993294758057, - "loss": 46.0, - "step": 2299 - }, - { - "epoch": 0.37038528121099884, - "grad_norm": 0.0008157939882948995, - "learning_rate": 0.00019999993288898115, - "loss": 46.0, - "step": 2300 - }, - { - "epoch": 0.37054631828978624, - "grad_norm": 0.0016090160934254527, - "learning_rate": 0.0001999999328303561, - "loss": 46.0, - "step": 2301 - }, - { - "epoch": 0.37070735536857363, - "grad_norm": 0.0010384847410023212, - "learning_rate": 0.0001999999327717055, - "loss": 46.0, - "step": 2302 - }, - { - "epoch": 0.37086839244736103, - "grad_norm": 0.000959428318310529, - "learning_rate": 0.00019999993271302926, - "loss": 46.0, - "step": 2303 - }, - { - "epoch": 0.37102942952614837, - "grad_norm": 0.0019294407684355974, - "learning_rate": 0.0001999999326543274, - "loss": 46.0, - "step": 2304 - }, - { - "epoch": 0.37119046660493576, - "grad_norm": 0.000621182844042778, - "learning_rate": 0.00019999993259560002, - "loss": 46.0, - "step": 2305 - }, - { - "epoch": 0.37135150368372316, - "grad_norm": 0.0004290766955818981, - "learning_rate": 0.00019999993253684703, - "loss": 46.0, - "step": 2306 - }, - { - "epoch": 0.37151254076251056, - "grad_norm": 0.001392365782521665, - "learning_rate": 0.00019999993247806841, - "loss": 46.0, - "step": 2307 - }, - { - "epoch": 0.37167357784129795, - "grad_norm": 0.00041785926441662014, - "learning_rate": 0.0001999999324192642, - "loss": 46.0, - "step": 2308 - }, - { - "epoch": 0.37183461492008535, - "grad_norm": 0.0008172749658115208, - "learning_rate": 0.0001999999323604344, - "loss": 46.0, - "step": 2309 - }, - { - "epoch": 0.37199565199887275, - "grad_norm": 0.0004754325782414526, - "learning_rate": 0.00019999993230157903, - "loss": 46.0, - "step": 2310 - }, - { - "epoch": 0.37215668907766014, - "grad_norm": 0.0016481417696923018, - "learning_rate": 0.00019999993224269804, - "loss": 46.0, - "step": 2311 - }, - { - "epoch": 0.37231772615644754, - "grad_norm": 0.0010458268225193024, - "learning_rate": 0.00019999993218379144, - "loss": 46.0, - "step": 2312 - }, - { - "epoch": 0.37247876323523493, - "grad_norm": 0.0037686415016651154, - "learning_rate": 0.0001999999321248593, - "loss": 46.0, - "step": 2313 - }, - { - "epoch": 0.37263980031402233, - "grad_norm": 0.0004300907894503325, - "learning_rate": 0.00019999993206590153, - "loss": 46.0, - "step": 2314 - }, - { - "epoch": 0.37280083739280967, - "grad_norm": 0.0015523259062319994, - "learning_rate": 0.00019999993200691817, - "loss": 46.0, - "step": 2315 - }, - { - "epoch": 0.37296187447159707, - "grad_norm": 0.0010225278092548251, - "learning_rate": 0.0001999999319479092, - "loss": 46.0, - "step": 2316 - }, - { - "epoch": 0.37312291155038446, - "grad_norm": 0.00028549617854878306, - "learning_rate": 0.00019999993188887465, - "loss": 46.0, - "step": 2317 - }, - { - "epoch": 0.37328394862917186, - "grad_norm": 0.00028623847174458206, - "learning_rate": 0.0001999999318298145, - "loss": 46.0, - "step": 2318 - }, - { - "epoch": 0.37344498570795925, - "grad_norm": 0.0005056145018897951, - "learning_rate": 0.00019999993177072876, - "loss": 46.0, - "step": 2319 - }, - { - "epoch": 0.37360602278674665, - "grad_norm": 0.0005586351617239416, - "learning_rate": 0.0001999999317116174, - "loss": 46.0, - "step": 2320 - }, - { - "epoch": 0.37376705986553405, - "grad_norm": 0.0007560873636975884, - "learning_rate": 0.0001999999316524805, - "loss": 46.0, - "step": 2321 - }, - { - "epoch": 0.37392809694432144, - "grad_norm": 0.0012680510990321636, - "learning_rate": 0.00019999993159331797, - "loss": 46.0, - "step": 2322 - }, - { - "epoch": 0.37408913402310884, - "grad_norm": 0.0006473853136412799, - "learning_rate": 0.00019999993153412983, - "loss": 46.0, - "step": 2323 - }, - { - "epoch": 0.37425017110189623, - "grad_norm": 0.0005554733797907829, - "learning_rate": 0.00019999993147491613, - "loss": 46.0, - "step": 2324 - }, - { - "epoch": 0.37441120818068363, - "grad_norm": 0.000544844544492662, - "learning_rate": 0.0001999999314156768, - "loss": 46.0, - "step": 2325 - }, - { - "epoch": 0.37457224525947097, - "grad_norm": 0.0005107149481773376, - "learning_rate": 0.0001999999313564119, - "loss": 46.0, - "step": 2326 - }, - { - "epoch": 0.37473328233825837, - "grad_norm": 0.0010599495144560933, - "learning_rate": 0.00019999993129712142, - "loss": 46.0, - "step": 2327 - }, - { - "epoch": 0.37489431941704576, - "grad_norm": 0.0009644674137234688, - "learning_rate": 0.00019999993123780531, - "loss": 46.0, - "step": 2328 - }, - { - "epoch": 0.37505535649583316, - "grad_norm": 0.0015917709097266197, - "learning_rate": 0.00019999993117846362, - "loss": 46.0, - "step": 2329 - }, - { - "epoch": 0.37521639357462055, - "grad_norm": 0.0010225436417385936, - "learning_rate": 0.00019999993111909634, - "loss": 46.0, - "step": 2330 - }, - { - "epoch": 0.37537743065340795, - "grad_norm": 0.001246629050001502, - "learning_rate": 0.00019999993105970348, - "loss": 46.0, - "step": 2331 - }, - { - "epoch": 0.37553846773219535, - "grad_norm": 0.0007145669078454375, - "learning_rate": 0.000199999931000285, - "loss": 46.0, - "step": 2332 - }, - { - "epoch": 0.37569950481098274, - "grad_norm": 0.0005153944948688149, - "learning_rate": 0.0001999999309408409, - "loss": 46.0, - "step": 2333 - }, - { - "epoch": 0.37586054188977014, - "grad_norm": 0.0007187469163909554, - "learning_rate": 0.00019999993088137125, - "loss": 46.0, - "step": 2334 - }, - { - "epoch": 0.37602157896855753, - "grad_norm": 0.0010105695109814405, - "learning_rate": 0.00019999993082187598, - "loss": 46.0, - "step": 2335 - }, - { - "epoch": 0.3761826160473449, - "grad_norm": 0.0009219853673130274, - "learning_rate": 0.00019999993076235515, - "loss": 46.0, - "step": 2336 - }, - { - "epoch": 0.37634365312613227, - "grad_norm": 0.0004764607292599976, - "learning_rate": 0.00019999993070280868, - "loss": 46.0, - "step": 2337 - }, - { - "epoch": 0.37650469020491967, - "grad_norm": 0.0025382298044860363, - "learning_rate": 0.00019999993064323665, - "loss": 46.0, - "step": 2338 - }, - { - "epoch": 0.37666572728370706, - "grad_norm": 0.0008136255200952291, - "learning_rate": 0.000199999930583639, - "loss": 46.0, - "step": 2339 - }, - { - "epoch": 0.37682676436249446, - "grad_norm": 0.0017399373464286327, - "learning_rate": 0.00019999993052401577, - "loss": 46.0, - "step": 2340 - }, - { - "epoch": 0.37698780144128186, - "grad_norm": 0.00034965010127052665, - "learning_rate": 0.00019999993046436693, - "loss": 46.0, - "step": 2341 - }, - { - "epoch": 0.37714883852006925, - "grad_norm": 0.002027729758992791, - "learning_rate": 0.00019999993040469255, - "loss": 46.0, - "step": 2342 - }, - { - "epoch": 0.37730987559885665, - "grad_norm": 0.0010564856929704547, - "learning_rate": 0.00019999993034499253, - "loss": 46.0, - "step": 2343 - }, - { - "epoch": 0.37747091267764404, - "grad_norm": 0.0006513954722322524, - "learning_rate": 0.0001999999302852669, - "loss": 46.0, - "step": 2344 - }, - { - "epoch": 0.37763194975643144, - "grad_norm": 0.0009405898745171726, - "learning_rate": 0.0001999999302255157, - "loss": 46.0, - "step": 2345 - }, - { - "epoch": 0.37779298683521884, - "grad_norm": 0.0003593849833123386, - "learning_rate": 0.0001999999301657389, - "loss": 46.0, - "step": 2346 - }, - { - "epoch": 0.3779540239140062, - "grad_norm": 0.0039969636127352715, - "learning_rate": 0.0001999999301059365, - "loss": 46.0, - "step": 2347 - }, - { - "epoch": 0.3781150609927936, - "grad_norm": 0.0006771524786017835, - "learning_rate": 0.00019999993004610848, - "loss": 46.0, - "step": 2348 - }, - { - "epoch": 0.37827609807158097, - "grad_norm": 0.0008385548717342317, - "learning_rate": 0.0001999999299862549, - "loss": 46.0, - "step": 2349 - }, - { - "epoch": 0.37843713515036836, - "grad_norm": 0.0017256089486181736, - "learning_rate": 0.00019999992992637572, - "loss": 46.0, - "step": 2350 - }, - { - "epoch": 0.37859817222915576, - "grad_norm": 0.0014234265545383096, - "learning_rate": 0.00019999992986647093, - "loss": 46.0, - "step": 2351 - }, - { - "epoch": 0.37875920930794316, - "grad_norm": 0.0008165540639311075, - "learning_rate": 0.00019999992980654057, - "loss": 46.0, - "step": 2352 - }, - { - "epoch": 0.37892024638673055, - "grad_norm": 0.00033828490995801985, - "learning_rate": 0.00019999992974658462, - "loss": 46.0, - "step": 2353 - }, - { - "epoch": 0.37908128346551795, - "grad_norm": 0.0009571529226377606, - "learning_rate": 0.00019999992968660303, - "loss": 46.0, - "step": 2354 - }, - { - "epoch": 0.37924232054430534, - "grad_norm": 0.0018818129319697618, - "learning_rate": 0.00019999992962659588, - "loss": 46.0, - "step": 2355 - }, - { - "epoch": 0.37940335762309274, - "grad_norm": 0.0006002320442348719, - "learning_rate": 0.00019999992956656315, - "loss": 46.0, - "step": 2356 - }, - { - "epoch": 0.3795643947018801, - "grad_norm": 0.0019165269332006574, - "learning_rate": 0.0001999999295065048, - "loss": 46.0, - "step": 2357 - }, - { - "epoch": 0.3797254317806675, - "grad_norm": 0.0013940882636234164, - "learning_rate": 0.00019999992944642086, - "loss": 46.0, - "step": 2358 - }, - { - "epoch": 0.3798864688594549, - "grad_norm": 0.0002520715643186122, - "learning_rate": 0.0001999999293863113, - "loss": 46.0, - "step": 2359 - }, - { - "epoch": 0.38004750593824227, - "grad_norm": 0.000614637159742415, - "learning_rate": 0.0001999999293261762, - "loss": 46.0, - "step": 2360 - }, - { - "epoch": 0.38020854301702967, - "grad_norm": 0.00031276774825528264, - "learning_rate": 0.00019999992926601548, - "loss": 46.0, - "step": 2361 - }, - { - "epoch": 0.38036958009581706, - "grad_norm": 0.0005117102991789579, - "learning_rate": 0.00019999992920582914, - "loss": 46.0, - "step": 2362 - }, - { - "epoch": 0.38053061717460446, - "grad_norm": 0.00078467873390764, - "learning_rate": 0.00019999992914561724, - "loss": 46.0, - "step": 2363 - }, - { - "epoch": 0.38069165425339185, - "grad_norm": 0.0007993248291313648, - "learning_rate": 0.0001999999290853797, - "loss": 46.0, - "step": 2364 - }, - { - "epoch": 0.38085269133217925, - "grad_norm": 0.0006927712238393724, - "learning_rate": 0.00019999992902511662, - "loss": 46.0, - "step": 2365 - }, - { - "epoch": 0.38101372841096665, - "grad_norm": 0.0004904004745185375, - "learning_rate": 0.0001999999289648279, - "loss": 46.0, - "step": 2366 - }, - { - "epoch": 0.38117476548975404, - "grad_norm": 0.0007624586578458548, - "learning_rate": 0.00019999992890451363, - "loss": 46.0, - "step": 2367 - }, - { - "epoch": 0.3813358025685414, - "grad_norm": 0.0006003980524837971, - "learning_rate": 0.00019999992884417372, - "loss": 46.0, - "step": 2368 - }, - { - "epoch": 0.3814968396473288, - "grad_norm": 0.0012511751847341657, - "learning_rate": 0.00019999992878380824, - "loss": 46.0, - "step": 2369 - }, - { - "epoch": 0.3816578767261162, - "grad_norm": 0.00030171748949214816, - "learning_rate": 0.00019999992872341715, - "loss": 46.0, - "step": 2370 - }, - { - "epoch": 0.38181891380490357, - "grad_norm": 0.000552336685359478, - "learning_rate": 0.0001999999286630005, - "loss": 46.0, - "step": 2371 - }, - { - "epoch": 0.38197995088369097, - "grad_norm": 0.0003679750661831349, - "learning_rate": 0.00019999992860255823, - "loss": 46.0, - "step": 2372 - }, - { - "epoch": 0.38214098796247836, - "grad_norm": 0.0015138769522309303, - "learning_rate": 0.00019999992854209035, - "loss": 46.0, - "step": 2373 - }, - { - "epoch": 0.38230202504126576, - "grad_norm": 0.002658956218510866, - "learning_rate": 0.0001999999284815969, - "loss": 46.0, - "step": 2374 - }, - { - "epoch": 0.38246306212005315, - "grad_norm": 0.0006245792610570788, - "learning_rate": 0.00019999992842107783, - "loss": 46.0, - "step": 2375 - }, - { - "epoch": 0.38262409919884055, - "grad_norm": 0.0011079673422500491, - "learning_rate": 0.00019999992836053322, - "loss": 46.0, - "step": 2376 - }, - { - "epoch": 0.38278513627762795, - "grad_norm": 0.0005209127557463944, - "learning_rate": 0.00019999992829996293, - "loss": 46.0, - "step": 2377 - }, - { - "epoch": 0.3829461733564153, - "grad_norm": 0.0004823681665584445, - "learning_rate": 0.00019999992823936712, - "loss": 46.0, - "step": 2378 - }, - { - "epoch": 0.3831072104352027, - "grad_norm": 0.00042470896732993424, - "learning_rate": 0.0001999999281787457, - "loss": 46.0, - "step": 2379 - }, - { - "epoch": 0.3832682475139901, - "grad_norm": 0.0006351933116093278, - "learning_rate": 0.00019999992811809865, - "loss": 46.0, - "step": 2380 - }, - { - "epoch": 0.3834292845927775, - "grad_norm": 0.00033180080936290324, - "learning_rate": 0.00019999992805742601, - "loss": 46.0, - "step": 2381 - }, - { - "epoch": 0.38359032167156487, - "grad_norm": 0.0039007551968097687, - "learning_rate": 0.00019999992799672782, - "loss": 46.0, - "step": 2382 - }, - { - "epoch": 0.38375135875035227, - "grad_norm": 0.001388771110214293, - "learning_rate": 0.000199999927936004, - "loss": 46.0, - "step": 2383 - }, - { - "epoch": 0.38391239582913966, - "grad_norm": 0.001418697414919734, - "learning_rate": 0.0001999999278752546, - "loss": 46.0, - "step": 2384 - }, - { - "epoch": 0.38407343290792706, - "grad_norm": 0.0015510203083977103, - "learning_rate": 0.00019999992781447962, - "loss": 46.0, - "step": 2385 - }, - { - "epoch": 0.38423446998671446, - "grad_norm": 0.00042005235445685685, - "learning_rate": 0.000199999927753679, - "loss": 46.0, - "step": 2386 - }, - { - "epoch": 0.38439550706550185, - "grad_norm": 0.0018052227096632123, - "learning_rate": 0.00019999992769285281, - "loss": 46.0, - "step": 2387 - }, - { - "epoch": 0.38455654414428925, - "grad_norm": 0.0005109263001941144, - "learning_rate": 0.00019999992763200102, - "loss": 46.0, - "step": 2388 - }, - { - "epoch": 0.3847175812230766, - "grad_norm": 0.0006831470527686179, - "learning_rate": 0.00019999992757112364, - "loss": 46.0, - "step": 2389 - }, - { - "epoch": 0.384878618301864, - "grad_norm": 0.0006294698105193675, - "learning_rate": 0.00019999992751022067, - "loss": 46.0, - "step": 2390 - }, - { - "epoch": 0.3850396553806514, - "grad_norm": 0.001542579149827361, - "learning_rate": 0.00019999992744929208, - "loss": 46.0, - "step": 2391 - }, - { - "epoch": 0.3852006924594388, - "grad_norm": 0.00169956567697227, - "learning_rate": 0.00019999992738833794, - "loss": 46.0, - "step": 2392 - }, - { - "epoch": 0.38536172953822617, - "grad_norm": 0.0014051354955881834, - "learning_rate": 0.00019999992732735815, - "loss": 46.0, - "step": 2393 - }, - { - "epoch": 0.38552276661701357, - "grad_norm": 0.0006234274478629231, - "learning_rate": 0.0001999999272663528, - "loss": 46.0, - "step": 2394 - }, - { - "epoch": 0.38568380369580096, - "grad_norm": 0.00031594731262885034, - "learning_rate": 0.00019999992720532185, - "loss": 46.0, - "step": 2395 - }, - { - "epoch": 0.38584484077458836, - "grad_norm": 0.0011688338126987219, - "learning_rate": 0.00019999992714426533, - "loss": 46.0, - "step": 2396 - }, - { - "epoch": 0.38600587785337576, - "grad_norm": 0.000736875575967133, - "learning_rate": 0.00019999992708318317, - "loss": 46.0, - "step": 2397 - }, - { - "epoch": 0.38616691493216315, - "grad_norm": 0.0012262999080121517, - "learning_rate": 0.00019999992702207544, - "loss": 46.0, - "step": 2398 - }, - { - "epoch": 0.38632795201095055, - "grad_norm": 0.0007673557847738266, - "learning_rate": 0.0001999999269609421, - "loss": 46.0, - "step": 2399 - }, - { - "epoch": 0.3864889890897379, - "grad_norm": 0.0011410334846004844, - "learning_rate": 0.00019999992689978318, - "loss": 46.0, - "step": 2400 - }, - { - "epoch": 0.3866500261685253, - "grad_norm": 0.0007803708431310952, - "learning_rate": 0.00019999992683859867, - "loss": 46.0, - "step": 2401 - }, - { - "epoch": 0.3868110632473127, - "grad_norm": 0.00037778247497044504, - "learning_rate": 0.00019999992677738855, - "loss": 46.0, - "step": 2402 - }, - { - "epoch": 0.3869721003261001, - "grad_norm": 0.0013648406602442265, - "learning_rate": 0.00019999992671615284, - "loss": 46.0, - "step": 2403 - }, - { - "epoch": 0.3871331374048875, - "grad_norm": 0.00034797709668055177, - "learning_rate": 0.00019999992665489154, - "loss": 46.0, - "step": 2404 - }, - { - "epoch": 0.38729417448367487, - "grad_norm": 0.000945565989241004, - "learning_rate": 0.00019999992659360465, - "loss": 46.0, - "step": 2405 - }, - { - "epoch": 0.38745521156246227, - "grad_norm": 0.00022659880050923675, - "learning_rate": 0.00019999992653229213, - "loss": 46.0, - "step": 2406 - }, - { - "epoch": 0.38761624864124966, - "grad_norm": 0.0005075627123005688, - "learning_rate": 0.00019999992647095404, - "loss": 46.0, - "step": 2407 - }, - { - "epoch": 0.38777728572003706, - "grad_norm": 0.0009664405952207744, - "learning_rate": 0.00019999992640959036, - "loss": 46.0, - "step": 2408 - }, - { - "epoch": 0.38793832279882445, - "grad_norm": 0.00041972025064751506, - "learning_rate": 0.00019999992634820108, - "loss": 46.0, - "step": 2409 - }, - { - "epoch": 0.3880993598776118, - "grad_norm": 0.0007176906801760197, - "learning_rate": 0.0001999999262867862, - "loss": 46.0, - "step": 2410 - }, - { - "epoch": 0.3882603969563992, - "grad_norm": 0.0008887439616955817, - "learning_rate": 0.00019999992622534576, - "loss": 46.0, - "step": 2411 - }, - { - "epoch": 0.3884214340351866, - "grad_norm": 0.0003332272171974182, - "learning_rate": 0.00019999992616387969, - "loss": 46.0, - "step": 2412 - }, - { - "epoch": 0.388582471113974, - "grad_norm": 0.0008063961286097765, - "learning_rate": 0.00019999992610238802, - "loss": 46.0, - "step": 2413 - }, - { - "epoch": 0.3887435081927614, - "grad_norm": 0.0011459635570645332, - "learning_rate": 0.0001999999260408708, - "loss": 46.0, - "step": 2414 - }, - { - "epoch": 0.3889045452715488, - "grad_norm": 0.0006852532969787717, - "learning_rate": 0.00019999992597932793, - "loss": 46.0, - "step": 2415 - }, - { - "epoch": 0.38906558235033617, - "grad_norm": 0.0004779025912284851, - "learning_rate": 0.00019999992591775948, - "loss": 46.0, - "step": 2416 - }, - { - "epoch": 0.38922661942912357, - "grad_norm": 0.0016908718971535563, - "learning_rate": 0.00019999992585616544, - "loss": 46.0, - "step": 2417 - }, - { - "epoch": 0.38938765650791096, - "grad_norm": 0.0002849120646715164, - "learning_rate": 0.00019999992579454584, - "loss": 46.0, - "step": 2418 - }, - { - "epoch": 0.38954869358669836, - "grad_norm": 0.0003883201570715755, - "learning_rate": 0.0001999999257329006, - "loss": 46.0, - "step": 2419 - }, - { - "epoch": 0.38970973066548575, - "grad_norm": 0.001718754880130291, - "learning_rate": 0.00019999992567122977, - "loss": 46.0, - "step": 2420 - }, - { - "epoch": 0.3898707677442731, - "grad_norm": 0.0005050842883065343, - "learning_rate": 0.00019999992560953335, - "loss": 46.0, - "step": 2421 - }, - { - "epoch": 0.3900318048230605, - "grad_norm": 0.0005970178754068911, - "learning_rate": 0.00019999992554781132, - "loss": 46.0, - "step": 2422 - }, - { - "epoch": 0.3901928419018479, - "grad_norm": 0.0003626322722993791, - "learning_rate": 0.00019999992548606376, - "loss": 46.0, - "step": 2423 - }, - { - "epoch": 0.3903538789806353, - "grad_norm": 0.0005492300260812044, - "learning_rate": 0.00019999992542429052, - "loss": 46.0, - "step": 2424 - }, - { - "epoch": 0.3905149160594227, - "grad_norm": 0.0004774110275320709, - "learning_rate": 0.00019999992536249176, - "loss": 46.0, - "step": 2425 - }, - { - "epoch": 0.3906759531382101, - "grad_norm": 0.00033885202719829977, - "learning_rate": 0.00019999992530066733, - "loss": 46.0, - "step": 2426 - }, - { - "epoch": 0.39083699021699747, - "grad_norm": 0.0003523860650602728, - "learning_rate": 0.00019999992523881736, - "loss": 46.0, - "step": 2427 - }, - { - "epoch": 0.39099802729578487, - "grad_norm": 0.0005243658670224249, - "learning_rate": 0.00019999992517694178, - "loss": 46.0, - "step": 2428 - }, - { - "epoch": 0.39115906437457226, - "grad_norm": 0.0021557840518653393, - "learning_rate": 0.0001999999251150406, - "loss": 46.0, - "step": 2429 - }, - { - "epoch": 0.39132010145335966, - "grad_norm": 0.0014437641948461533, - "learning_rate": 0.00019999992505311386, - "loss": 46.0, - "step": 2430 - }, - { - "epoch": 0.391481138532147, - "grad_norm": 0.0017680978635326028, - "learning_rate": 0.00019999992499116146, - "loss": 46.0, - "step": 2431 - }, - { - "epoch": 0.3916421756109344, - "grad_norm": 0.0006148244137875736, - "learning_rate": 0.0001999999249291835, - "loss": 46.0, - "step": 2432 - }, - { - "epoch": 0.3918032126897218, - "grad_norm": 0.0006314045749604702, - "learning_rate": 0.00019999992486717996, - "loss": 46.0, - "step": 2433 - }, - { - "epoch": 0.3919642497685092, - "grad_norm": 0.0009844782762229443, - "learning_rate": 0.00019999992480515083, - "loss": 46.0, - "step": 2434 - }, - { - "epoch": 0.3921252868472966, - "grad_norm": 0.001140897162258625, - "learning_rate": 0.00019999992474309606, - "loss": 46.0, - "step": 2435 - }, - { - "epoch": 0.392286323926084, - "grad_norm": 0.0005091571947559714, - "learning_rate": 0.00019999992468101572, - "loss": 46.0, - "step": 2436 - }, - { - "epoch": 0.3924473610048714, - "grad_norm": 0.00027712646988220513, - "learning_rate": 0.0001999999246189098, - "loss": 46.0, - "step": 2437 - }, - { - "epoch": 0.39260839808365877, - "grad_norm": 0.0008853934123180807, - "learning_rate": 0.00019999992455677824, - "loss": 46.0, - "step": 2438 - }, - { - "epoch": 0.39276943516244617, - "grad_norm": 0.0011501384433358908, - "learning_rate": 0.00019999992449462112, - "loss": 46.0, - "step": 2439 - }, - { - "epoch": 0.39293047224123356, - "grad_norm": 0.000260513334069401, - "learning_rate": 0.00019999992443243842, - "loss": 46.0, - "step": 2440 - }, - { - "epoch": 0.39309150932002096, - "grad_norm": 0.0013478387845680118, - "learning_rate": 0.0001999999243702301, - "loss": 46.0, - "step": 2441 - }, - { - "epoch": 0.3932525463988083, - "grad_norm": 0.0021682940423488617, - "learning_rate": 0.00019999992430799618, - "loss": 46.0, - "step": 2442 - }, - { - "epoch": 0.3934135834775957, - "grad_norm": 0.00033404288114979863, - "learning_rate": 0.00019999992424573671, - "loss": 46.0, - "step": 2443 - }, - { - "epoch": 0.3935746205563831, - "grad_norm": 0.0008781529031693935, - "learning_rate": 0.0001999999241834516, - "loss": 46.0, - "step": 2444 - }, - { - "epoch": 0.3937356576351705, - "grad_norm": 0.0006088555091992021, - "learning_rate": 0.0001999999241211409, - "loss": 46.0, - "step": 2445 - }, - { - "epoch": 0.3938966947139579, - "grad_norm": 0.00035477944766171277, - "learning_rate": 0.00019999992405880462, - "loss": 46.0, - "step": 2446 - }, - { - "epoch": 0.3940577317927453, - "grad_norm": 0.00033839233219623566, - "learning_rate": 0.00019999992399644275, - "loss": 46.0, - "step": 2447 - }, - { - "epoch": 0.3942187688715327, - "grad_norm": 0.0007759662694297731, - "learning_rate": 0.00019999992393405526, - "loss": 46.0, - "step": 2448 - }, - { - "epoch": 0.3943798059503201, - "grad_norm": 0.0020294003188610077, - "learning_rate": 0.00019999992387164216, - "loss": 46.0, - "step": 2449 - }, - { - "epoch": 0.39454084302910747, - "grad_norm": 0.00037049237289465964, - "learning_rate": 0.00019999992380920353, - "loss": 46.0, - "step": 2450 - }, - { - "epoch": 0.39470188010789486, - "grad_norm": 0.00031506139202974737, - "learning_rate": 0.00019999992374673922, - "loss": 46.0, - "step": 2451 - }, - { - "epoch": 0.39486291718668226, - "grad_norm": 0.0017361241625621915, - "learning_rate": 0.0001999999236842494, - "loss": 46.0, - "step": 2452 - }, - { - "epoch": 0.3950239542654696, - "grad_norm": 0.000610153132583946, - "learning_rate": 0.00019999992362173394, - "loss": 46.0, - "step": 2453 - }, - { - "epoch": 0.395184991344257, - "grad_norm": 0.0006328023737296462, - "learning_rate": 0.0001999999235591929, - "loss": 46.0, - "step": 2454 - }, - { - "epoch": 0.3953460284230444, - "grad_norm": 0.0005528049659915268, - "learning_rate": 0.00019999992349662625, - "loss": 46.0, - "step": 2455 - }, - { - "epoch": 0.3955070655018318, - "grad_norm": 0.0006624308880418539, - "learning_rate": 0.000199999923434034, - "loss": 46.0, - "step": 2456 - }, - { - "epoch": 0.3956681025806192, - "grad_norm": 0.0005569378845393658, - "learning_rate": 0.0001999999233714162, - "loss": 46.0, - "step": 2457 - }, - { - "epoch": 0.3958291396594066, - "grad_norm": 0.0006670751608908176, - "learning_rate": 0.00019999992330877277, - "loss": 46.0, - "step": 2458 - }, - { - "epoch": 0.395990176738194, - "grad_norm": 0.0006159030017443001, - "learning_rate": 0.00019999992324610375, - "loss": 46.0, - "step": 2459 - }, - { - "epoch": 0.3961512138169814, - "grad_norm": 0.0028524715453386307, - "learning_rate": 0.00019999992318340913, - "loss": 46.0, - "step": 2460 - }, - { - "epoch": 0.39631225089576877, - "grad_norm": 0.0006164934602566063, - "learning_rate": 0.0001999999231206889, - "loss": 46.0, - "step": 2461 - }, - { - "epoch": 0.39647328797455617, - "grad_norm": 0.0015587115194648504, - "learning_rate": 0.00019999992305794312, - "loss": 46.0, - "step": 2462 - }, - { - "epoch": 0.3966343250533435, - "grad_norm": 0.0005712852580472827, - "learning_rate": 0.00019999992299517171, - "loss": 46.0, - "step": 2463 - }, - { - "epoch": 0.3967953621321309, - "grad_norm": 0.0016132298624143004, - "learning_rate": 0.0001999999229323747, - "loss": 46.0, - "step": 2464 - }, - { - "epoch": 0.3969563992109183, - "grad_norm": 0.0005810867296531796, - "learning_rate": 0.00019999992286955212, - "loss": 46.0, - "step": 2465 - }, - { - "epoch": 0.3971174362897057, - "grad_norm": 0.00031309822225011885, - "learning_rate": 0.00019999992280670396, - "loss": 46.0, - "step": 2466 - }, - { - "epoch": 0.3972784733684931, - "grad_norm": 0.0009651644504629076, - "learning_rate": 0.00019999992274383015, - "loss": 46.0, - "step": 2467 - }, - { - "epoch": 0.3974395104472805, - "grad_norm": 0.0007141558453440666, - "learning_rate": 0.00019999992268093079, - "loss": 46.0, - "step": 2468 - }, - { - "epoch": 0.3976005475260679, - "grad_norm": 0.00030265108216553926, - "learning_rate": 0.00019999992261800583, - "loss": 46.0, - "step": 2469 - }, - { - "epoch": 0.3977615846048553, - "grad_norm": 0.0005087955505587161, - "learning_rate": 0.00019999992255505524, - "loss": 46.0, - "step": 2470 - }, - { - "epoch": 0.3979226216836427, - "grad_norm": 0.001312676933594048, - "learning_rate": 0.00019999992249207908, - "loss": 46.0, - "step": 2471 - }, - { - "epoch": 0.39808365876243007, - "grad_norm": 0.00132571323774755, - "learning_rate": 0.00019999992242907734, - "loss": 46.0, - "step": 2472 - }, - { - "epoch": 0.39824469584121747, - "grad_norm": 0.0004774648405145854, - "learning_rate": 0.00019999992236605, - "loss": 46.0, - "step": 2473 - }, - { - "epoch": 0.3984057329200048, - "grad_norm": 0.0006745936698280275, - "learning_rate": 0.00019999992230299704, - "loss": 46.0, - "step": 2474 - }, - { - "epoch": 0.3985667699987922, - "grad_norm": 0.002986040897667408, - "learning_rate": 0.00019999992223991851, - "loss": 46.0, - "step": 2475 - }, - { - "epoch": 0.3987278070775796, - "grad_norm": 0.0011012536706402898, - "learning_rate": 0.00019999992217681437, - "loss": 46.0, - "step": 2476 - }, - { - "epoch": 0.398888844156367, - "grad_norm": 0.0002778563357423991, - "learning_rate": 0.00019999992211368467, - "loss": 46.0, - "step": 2477 - }, - { - "epoch": 0.3990498812351544, - "grad_norm": 0.0005057700909674168, - "learning_rate": 0.00019999992205052932, - "loss": 46.0, - "step": 2478 - }, - { - "epoch": 0.3992109183139418, - "grad_norm": 0.0008647431968711317, - "learning_rate": 0.00019999992198734841, - "loss": 46.0, - "step": 2479 - }, - { - "epoch": 0.3993719553927292, - "grad_norm": 0.0019028813112527132, - "learning_rate": 0.00019999992192414187, - "loss": 46.0, - "step": 2480 - }, - { - "epoch": 0.3995329924715166, - "grad_norm": 0.0002257141750305891, - "learning_rate": 0.00019999992186090979, - "loss": 46.0, - "step": 2481 - }, - { - "epoch": 0.399694029550304, - "grad_norm": 0.0005610088701359928, - "learning_rate": 0.0001999999217976521, - "loss": 46.0, - "step": 2482 - }, - { - "epoch": 0.39985506662909137, - "grad_norm": 0.002196467248722911, - "learning_rate": 0.00019999992173436878, - "loss": 46.0, - "step": 2483 - }, - { - "epoch": 0.4000161037078787, - "grad_norm": 0.00036643995554186404, - "learning_rate": 0.0001999999216710599, - "loss": 46.0, - "step": 2484 - }, - { - "epoch": 0.4001771407866661, - "grad_norm": 0.00037171339499764144, - "learning_rate": 0.0001999999216077254, - "loss": 46.0, - "step": 2485 - }, - { - "epoch": 0.4003381778654535, - "grad_norm": 0.0003727784496732056, - "learning_rate": 0.0001999999215443653, - "loss": 46.0, - "step": 2486 - }, - { - "epoch": 0.4004992149442409, - "grad_norm": 0.0017312564887106419, - "learning_rate": 0.00019999992148097965, - "loss": 46.0, - "step": 2487 - }, - { - "epoch": 0.4006602520230283, - "grad_norm": 0.0003423290909267962, - "learning_rate": 0.00019999992141756838, - "loss": 46.0, - "step": 2488 - }, - { - "epoch": 0.4008212891018157, - "grad_norm": 0.0002671041584108025, - "learning_rate": 0.00019999992135413152, - "loss": 46.0, - "step": 2489 - }, - { - "epoch": 0.4009823261806031, - "grad_norm": 0.000793307728599757, - "learning_rate": 0.00019999992129066905, - "loss": 46.0, - "step": 2490 - }, - { - "epoch": 0.4011433632593905, - "grad_norm": 0.0008093641954474151, - "learning_rate": 0.00019999992122718099, - "loss": 46.0, - "step": 2491 - }, - { - "epoch": 0.4013044003381779, - "grad_norm": 0.0028547835536301136, - "learning_rate": 0.00019999992116366734, - "loss": 46.0, - "step": 2492 - }, - { - "epoch": 0.4014654374169653, - "grad_norm": 0.00037326946039684117, - "learning_rate": 0.00019999992110012808, - "loss": 46.0, - "step": 2493 - }, - { - "epoch": 0.40162647449575267, - "grad_norm": 0.0007795938872732222, - "learning_rate": 0.00019999992103656325, - "loss": 46.0, - "step": 2494 - }, - { - "epoch": 0.40178751157454, - "grad_norm": 0.0005893181078135967, - "learning_rate": 0.00019999992097297282, - "loss": 46.0, - "step": 2495 - }, - { - "epoch": 0.4019485486533274, - "grad_norm": 0.0003579497570171952, - "learning_rate": 0.00019999992090935677, - "loss": 46.0, - "step": 2496 - }, - { - "epoch": 0.4021095857321148, - "grad_norm": 0.000713040994014591, - "learning_rate": 0.00019999992084571516, - "loss": 46.0, - "step": 2497 - }, - { - "epoch": 0.4022706228109022, - "grad_norm": 0.0009842911968007684, - "learning_rate": 0.00019999992078204793, - "loss": 46.0, - "step": 2498 - }, - { - "epoch": 0.4024316598896896, - "grad_norm": 0.0007644824800081551, - "learning_rate": 0.00019999992071835512, - "loss": 46.0, - "step": 2499 - }, - { - "epoch": 0.402592696968477, - "grad_norm": 0.001677367021329701, - "learning_rate": 0.0001999999206546367, - "loss": 46.0, - "step": 2500 - }, - { - "epoch": 0.4027537340472644, - "grad_norm": 0.0011205823393538594, - "learning_rate": 0.0001999999205908927, - "loss": 46.0, - "step": 2501 - }, - { - "epoch": 0.4029147711260518, - "grad_norm": 0.0016218560049310327, - "learning_rate": 0.0001999999205271231, - "loss": 46.0, - "step": 2502 - }, - { - "epoch": 0.4030758082048392, - "grad_norm": 0.00045920678530819714, - "learning_rate": 0.00019999992046332792, - "loss": 46.0, - "step": 2503 - }, - { - "epoch": 0.4032368452836266, - "grad_norm": 0.0013049228582531214, - "learning_rate": 0.00019999992039950715, - "loss": 46.0, - "step": 2504 - }, - { - "epoch": 0.403397882362414, - "grad_norm": 0.0003285286366008222, - "learning_rate": 0.00019999992033566076, - "loss": 46.0, - "step": 2505 - }, - { - "epoch": 0.4035589194412013, - "grad_norm": 0.0005516027449630201, - "learning_rate": 0.00019999992027178878, - "loss": 46.0, - "step": 2506 - }, - { - "epoch": 0.4037199565199887, - "grad_norm": 0.004015455488115549, - "learning_rate": 0.0001999999202078912, - "loss": 46.0, - "step": 2507 - }, - { - "epoch": 0.4038809935987761, - "grad_norm": 0.0006361909327097237, - "learning_rate": 0.00019999992014396804, - "loss": 46.0, - "step": 2508 - }, - { - "epoch": 0.4040420306775635, - "grad_norm": 0.000788997276686132, - "learning_rate": 0.00019999992008001927, - "loss": 46.0, - "step": 2509 - }, - { - "epoch": 0.4042030677563509, - "grad_norm": 0.002198253758251667, - "learning_rate": 0.0001999999200160449, - "loss": 46.0, - "step": 2510 - }, - { - "epoch": 0.4043641048351383, - "grad_norm": 0.0007323686149902642, - "learning_rate": 0.00019999991995204496, - "loss": 46.0, - "step": 2511 - }, - { - "epoch": 0.4045251419139257, - "grad_norm": 0.0005261424812488258, - "learning_rate": 0.0001999999198880194, - "loss": 46.0, - "step": 2512 - }, - { - "epoch": 0.4046861789927131, - "grad_norm": 0.0007068556733429432, - "learning_rate": 0.00019999991982396826, - "loss": 46.0, - "step": 2513 - }, - { - "epoch": 0.4048472160715005, - "grad_norm": 0.0014703463530167937, - "learning_rate": 0.00019999991975989153, - "loss": 46.0, - "step": 2514 - }, - { - "epoch": 0.4050082531502879, - "grad_norm": 0.0029368470422923565, - "learning_rate": 0.0001999999196957892, - "loss": 46.0, - "step": 2515 - }, - { - "epoch": 0.4051692902290752, - "grad_norm": 0.0005351447034627199, - "learning_rate": 0.00019999991963166126, - "loss": 46.0, - "step": 2516 - }, - { - "epoch": 0.4053303273078626, - "grad_norm": 0.0005643073818646371, - "learning_rate": 0.00019999991956750775, - "loss": 46.0, - "step": 2517 - }, - { - "epoch": 0.40549136438665, - "grad_norm": 0.0005366371478885412, - "learning_rate": 0.00019999991950332864, - "loss": 46.0, - "step": 2518 - }, - { - "epoch": 0.4056524014654374, - "grad_norm": 0.0009746613213792443, - "learning_rate": 0.0001999999194391239, - "loss": 46.0, - "step": 2519 - }, - { - "epoch": 0.4058134385442248, - "grad_norm": 0.0007656855159439147, - "learning_rate": 0.0001999999193748936, - "loss": 46.0, - "step": 2520 - }, - { - "epoch": 0.4059744756230122, - "grad_norm": 0.0004048139089718461, - "learning_rate": 0.0001999999193106377, - "loss": 46.0, - "step": 2521 - }, - { - "epoch": 0.4061355127017996, - "grad_norm": 0.002632913878187537, - "learning_rate": 0.00019999991924635622, - "loss": 46.0, - "step": 2522 - }, - { - "epoch": 0.406296549780587, - "grad_norm": 0.0005818718345835805, - "learning_rate": 0.00019999991918204913, - "loss": 46.0, - "step": 2523 - }, - { - "epoch": 0.4064575868593744, - "grad_norm": 0.0002545182651374489, - "learning_rate": 0.00019999991911771642, - "loss": 46.0, - "step": 2524 - }, - { - "epoch": 0.4066186239381618, - "grad_norm": 0.0008372089941985905, - "learning_rate": 0.00019999991905335815, - "loss": 46.0, - "step": 2525 - }, - { - "epoch": 0.4067796610169492, - "grad_norm": 0.0005814693286083639, - "learning_rate": 0.00019999991898897427, - "loss": 46.0, - "step": 2526 - }, - { - "epoch": 0.4069406980957365, - "grad_norm": 0.0012209018459543586, - "learning_rate": 0.0001999999189245648, - "loss": 46.0, - "step": 2527 - }, - { - "epoch": 0.4071017351745239, - "grad_norm": 0.0006315638311207294, - "learning_rate": 0.00019999991886012974, - "loss": 46.0, - "step": 2528 - }, - { - "epoch": 0.4072627722533113, - "grad_norm": 0.00044594131759367883, - "learning_rate": 0.00019999991879566907, - "loss": 46.0, - "step": 2529 - }, - { - "epoch": 0.4074238093320987, - "grad_norm": 0.00035840366035699844, - "learning_rate": 0.0001999999187311828, - "loss": 46.0, - "step": 2530 - }, - { - "epoch": 0.4075848464108861, - "grad_norm": 0.0013362610479816794, - "learning_rate": 0.00019999991866667097, - "loss": 46.0, - "step": 2531 - }, - { - "epoch": 0.4077458834896735, - "grad_norm": 0.00030423692078329623, - "learning_rate": 0.0001999999186021335, - "loss": 46.0, - "step": 2532 - }, - { - "epoch": 0.4079069205684609, - "grad_norm": 0.00022467058442998677, - "learning_rate": 0.0001999999185375705, - "loss": 46.0, - "step": 2533 - }, - { - "epoch": 0.4080679576472483, - "grad_norm": 0.0010856473818421364, - "learning_rate": 0.00019999991847298185, - "loss": 46.0, - "step": 2534 - }, - { - "epoch": 0.4082289947260357, - "grad_norm": 0.0006642889929935336, - "learning_rate": 0.00019999991840836763, - "loss": 46.0, - "step": 2535 - }, - { - "epoch": 0.4083900318048231, - "grad_norm": 0.0009202100918628275, - "learning_rate": 0.0001999999183437278, - "loss": 46.0, - "step": 2536 - }, - { - "epoch": 0.4085510688836104, - "grad_norm": 0.001075320178642869, - "learning_rate": 0.0001999999182790624, - "loss": 46.0, - "step": 2537 - }, - { - "epoch": 0.4087121059623978, - "grad_norm": 0.0011671797838062048, - "learning_rate": 0.0001999999182143714, - "loss": 46.0, - "step": 2538 - }, - { - "epoch": 0.4088731430411852, - "grad_norm": 0.00038806599332019687, - "learning_rate": 0.00019999991814965474, - "loss": 46.0, - "step": 2539 - }, - { - "epoch": 0.4090341801199726, - "grad_norm": 0.00021556422871071845, - "learning_rate": 0.00019999991808491256, - "loss": 46.0, - "step": 2540 - }, - { - "epoch": 0.40919521719876, - "grad_norm": 0.0013159997761249542, - "learning_rate": 0.00019999991802014476, - "loss": 46.0, - "step": 2541 - }, - { - "epoch": 0.4093562542775474, - "grad_norm": 0.0002375345939071849, - "learning_rate": 0.00019999991795535137, - "loss": 46.0, - "step": 2542 - }, - { - "epoch": 0.4095172913563348, - "grad_norm": 0.0005722291534766555, - "learning_rate": 0.00019999991789053237, - "loss": 46.0, - "step": 2543 - }, - { - "epoch": 0.4096783284351222, - "grad_norm": 0.0011116081150248647, - "learning_rate": 0.00019999991782568781, - "loss": 46.0, - "step": 2544 - }, - { - "epoch": 0.4098393655139096, - "grad_norm": 0.0003885219630319625, - "learning_rate": 0.0001999999177608176, - "loss": 46.0, - "step": 2545 - }, - { - "epoch": 0.410000402592697, - "grad_norm": 0.0011035461211577058, - "learning_rate": 0.00019999991769592182, - "loss": 46.0, - "step": 2546 - }, - { - "epoch": 0.4101614396714844, - "grad_norm": 0.0005727497627958655, - "learning_rate": 0.00019999991763100048, - "loss": 46.0, - "step": 2547 - }, - { - "epoch": 0.4103224767502717, - "grad_norm": 0.0010675222147256136, - "learning_rate": 0.0001999999175660535, - "loss": 46.0, - "step": 2548 - }, - { - "epoch": 0.4104835138290591, - "grad_norm": 0.000688559259288013, - "learning_rate": 0.00019999991750108096, - "loss": 46.0, - "step": 2549 - }, - { - "epoch": 0.4106445509078465, - "grad_norm": 0.0010269100312143564, - "learning_rate": 0.0001999999174360828, - "loss": 46.0, - "step": 2550 - }, - { - "epoch": 0.4108055879866339, - "grad_norm": 0.0005960292182862759, - "learning_rate": 0.00019999991737105908, - "loss": 46.0, - "step": 2551 - }, - { - "epoch": 0.4109666250654213, - "grad_norm": 0.0004713739617727697, - "learning_rate": 0.00019999991730600974, - "loss": 46.0, - "step": 2552 - }, - { - "epoch": 0.4111276621442087, - "grad_norm": 0.0007000573677942157, - "learning_rate": 0.00019999991724093478, - "loss": 46.0, - "step": 2553 - }, - { - "epoch": 0.4112886992229961, - "grad_norm": 0.0014249715022742748, - "learning_rate": 0.00019999991717583427, - "loss": 46.0, - "step": 2554 - }, - { - "epoch": 0.4114497363017835, - "grad_norm": 0.0004502889350987971, - "learning_rate": 0.00019999991711070814, - "loss": 46.0, - "step": 2555 - }, - { - "epoch": 0.4116107733805709, - "grad_norm": 0.0005693979328498244, - "learning_rate": 0.0001999999170455564, - "loss": 46.0, - "step": 2556 - }, - { - "epoch": 0.4117718104593583, - "grad_norm": 0.002164100529626012, - "learning_rate": 0.0001999999169803791, - "loss": 46.0, - "step": 2557 - }, - { - "epoch": 0.4119328475381457, - "grad_norm": 0.000281511020148173, - "learning_rate": 0.00019999991691517619, - "loss": 46.0, - "step": 2558 - }, - { - "epoch": 0.412093884616933, - "grad_norm": 0.0004598365630954504, - "learning_rate": 0.00019999991684994766, - "loss": 46.0, - "step": 2559 - }, - { - "epoch": 0.4122549216957204, - "grad_norm": 0.0005771976429969072, - "learning_rate": 0.00019999991678469356, - "loss": 46.0, - "step": 2560 - }, - { - "epoch": 0.4124159587745078, - "grad_norm": 0.0004161202523391694, - "learning_rate": 0.00019999991671941386, - "loss": 46.0, - "step": 2561 - }, - { - "epoch": 0.4125769958532952, - "grad_norm": 0.001491861417889595, - "learning_rate": 0.00019999991665410857, - "loss": 46.0, - "step": 2562 - }, - { - "epoch": 0.4127380329320826, - "grad_norm": 0.0007350731175392866, - "learning_rate": 0.0001999999165887777, - "loss": 46.0, - "step": 2563 - }, - { - "epoch": 0.41289907001087, - "grad_norm": 0.0006727223517373204, - "learning_rate": 0.0001999999165234212, - "loss": 46.0, - "step": 2564 - }, - { - "epoch": 0.4130601070896574, - "grad_norm": 0.000969410699326545, - "learning_rate": 0.00019999991645803914, - "loss": 46.0, - "step": 2565 - }, - { - "epoch": 0.4132211441684448, - "grad_norm": 0.0005818248027935624, - "learning_rate": 0.00019999991639263148, - "loss": 46.0, - "step": 2566 - }, - { - "epoch": 0.4133821812472322, - "grad_norm": 0.0004061332147102803, - "learning_rate": 0.00019999991632719822, - "loss": 46.0, - "step": 2567 - }, - { - "epoch": 0.4135432183260196, - "grad_norm": 0.0004701008729171008, - "learning_rate": 0.00019999991626173935, - "loss": 46.0, - "step": 2568 - }, - { - "epoch": 0.41370425540480693, - "grad_norm": 0.001591149833984673, - "learning_rate": 0.0001999999161962549, - "loss": 46.0, - "step": 2569 - }, - { - "epoch": 0.4138652924835943, - "grad_norm": 0.0022589925210922956, - "learning_rate": 0.00019999991613074485, - "loss": 46.0, - "step": 2570 - }, - { - "epoch": 0.4140263295623817, - "grad_norm": 0.0006316992803476751, - "learning_rate": 0.0001999999160652092, - "loss": 46.0, - "step": 2571 - }, - { - "epoch": 0.4141873666411691, - "grad_norm": 0.0003919966402463615, - "learning_rate": 0.00019999991599964795, - "loss": 46.0, - "step": 2572 - }, - { - "epoch": 0.4143484037199565, - "grad_norm": 0.000711875210981816, - "learning_rate": 0.00019999991593406112, - "loss": 46.0, - "step": 2573 - }, - { - "epoch": 0.4145094407987439, - "grad_norm": 0.0007769575458951294, - "learning_rate": 0.0001999999158684487, - "loss": 46.0, - "step": 2574 - }, - { - "epoch": 0.4146704778775313, - "grad_norm": 0.000219284716877155, - "learning_rate": 0.00019999991580281067, - "loss": 46.0, - "step": 2575 - }, - { - "epoch": 0.4148315149563187, - "grad_norm": 0.00028979365015402436, - "learning_rate": 0.00019999991573714705, - "loss": 46.0, - "step": 2576 - }, - { - "epoch": 0.4149925520351061, - "grad_norm": 0.0017525262665003538, - "learning_rate": 0.00019999991567145787, - "loss": 46.0, - "step": 2577 - }, - { - "epoch": 0.4151535891138935, - "grad_norm": 0.0005864475388079882, - "learning_rate": 0.00019999991560574305, - "loss": 46.0, - "step": 2578 - }, - { - "epoch": 0.4153146261926809, - "grad_norm": 0.0004776372807100415, - "learning_rate": 0.00019999991554000264, - "loss": 46.0, - "step": 2579 - }, - { - "epoch": 0.41547566327146823, - "grad_norm": 0.0011292042909190059, - "learning_rate": 0.00019999991547423664, - "loss": 46.0, - "step": 2580 - }, - { - "epoch": 0.41563670035025563, - "grad_norm": 0.0006078517180867493, - "learning_rate": 0.00019999991540844506, - "loss": 46.0, - "step": 2581 - }, - { - "epoch": 0.415797737429043, - "grad_norm": 0.0007042553625069559, - "learning_rate": 0.0001999999153426279, - "loss": 46.0, - "step": 2582 - }, - { - "epoch": 0.4159587745078304, - "grad_norm": 0.0004453930596355349, - "learning_rate": 0.00019999991527678508, - "loss": 46.0, - "step": 2583 - }, - { - "epoch": 0.4161198115866178, - "grad_norm": 0.0010095108300447464, - "learning_rate": 0.0001999999152109167, - "loss": 46.0, - "step": 2584 - }, - { - "epoch": 0.4162808486654052, - "grad_norm": 0.00038195581873878837, - "learning_rate": 0.00019999991514502274, - "loss": 46.0, - "step": 2585 - }, - { - "epoch": 0.4164418857441926, - "grad_norm": 0.0011428914731368423, - "learning_rate": 0.00019999991507910317, - "loss": 46.0, - "step": 2586 - }, - { - "epoch": 0.41660292282298, - "grad_norm": 0.001429154654033482, - "learning_rate": 0.000199999915013158, - "loss": 46.0, - "step": 2587 - }, - { - "epoch": 0.4167639599017674, - "grad_norm": 0.0011013075709342957, - "learning_rate": 0.00019999991494718726, - "loss": 46.0, - "step": 2588 - }, - { - "epoch": 0.4169249969805548, - "grad_norm": 0.0007707034819759429, - "learning_rate": 0.0001999999148811909, - "loss": 46.0, - "step": 2589 - }, - { - "epoch": 0.41708603405934214, - "grad_norm": 0.0004365165368653834, - "learning_rate": 0.00019999991481516895, - "loss": 46.0, - "step": 2590 - }, - { - "epoch": 0.41724707113812953, - "grad_norm": 0.002883465960621834, - "learning_rate": 0.00019999991474912141, - "loss": 46.0, - "step": 2591 - }, - { - "epoch": 0.41740810821691693, - "grad_norm": 0.000611049123108387, - "learning_rate": 0.0001999999146830483, - "loss": 46.0, - "step": 2592 - }, - { - "epoch": 0.4175691452957043, - "grad_norm": 0.0003664041869342327, - "learning_rate": 0.00019999991461694955, - "loss": 46.0, - "step": 2593 - }, - { - "epoch": 0.4177301823744917, - "grad_norm": 0.0004098225326742977, - "learning_rate": 0.0001999999145508252, - "loss": 46.0, - "step": 2594 - }, - { - "epoch": 0.4178912194532791, - "grad_norm": 0.0005654104752466083, - "learning_rate": 0.0001999999144846753, - "loss": 46.0, - "step": 2595 - }, - { - "epoch": 0.4180522565320665, - "grad_norm": 0.0005321140633895993, - "learning_rate": 0.00019999991441849976, - "loss": 46.0, - "step": 2596 - }, - { - "epoch": 0.4182132936108539, - "grad_norm": 0.0006334095960482955, - "learning_rate": 0.00019999991435229868, - "loss": 46.0, - "step": 2597 - }, - { - "epoch": 0.4183743306896413, - "grad_norm": 0.0011608528438955545, - "learning_rate": 0.00019999991428607198, - "loss": 46.0, - "step": 2598 - }, - { - "epoch": 0.4185353677684287, - "grad_norm": 0.0007042776560410857, - "learning_rate": 0.00019999991421981966, - "loss": 46.0, - "step": 2599 - }, - { - "epoch": 0.4186964048472161, - "grad_norm": 0.0010931257857009768, - "learning_rate": 0.00019999991415354176, - "loss": 46.0, - "step": 2600 - }, - { - "epoch": 0.41885744192600344, - "grad_norm": 0.0002743923105299473, - "learning_rate": 0.0001999999140872383, - "loss": 46.0, - "step": 2601 - }, - { - "epoch": 0.41901847900479083, - "grad_norm": 0.0011098834220319986, - "learning_rate": 0.0001999999140209092, - "loss": 46.0, - "step": 2602 - }, - { - "epoch": 0.41917951608357823, - "grad_norm": 0.0004687729524448514, - "learning_rate": 0.00019999991395455453, - "loss": 46.0, - "step": 2603 - }, - { - "epoch": 0.4193405531623656, - "grad_norm": 0.000279287516605109, - "learning_rate": 0.00019999991388817425, - "loss": 46.0, - "step": 2604 - }, - { - "epoch": 0.419501590241153, - "grad_norm": 0.0020967002492398024, - "learning_rate": 0.00019999991382176839, - "loss": 46.0, - "step": 2605 - }, - { - "epoch": 0.4196626273199404, - "grad_norm": 0.0010082245571538806, - "learning_rate": 0.0001999999137553369, - "loss": 46.0, - "step": 2606 - }, - { - "epoch": 0.4198236643987278, - "grad_norm": 0.0008503978606313467, - "learning_rate": 0.00019999991368887984, - "loss": 46.0, - "step": 2607 - }, - { - "epoch": 0.4199847014775152, - "grad_norm": 0.0002749088453128934, - "learning_rate": 0.0001999999136223972, - "loss": 46.0, - "step": 2608 - }, - { - "epoch": 0.4201457385563026, - "grad_norm": 0.000738668255507946, - "learning_rate": 0.00019999991355588894, - "loss": 46.0, - "step": 2609 - }, - { - "epoch": 0.42030677563509, - "grad_norm": 0.00029768518288619816, - "learning_rate": 0.0001999999134893551, - "loss": 46.0, - "step": 2610 - }, - { - "epoch": 0.4204678127138774, - "grad_norm": 0.0007780184969305992, - "learning_rate": 0.00019999991342279567, - "loss": 46.0, - "step": 2611 - }, - { - "epoch": 0.42062884979266474, - "grad_norm": 0.0016570388106629252, - "learning_rate": 0.00019999991335621064, - "loss": 46.0, - "step": 2612 - }, - { - "epoch": 0.42078988687145213, - "grad_norm": 0.0006330210599116981, - "learning_rate": 0.00019999991328960003, - "loss": 46.0, - "step": 2613 - }, - { - "epoch": 0.42095092395023953, - "grad_norm": 0.0019042609492316842, - "learning_rate": 0.0001999999132229638, - "loss": 46.0, - "step": 2614 - }, - { - "epoch": 0.4211119610290269, - "grad_norm": 0.0017024396220222116, - "learning_rate": 0.00019999991315630198, - "loss": 46.0, - "step": 2615 - }, - { - "epoch": 0.4212729981078143, - "grad_norm": 0.0008206578204408288, - "learning_rate": 0.00019999991308961457, - "loss": 46.0, - "step": 2616 - }, - { - "epoch": 0.4214340351866017, - "grad_norm": 0.00285553396679461, - "learning_rate": 0.00019999991302290155, - "loss": 46.0, - "step": 2617 - }, - { - "epoch": 0.4215950722653891, - "grad_norm": 0.0012596884043887258, - "learning_rate": 0.00019999991295616295, - "loss": 46.0, - "step": 2618 - }, - { - "epoch": 0.4217561093441765, - "grad_norm": 0.0003684552211780101, - "learning_rate": 0.00019999991288939875, - "loss": 46.0, - "step": 2619 - }, - { - "epoch": 0.4219171464229639, - "grad_norm": 0.0010181218385696411, - "learning_rate": 0.00019999991282260897, - "loss": 46.0, - "step": 2620 - }, - { - "epoch": 0.4220781835017513, - "grad_norm": 0.0006476754206232727, - "learning_rate": 0.00019999991275579358, - "loss": 46.0, - "step": 2621 - }, - { - "epoch": 0.42223922058053864, - "grad_norm": 0.0003088137018494308, - "learning_rate": 0.0001999999126889526, - "loss": 46.0, - "step": 2622 - }, - { - "epoch": 0.42240025765932604, - "grad_norm": 0.0006333135534077883, - "learning_rate": 0.00019999991262208605, - "loss": 46.0, - "step": 2623 - }, - { - "epoch": 0.42256129473811344, - "grad_norm": 0.0005531442002393305, - "learning_rate": 0.00019999991255519387, - "loss": 46.0, - "step": 2624 - }, - { - "epoch": 0.42272233181690083, - "grad_norm": 0.0017164757009595633, - "learning_rate": 0.0001999999124882761, - "loss": 46.0, - "step": 2625 - }, - { - "epoch": 0.4228833688956882, - "grad_norm": 0.000631219707429409, - "learning_rate": 0.0001999999124213327, - "loss": 46.0, - "step": 2626 - }, - { - "epoch": 0.4230444059744756, - "grad_norm": 0.0006477307761088014, - "learning_rate": 0.00019999991235436376, - "loss": 46.0, - "step": 2627 - }, - { - "epoch": 0.423205443053263, - "grad_norm": 0.0008212728425860405, - "learning_rate": 0.0001999999122873692, - "loss": 46.0, - "step": 2628 - }, - { - "epoch": 0.4233664801320504, - "grad_norm": 0.001346821547485888, - "learning_rate": 0.00019999991222034908, - "loss": 46.0, - "step": 2629 - }, - { - "epoch": 0.4235275172108378, - "grad_norm": 0.0004094088508281857, - "learning_rate": 0.00019999991215330335, - "loss": 46.0, - "step": 2630 - }, - { - "epoch": 0.4236885542896252, - "grad_norm": 0.00031145557295531034, - "learning_rate": 0.000199999912086232, - "loss": 46.0, - "step": 2631 - }, - { - "epoch": 0.4238495913684126, - "grad_norm": 0.0009608006803318858, - "learning_rate": 0.00019999991201913507, - "loss": 46.0, - "step": 2632 - }, - { - "epoch": 0.42401062844719994, - "grad_norm": 0.0003583583456929773, - "learning_rate": 0.00019999991195201254, - "loss": 46.0, - "step": 2633 - }, - { - "epoch": 0.42417166552598734, - "grad_norm": 0.0005778919439762831, - "learning_rate": 0.00019999991188486443, - "loss": 46.0, - "step": 2634 - }, - { - "epoch": 0.42433270260477474, - "grad_norm": 0.0006798491813242435, - "learning_rate": 0.0001999999118176907, - "loss": 46.0, - "step": 2635 - }, - { - "epoch": 0.42449373968356213, - "grad_norm": 0.0005650821258313954, - "learning_rate": 0.00019999991175049142, - "loss": 46.0, - "step": 2636 - }, - { - "epoch": 0.42465477676234953, - "grad_norm": 0.001622366369701922, - "learning_rate": 0.0001999999116832665, - "loss": 46.0, - "step": 2637 - }, - { - "epoch": 0.4248158138411369, - "grad_norm": 0.0005355989560484886, - "learning_rate": 0.000199999911616016, - "loss": 46.0, - "step": 2638 - }, - { - "epoch": 0.4249768509199243, - "grad_norm": 0.0009966815123334527, - "learning_rate": 0.0001999999115487399, - "loss": 46.0, - "step": 2639 - }, - { - "epoch": 0.4251378879987117, - "grad_norm": 0.0016135798068717122, - "learning_rate": 0.00019999991148143823, - "loss": 46.0, - "step": 2640 - }, - { - "epoch": 0.4252989250774991, - "grad_norm": 0.0004172810586169362, - "learning_rate": 0.00019999991141411095, - "loss": 46.0, - "step": 2641 - }, - { - "epoch": 0.4254599621562865, - "grad_norm": 0.00042144497274421155, - "learning_rate": 0.00019999991134675806, - "loss": 46.0, - "step": 2642 - }, - { - "epoch": 0.42562099923507385, - "grad_norm": 0.0007188401068560779, - "learning_rate": 0.0001999999112793796, - "loss": 46.0, - "step": 2643 - }, - { - "epoch": 0.42578203631386125, - "grad_norm": 0.0018502577440813184, - "learning_rate": 0.00019999991121197552, - "loss": 46.0, - "step": 2644 - }, - { - "epoch": 0.42594307339264864, - "grad_norm": 0.0006879601860418916, - "learning_rate": 0.00019999991114454585, - "loss": 46.0, - "step": 2645 - }, - { - "epoch": 0.42610411047143604, - "grad_norm": 0.0016043668147176504, - "learning_rate": 0.0001999999110770906, - "loss": 46.0, - "step": 2646 - }, - { - "epoch": 0.42626514755022343, - "grad_norm": 0.0005683312192559242, - "learning_rate": 0.00019999991100960976, - "loss": 46.0, - "step": 2647 - }, - { - "epoch": 0.42642618462901083, - "grad_norm": 0.0005000039236620069, - "learning_rate": 0.00019999991094210332, - "loss": 46.0, - "step": 2648 - }, - { - "epoch": 0.4265872217077982, - "grad_norm": 0.00046185965766198933, - "learning_rate": 0.00019999991087457127, - "loss": 46.0, - "step": 2649 - }, - { - "epoch": 0.4267482587865856, - "grad_norm": 0.00037300106487236917, - "learning_rate": 0.00019999991080701363, - "loss": 46.0, - "step": 2650 - }, - { - "epoch": 0.426909295865373, - "grad_norm": 0.0010627803858369589, - "learning_rate": 0.0001999999107394304, - "loss": 46.0, - "step": 2651 - }, - { - "epoch": 0.4270703329441604, - "grad_norm": 0.0007060438510961831, - "learning_rate": 0.00019999991067182156, - "loss": 46.0, - "step": 2652 - }, - { - "epoch": 0.4272313700229478, - "grad_norm": 0.001122227986343205, - "learning_rate": 0.00019999991060418715, - "loss": 46.0, - "step": 2653 - }, - { - "epoch": 0.42739240710173515, - "grad_norm": 0.0006794400978833437, - "learning_rate": 0.0001999999105365271, - "loss": 46.0, - "step": 2654 - }, - { - "epoch": 0.42755344418052255, - "grad_norm": 0.0012592078419402242, - "learning_rate": 0.00019999991046884154, - "loss": 46.0, - "step": 2655 - }, - { - "epoch": 0.42771448125930994, - "grad_norm": 0.0008367216214537621, - "learning_rate": 0.00019999991040113032, - "loss": 46.0, - "step": 2656 - }, - { - "epoch": 0.42787551833809734, - "grad_norm": 0.00032703651231713593, - "learning_rate": 0.0001999999103333935, - "loss": 46.0, - "step": 2657 - }, - { - "epoch": 0.42803655541688473, - "grad_norm": 0.0006271525053307414, - "learning_rate": 0.0001999999102656311, - "loss": 46.0, - "step": 2658 - }, - { - "epoch": 0.42819759249567213, - "grad_norm": 0.000993541325442493, - "learning_rate": 0.00019999991019784312, - "loss": 46.0, - "step": 2659 - }, - { - "epoch": 0.4283586295744595, - "grad_norm": 0.000801052141468972, - "learning_rate": 0.00019999991013002952, - "loss": 46.0, - "step": 2660 - }, - { - "epoch": 0.4285196666532469, - "grad_norm": 0.0006492906832136214, - "learning_rate": 0.00019999991006219037, - "loss": 46.0, - "step": 2661 - }, - { - "epoch": 0.4286807037320343, - "grad_norm": 0.0004494022286962718, - "learning_rate": 0.00019999990999432557, - "loss": 46.0, - "step": 2662 - }, - { - "epoch": 0.4288417408108217, - "grad_norm": 0.0007963523967191577, - "learning_rate": 0.00019999990992643522, - "loss": 46.0, - "step": 2663 - }, - { - "epoch": 0.42900277788960905, - "grad_norm": 0.0004384623025543988, - "learning_rate": 0.00019999990985851925, - "loss": 46.0, - "step": 2664 - }, - { - "epoch": 0.42916381496839645, - "grad_norm": 0.001220785896293819, - "learning_rate": 0.00019999990979057767, - "loss": 46.0, - "step": 2665 - }, - { - "epoch": 0.42932485204718385, - "grad_norm": 0.0003058039874304086, - "learning_rate": 0.00019999990972261052, - "loss": 46.0, - "step": 2666 - }, - { - "epoch": 0.42948588912597124, - "grad_norm": 0.00038542156107723713, - "learning_rate": 0.0001999999096546178, - "loss": 46.0, - "step": 2667 - }, - { - "epoch": 0.42964692620475864, - "grad_norm": 0.0006260395748540759, - "learning_rate": 0.00019999990958659945, - "loss": 46.0, - "step": 2668 - }, - { - "epoch": 0.42980796328354604, - "grad_norm": 0.001564955455251038, - "learning_rate": 0.0001999999095185555, - "loss": 46.0, - "step": 2669 - }, - { - "epoch": 0.42996900036233343, - "grad_norm": 0.0005347688565962017, - "learning_rate": 0.00019999990945048597, - "loss": 46.0, - "step": 2670 - }, - { - "epoch": 0.4301300374411208, - "grad_norm": 0.0005981026333756745, - "learning_rate": 0.00019999990938239084, - "loss": 46.0, - "step": 2671 - }, - { - "epoch": 0.4302910745199082, - "grad_norm": 0.0013980268267914653, - "learning_rate": 0.00019999990931427012, - "loss": 46.0, - "step": 2672 - }, - { - "epoch": 0.4304521115986956, - "grad_norm": 0.0004649643669836223, - "learning_rate": 0.0001999999092461238, - "loss": 46.0, - "step": 2673 - }, - { - "epoch": 0.430613148677483, - "grad_norm": 0.00039003134588710964, - "learning_rate": 0.00019999990917795189, - "loss": 46.0, - "step": 2674 - }, - { - "epoch": 0.43077418575627036, - "grad_norm": 0.0007489864365197718, - "learning_rate": 0.00019999990910975438, - "loss": 46.0, - "step": 2675 - }, - { - "epoch": 0.43093522283505775, - "grad_norm": 0.000439871393609792, - "learning_rate": 0.00019999990904153128, - "loss": 46.0, - "step": 2676 - }, - { - "epoch": 0.43109625991384515, - "grad_norm": 0.0006495008710771799, - "learning_rate": 0.00019999990897328257, - "loss": 46.0, - "step": 2677 - }, - { - "epoch": 0.43125729699263254, - "grad_norm": 0.0021862033754587173, - "learning_rate": 0.00019999990890500827, - "loss": 46.0, - "step": 2678 - }, - { - "epoch": 0.43141833407141994, - "grad_norm": 0.00034401609445922077, - "learning_rate": 0.00019999990883670839, - "loss": 46.0, - "step": 2679 - }, - { - "epoch": 0.43157937115020734, - "grad_norm": 0.00048437921213917434, - "learning_rate": 0.00019999990876838291, - "loss": 46.0, - "step": 2680 - }, - { - "epoch": 0.43174040822899473, - "grad_norm": 0.0007042984361760318, - "learning_rate": 0.00019999990870003185, - "loss": 46.0, - "step": 2681 - }, - { - "epoch": 0.43190144530778213, - "grad_norm": 0.0007430159021168947, - "learning_rate": 0.00019999990863165515, - "loss": 46.0, - "step": 2682 - }, - { - "epoch": 0.4320624823865695, - "grad_norm": 0.0010032978607341647, - "learning_rate": 0.0001999999085632529, - "loss": 46.0, - "step": 2683 - }, - { - "epoch": 0.4322235194653569, - "grad_norm": 0.00039155816193670034, - "learning_rate": 0.00019999990849482504, - "loss": 46.0, - "step": 2684 - }, - { - "epoch": 0.4323845565441443, - "grad_norm": 0.0009293517796322703, - "learning_rate": 0.0001999999084263716, - "loss": 46.0, - "step": 2685 - }, - { - "epoch": 0.43254559362293166, - "grad_norm": 0.00036867070593871176, - "learning_rate": 0.00019999990835789253, - "loss": 46.0, - "step": 2686 - }, - { - "epoch": 0.43270663070171905, - "grad_norm": 0.001242671045474708, - "learning_rate": 0.0001999999082893879, - "loss": 46.0, - "step": 2687 - }, - { - "epoch": 0.43286766778050645, - "grad_norm": 0.000723321340046823, - "learning_rate": 0.00019999990822085765, - "loss": 46.0, - "step": 2688 - }, - { - "epoch": 0.43302870485929384, - "grad_norm": 0.0004949741996824741, - "learning_rate": 0.0001999999081523018, - "loss": 46.0, - "step": 2689 - }, - { - "epoch": 0.43318974193808124, - "grad_norm": 0.0011749679688364267, - "learning_rate": 0.00019999990808372038, - "loss": 46.0, - "step": 2690 - }, - { - "epoch": 0.43335077901686864, - "grad_norm": 0.0015893633244559169, - "learning_rate": 0.00019999990801511334, - "loss": 46.0, - "step": 2691 - }, - { - "epoch": 0.43351181609565603, - "grad_norm": 0.0004216721863485873, - "learning_rate": 0.00019999990794648074, - "loss": 46.0, - "step": 2692 - }, - { - "epoch": 0.43367285317444343, - "grad_norm": 0.00045768602285534143, - "learning_rate": 0.00019999990787782253, - "loss": 46.0, - "step": 2693 - }, - { - "epoch": 0.4338338902532308, - "grad_norm": 0.0012356709921732545, - "learning_rate": 0.0001999999078091387, - "loss": 46.0, - "step": 2694 - }, - { - "epoch": 0.4339949273320182, - "grad_norm": 0.0006851233774796128, - "learning_rate": 0.0001999999077404293, - "loss": 46.0, - "step": 2695 - }, - { - "epoch": 0.43415596441080556, - "grad_norm": 0.000262343673966825, - "learning_rate": 0.0001999999076716943, - "loss": 46.0, - "step": 2696 - }, - { - "epoch": 0.43431700148959296, - "grad_norm": 0.0004097023338545114, - "learning_rate": 0.0001999999076029337, - "loss": 46.0, - "step": 2697 - }, - { - "epoch": 0.43447803856838035, - "grad_norm": 0.000551571836695075, - "learning_rate": 0.00019999990753414752, - "loss": 46.0, - "step": 2698 - }, - { - "epoch": 0.43463907564716775, - "grad_norm": 0.0016339104622602463, - "learning_rate": 0.00019999990746533573, - "loss": 46.0, - "step": 2699 - }, - { - "epoch": 0.43480011272595515, - "grad_norm": 0.00032277987338602543, - "learning_rate": 0.00019999990739649835, - "loss": 46.0, - "step": 2700 - }, - { - "epoch": 0.43496114980474254, - "grad_norm": 0.0004354232514742762, - "learning_rate": 0.00019999990732763538, - "loss": 46.0, - "step": 2701 - }, - { - "epoch": 0.43512218688352994, - "grad_norm": 0.0006771563785150647, - "learning_rate": 0.0001999999072587468, - "loss": 46.0, - "step": 2702 - }, - { - "epoch": 0.43528322396231733, - "grad_norm": 0.0003972511040046811, - "learning_rate": 0.00019999990718983263, - "loss": 46.0, - "step": 2703 - }, - { - "epoch": 0.43544426104110473, - "grad_norm": 0.000676590483635664, - "learning_rate": 0.00019999990712089288, - "loss": 46.0, - "step": 2704 - }, - { - "epoch": 0.4356052981198921, - "grad_norm": 0.0007195529760792851, - "learning_rate": 0.00019999990705192754, - "loss": 46.0, - "step": 2705 - }, - { - "epoch": 0.4357663351986795, - "grad_norm": 0.00038460109499283135, - "learning_rate": 0.00019999990698293658, - "loss": 46.0, - "step": 2706 - }, - { - "epoch": 0.43592737227746686, - "grad_norm": 0.0006472237873822451, - "learning_rate": 0.00019999990691392004, - "loss": 46.0, - "step": 2707 - }, - { - "epoch": 0.43608840935625426, - "grad_norm": 0.0003614487068261951, - "learning_rate": 0.0001999999068448779, - "loss": 46.0, - "step": 2708 - }, - { - "epoch": 0.43624944643504165, - "grad_norm": 0.0008919125539250672, - "learning_rate": 0.00019999990677581017, - "loss": 46.0, - "step": 2709 - }, - { - "epoch": 0.43641048351382905, - "grad_norm": 0.0006342657143250108, - "learning_rate": 0.00019999990670671684, - "loss": 46.0, - "step": 2710 - }, - { - "epoch": 0.43657152059261645, - "grad_norm": 0.0006811715429648757, - "learning_rate": 0.00019999990663759794, - "loss": 46.0, - "step": 2711 - }, - { - "epoch": 0.43673255767140384, - "grad_norm": 0.002743551740422845, - "learning_rate": 0.0001999999065684534, - "loss": 46.0, - "step": 2712 - }, - { - "epoch": 0.43689359475019124, - "grad_norm": 0.00041297305142506957, - "learning_rate": 0.0001999999064992833, - "loss": 46.0, - "step": 2713 - }, - { - "epoch": 0.43705463182897863, - "grad_norm": 0.0009428643388673663, - "learning_rate": 0.00019999990643008758, - "loss": 46.0, - "step": 2714 - }, - { - "epoch": 0.43721566890776603, - "grad_norm": 0.001723370049148798, - "learning_rate": 0.00019999990636086626, - "loss": 46.0, - "step": 2715 - }, - { - "epoch": 0.4373767059865534, - "grad_norm": 0.0008770760032348335, - "learning_rate": 0.0001999999062916194, - "loss": 46.0, - "step": 2716 - }, - { - "epoch": 0.43753774306534077, - "grad_norm": 0.000561458757147193, - "learning_rate": 0.0001999999062223469, - "loss": 46.0, - "step": 2717 - }, - { - "epoch": 0.43769878014412816, - "grad_norm": 0.0038393663708120584, - "learning_rate": 0.00019999990615304883, - "loss": 46.0, - "step": 2718 - }, - { - "epoch": 0.43785981722291556, - "grad_norm": 0.0010003532515838742, - "learning_rate": 0.0001999999060837251, - "loss": 46.0, - "step": 2719 - }, - { - "epoch": 0.43802085430170296, - "grad_norm": 0.0006829585181549191, - "learning_rate": 0.00019999990601437585, - "loss": 46.0, - "step": 2720 - }, - { - "epoch": 0.43818189138049035, - "grad_norm": 0.000540735840331763, - "learning_rate": 0.00019999990594500098, - "loss": 46.0, - "step": 2721 - }, - { - "epoch": 0.43834292845927775, - "grad_norm": 0.0006235248292796314, - "learning_rate": 0.00019999990587560055, - "loss": 46.0, - "step": 2722 - }, - { - "epoch": 0.43850396553806514, - "grad_norm": 0.0007193930214270949, - "learning_rate": 0.00019999990580617447, - "loss": 46.0, - "step": 2723 - }, - { - "epoch": 0.43866500261685254, - "grad_norm": 0.0008322568028233945, - "learning_rate": 0.0001999999057367228, - "loss": 46.0, - "step": 2724 - }, - { - "epoch": 0.43882603969563994, - "grad_norm": 0.0005510931368917227, - "learning_rate": 0.00019999990566724557, - "loss": 46.0, - "step": 2725 - }, - { - "epoch": 0.43898707677442733, - "grad_norm": 0.0006119512254372239, - "learning_rate": 0.0001999999055977427, - "loss": 46.0, - "step": 2726 - }, - { - "epoch": 0.4391481138532147, - "grad_norm": 0.000798740133177489, - "learning_rate": 0.00019999990552821428, - "loss": 46.0, - "step": 2727 - }, - { - "epoch": 0.43930915093200207, - "grad_norm": 0.0006452187662944198, - "learning_rate": 0.00019999990545866022, - "loss": 46.0, - "step": 2728 - }, - { - "epoch": 0.43947018801078946, - "grad_norm": 0.0004959151847288013, - "learning_rate": 0.0001999999053890806, - "loss": 46.0, - "step": 2729 - }, - { - "epoch": 0.43963122508957686, - "grad_norm": 0.0007036104216240346, - "learning_rate": 0.00019999990531947539, - "loss": 46.0, - "step": 2730 - }, - { - "epoch": 0.43979226216836426, - "grad_norm": 0.00037318517570383847, - "learning_rate": 0.00019999990524984456, - "loss": 46.0, - "step": 2731 - }, - { - "epoch": 0.43995329924715165, - "grad_norm": 0.0006354129291139543, - "learning_rate": 0.00019999990518018815, - "loss": 46.0, - "step": 2732 - }, - { - "epoch": 0.44011433632593905, - "grad_norm": 0.00037247739965096116, - "learning_rate": 0.00019999990511050615, - "loss": 46.0, - "step": 2733 - }, - { - "epoch": 0.44027537340472644, - "grad_norm": 0.002498733112588525, - "learning_rate": 0.00019999990504079854, - "loss": 46.0, - "step": 2734 - }, - { - "epoch": 0.44043641048351384, - "grad_norm": 0.0013193635968491435, - "learning_rate": 0.00019999990497106534, - "loss": 46.0, - "step": 2735 - }, - { - "epoch": 0.44059744756230124, - "grad_norm": 0.0005198375438340008, - "learning_rate": 0.00019999990490130655, - "loss": 46.0, - "step": 2736 - }, - { - "epoch": 0.44075848464108863, - "grad_norm": 0.0006304277339950204, - "learning_rate": 0.00019999990483152215, - "loss": 46.0, - "step": 2737 - }, - { - "epoch": 0.44091952171987603, - "grad_norm": 0.0007082073716446757, - "learning_rate": 0.00019999990476171219, - "loss": 46.0, - "step": 2738 - }, - { - "epoch": 0.44108055879866337, - "grad_norm": 0.00026121517294086516, - "learning_rate": 0.00019999990469187658, - "loss": 46.0, - "step": 2739 - }, - { - "epoch": 0.44124159587745077, - "grad_norm": 0.00047844467917457223, - "learning_rate": 0.00019999990462201542, - "loss": 46.0, - "step": 2740 - }, - { - "epoch": 0.44140263295623816, - "grad_norm": 0.0006873977254144847, - "learning_rate": 0.00019999990455212864, - "loss": 46.0, - "step": 2741 - }, - { - "epoch": 0.44156367003502556, - "grad_norm": 0.0006066896021366119, - "learning_rate": 0.00019999990448221628, - "loss": 46.0, - "step": 2742 - }, - { - "epoch": 0.44172470711381295, - "grad_norm": 0.0006010742508806288, - "learning_rate": 0.00019999990441227833, - "loss": 46.0, - "step": 2743 - }, - { - "epoch": 0.44188574419260035, - "grad_norm": 0.0017767059616744518, - "learning_rate": 0.00019999990434231476, - "loss": 46.0, - "step": 2744 - }, - { - "epoch": 0.44204678127138775, - "grad_norm": 0.0010924983071163297, - "learning_rate": 0.00019999990427232563, - "loss": 46.0, - "step": 2745 - }, - { - "epoch": 0.44220781835017514, - "grad_norm": 0.0006658118800260127, - "learning_rate": 0.0001999999042023109, - "loss": 46.0, - "step": 2746 - }, - { - "epoch": 0.44236885542896254, - "grad_norm": 0.002850266871973872, - "learning_rate": 0.00019999990413227056, - "loss": 46.0, - "step": 2747 - }, - { - "epoch": 0.44252989250774993, - "grad_norm": 0.000484477641293779, - "learning_rate": 0.00019999990406220462, - "loss": 46.0, - "step": 2748 - }, - { - "epoch": 0.4426909295865373, - "grad_norm": 0.0004909904673695564, - "learning_rate": 0.0001999999039921131, - "loss": 46.0, - "step": 2749 - }, - { - "epoch": 0.44285196666532467, - "grad_norm": 0.0005222524632699788, - "learning_rate": 0.00019999990392199598, - "loss": 46.0, - "step": 2750 - }, - { - "epoch": 0.44301300374411207, - "grad_norm": 0.00028756973915733397, - "learning_rate": 0.00019999990385185325, - "loss": 46.0, - "step": 2751 - }, - { - "epoch": 0.44317404082289946, - "grad_norm": 0.00036780882510356605, - "learning_rate": 0.00019999990378168496, - "loss": 46.0, - "step": 2752 - }, - { - "epoch": 0.44333507790168686, - "grad_norm": 0.004427620675414801, - "learning_rate": 0.00019999990371149105, - "loss": 46.0, - "step": 2753 - }, - { - "epoch": 0.44349611498047425, - "grad_norm": 0.0005069668404757977, - "learning_rate": 0.00019999990364127156, - "loss": 46.0, - "step": 2754 - }, - { - "epoch": 0.44365715205926165, - "grad_norm": 0.0011849005240947008, - "learning_rate": 0.00019999990357102645, - "loss": 46.0, - "step": 2755 - }, - { - "epoch": 0.44381818913804905, - "grad_norm": 0.0012834552908316255, - "learning_rate": 0.00019999990350075576, - "loss": 46.0, - "step": 2756 - }, - { - "epoch": 0.44397922621683644, - "grad_norm": 0.0005128609482198954, - "learning_rate": 0.00019999990343045948, - "loss": 46.0, - "step": 2757 - }, - { - "epoch": 0.44414026329562384, - "grad_norm": 0.0005335664027370512, - "learning_rate": 0.00019999990336013758, - "loss": 46.0, - "step": 2758 - }, - { - "epoch": 0.44430130037441123, - "grad_norm": 0.000431609369115904, - "learning_rate": 0.00019999990328979013, - "loss": 46.0, - "step": 2759 - }, - { - "epoch": 0.4444623374531986, - "grad_norm": 0.00036994690890423954, - "learning_rate": 0.00019999990321941703, - "loss": 46.0, - "step": 2760 - }, - { - "epoch": 0.44462337453198597, - "grad_norm": 0.0005365213728509843, - "learning_rate": 0.00019999990314901838, - "loss": 46.0, - "step": 2761 - }, - { - "epoch": 0.44478441161077337, - "grad_norm": 0.000801724148914218, - "learning_rate": 0.00019999990307859413, - "loss": 46.0, - "step": 2762 - }, - { - "epoch": 0.44494544868956076, - "grad_norm": 0.0003944375494029373, - "learning_rate": 0.00019999990300814427, - "loss": 46.0, - "step": 2763 - }, - { - "epoch": 0.44510648576834816, - "grad_norm": 0.00028719662805087864, - "learning_rate": 0.00019999990293766883, - "loss": 46.0, - "step": 2764 - }, - { - "epoch": 0.44526752284713556, - "grad_norm": 0.0023350659757852554, - "learning_rate": 0.0001999999028671678, - "loss": 46.0, - "step": 2765 - }, - { - "epoch": 0.44542855992592295, - "grad_norm": 0.0006715567433275282, - "learning_rate": 0.00019999990279664115, - "loss": 46.0, - "step": 2766 - }, - { - "epoch": 0.44558959700471035, - "grad_norm": 0.0004399584431666881, - "learning_rate": 0.00019999990272608892, - "loss": 46.0, - "step": 2767 - }, - { - "epoch": 0.44575063408349774, - "grad_norm": 0.0007615103386342525, - "learning_rate": 0.00019999990265551107, - "loss": 46.0, - "step": 2768 - }, - { - "epoch": 0.44591167116228514, - "grad_norm": 0.000605251407250762, - "learning_rate": 0.00019999990258490766, - "loss": 46.0, - "step": 2769 - }, - { - "epoch": 0.4460727082410725, - "grad_norm": 0.0006432253867387772, - "learning_rate": 0.00019999990251427864, - "loss": 46.0, - "step": 2770 - }, - { - "epoch": 0.4462337453198599, - "grad_norm": 0.0014349092962220311, - "learning_rate": 0.000199999902443624, - "loss": 46.0, - "step": 2771 - }, - { - "epoch": 0.44639478239864727, - "grad_norm": 0.0006780263502150774, - "learning_rate": 0.0001999999023729438, - "loss": 46.0, - "step": 2772 - }, - { - "epoch": 0.44655581947743467, - "grad_norm": 0.0009366684826090932, - "learning_rate": 0.000199999902302238, - "loss": 46.0, - "step": 2773 - }, - { - "epoch": 0.44671685655622206, - "grad_norm": 0.0011787187540903687, - "learning_rate": 0.00019999990223150662, - "loss": 46.0, - "step": 2774 - }, - { - "epoch": 0.44687789363500946, - "grad_norm": 0.000608434434980154, - "learning_rate": 0.0001999999021607496, - "loss": 46.0, - "step": 2775 - }, - { - "epoch": 0.44703893071379686, - "grad_norm": 0.000304481916828081, - "learning_rate": 0.00019999990208996704, - "loss": 46.0, - "step": 2776 - }, - { - "epoch": 0.44719996779258425, - "grad_norm": 0.0007709647761657834, - "learning_rate": 0.00019999990201915882, - "loss": 46.0, - "step": 2777 - }, - { - "epoch": 0.44736100487137165, - "grad_norm": 0.0014972627395763993, - "learning_rate": 0.00019999990194832508, - "loss": 46.0, - "step": 2778 - }, - { - "epoch": 0.44752204195015904, - "grad_norm": 0.0008488981984555721, - "learning_rate": 0.0001999999018774657, - "loss": 46.0, - "step": 2779 - }, - { - "epoch": 0.44768307902894644, - "grad_norm": 0.000668447813950479, - "learning_rate": 0.00019999990180658074, - "loss": 46.0, - "step": 2780 - }, - { - "epoch": 0.4478441161077338, - "grad_norm": 0.0007189158932305872, - "learning_rate": 0.00019999990173567018, - "loss": 46.0, - "step": 2781 - }, - { - "epoch": 0.4480051531865212, - "grad_norm": 0.0006612858851440251, - "learning_rate": 0.000199999901664734, - "loss": 46.0, - "step": 2782 - }, - { - "epoch": 0.4481661902653086, - "grad_norm": 0.0015983371995389462, - "learning_rate": 0.00019999990159377226, - "loss": 46.0, - "step": 2783 - }, - { - "epoch": 0.44832722734409597, - "grad_norm": 0.00026204640744253993, - "learning_rate": 0.0001999999015227849, - "loss": 46.0, - "step": 2784 - }, - { - "epoch": 0.44848826442288336, - "grad_norm": 0.0010673885699361563, - "learning_rate": 0.000199999901451772, - "loss": 46.0, - "step": 2785 - }, - { - "epoch": 0.44864930150167076, - "grad_norm": 0.0005019505624659359, - "learning_rate": 0.00019999990138073342, - "loss": 46.0, - "step": 2786 - }, - { - "epoch": 0.44881033858045816, - "grad_norm": 0.0012449523201212287, - "learning_rate": 0.0001999999013096693, - "loss": 46.0, - "step": 2787 - }, - { - "epoch": 0.44897137565924555, - "grad_norm": 0.00023666986089665443, - "learning_rate": 0.00019999990123857958, - "loss": 46.0, - "step": 2788 - }, - { - "epoch": 0.44913241273803295, - "grad_norm": 0.0006680472288280725, - "learning_rate": 0.00019999990116746427, - "loss": 46.0, - "step": 2789 - }, - { - "epoch": 0.44929344981682034, - "grad_norm": 0.0005869470769539475, - "learning_rate": 0.00019999990109632334, - "loss": 46.0, - "step": 2790 - }, - { - "epoch": 0.44945448689560774, - "grad_norm": 0.000649125431664288, - "learning_rate": 0.00019999990102515685, - "loss": 46.0, - "step": 2791 - }, - { - "epoch": 0.4496155239743951, - "grad_norm": 0.0003559860051609576, - "learning_rate": 0.00019999990095396472, - "loss": 46.0, - "step": 2792 - }, - { - "epoch": 0.4497765610531825, - "grad_norm": 0.0005704216309823096, - "learning_rate": 0.00019999990088274703, - "loss": 46.0, - "step": 2793 - }, - { - "epoch": 0.4499375981319699, - "grad_norm": 0.000314653676468879, - "learning_rate": 0.00019999990081150375, - "loss": 46.0, - "step": 2794 - }, - { - "epoch": 0.45009863521075727, - "grad_norm": 0.0011541357962414622, - "learning_rate": 0.00019999990074023483, - "loss": 46.0, - "step": 2795 - }, - { - "epoch": 0.45025967228954467, - "grad_norm": 0.0017858966020867229, - "learning_rate": 0.00019999990066894036, - "loss": 46.0, - "step": 2796 - }, - { - "epoch": 0.45042070936833206, - "grad_norm": 0.000754722161218524, - "learning_rate": 0.00019999990059762032, - "loss": 46.0, - "step": 2797 - }, - { - "epoch": 0.45058174644711946, - "grad_norm": 0.0005892252083867788, - "learning_rate": 0.0001999999005262746, - "loss": 46.0, - "step": 2798 - }, - { - "epoch": 0.45074278352590685, - "grad_norm": 0.00027911565848626196, - "learning_rate": 0.00019999990045490334, - "loss": 46.0, - "step": 2799 - }, - { - "epoch": 0.45090382060469425, - "grad_norm": 0.0007249664631672204, - "learning_rate": 0.00019999990038350646, - "loss": 46.0, - "step": 2800 - }, - { - "epoch": 0.45106485768348165, - "grad_norm": 0.0006351073388941586, - "learning_rate": 0.00019999990031208402, - "loss": 46.0, - "step": 2801 - }, - { - "epoch": 0.451225894762269, - "grad_norm": 0.000732273212634027, - "learning_rate": 0.00019999990024063596, - "loss": 46.0, - "step": 2802 - }, - { - "epoch": 0.4513869318410564, - "grad_norm": 0.00045403256081044674, - "learning_rate": 0.00019999990016916232, - "loss": 46.0, - "step": 2803 - }, - { - "epoch": 0.4515479689198438, - "grad_norm": 0.00037547352258116007, - "learning_rate": 0.00019999990009766306, - "loss": 46.0, - "step": 2804 - }, - { - "epoch": 0.4517090059986312, - "grad_norm": 0.0005481303087435663, - "learning_rate": 0.00019999990002613825, - "loss": 46.0, - "step": 2805 - }, - { - "epoch": 0.45187004307741857, - "grad_norm": 0.0012723577674478292, - "learning_rate": 0.00019999989995458782, - "loss": 46.0, - "step": 2806 - }, - { - "epoch": 0.45203108015620597, - "grad_norm": 0.0003037889546249062, - "learning_rate": 0.0001999998998830118, - "loss": 46.0, - "step": 2807 - }, - { - "epoch": 0.45219211723499336, - "grad_norm": 0.0007623456185683608, - "learning_rate": 0.00019999989981141016, - "loss": 46.0, - "step": 2808 - }, - { - "epoch": 0.45235315431378076, - "grad_norm": 0.0004901849897578359, - "learning_rate": 0.00019999989973978294, - "loss": 46.0, - "step": 2809 - }, - { - "epoch": 0.45251419139256815, - "grad_norm": 0.0006916733109392226, - "learning_rate": 0.00019999989966813014, - "loss": 46.0, - "step": 2810 - }, - { - "epoch": 0.45267522847135555, - "grad_norm": 0.0007174148922786117, - "learning_rate": 0.00019999989959645174, - "loss": 46.0, - "step": 2811 - }, - { - "epoch": 0.45283626555014295, - "grad_norm": 0.0003123388160020113, - "learning_rate": 0.0001999998995247477, - "loss": 46.0, - "step": 2812 - }, - { - "epoch": 0.4529973026289303, - "grad_norm": 0.00042951980140060186, - "learning_rate": 0.0001999998994530181, - "loss": 46.0, - "step": 2813 - }, - { - "epoch": 0.4531583397077177, - "grad_norm": 0.0006232656305655837, - "learning_rate": 0.00019999989938126293, - "loss": 46.0, - "step": 2814 - }, - { - "epoch": 0.4533193767865051, - "grad_norm": 0.0007511017029173672, - "learning_rate": 0.00019999989930948216, - "loss": 46.0, - "step": 2815 - }, - { - "epoch": 0.4534804138652925, - "grad_norm": 0.0006081481114961207, - "learning_rate": 0.00019999989923767577, - "loss": 46.0, - "step": 2816 - }, - { - "epoch": 0.45364145094407987, - "grad_norm": 0.0004693980736192316, - "learning_rate": 0.00019999989916584377, - "loss": 46.0, - "step": 2817 - }, - { - "epoch": 0.45380248802286727, - "grad_norm": 0.0008029431337490678, - "learning_rate": 0.0001999998990939862, - "loss": 46.0, - "step": 2818 - }, - { - "epoch": 0.45396352510165466, - "grad_norm": 0.00046594266314059496, - "learning_rate": 0.00019999989902210307, - "loss": 46.0, - "step": 2819 - }, - { - "epoch": 0.45412456218044206, - "grad_norm": 0.000332236522808671, - "learning_rate": 0.00019999989895019428, - "loss": 46.0, - "step": 2820 - }, - { - "epoch": 0.45428559925922946, - "grad_norm": 0.00030856754165142775, - "learning_rate": 0.00019999989887825993, - "loss": 46.0, - "step": 2821 - }, - { - "epoch": 0.45444663633801685, - "grad_norm": 0.0003584769438020885, - "learning_rate": 0.0001999998988063, - "loss": 46.0, - "step": 2822 - }, - { - "epoch": 0.4546076734168042, - "grad_norm": 0.0002391423040535301, - "learning_rate": 0.00019999989873431445, - "loss": 46.0, - "step": 2823 - }, - { - "epoch": 0.4547687104955916, - "grad_norm": 0.0005808458663523197, - "learning_rate": 0.0001999998986623033, - "loss": 46.0, - "step": 2824 - }, - { - "epoch": 0.454929747574379, - "grad_norm": 0.000656791205983609, - "learning_rate": 0.0001999998985902666, - "loss": 46.0, - "step": 2825 - }, - { - "epoch": 0.4550907846531664, - "grad_norm": 0.0007843867060728371, - "learning_rate": 0.00019999989851820425, - "loss": 46.0, - "step": 2826 - }, - { - "epoch": 0.4552518217319538, - "grad_norm": 0.0003235080512240529, - "learning_rate": 0.0001999998984461163, - "loss": 46.0, - "step": 2827 - }, - { - "epoch": 0.4554128588107412, - "grad_norm": 0.0005541152786463499, - "learning_rate": 0.0001999998983740028, - "loss": 46.0, - "step": 2828 - }, - { - "epoch": 0.45557389588952857, - "grad_norm": 0.0004606999282259494, - "learning_rate": 0.0001999998983018637, - "loss": 46.0, - "step": 2829 - }, - { - "epoch": 0.45573493296831596, - "grad_norm": 0.0011908607557415962, - "learning_rate": 0.00019999989822969897, - "loss": 46.0, - "step": 2830 - }, - { - "epoch": 0.45589597004710336, - "grad_norm": 0.0006549574318341911, - "learning_rate": 0.00019999989815750867, - "loss": 46.0, - "step": 2831 - }, - { - "epoch": 0.45605700712589076, - "grad_norm": 0.0007461561472155154, - "learning_rate": 0.00019999989808529276, - "loss": 46.0, - "step": 2832 - }, - { - "epoch": 0.45621804420467815, - "grad_norm": 0.00095327541930601, - "learning_rate": 0.00019999989801305128, - "loss": 46.0, - "step": 2833 - }, - { - "epoch": 0.4563790812834655, - "grad_norm": 0.0013621571706607938, - "learning_rate": 0.00019999989794078417, - "loss": 46.0, - "step": 2834 - }, - { - "epoch": 0.4565401183622529, - "grad_norm": 0.0004566084826365113, - "learning_rate": 0.00019999989786849152, - "loss": 46.0, - "step": 2835 - }, - { - "epoch": 0.4567011554410403, - "grad_norm": 0.0011674485867843032, - "learning_rate": 0.00019999989779617323, - "loss": 46.0, - "step": 2836 - }, - { - "epoch": 0.4568621925198277, - "grad_norm": 0.0005980342975817621, - "learning_rate": 0.00019999989772382938, - "loss": 46.0, - "step": 2837 - }, - { - "epoch": 0.4570232295986151, - "grad_norm": 0.0032675962429493666, - "learning_rate": 0.00019999989765145988, - "loss": 46.0, - "step": 2838 - }, - { - "epoch": 0.4571842666774025, - "grad_norm": 0.0006431429646909237, - "learning_rate": 0.00019999989757906483, - "loss": 46.0, - "step": 2839 - }, - { - "epoch": 0.45734530375618987, - "grad_norm": 0.0008999565034173429, - "learning_rate": 0.00019999989750664417, - "loss": 46.0, - "step": 2840 - }, - { - "epoch": 0.45750634083497727, - "grad_norm": 0.00036903267027810216, - "learning_rate": 0.00019999989743419794, - "loss": 46.0, - "step": 2841 - }, - { - "epoch": 0.45766737791376466, - "grad_norm": 0.0006419433047994971, - "learning_rate": 0.00019999989736172607, - "loss": 46.0, - "step": 2842 - }, - { - "epoch": 0.45782841499255206, - "grad_norm": 0.0025293666403740644, - "learning_rate": 0.00019999989728922864, - "loss": 46.0, - "step": 2843 - }, - { - "epoch": 0.45798945207133945, - "grad_norm": 0.002030485775321722, - "learning_rate": 0.0001999998972167056, - "loss": 46.0, - "step": 2844 - }, - { - "epoch": 0.4581504891501268, - "grad_norm": 0.0009818007238209248, - "learning_rate": 0.00019999989714415697, - "loss": 46.0, - "step": 2845 - }, - { - "epoch": 0.4583115262289142, - "grad_norm": 0.00035288967774249613, - "learning_rate": 0.00019999989707158273, - "loss": 46.0, - "step": 2846 - }, - { - "epoch": 0.4584725633077016, - "grad_norm": 0.0026120112743228674, - "learning_rate": 0.00019999989699898292, - "loss": 46.0, - "step": 2847 - }, - { - "epoch": 0.458633600386489, - "grad_norm": 0.0004638070531655103, - "learning_rate": 0.0001999998969263575, - "loss": 46.0, - "step": 2848 - }, - { - "epoch": 0.4587946374652764, - "grad_norm": 0.0006521355826407671, - "learning_rate": 0.0001999998968537065, - "loss": 46.0, - "step": 2849 - }, - { - "epoch": 0.4589556745440638, - "grad_norm": 0.00038754616980440915, - "learning_rate": 0.00019999989678102988, - "loss": 46.0, - "step": 2850 - }, - { - "epoch": 0.45911671162285117, - "grad_norm": 0.0005511146737262607, - "learning_rate": 0.00019999989670832768, - "loss": 46.0, - "step": 2851 - }, - { - "epoch": 0.45927774870163857, - "grad_norm": 0.0006465655169449747, - "learning_rate": 0.00019999989663559988, - "loss": 46.0, - "step": 2852 - }, - { - "epoch": 0.45943878578042596, - "grad_norm": 0.0006740029202774167, - "learning_rate": 0.0001999998965628465, - "loss": 46.0, - "step": 2853 - }, - { - "epoch": 0.45959982285921336, - "grad_norm": 0.001330913626588881, - "learning_rate": 0.0001999998964900675, - "loss": 46.0, - "step": 2854 - }, - { - "epoch": 0.4597608599380007, - "grad_norm": 0.00043661901145242155, - "learning_rate": 0.00019999989641726292, - "loss": 46.0, - "step": 2855 - }, - { - "epoch": 0.4599218970167881, - "grad_norm": 0.00043866271153092384, - "learning_rate": 0.00019999989634443278, - "loss": 46.0, - "step": 2856 - }, - { - "epoch": 0.4600829340955755, - "grad_norm": 0.0036805500276386738, - "learning_rate": 0.000199999896271577, - "loss": 46.0, - "step": 2857 - }, - { - "epoch": 0.4602439711743629, - "grad_norm": 0.0005626160418614745, - "learning_rate": 0.00019999989619869562, - "loss": 46.0, - "step": 2858 - }, - { - "epoch": 0.4604050082531503, - "grad_norm": 0.001611289568245411, - "learning_rate": 0.00019999989612578867, - "loss": 46.0, - "step": 2859 - }, - { - "epoch": 0.4605660453319377, - "grad_norm": 0.0005461085238493979, - "learning_rate": 0.0001999998960528561, - "loss": 46.0, - "step": 2860 - }, - { - "epoch": 0.4607270824107251, - "grad_norm": 0.0008688995148986578, - "learning_rate": 0.00019999989597989796, - "loss": 46.0, - "step": 2861 - }, - { - "epoch": 0.46088811948951247, - "grad_norm": 0.00169644714333117, - "learning_rate": 0.00019999989590691421, - "loss": 46.0, - "step": 2862 - }, - { - "epoch": 0.46104915656829987, - "grad_norm": 0.0005847355350852013, - "learning_rate": 0.00019999989583390488, - "loss": 46.0, - "step": 2863 - }, - { - "epoch": 0.46121019364708726, - "grad_norm": 0.0006305525312200189, - "learning_rate": 0.00019999989576086996, - "loss": 46.0, - "step": 2864 - }, - { - "epoch": 0.46137123072587466, - "grad_norm": 0.00047217195970006287, - "learning_rate": 0.00019999989568780942, - "loss": 46.0, - "step": 2865 - }, - { - "epoch": 0.461532267804662, - "grad_norm": 0.0029122272972017527, - "learning_rate": 0.0001999998956147233, - "loss": 46.0, - "step": 2866 - }, - { - "epoch": 0.4616933048834494, - "grad_norm": 0.0008619397412985563, - "learning_rate": 0.0001999998955416116, - "loss": 46.0, - "step": 2867 - }, - { - "epoch": 0.4618543419622368, - "grad_norm": 0.0006143774371594191, - "learning_rate": 0.00019999989546847427, - "loss": 46.0, - "step": 2868 - }, - { - "epoch": 0.4620153790410242, - "grad_norm": 0.0007360770250670612, - "learning_rate": 0.00019999989539531136, - "loss": 46.0, - "step": 2869 - }, - { - "epoch": 0.4621764161198116, - "grad_norm": 0.00042803262476809323, - "learning_rate": 0.00019999989532212283, - "loss": 46.0, - "step": 2870 - }, - { - "epoch": 0.462337453198599, - "grad_norm": 0.0009471254306845367, - "learning_rate": 0.00019999989524890874, - "loss": 46.0, - "step": 2871 - }, - { - "epoch": 0.4624984902773864, - "grad_norm": 0.0013519171625375748, - "learning_rate": 0.00019999989517566907, - "loss": 46.0, - "step": 2872 - }, - { - "epoch": 0.46265952735617377, - "grad_norm": 0.0003170229902025312, - "learning_rate": 0.00019999989510240379, - "loss": 46.0, - "step": 2873 - }, - { - "epoch": 0.46282056443496117, - "grad_norm": 0.0005881020915694535, - "learning_rate": 0.00019999989502911288, - "loss": 46.0, - "step": 2874 - }, - { - "epoch": 0.46298160151374856, - "grad_norm": 0.000855987542308867, - "learning_rate": 0.0001999998949557964, - "loss": 46.0, - "step": 2875 - }, - { - "epoch": 0.4631426385925359, - "grad_norm": 0.0006057831342332065, - "learning_rate": 0.00019999989488245435, - "loss": 46.0, - "step": 2876 - }, - { - "epoch": 0.4633036756713233, - "grad_norm": 0.0009846428874880075, - "learning_rate": 0.00019999989480908668, - "loss": 46.0, - "step": 2877 - }, - { - "epoch": 0.4634647127501107, - "grad_norm": 0.0002604479086585343, - "learning_rate": 0.0001999998947356934, - "loss": 46.0, - "step": 2878 - }, - { - "epoch": 0.4636257498288981, - "grad_norm": 0.00033883212017826736, - "learning_rate": 0.00019999989466227457, - "loss": 46.0, - "step": 2879 - }, - { - "epoch": 0.4637867869076855, - "grad_norm": 0.001364146126434207, - "learning_rate": 0.00019999989458883012, - "loss": 46.0, - "step": 2880 - }, - { - "epoch": 0.4639478239864729, - "grad_norm": 0.00047338224248960614, - "learning_rate": 0.00019999989451536005, - "loss": 46.0, - "step": 2881 - }, - { - "epoch": 0.4641088610652603, - "grad_norm": 0.000514526735059917, - "learning_rate": 0.00019999989444186443, - "loss": 46.0, - "step": 2882 - }, - { - "epoch": 0.4642698981440477, - "grad_norm": 0.0002897829399444163, - "learning_rate": 0.00019999989436834319, - "loss": 46.0, - "step": 2883 - }, - { - "epoch": 0.4644309352228351, - "grad_norm": 0.0007724882452748716, - "learning_rate": 0.00019999989429479636, - "loss": 46.0, - "step": 2884 - }, - { - "epoch": 0.46459197230162247, - "grad_norm": 0.0005975780077278614, - "learning_rate": 0.00019999989422122394, - "loss": 46.0, - "step": 2885 - }, - { - "epoch": 0.46475300938040986, - "grad_norm": 0.00030973469256423414, - "learning_rate": 0.0001999998941476259, - "loss": 46.0, - "step": 2886 - }, - { - "epoch": 0.4649140464591972, - "grad_norm": 0.000649851921480149, - "learning_rate": 0.0001999998940740023, - "loss": 46.0, - "step": 2887 - }, - { - "epoch": 0.4650750835379846, - "grad_norm": 0.000337887613568455, - "learning_rate": 0.00019999989400035307, - "loss": 46.0, - "step": 2888 - }, - { - "epoch": 0.465236120616772, - "grad_norm": 0.0003283452242612839, - "learning_rate": 0.00019999989392667828, - "loss": 46.0, - "step": 2889 - }, - { - "epoch": 0.4653971576955594, - "grad_norm": 0.0003954330750275403, - "learning_rate": 0.00019999989385297788, - "loss": 46.0, - "step": 2890 - }, - { - "epoch": 0.4655581947743468, - "grad_norm": 0.0007709976052865386, - "learning_rate": 0.0001999998937792519, - "loss": 46.0, - "step": 2891 - }, - { - "epoch": 0.4657192318531342, - "grad_norm": 0.0003327724989503622, - "learning_rate": 0.00019999989370550033, - "loss": 46.0, - "step": 2892 - }, - { - "epoch": 0.4658802689319216, - "grad_norm": 0.0004651611379813403, - "learning_rate": 0.0001999998936317231, - "loss": 46.0, - "step": 2893 - }, - { - "epoch": 0.466041306010709, - "grad_norm": 0.001808120054192841, - "learning_rate": 0.00019999989355792033, - "loss": 46.0, - "step": 2894 - }, - { - "epoch": 0.4662023430894964, - "grad_norm": 0.000348433677572757, - "learning_rate": 0.00019999989348409196, - "loss": 46.0, - "step": 2895 - }, - { - "epoch": 0.46636338016828377, - "grad_norm": 0.0008953766664490104, - "learning_rate": 0.00019999989341023798, - "loss": 46.0, - "step": 2896 - }, - { - "epoch": 0.46652441724707117, - "grad_norm": 0.0004735529946628958, - "learning_rate": 0.00019999989333635844, - "loss": 46.0, - "step": 2897 - }, - { - "epoch": 0.4666854543258585, - "grad_norm": 0.0022318630944937468, - "learning_rate": 0.00019999989326245328, - "loss": 46.0, - "step": 2898 - }, - { - "epoch": 0.4668464914046459, - "grad_norm": 0.00036539489519782364, - "learning_rate": 0.00019999989318852251, - "loss": 46.0, - "step": 2899 - }, - { - "epoch": 0.4670075284834333, - "grad_norm": 0.0010025915689766407, - "learning_rate": 0.00019999989311456618, - "loss": 46.0, - "step": 2900 - }, - { - "epoch": 0.4671685655622207, - "grad_norm": 0.0004213416832499206, - "learning_rate": 0.00019999989304058424, - "loss": 46.0, - "step": 2901 - }, - { - "epoch": 0.4673296026410081, - "grad_norm": 0.0013688647886738181, - "learning_rate": 0.00019999989296657668, - "loss": 46.0, - "step": 2902 - }, - { - "epoch": 0.4674906397197955, - "grad_norm": 0.000347552151652053, - "learning_rate": 0.00019999989289254356, - "loss": 46.0, - "step": 2903 - }, - { - "epoch": 0.4676516767985829, - "grad_norm": 0.0003829733468592167, - "learning_rate": 0.00019999989281848483, - "loss": 46.0, - "step": 2904 - }, - { - "epoch": 0.4678127138773703, - "grad_norm": 0.0007045012316666543, - "learning_rate": 0.0001999998927444005, - "loss": 46.0, - "step": 2905 - }, - { - "epoch": 0.4679737509561577, - "grad_norm": 0.0012553539127111435, - "learning_rate": 0.0001999998926702906, - "loss": 46.0, - "step": 2906 - }, - { - "epoch": 0.46813478803494507, - "grad_norm": 0.0005822149687446654, - "learning_rate": 0.00019999989259615508, - "loss": 46.0, - "step": 2907 - }, - { - "epoch": 0.4682958251137324, - "grad_norm": 0.0007119124638848007, - "learning_rate": 0.00019999989252199397, - "loss": 46.0, - "step": 2908 - }, - { - "epoch": 0.4684568621925198, - "grad_norm": 0.0009871729416772723, - "learning_rate": 0.00019999989244780727, - "loss": 46.0, - "step": 2909 - }, - { - "epoch": 0.4686178992713072, - "grad_norm": 0.0006980373291298747, - "learning_rate": 0.000199999892373595, - "loss": 46.0, - "step": 2910 - }, - { - "epoch": 0.4687789363500946, - "grad_norm": 0.0002902986598201096, - "learning_rate": 0.0001999998922993571, - "loss": 46.0, - "step": 2911 - }, - { - "epoch": 0.468939973428882, - "grad_norm": 0.0006203086813911796, - "learning_rate": 0.0001999998922250936, - "loss": 46.0, - "step": 2912 - }, - { - "epoch": 0.4691010105076694, - "grad_norm": 0.0005238538724370301, - "learning_rate": 0.00019999989215080453, - "loss": 46.0, - "step": 2913 - }, - { - "epoch": 0.4692620475864568, - "grad_norm": 0.0011008810251951218, - "learning_rate": 0.00019999989207648985, - "loss": 46.0, - "step": 2914 - }, - { - "epoch": 0.4694230846652442, - "grad_norm": 0.0006297993822954595, - "learning_rate": 0.00019999989200214957, - "loss": 46.0, - "step": 2915 - }, - { - "epoch": 0.4695841217440316, - "grad_norm": 0.00040685865678824484, - "learning_rate": 0.0001999998919277837, - "loss": 46.0, - "step": 2916 - }, - { - "epoch": 0.469745158822819, - "grad_norm": 0.0005493556964211166, - "learning_rate": 0.00019999989185339224, - "loss": 46.0, - "step": 2917 - }, - { - "epoch": 0.46990619590160637, - "grad_norm": 0.0005613280809484422, - "learning_rate": 0.0001999998917789752, - "loss": 46.0, - "step": 2918 - }, - { - "epoch": 0.4700672329803937, - "grad_norm": 0.000449535611551255, - "learning_rate": 0.00019999989170453255, - "loss": 46.0, - "step": 2919 - }, - { - "epoch": 0.4702282700591811, - "grad_norm": 0.00031375791877508163, - "learning_rate": 0.00019999989163006432, - "loss": 46.0, - "step": 2920 - }, - { - "epoch": 0.4703893071379685, - "grad_norm": 0.0007060517091304064, - "learning_rate": 0.00019999989155557047, - "loss": 46.0, - "step": 2921 - }, - { - "epoch": 0.4705503442167559, - "grad_norm": 0.0008331080316565931, - "learning_rate": 0.00019999989148105103, - "loss": 46.0, - "step": 2922 - }, - { - "epoch": 0.4707113812955433, - "grad_norm": 0.0007294303504750133, - "learning_rate": 0.000199999891406506, - "loss": 46.0, - "step": 2923 - }, - { - "epoch": 0.4708724183743307, - "grad_norm": 0.0011409181170165539, - "learning_rate": 0.00019999989133193536, - "loss": 46.0, - "step": 2924 - }, - { - "epoch": 0.4710334554531181, - "grad_norm": 0.0006150680710561574, - "learning_rate": 0.00019999989125733916, - "loss": 46.0, - "step": 2925 - }, - { - "epoch": 0.4711944925319055, - "grad_norm": 0.0014988044276833534, - "learning_rate": 0.00019999989118271735, - "loss": 46.0, - "step": 2926 - }, - { - "epoch": 0.4713555296106929, - "grad_norm": 0.0005371063598431647, - "learning_rate": 0.00019999989110806995, - "loss": 46.0, - "step": 2927 - }, - { - "epoch": 0.4715165666894803, - "grad_norm": 0.00030780400265939534, - "learning_rate": 0.00019999989103339693, - "loss": 46.0, - "step": 2928 - }, - { - "epoch": 0.4716776037682676, - "grad_norm": 0.0006254728650674224, - "learning_rate": 0.00019999989095869833, - "loss": 46.0, - "step": 2929 - }, - { - "epoch": 0.471838640847055, - "grad_norm": 0.0007008228567428887, - "learning_rate": 0.00019999989088397414, - "loss": 46.0, - "step": 2930 - }, - { - "epoch": 0.4719996779258424, - "grad_norm": 0.0005706832162104547, - "learning_rate": 0.00019999989080922434, - "loss": 46.0, - "step": 2931 - }, - { - "epoch": 0.4721607150046298, - "grad_norm": 0.002139221178367734, - "learning_rate": 0.00019999989073444895, - "loss": 46.0, - "step": 2932 - }, - { - "epoch": 0.4723217520834172, - "grad_norm": 0.0012941916938871145, - "learning_rate": 0.000199999890659648, - "loss": 46.0, - "step": 2933 - }, - { - "epoch": 0.4724827891622046, - "grad_norm": 0.0009738983935676515, - "learning_rate": 0.00019999989058482143, - "loss": 46.0, - "step": 2934 - }, - { - "epoch": 0.472643826240992, - "grad_norm": 0.00020528545428533107, - "learning_rate": 0.00019999989050996925, - "loss": 46.0, - "step": 2935 - }, - { - "epoch": 0.4728048633197794, - "grad_norm": 0.0004141554527450353, - "learning_rate": 0.00019999989043509149, - "loss": 46.0, - "step": 2936 - }, - { - "epoch": 0.4729659003985668, - "grad_norm": 0.0006022877059876919, - "learning_rate": 0.00019999989036018813, - "loss": 46.0, - "step": 2937 - }, - { - "epoch": 0.4731269374773542, - "grad_norm": 0.0015486562624573708, - "learning_rate": 0.0001999998902852592, - "loss": 46.0, - "step": 2938 - }, - { - "epoch": 0.4732879745561416, - "grad_norm": 0.0005348848062567413, - "learning_rate": 0.00019999989021030464, - "loss": 46.0, - "step": 2939 - }, - { - "epoch": 0.4734490116349289, - "grad_norm": 0.0005023290286771953, - "learning_rate": 0.0001999998901353245, - "loss": 46.0, - "step": 2940 - }, - { - "epoch": 0.4736100487137163, - "grad_norm": 0.002133709844201803, - "learning_rate": 0.00019999989006031876, - "loss": 46.0, - "step": 2941 - }, - { - "epoch": 0.4737710857925037, - "grad_norm": 0.001634693006053567, - "learning_rate": 0.00019999988998528742, - "loss": 46.0, - "step": 2942 - }, - { - "epoch": 0.4739321228712911, - "grad_norm": 0.0002572726516518742, - "learning_rate": 0.0001999998899102305, - "loss": 46.0, - "step": 2943 - }, - { - "epoch": 0.4740931599500785, - "grad_norm": 0.0014945456059649587, - "learning_rate": 0.00019999988983514797, - "loss": 46.0, - "step": 2944 - }, - { - "epoch": 0.4742541970288659, - "grad_norm": 0.0008017955697141588, - "learning_rate": 0.00019999988976003986, - "loss": 46.0, - "step": 2945 - }, - { - "epoch": 0.4744152341076533, - "grad_norm": 0.00044260459253564477, - "learning_rate": 0.00019999988968490614, - "loss": 46.0, - "step": 2946 - }, - { - "epoch": 0.4745762711864407, - "grad_norm": 0.0005199291626922786, - "learning_rate": 0.00019999988960974684, - "loss": 46.0, - "step": 2947 - }, - { - "epoch": 0.4747373082652281, - "grad_norm": 0.0008907295414246619, - "learning_rate": 0.00019999988953456194, - "loss": 46.0, - "step": 2948 - }, - { - "epoch": 0.4748983453440155, - "grad_norm": 0.0004547477583400905, - "learning_rate": 0.00019999988945935146, - "loss": 46.0, - "step": 2949 - }, - { - "epoch": 0.4750593824228028, - "grad_norm": 0.0004121024685446173, - "learning_rate": 0.00019999988938411537, - "loss": 46.0, - "step": 2950 - }, - { - "epoch": 0.4752204195015902, - "grad_norm": 0.0003815851523540914, - "learning_rate": 0.00019999988930885368, - "loss": 46.0, - "step": 2951 - }, - { - "epoch": 0.4753814565803776, - "grad_norm": 0.0004361814062576741, - "learning_rate": 0.0001999998892335664, - "loss": 46.0, - "step": 2952 - }, - { - "epoch": 0.475542493659165, - "grad_norm": 0.0012374628568068147, - "learning_rate": 0.00019999988915825353, - "loss": 46.0, - "step": 2953 - }, - { - "epoch": 0.4757035307379524, - "grad_norm": 0.0008402442908845842, - "learning_rate": 0.00019999988908291506, - "loss": 46.0, - "step": 2954 - }, - { - "epoch": 0.4758645678167398, - "grad_norm": 0.0012711393646895885, - "learning_rate": 0.000199999889007551, - "loss": 46.0, - "step": 2955 - }, - { - "epoch": 0.4760256048955272, - "grad_norm": 0.00039713442674838006, - "learning_rate": 0.00019999988893216135, - "loss": 46.0, - "step": 2956 - }, - { - "epoch": 0.4761866419743146, - "grad_norm": 0.000489401223603636, - "learning_rate": 0.00019999988885674607, - "loss": 46.0, - "step": 2957 - }, - { - "epoch": 0.476347679053102, - "grad_norm": 0.0008890616591088474, - "learning_rate": 0.00019999988878130522, - "loss": 46.0, - "step": 2958 - }, - { - "epoch": 0.4765087161318894, - "grad_norm": 0.0005563640152104199, - "learning_rate": 0.00019999988870583879, - "loss": 46.0, - "step": 2959 - }, - { - "epoch": 0.4766697532106768, - "grad_norm": 0.0015805872390046716, - "learning_rate": 0.00019999988863034676, - "loss": 46.0, - "step": 2960 - }, - { - "epoch": 0.4768307902894641, - "grad_norm": 0.0005280330660752952, - "learning_rate": 0.00019999988855482913, - "loss": 46.0, - "step": 2961 - }, - { - "epoch": 0.4769918273682515, - "grad_norm": 0.0005939175607636571, - "learning_rate": 0.00019999988847928588, - "loss": 46.0, - "step": 2962 - }, - { - "epoch": 0.4771528644470389, - "grad_norm": 0.0005209036753512919, - "learning_rate": 0.00019999988840371707, - "loss": 46.0, - "step": 2963 - }, - { - "epoch": 0.4773139015258263, - "grad_norm": 0.0004026646784041077, - "learning_rate": 0.00019999988832812267, - "loss": 46.0, - "step": 2964 - }, - { - "epoch": 0.4774749386046137, - "grad_norm": 0.0004014724399894476, - "learning_rate": 0.00019999988825250263, - "loss": 46.0, - "step": 2965 - }, - { - "epoch": 0.4776359756834011, - "grad_norm": 0.0016275751404464245, - "learning_rate": 0.00019999988817685703, - "loss": 46.0, - "step": 2966 - }, - { - "epoch": 0.4777970127621885, - "grad_norm": 0.00042475457303225994, - "learning_rate": 0.00019999988810118585, - "loss": 46.0, - "step": 2967 - }, - { - "epoch": 0.4779580498409759, - "grad_norm": 0.0003831503272522241, - "learning_rate": 0.00019999988802548905, - "loss": 46.0, - "step": 2968 - }, - { - "epoch": 0.4781190869197633, - "grad_norm": 0.0003797115059569478, - "learning_rate": 0.00019999988794976666, - "loss": 46.0, - "step": 2969 - }, - { - "epoch": 0.4782801239985507, - "grad_norm": 0.0003801226557698101, - "learning_rate": 0.00019999988787401866, - "loss": 46.0, - "step": 2970 - }, - { - "epoch": 0.4784411610773381, - "grad_norm": 0.0017341175116598606, - "learning_rate": 0.00019999988779824504, - "loss": 46.0, - "step": 2971 - }, - { - "epoch": 0.4786021981561254, - "grad_norm": 0.0006976477452553809, - "learning_rate": 0.0001999998877224459, - "loss": 46.0, - "step": 2972 - }, - { - "epoch": 0.4787632352349128, - "grad_norm": 0.0014606605982407928, - "learning_rate": 0.00019999988764662113, - "loss": 46.0, - "step": 2973 - }, - { - "epoch": 0.4789242723137002, - "grad_norm": 0.0003381700662430376, - "learning_rate": 0.00019999988757077075, - "loss": 46.0, - "step": 2974 - }, - { - "epoch": 0.4790853093924876, - "grad_norm": 0.00047852908028289676, - "learning_rate": 0.0001999998874948948, - "loss": 46.0, - "step": 2975 - }, - { - "epoch": 0.479246346471275, - "grad_norm": 0.0004297027480788529, - "learning_rate": 0.00019999988741899324, - "loss": 46.0, - "step": 2976 - }, - { - "epoch": 0.4794073835500624, - "grad_norm": 0.0004784474731422961, - "learning_rate": 0.0001999998873430661, - "loss": 46.0, - "step": 2977 - }, - { - "epoch": 0.4795684206288498, - "grad_norm": 0.00032422482036054134, - "learning_rate": 0.00019999988726711334, - "loss": 46.0, - "step": 2978 - }, - { - "epoch": 0.4797294577076372, - "grad_norm": 0.00032233702950179577, - "learning_rate": 0.000199999887191135, - "loss": 46.0, - "step": 2979 - }, - { - "epoch": 0.4798904947864246, - "grad_norm": 0.0008858292130753398, - "learning_rate": 0.00019999988711513108, - "loss": 46.0, - "step": 2980 - }, - { - "epoch": 0.480051531865212, - "grad_norm": 0.0009945160709321499, - "learning_rate": 0.00019999988703910156, - "loss": 46.0, - "step": 2981 - }, - { - "epoch": 0.48021256894399933, - "grad_norm": 0.00042676139855757356, - "learning_rate": 0.0001999998869630464, - "loss": 46.0, - "step": 2982 - }, - { - "epoch": 0.4803736060227867, - "grad_norm": 0.0012016601394861937, - "learning_rate": 0.0001999998868869657, - "loss": 46.0, - "step": 2983 - }, - { - "epoch": 0.4805346431015741, - "grad_norm": 0.000258055079029873, - "learning_rate": 0.00019999988681085936, - "loss": 46.0, - "step": 2984 - }, - { - "epoch": 0.4806956801803615, - "grad_norm": 0.0005329045816324651, - "learning_rate": 0.00019999988673472747, - "loss": 46.0, - "step": 2985 - }, - { - "epoch": 0.4808567172591489, - "grad_norm": 0.0013322388986125588, - "learning_rate": 0.00019999988665856996, - "loss": 46.0, - "step": 2986 - }, - { - "epoch": 0.4810177543379363, - "grad_norm": 0.0009696754277683794, - "learning_rate": 0.00019999988658238687, - "loss": 46.0, - "step": 2987 - }, - { - "epoch": 0.4811787914167237, - "grad_norm": 0.0004098875797353685, - "learning_rate": 0.0001999998865061782, - "loss": 46.0, - "step": 2988 - }, - { - "epoch": 0.4813398284955111, - "grad_norm": 0.0009699148940853775, - "learning_rate": 0.0001999998864299439, - "loss": 46.0, - "step": 2989 - }, - { - "epoch": 0.4815008655742985, - "grad_norm": 0.0005095636588521302, - "learning_rate": 0.00019999988635368402, - "loss": 46.0, - "step": 2990 - }, - { - "epoch": 0.4816619026530859, - "grad_norm": 0.0008789250277914107, - "learning_rate": 0.00019999988627739855, - "loss": 46.0, - "step": 2991 - }, - { - "epoch": 0.4818229397318733, - "grad_norm": 0.0006461109733209014, - "learning_rate": 0.00019999988620108744, - "loss": 46.0, - "step": 2992 - }, - { - "epoch": 0.48198397681066063, - "grad_norm": 0.0009350635809823871, - "learning_rate": 0.00019999988612475077, - "loss": 46.0, - "step": 2993 - }, - { - "epoch": 0.482145013889448, - "grad_norm": 0.001425218302756548, - "learning_rate": 0.00019999988604838852, - "loss": 46.0, - "step": 2994 - }, - { - "epoch": 0.4823060509682354, - "grad_norm": 0.0005072914063930511, - "learning_rate": 0.00019999988597200065, - "loss": 46.0, - "step": 2995 - }, - { - "epoch": 0.4824670880470228, - "grad_norm": 0.0014354382874444127, - "learning_rate": 0.00019999988589558722, - "loss": 46.0, - "step": 2996 - }, - { - "epoch": 0.4826281251258102, - "grad_norm": 0.001478009158745408, - "learning_rate": 0.00019999988581914817, - "loss": 46.0, - "step": 2997 - }, - { - "epoch": 0.4827891622045976, - "grad_norm": 0.0011180785950273275, - "learning_rate": 0.0001999998857426835, - "loss": 46.0, - "step": 2998 - }, - { - "epoch": 0.482950199283385, - "grad_norm": 0.0007119972142390907, - "learning_rate": 0.00019999988566619326, - "loss": 46.0, - "step": 2999 - }, - { - "epoch": 0.4831112363621724, - "grad_norm": 0.00042794988257810473, - "learning_rate": 0.00019999988558967746, - "loss": 46.0, - "step": 3000 - }, - { - "epoch": 0.4832722734409598, - "grad_norm": 0.00032044705585576594, - "learning_rate": 0.000199999885513136, - "loss": 46.0, - "step": 3001 - }, - { - "epoch": 0.4834333105197472, - "grad_norm": 0.0009639356285333633, - "learning_rate": 0.000199999885436569, - "loss": 46.0, - "step": 3002 - }, - { - "epoch": 0.48359434759853454, - "grad_norm": 0.0007271425565704703, - "learning_rate": 0.0001999998853599764, - "loss": 46.0, - "step": 3003 - }, - { - "epoch": 0.48375538467732193, - "grad_norm": 0.0011597374686971307, - "learning_rate": 0.00019999988528335817, - "loss": 46.0, - "step": 3004 - }, - { - "epoch": 0.4839164217561093, - "grad_norm": 0.000668774067889899, - "learning_rate": 0.00019999988520671437, - "loss": 46.0, - "step": 3005 - }, - { - "epoch": 0.4840774588348967, - "grad_norm": 0.0010206762235611677, - "learning_rate": 0.00019999988513004496, - "loss": 46.0, - "step": 3006 - }, - { - "epoch": 0.4842384959136841, - "grad_norm": 0.0016464662039652467, - "learning_rate": 0.00019999988505335, - "loss": 46.0, - "step": 3007 - }, - { - "epoch": 0.4843995329924715, - "grad_norm": 0.0003405835013836622, - "learning_rate": 0.00019999988497662938, - "loss": 46.0, - "step": 3008 - }, - { - "epoch": 0.4845605700712589, - "grad_norm": 0.0005689465324394405, - "learning_rate": 0.0001999998848998832, - "loss": 46.0, - "step": 3009 - }, - { - "epoch": 0.4847216071500463, - "grad_norm": 0.0002660571481101215, - "learning_rate": 0.0001999998848231114, - "loss": 46.0, - "step": 3010 - }, - { - "epoch": 0.4848826442288337, - "grad_norm": 0.0010441048070788383, - "learning_rate": 0.00019999988474631405, - "loss": 46.0, - "step": 3011 - }, - { - "epoch": 0.4850436813076211, - "grad_norm": 0.0018408495234325528, - "learning_rate": 0.00019999988466949109, - "loss": 46.0, - "step": 3012 - }, - { - "epoch": 0.4852047183864085, - "grad_norm": 0.0004564946866594255, - "learning_rate": 0.0001999998845926425, - "loss": 46.0, - "step": 3013 - }, - { - "epoch": 0.48536575546519584, - "grad_norm": 0.0008664042106829584, - "learning_rate": 0.00019999988451576835, - "loss": 46.0, - "step": 3014 - }, - { - "epoch": 0.48552679254398323, - "grad_norm": 0.0003404630406294018, - "learning_rate": 0.0001999998844388686, - "loss": 46.0, - "step": 3015 - }, - { - "epoch": 0.48568782962277063, - "grad_norm": 0.0006898824940435588, - "learning_rate": 0.00019999988436194324, - "loss": 46.0, - "step": 3016 - }, - { - "epoch": 0.485848866701558, - "grad_norm": 0.0003574106376618147, - "learning_rate": 0.0001999998842849923, - "loss": 46.0, - "step": 3017 - }, - { - "epoch": 0.4860099037803454, - "grad_norm": 0.0004943932290188968, - "learning_rate": 0.00019999988420801577, - "loss": 46.0, - "step": 3018 - }, - { - "epoch": 0.4861709408591328, - "grad_norm": 0.001738535356707871, - "learning_rate": 0.00019999988413101362, - "loss": 46.0, - "step": 3019 - }, - { - "epoch": 0.4863319779379202, - "grad_norm": 0.0010401951149106026, - "learning_rate": 0.00019999988405398588, - "loss": 46.0, - "step": 3020 - }, - { - "epoch": 0.4864930150167076, - "grad_norm": 0.0011530905030667782, - "learning_rate": 0.00019999988397693258, - "loss": 46.0, - "step": 3021 - }, - { - "epoch": 0.486654052095495, - "grad_norm": 0.0005458248779177666, - "learning_rate": 0.00019999988389985367, - "loss": 46.0, - "step": 3022 - }, - { - "epoch": 0.4868150891742824, - "grad_norm": 0.0005783428205177188, - "learning_rate": 0.00019999988382274914, - "loss": 46.0, - "step": 3023 - }, - { - "epoch": 0.4869761262530698, - "grad_norm": 0.0030912079382687807, - "learning_rate": 0.00019999988374561905, - "loss": 46.0, - "step": 3024 - }, - { - "epoch": 0.48713716333185714, - "grad_norm": 0.0007660651463083923, - "learning_rate": 0.00019999988366846335, - "loss": 46.0, - "step": 3025 - }, - { - "epoch": 0.48729820041064453, - "grad_norm": 0.0005894253263249993, - "learning_rate": 0.00019999988359128203, - "loss": 46.0, - "step": 3026 - }, - { - "epoch": 0.48745923748943193, - "grad_norm": 0.00044560522655956447, - "learning_rate": 0.00019999988351407515, - "loss": 46.0, - "step": 3027 - }, - { - "epoch": 0.4876202745682193, - "grad_norm": 0.0002980520948767662, - "learning_rate": 0.00019999988343684266, - "loss": 46.0, - "step": 3028 - }, - { - "epoch": 0.4877813116470067, - "grad_norm": 0.0011412062449380755, - "learning_rate": 0.00019999988335958458, - "loss": 46.0, - "step": 3029 - }, - { - "epoch": 0.4879423487257941, - "grad_norm": 0.0021029768977314234, - "learning_rate": 0.0001999998832823009, - "loss": 46.0, - "step": 3030 - }, - { - "epoch": 0.4881033858045815, - "grad_norm": 0.0005195364938117564, - "learning_rate": 0.00019999988320499164, - "loss": 46.0, - "step": 3031 - }, - { - "epoch": 0.4882644228833689, - "grad_norm": 0.0009377730893902481, - "learning_rate": 0.00019999988312765677, - "loss": 46.0, - "step": 3032 - }, - { - "epoch": 0.4884254599621563, - "grad_norm": 0.0007240478880703449, - "learning_rate": 0.00019999988305029632, - "loss": 46.0, - "step": 3033 - }, - { - "epoch": 0.4885864970409437, - "grad_norm": 0.0002842667163349688, - "learning_rate": 0.00019999988297291022, - "loss": 46.0, - "step": 3034 - }, - { - "epoch": 0.48874753411973104, - "grad_norm": 0.00034056042204611003, - "learning_rate": 0.0001999998828954986, - "loss": 46.0, - "step": 3035 - }, - { - "epoch": 0.48890857119851844, - "grad_norm": 0.0004168835876043886, - "learning_rate": 0.00019999988281806135, - "loss": 46.0, - "step": 3036 - }, - { - "epoch": 0.48906960827730583, - "grad_norm": 0.00039020171971060336, - "learning_rate": 0.0001999998827405985, - "loss": 46.0, - "step": 3037 - }, - { - "epoch": 0.48923064535609323, - "grad_norm": 0.0003701295063365251, - "learning_rate": 0.00019999988266311008, - "loss": 46.0, - "step": 3038 - }, - { - "epoch": 0.4893916824348806, - "grad_norm": 0.0005590972141362727, - "learning_rate": 0.00019999988258559605, - "loss": 46.0, - "step": 3039 - }, - { - "epoch": 0.489552719513668, - "grad_norm": 0.0006196689209900796, - "learning_rate": 0.00019999988250805643, - "loss": 46.0, - "step": 3040 - }, - { - "epoch": 0.4897137565924554, - "grad_norm": 0.0006001039873808622, - "learning_rate": 0.0001999998824304912, - "loss": 46.0, - "step": 3041 - }, - { - "epoch": 0.4898747936712428, - "grad_norm": 0.002684792736545205, - "learning_rate": 0.00019999988235290037, - "loss": 46.0, - "step": 3042 - }, - { - "epoch": 0.4900358307500302, - "grad_norm": 0.0006069751107133925, - "learning_rate": 0.00019999988227528397, - "loss": 46.0, - "step": 3043 - }, - { - "epoch": 0.4901968678288176, - "grad_norm": 0.0007200596737675369, - "learning_rate": 0.00019999988219764197, - "loss": 46.0, - "step": 3044 - }, - { - "epoch": 0.490357904907605, - "grad_norm": 0.00042322510853409767, - "learning_rate": 0.00019999988211997436, - "loss": 46.0, - "step": 3045 - }, - { - "epoch": 0.49051894198639234, - "grad_norm": 0.0005256300792098045, - "learning_rate": 0.00019999988204228117, - "loss": 46.0, - "step": 3046 - }, - { - "epoch": 0.49067997906517974, - "grad_norm": 0.0006421637954190373, - "learning_rate": 0.00019999988196456236, - "loss": 46.0, - "step": 3047 - }, - { - "epoch": 0.49084101614396713, - "grad_norm": 0.000260350905591622, - "learning_rate": 0.000199999881886818, - "loss": 46.0, - "step": 3048 - }, - { - "epoch": 0.49100205322275453, - "grad_norm": 0.0004371090035419911, - "learning_rate": 0.000199999881809048, - "loss": 46.0, - "step": 3049 - }, - { - "epoch": 0.4911630903015419, - "grad_norm": 0.0016571134328842163, - "learning_rate": 0.00019999988173125243, - "loss": 46.0, - "step": 3050 - }, - { - "epoch": 0.4913241273803293, - "grad_norm": 0.0003236646589357406, - "learning_rate": 0.0001999998816534313, - "loss": 46.0, - "step": 3051 - }, - { - "epoch": 0.4914851644591167, - "grad_norm": 0.00041940153460018337, - "learning_rate": 0.0001999998815755845, - "loss": 46.0, - "step": 3052 - }, - { - "epoch": 0.4916462015379041, - "grad_norm": 0.0014828043058514595, - "learning_rate": 0.00019999988149771217, - "loss": 46.0, - "step": 3053 - }, - { - "epoch": 0.4918072386166915, - "grad_norm": 0.0022164764814078808, - "learning_rate": 0.00019999988141981422, - "loss": 46.0, - "step": 3054 - }, - { - "epoch": 0.4919682756954789, - "grad_norm": 0.0005107664619572461, - "learning_rate": 0.00019999988134189066, - "loss": 46.0, - "step": 3055 - }, - { - "epoch": 0.49212931277426625, - "grad_norm": 0.0004044045927003026, - "learning_rate": 0.0001999998812639415, - "loss": 46.0, - "step": 3056 - }, - { - "epoch": 0.49229034985305364, - "grad_norm": 0.0009663673117756844, - "learning_rate": 0.00019999988118596677, - "loss": 46.0, - "step": 3057 - }, - { - "epoch": 0.49245138693184104, - "grad_norm": 0.00046986175584606826, - "learning_rate": 0.00019999988110796645, - "loss": 46.0, - "step": 3058 - }, - { - "epoch": 0.49261242401062844, - "grad_norm": 0.001860951422713697, - "learning_rate": 0.0001999998810299405, - "loss": 46.0, - "step": 3059 - }, - { - "epoch": 0.49277346108941583, - "grad_norm": 0.002688717795535922, - "learning_rate": 0.000199999880951889, - "loss": 46.0, - "step": 3060 - }, - { - "epoch": 0.49293449816820323, - "grad_norm": 0.0012286669807508588, - "learning_rate": 0.00019999988087381187, - "loss": 46.0, - "step": 3061 - }, - { - "epoch": 0.4930955352469906, - "grad_norm": 0.0008862640825100243, - "learning_rate": 0.00019999988079570918, - "loss": 46.0, - "step": 3062 - }, - { - "epoch": 0.493256572325778, - "grad_norm": 0.000542375142686069, - "learning_rate": 0.00019999988071758084, - "loss": 46.0, - "step": 3063 - }, - { - "epoch": 0.4934176094045654, - "grad_norm": 0.00142467871773988, - "learning_rate": 0.00019999988063942693, - "loss": 46.0, - "step": 3064 - }, - { - "epoch": 0.4935786464833528, - "grad_norm": 0.000658223289065063, - "learning_rate": 0.00019999988056124745, - "loss": 46.0, - "step": 3065 - }, - { - "epoch": 0.4937396835621402, - "grad_norm": 0.00035880127688869834, - "learning_rate": 0.00019999988048304235, - "loss": 46.0, - "step": 3066 - }, - { - "epoch": 0.49390072064092755, - "grad_norm": 0.0008620198932476342, - "learning_rate": 0.00019999988040481168, - "loss": 46.0, - "step": 3067 - }, - { - "epoch": 0.49406175771971494, - "grad_norm": 0.0012930762022733688, - "learning_rate": 0.00019999988032655538, - "loss": 46.0, - "step": 3068 - }, - { - "epoch": 0.49422279479850234, - "grad_norm": 0.0018414922524243593, - "learning_rate": 0.00019999988024827352, - "loss": 46.0, - "step": 3069 - }, - { - "epoch": 0.49438383187728974, - "grad_norm": 0.0003766584559343755, - "learning_rate": 0.00019999988016996604, - "loss": 46.0, - "step": 3070 - }, - { - "epoch": 0.49454486895607713, - "grad_norm": 0.0010134689509868622, - "learning_rate": 0.000199999880091633, - "loss": 46.0, - "step": 3071 - }, - { - "epoch": 0.49470590603486453, - "grad_norm": 0.0007995095220394433, - "learning_rate": 0.00019999988001327432, - "loss": 46.0, - "step": 3072 - }, - { - "epoch": 0.4948669431136519, - "grad_norm": 0.00047520571388304234, - "learning_rate": 0.00019999987993489006, - "loss": 46.0, - "step": 3073 - }, - { - "epoch": 0.4950279801924393, - "grad_norm": 0.0032507237046957016, - "learning_rate": 0.0001999998798564802, - "loss": 46.0, - "step": 3074 - }, - { - "epoch": 0.4951890172712267, - "grad_norm": 0.001113923848606646, - "learning_rate": 0.00019999987977804476, - "loss": 46.0, - "step": 3075 - }, - { - "epoch": 0.4953500543500141, - "grad_norm": 0.001107540330849588, - "learning_rate": 0.00019999987969958373, - "loss": 46.0, - "step": 3076 - }, - { - "epoch": 0.4955110914288015, - "grad_norm": 0.0029324879869818687, - "learning_rate": 0.0001999998796210971, - "loss": 46.0, - "step": 3077 - }, - { - "epoch": 0.49567212850758885, - "grad_norm": 0.001303511206060648, - "learning_rate": 0.00019999987954258486, - "loss": 46.0, - "step": 3078 - }, - { - "epoch": 0.49583316558637625, - "grad_norm": 0.0004946336848661304, - "learning_rate": 0.00019999987946404705, - "loss": 46.0, - "step": 3079 - }, - { - "epoch": 0.49599420266516364, - "grad_norm": 0.0004824888310395181, - "learning_rate": 0.00019999987938548364, - "loss": 46.0, - "step": 3080 - }, - { - "epoch": 0.49615523974395104, - "grad_norm": 0.0008335686870850623, - "learning_rate": 0.0001999998793068946, - "loss": 46.0, - "step": 3081 - }, - { - "epoch": 0.49631627682273843, - "grad_norm": 0.0003709551237989217, - "learning_rate": 0.00019999987922828002, - "loss": 46.0, - "step": 3082 - }, - { - "epoch": 0.49647731390152583, - "grad_norm": 0.0006122171762399375, - "learning_rate": 0.0001999998791496398, - "loss": 46.0, - "step": 3083 - }, - { - "epoch": 0.4966383509803132, - "grad_norm": 0.0009241683874279261, - "learning_rate": 0.00019999987907097402, - "loss": 46.0, - "step": 3084 - }, - { - "epoch": 0.4967993880591006, - "grad_norm": 0.000317135127261281, - "learning_rate": 0.0001999998789922826, - "loss": 46.0, - "step": 3085 - }, - { - "epoch": 0.496960425137888, - "grad_norm": 0.0007719341083429754, - "learning_rate": 0.00019999987891356562, - "loss": 46.0, - "step": 3086 - }, - { - "epoch": 0.4971214622166754, - "grad_norm": 0.0006923080654814839, - "learning_rate": 0.00019999987883482305, - "loss": 46.0, - "step": 3087 - }, - { - "epoch": 0.49728249929546275, - "grad_norm": 0.0008883123518899083, - "learning_rate": 0.00019999987875605487, - "loss": 46.0, - "step": 3088 - }, - { - "epoch": 0.49744353637425015, - "grad_norm": 0.0022773360833525658, - "learning_rate": 0.0001999998786772611, - "loss": 46.0, - "step": 3089 - }, - { - "epoch": 0.49760457345303755, - "grad_norm": 0.002116635674610734, - "learning_rate": 0.00019999987859844174, - "loss": 46.0, - "step": 3090 - }, - { - "epoch": 0.49776561053182494, - "grad_norm": 0.00032865567482076585, - "learning_rate": 0.00019999987851959677, - "loss": 46.0, - "step": 3091 - }, - { - "epoch": 0.49792664761061234, - "grad_norm": 0.000617999816313386, - "learning_rate": 0.00019999987844072621, - "loss": 46.0, - "step": 3092 - }, - { - "epoch": 0.49808768468939973, - "grad_norm": 0.0005839301156811416, - "learning_rate": 0.00019999987836183004, - "loss": 46.0, - "step": 3093 - }, - { - "epoch": 0.49824872176818713, - "grad_norm": 0.0005078520043753088, - "learning_rate": 0.0001999998782829083, - "loss": 46.0, - "step": 3094 - }, - { - "epoch": 0.4984097588469745, - "grad_norm": 0.0005395420012064278, - "learning_rate": 0.000199999878203961, - "loss": 46.0, - "step": 3095 - }, - { - "epoch": 0.4985707959257619, - "grad_norm": 0.000586939335335046, - "learning_rate": 0.00019999987812498803, - "loss": 46.0, - "step": 3096 - }, - { - "epoch": 0.4987318330045493, - "grad_norm": 0.0004774358822032809, - "learning_rate": 0.0001999998780459895, - "loss": 46.0, - "step": 3097 - }, - { - "epoch": 0.4988928700833367, - "grad_norm": 0.0005348059348762035, - "learning_rate": 0.00019999987796696534, - "loss": 46.0, - "step": 3098 - }, - { - "epoch": 0.49905390716212406, - "grad_norm": 0.0002666429500095546, - "learning_rate": 0.00019999987788791565, - "loss": 46.0, - "step": 3099 - }, - { - "epoch": 0.49921494424091145, - "grad_norm": 0.0008934899815358222, - "learning_rate": 0.00019999987780884034, - "loss": 46.0, - "step": 3100 - }, - { - "epoch": 0.49937598131969885, - "grad_norm": 0.0008154212846420705, - "learning_rate": 0.00019999987772973942, - "loss": 46.0, - "step": 3101 - }, - { - "epoch": 0.49953701839848624, - "grad_norm": 0.0005573169910348952, - "learning_rate": 0.0001999998776506129, - "loss": 46.0, - "step": 3102 - }, - { - "epoch": 0.49969805547727364, - "grad_norm": 0.00031111351563595235, - "learning_rate": 0.0001999998775714608, - "loss": 46.0, - "step": 3103 - }, - { - "epoch": 0.49985909255606104, - "grad_norm": 0.0006274578045122325, - "learning_rate": 0.0001999998774922831, - "loss": 46.0, - "step": 3104 - }, - { - "epoch": 0.5000201296348484, - "grad_norm": 0.00036200587055645883, - "learning_rate": 0.00019999987741307985, - "loss": 46.0, - "step": 3105 - }, - { - "epoch": 0.5001811667136358, - "grad_norm": 0.0010207627201452851, - "learning_rate": 0.00019999987733385096, - "loss": 46.0, - "step": 3106 - }, - { - "epoch": 0.5003422037924232, - "grad_norm": 0.0009090094245038927, - "learning_rate": 0.0001999998772545965, - "loss": 46.0, - "step": 3107 - }, - { - "epoch": 0.5005032408712106, - "grad_norm": 0.0009704349213279784, - "learning_rate": 0.0001999998771753164, - "loss": 46.0, - "step": 3108 - }, - { - "epoch": 0.500664277949998, - "grad_norm": 0.0020493874326348305, - "learning_rate": 0.00019999987709601072, - "loss": 46.0, - "step": 3109 - }, - { - "epoch": 0.5008253150287854, - "grad_norm": 0.0015200057532638311, - "learning_rate": 0.00019999987701667946, - "loss": 46.0, - "step": 3110 - }, - { - "epoch": 0.5009863521075728, - "grad_norm": 0.0005671605467796326, - "learning_rate": 0.00019999987693732258, - "loss": 46.0, - "step": 3111 - }, - { - "epoch": 0.5011473891863601, - "grad_norm": 0.0006468218052759767, - "learning_rate": 0.00019999987685794015, - "loss": 46.0, - "step": 3112 - }, - { - "epoch": 0.5013084262651476, - "grad_norm": 0.0005289054824970663, - "learning_rate": 0.0001999998767785321, - "loss": 46.0, - "step": 3113 - }, - { - "epoch": 0.5014694633439349, - "grad_norm": 0.000521992624271661, - "learning_rate": 0.00019999987669909846, - "loss": 46.0, - "step": 3114 - }, - { - "epoch": 0.5016305004227223, - "grad_norm": 0.0007570886518806219, - "learning_rate": 0.0001999998766196392, - "loss": 46.0, - "step": 3115 - }, - { - "epoch": 0.5017915375015097, - "grad_norm": 0.0005296626477502286, - "learning_rate": 0.00019999987654015436, - "loss": 46.0, - "step": 3116 - }, - { - "epoch": 0.5019525745802971, - "grad_norm": 0.0005140362773090601, - "learning_rate": 0.00019999987646064396, - "loss": 46.0, - "step": 3117 - }, - { - "epoch": 0.5021136116590845, - "grad_norm": 0.0008891879697330296, - "learning_rate": 0.00019999987638110792, - "loss": 46.0, - "step": 3118 - }, - { - "epoch": 0.5022746487378719, - "grad_norm": 0.00046948957606218755, - "learning_rate": 0.0001999998763015463, - "loss": 46.0, - "step": 3119 - }, - { - "epoch": 0.5024356858166593, - "grad_norm": 0.0006360865663737059, - "learning_rate": 0.0001999998762219591, - "loss": 46.0, - "step": 3120 - }, - { - "epoch": 0.5025967228954467, - "grad_norm": 0.0005822768434882164, - "learning_rate": 0.0001999998761423463, - "loss": 46.0, - "step": 3121 - }, - { - "epoch": 0.5027577599742341, - "grad_norm": 0.0007643274147994816, - "learning_rate": 0.00019999987606270789, - "loss": 46.0, - "step": 3122 - }, - { - "epoch": 0.5029187970530214, - "grad_norm": 0.0021854920778423548, - "learning_rate": 0.00019999987598304388, - "loss": 46.0, - "step": 3123 - }, - { - "epoch": 0.5030798341318089, - "grad_norm": 0.0004112437309231609, - "learning_rate": 0.0001999998759033543, - "loss": 46.0, - "step": 3124 - }, - { - "epoch": 0.5032408712105962, - "grad_norm": 0.0009620593045838177, - "learning_rate": 0.00019999987582363914, - "loss": 46.0, - "step": 3125 - }, - { - "epoch": 0.5034019082893836, - "grad_norm": 0.0005368393030948937, - "learning_rate": 0.00019999987574389835, - "loss": 46.0, - "step": 3126 - }, - { - "epoch": 0.503562945368171, - "grad_norm": 0.0003768324968405068, - "learning_rate": 0.00019999987566413197, - "loss": 46.0, - "step": 3127 - }, - { - "epoch": 0.5037239824469584, - "grad_norm": 0.001006724894978106, - "learning_rate": 0.00019999987558434, - "loss": 46.0, - "step": 3128 - }, - { - "epoch": 0.5038850195257458, - "grad_norm": 0.0004461338394321501, - "learning_rate": 0.00019999987550452242, - "loss": 46.0, - "step": 3129 - }, - { - "epoch": 0.5040460566045332, - "grad_norm": 0.0007448829710483551, - "learning_rate": 0.00019999987542467927, - "loss": 46.0, - "step": 3130 - }, - { - "epoch": 0.5042070936833206, - "grad_norm": 0.0013202362461015582, - "learning_rate": 0.00019999987534481052, - "loss": 46.0, - "step": 3131 - }, - { - "epoch": 0.504368130762108, - "grad_norm": 0.0020129114855080843, - "learning_rate": 0.00019999987526491615, - "loss": 46.0, - "step": 3132 - }, - { - "epoch": 0.5045291678408954, - "grad_norm": 0.000902953848708421, - "learning_rate": 0.00019999987518499622, - "loss": 46.0, - "step": 3133 - }, - { - "epoch": 0.5046902049196827, - "grad_norm": 0.00033976256963796914, - "learning_rate": 0.00019999987510505067, - "loss": 46.0, - "step": 3134 - }, - { - "epoch": 0.5048512419984702, - "grad_norm": 0.0006377190002240241, - "learning_rate": 0.00019999987502507954, - "loss": 46.0, - "step": 3135 - }, - { - "epoch": 0.5050122790772575, - "grad_norm": 0.0003616051108110696, - "learning_rate": 0.0001999998749450828, - "loss": 46.0, - "step": 3136 - }, - { - "epoch": 0.5051733161560449, - "grad_norm": 0.00026360744959674776, - "learning_rate": 0.0001999998748650605, - "loss": 46.0, - "step": 3137 - }, - { - "epoch": 0.5053343532348323, - "grad_norm": 0.0005565241444855928, - "learning_rate": 0.00019999987478501257, - "loss": 46.0, - "step": 3138 - }, - { - "epoch": 0.5054953903136197, - "grad_norm": 0.00028414541156962514, - "learning_rate": 0.00019999987470493906, - "loss": 46.0, - "step": 3139 - }, - { - "epoch": 0.5056564273924071, - "grad_norm": 0.0004502649244386703, - "learning_rate": 0.00019999987462483994, - "loss": 46.0, - "step": 3140 - }, - { - "epoch": 0.5058174644711945, - "grad_norm": 0.0003624082892201841, - "learning_rate": 0.00019999987454471523, - "loss": 46.0, - "step": 3141 - }, - { - "epoch": 0.5059785015499819, - "grad_norm": 0.0014462817925959826, - "learning_rate": 0.00019999987446456494, - "loss": 46.0, - "step": 3142 - }, - { - "epoch": 0.5061395386287693, - "grad_norm": 0.0007111776503734291, - "learning_rate": 0.00019999987438438902, - "loss": 46.0, - "step": 3143 - }, - { - "epoch": 0.5063005757075567, - "grad_norm": 0.00025288472534157336, - "learning_rate": 0.00019999987430418753, - "loss": 46.0, - "step": 3144 - }, - { - "epoch": 0.506461612786344, - "grad_norm": 0.0018452502554282546, - "learning_rate": 0.00019999987422396047, - "loss": 46.0, - "step": 3145 - }, - { - "epoch": 0.5066226498651314, - "grad_norm": 0.0004520209622569382, - "learning_rate": 0.00019999987414370777, - "loss": 46.0, - "step": 3146 - }, - { - "epoch": 0.5067836869439188, - "grad_norm": 0.0011336015304550529, - "learning_rate": 0.0001999998740634295, - "loss": 46.0, - "step": 3147 - }, - { - "epoch": 0.5069447240227062, - "grad_norm": 0.0006521551986224949, - "learning_rate": 0.00019999987398312567, - "loss": 46.0, - "step": 3148 - }, - { - "epoch": 0.5071057611014936, - "grad_norm": 0.0005709989927709103, - "learning_rate": 0.00019999987390279618, - "loss": 46.0, - "step": 3149 - }, - { - "epoch": 0.507266798180281, - "grad_norm": 0.00048226548824459314, - "learning_rate": 0.00019999987382244113, - "loss": 46.0, - "step": 3150 - }, - { - "epoch": 0.5074278352590684, - "grad_norm": 0.00076228694524616, - "learning_rate": 0.0001999998737420605, - "loss": 46.0, - "step": 3151 - }, - { - "epoch": 0.5075888723378558, - "grad_norm": 0.0005003261612728238, - "learning_rate": 0.00019999987366165422, - "loss": 46.0, - "step": 3152 - }, - { - "epoch": 0.5077499094166432, - "grad_norm": 0.0019028366077691317, - "learning_rate": 0.00019999987358122238, - "loss": 46.0, - "step": 3153 - }, - { - "epoch": 0.5079109464954306, - "grad_norm": 0.00042662062332965434, - "learning_rate": 0.00019999987350076493, - "loss": 46.0, - "step": 3154 - }, - { - "epoch": 0.508071983574218, - "grad_norm": 0.0017045423155650496, - "learning_rate": 0.00019999987342028192, - "loss": 46.0, - "step": 3155 - }, - { - "epoch": 0.5082330206530054, - "grad_norm": 0.0005582640296779573, - "learning_rate": 0.0001999998733397733, - "loss": 46.0, - "step": 3156 - }, - { - "epoch": 0.5083940577317927, - "grad_norm": 0.0018368320306763053, - "learning_rate": 0.00019999987325923906, - "loss": 46.0, - "step": 3157 - }, - { - "epoch": 0.5085550948105801, - "grad_norm": 0.0006485346821136773, - "learning_rate": 0.00019999987317867926, - "loss": 46.0, - "step": 3158 - }, - { - "epoch": 0.5087161318893675, - "grad_norm": 0.000958534306846559, - "learning_rate": 0.00019999987309809382, - "loss": 46.0, - "step": 3159 - }, - { - "epoch": 0.5088771689681549, - "grad_norm": 0.0006661777733825147, - "learning_rate": 0.00019999987301748284, - "loss": 46.0, - "step": 3160 - }, - { - "epoch": 0.5090382060469423, - "grad_norm": 0.0002982582664117217, - "learning_rate": 0.00019999987293684623, - "loss": 46.0, - "step": 3161 - }, - { - "epoch": 0.5091992431257297, - "grad_norm": 0.0004241916467435658, - "learning_rate": 0.00019999987285618405, - "loss": 46.0, - "step": 3162 - }, - { - "epoch": 0.5093602802045171, - "grad_norm": 0.000682660611346364, - "learning_rate": 0.00019999987277549624, - "loss": 46.0, - "step": 3163 - }, - { - "epoch": 0.5095213172833045, - "grad_norm": 0.0004575554048642516, - "learning_rate": 0.00019999987269478286, - "loss": 46.0, - "step": 3164 - }, - { - "epoch": 0.5096823543620919, - "grad_norm": 0.0004946583649143577, - "learning_rate": 0.00019999987261404387, - "loss": 46.0, - "step": 3165 - }, - { - "epoch": 0.5098433914408793, - "grad_norm": 0.00042522800504229963, - "learning_rate": 0.00019999987253327932, - "loss": 46.0, - "step": 3166 - }, - { - "epoch": 0.5100044285196667, - "grad_norm": 0.002267090603709221, - "learning_rate": 0.00019999987245248915, - "loss": 46.0, - "step": 3167 - }, - { - "epoch": 0.510165465598454, - "grad_norm": 0.0002657459699548781, - "learning_rate": 0.00019999987237167337, - "loss": 46.0, - "step": 3168 - }, - { - "epoch": 0.5103265026772414, - "grad_norm": 0.00038011313881725073, - "learning_rate": 0.000199999872290832, - "loss": 46.0, - "step": 3169 - }, - { - "epoch": 0.5104875397560288, - "grad_norm": 0.00034682228579185903, - "learning_rate": 0.00019999987220996505, - "loss": 46.0, - "step": 3170 - }, - { - "epoch": 0.5106485768348162, - "grad_norm": 0.0005717247840948403, - "learning_rate": 0.0001999998721290725, - "loss": 46.0, - "step": 3171 - }, - { - "epoch": 0.5108096139136036, - "grad_norm": 0.0006464279722422361, - "learning_rate": 0.00019999987204815436, - "loss": 46.0, - "step": 3172 - }, - { - "epoch": 0.510970650992391, - "grad_norm": 0.0006448480999097228, - "learning_rate": 0.00019999987196721062, - "loss": 46.0, - "step": 3173 - }, - { - "epoch": 0.5111316880711784, - "grad_norm": 0.0004309078212827444, - "learning_rate": 0.00019999987188624129, - "loss": 46.0, - "step": 3174 - }, - { - "epoch": 0.5112927251499658, - "grad_norm": 0.0004590658063534647, - "learning_rate": 0.00019999987180524634, - "loss": 46.0, - "step": 3175 - }, - { - "epoch": 0.5114537622287532, - "grad_norm": 0.00035977925290353596, - "learning_rate": 0.00019999987172422584, - "loss": 46.0, - "step": 3176 - }, - { - "epoch": 0.5116147993075406, - "grad_norm": 0.0006298014195635915, - "learning_rate": 0.00019999987164317972, - "loss": 46.0, - "step": 3177 - }, - { - "epoch": 0.511775836386328, - "grad_norm": 0.0014333116123452783, - "learning_rate": 0.000199999871562108, - "loss": 46.0, - "step": 3178 - }, - { - "epoch": 0.5119368734651153, - "grad_norm": 0.0005512171774171293, - "learning_rate": 0.0001999998714810107, - "loss": 46.0, - "step": 3179 - }, - { - "epoch": 0.5120979105439027, - "grad_norm": 0.00156623893417418, - "learning_rate": 0.00019999987139988782, - "loss": 46.0, - "step": 3180 - }, - { - "epoch": 0.5122589476226901, - "grad_norm": 0.00032754268613643944, - "learning_rate": 0.0001999998713187393, - "loss": 46.0, - "step": 3181 - }, - { - "epoch": 0.5124199847014775, - "grad_norm": 0.0008100890554487705, - "learning_rate": 0.0001999998712375652, - "loss": 46.0, - "step": 3182 - }, - { - "epoch": 0.5125810217802649, - "grad_norm": 0.000321648403769359, - "learning_rate": 0.00019999987115636554, - "loss": 46.0, - "step": 3183 - }, - { - "epoch": 0.5127420588590523, - "grad_norm": 0.0003590634441934526, - "learning_rate": 0.00019999987107514024, - "loss": 46.0, - "step": 3184 - }, - { - "epoch": 0.5129030959378397, - "grad_norm": 0.0005317406030371785, - "learning_rate": 0.00019999987099388937, - "loss": 46.0, - "step": 3185 - }, - { - "epoch": 0.5130641330166271, - "grad_norm": 0.001840433687902987, - "learning_rate": 0.0001999998709126129, - "loss": 46.0, - "step": 3186 - }, - { - "epoch": 0.5132251700954145, - "grad_norm": 0.0013532418524846435, - "learning_rate": 0.00019999987083131084, - "loss": 46.0, - "step": 3187 - }, - { - "epoch": 0.5133862071742019, - "grad_norm": 0.0003129248507320881, - "learning_rate": 0.00019999987074998315, - "loss": 46.0, - "step": 3188 - }, - { - "epoch": 0.5135472442529893, - "grad_norm": 0.0009054663823917508, - "learning_rate": 0.0001999998706686299, - "loss": 46.0, - "step": 3189 - }, - { - "epoch": 0.5137082813317766, - "grad_norm": 0.003508378518745303, - "learning_rate": 0.00019999987058725105, - "loss": 46.0, - "step": 3190 - }, - { - "epoch": 0.513869318410564, - "grad_norm": 0.0005252668634057045, - "learning_rate": 0.0001999998705058466, - "loss": 46.0, - "step": 3191 - }, - { - "epoch": 0.5140303554893514, - "grad_norm": 0.0011838222853839397, - "learning_rate": 0.00019999987042441657, - "loss": 46.0, - "step": 3192 - }, - { - "epoch": 0.5141913925681388, - "grad_norm": 0.0006155192386358976, - "learning_rate": 0.00019999987034296094, - "loss": 46.0, - "step": 3193 - }, - { - "epoch": 0.5143524296469262, - "grad_norm": 0.0009439602144993842, - "learning_rate": 0.0001999998702614797, - "loss": 46.0, - "step": 3194 - }, - { - "epoch": 0.5145134667257136, - "grad_norm": 0.00026224739849567413, - "learning_rate": 0.00019999987017997289, - "loss": 46.0, - "step": 3195 - }, - { - "epoch": 0.514674503804501, - "grad_norm": 0.001287496299482882, - "learning_rate": 0.00019999987009844045, - "loss": 46.0, - "step": 3196 - }, - { - "epoch": 0.5148355408832884, - "grad_norm": 0.0007224658620543778, - "learning_rate": 0.00019999987001688245, - "loss": 46.0, - "step": 3197 - }, - { - "epoch": 0.5149965779620758, - "grad_norm": 0.0005090706399641931, - "learning_rate": 0.0001999998699352988, - "loss": 46.0, - "step": 3198 - }, - { - "epoch": 0.5151576150408631, - "grad_norm": 0.0020414614118635654, - "learning_rate": 0.00019999986985368964, - "loss": 46.0, - "step": 3199 - }, - { - "epoch": 0.5153186521196506, - "grad_norm": 0.00012341848923824728, - "learning_rate": 0.00019999986977205483, - "loss": 46.0, - "step": 3200 - }, - { - "epoch": 0.5154796891984379, - "grad_norm": 0.0013383296318352222, - "learning_rate": 0.00019999986969039443, - "loss": 46.0, - "step": 3201 - }, - { - "epoch": 0.5156407262772253, - "grad_norm": 0.0005204256158322096, - "learning_rate": 0.00019999986960870844, - "loss": 46.0, - "step": 3202 - }, - { - "epoch": 0.5158017633560127, - "grad_norm": 0.0005140191060490906, - "learning_rate": 0.00019999986952699684, - "loss": 46.0, - "step": 3203 - }, - { - "epoch": 0.5159628004348001, - "grad_norm": 0.001439216430298984, - "learning_rate": 0.0001999998694452597, - "loss": 46.0, - "step": 3204 - }, - { - "epoch": 0.5161238375135875, - "grad_norm": 0.0003279148950241506, - "learning_rate": 0.00019999986936349693, - "loss": 46.0, - "step": 3205 - }, - { - "epoch": 0.5162848745923749, - "grad_norm": 0.000539019179996103, - "learning_rate": 0.00019999986928170854, - "loss": 46.0, - "step": 3206 - }, - { - "epoch": 0.5164459116711623, - "grad_norm": 0.0005979888956062496, - "learning_rate": 0.0001999998691998946, - "loss": 46.0, - "step": 3207 - }, - { - "epoch": 0.5166069487499497, - "grad_norm": 0.0006228465936146677, - "learning_rate": 0.00019999986911805505, - "loss": 46.0, - "step": 3208 - }, - { - "epoch": 0.5167679858287371, - "grad_norm": 0.0012169572291895747, - "learning_rate": 0.00019999986903618985, - "loss": 46.0, - "step": 3209 - }, - { - "epoch": 0.5169290229075244, - "grad_norm": 0.00021819921676069498, - "learning_rate": 0.0001999998689542991, - "loss": 46.0, - "step": 3210 - }, - { - "epoch": 0.5170900599863119, - "grad_norm": 0.00023319286992773414, - "learning_rate": 0.00019999986887238278, - "loss": 46.0, - "step": 3211 - }, - { - "epoch": 0.5172510970650992, - "grad_norm": 0.00025865319184958935, - "learning_rate": 0.00019999986879044084, - "loss": 46.0, - "step": 3212 - }, - { - "epoch": 0.5174121341438866, - "grad_norm": 0.0011366064427420497, - "learning_rate": 0.00019999986870847331, - "loss": 46.0, - "step": 3213 - }, - { - "epoch": 0.517573171222674, - "grad_norm": 0.0016338558634743094, - "learning_rate": 0.00019999986862648017, - "loss": 46.0, - "step": 3214 - }, - { - "epoch": 0.5177342083014614, - "grad_norm": 0.0006788591272197664, - "learning_rate": 0.00019999986854446147, - "loss": 46.0, - "step": 3215 - }, - { - "epoch": 0.5178952453802488, - "grad_norm": 0.0004349009250290692, - "learning_rate": 0.00019999986846241713, - "loss": 46.0, - "step": 3216 - }, - { - "epoch": 0.5180562824590362, - "grad_norm": 0.0006168187246657908, - "learning_rate": 0.00019999986838034722, - "loss": 46.0, - "step": 3217 - }, - { - "epoch": 0.5182173195378236, - "grad_norm": 0.0015551609685644507, - "learning_rate": 0.00019999986829825173, - "loss": 46.0, - "step": 3218 - }, - { - "epoch": 0.518378356616611, - "grad_norm": 0.0014264252968132496, - "learning_rate": 0.00019999986821613063, - "loss": 46.0, - "step": 3219 - }, - { - "epoch": 0.5185393936953984, - "grad_norm": 0.0006923124892637134, - "learning_rate": 0.00019999986813398394, - "loss": 46.0, - "step": 3220 - }, - { - "epoch": 0.5187004307741857, - "grad_norm": 0.0003963627968914807, - "learning_rate": 0.00019999986805181163, - "loss": 46.0, - "step": 3221 - }, - { - "epoch": 0.5188614678529732, - "grad_norm": 0.0020369815174490213, - "learning_rate": 0.00019999986796961374, - "loss": 46.0, - "step": 3222 - }, - { - "epoch": 0.5190225049317605, - "grad_norm": 0.0008632676908746362, - "learning_rate": 0.00019999986788739026, - "loss": 46.0, - "step": 3223 - }, - { - "epoch": 0.519183542010548, - "grad_norm": 0.001770115690305829, - "learning_rate": 0.0001999998678051412, - "loss": 46.0, - "step": 3224 - }, - { - "epoch": 0.5193445790893353, - "grad_norm": 0.0012730071321129799, - "learning_rate": 0.00019999986772286654, - "loss": 46.0, - "step": 3225 - }, - { - "epoch": 0.5195056161681227, - "grad_norm": 0.0003461350570432842, - "learning_rate": 0.00019999986764056627, - "loss": 46.0, - "step": 3226 - }, - { - "epoch": 0.5196666532469101, - "grad_norm": 0.0003870897344313562, - "learning_rate": 0.0001999998675582404, - "loss": 46.0, - "step": 3227 - }, - { - "epoch": 0.5198276903256975, - "grad_norm": 0.0012966131325811148, - "learning_rate": 0.00019999986747588894, - "loss": 46.0, - "step": 3228 - }, - { - "epoch": 0.5199887274044849, - "grad_norm": 0.002284219255670905, - "learning_rate": 0.0001999998673935119, - "loss": 46.0, - "step": 3229 - }, - { - "epoch": 0.5201497644832723, - "grad_norm": 0.0004675232048612088, - "learning_rate": 0.00019999986731110924, - "loss": 46.0, - "step": 3230 - }, - { - "epoch": 0.5203108015620597, - "grad_norm": 0.001080109621398151, - "learning_rate": 0.000199999867228681, - "loss": 46.0, - "step": 3231 - }, - { - "epoch": 0.520471838640847, - "grad_norm": 0.0012963133631274104, - "learning_rate": 0.00019999986714622719, - "loss": 46.0, - "step": 3232 - }, - { - "epoch": 0.5206328757196345, - "grad_norm": 0.0005809199064970016, - "learning_rate": 0.00019999986706374773, - "loss": 46.0, - "step": 3233 - }, - { - "epoch": 0.5207939127984218, - "grad_norm": 0.0006185378879308701, - "learning_rate": 0.0001999998669812427, - "loss": 46.0, - "step": 3234 - }, - { - "epoch": 0.5209549498772093, - "grad_norm": 0.0011855832999572158, - "learning_rate": 0.0001999998668987121, - "loss": 46.0, - "step": 3235 - }, - { - "epoch": 0.5211159869559966, - "grad_norm": 0.0011884800624102354, - "learning_rate": 0.00019999986681615588, - "loss": 46.0, - "step": 3236 - }, - { - "epoch": 0.521277024034784, - "grad_norm": 0.00159457977861166, - "learning_rate": 0.00019999986673357407, - "loss": 46.0, - "step": 3237 - }, - { - "epoch": 0.5214380611135714, - "grad_norm": 0.0015858509577810764, - "learning_rate": 0.00019999986665096667, - "loss": 46.0, - "step": 3238 - }, - { - "epoch": 0.5215990981923588, - "grad_norm": 0.0010136772179976106, - "learning_rate": 0.00019999986656833366, - "loss": 46.0, - "step": 3239 - }, - { - "epoch": 0.5217601352711462, - "grad_norm": 0.0006406564498320222, - "learning_rate": 0.0001999998664856751, - "loss": 46.0, - "step": 3240 - }, - { - "epoch": 0.5219211723499336, - "grad_norm": 0.0010428943205624819, - "learning_rate": 0.0001999998664029909, - "loss": 46.0, - "step": 3241 - }, - { - "epoch": 0.522082209428721, - "grad_norm": 0.0010522321099415421, - "learning_rate": 0.0001999998663202811, - "loss": 46.0, - "step": 3242 - }, - { - "epoch": 0.5222432465075083, - "grad_norm": 0.00025458712480030954, - "learning_rate": 0.00019999986623754574, - "loss": 46.0, - "step": 3243 - }, - { - "epoch": 0.5224042835862958, - "grad_norm": 0.0012281094677746296, - "learning_rate": 0.0001999998661547848, - "loss": 46.0, - "step": 3244 - }, - { - "epoch": 0.5225653206650831, - "grad_norm": 0.001638661720789969, - "learning_rate": 0.0001999998660719982, - "loss": 46.0, - "step": 3245 - }, - { - "epoch": 0.5227263577438706, - "grad_norm": 0.0005527132307179272, - "learning_rate": 0.00019999986598918606, - "loss": 46.0, - "step": 3246 - }, - { - "epoch": 0.5228873948226579, - "grad_norm": 0.00024619928444735706, - "learning_rate": 0.00019999986590634827, - "loss": 46.0, - "step": 3247 - }, - { - "epoch": 0.5230484319014453, - "grad_norm": 0.000301736785331741, - "learning_rate": 0.00019999986582348492, - "loss": 46.0, - "step": 3248 - }, - { - "epoch": 0.5232094689802327, - "grad_norm": 0.0031265667639672756, - "learning_rate": 0.000199999865740596, - "loss": 46.0, - "step": 3249 - }, - { - "epoch": 0.5233705060590201, - "grad_norm": 0.0005060398252680898, - "learning_rate": 0.00019999986565768146, - "loss": 46.0, - "step": 3250 - }, - { - "epoch": 0.5235315431378075, - "grad_norm": 0.0008124821470119059, - "learning_rate": 0.0001999998655747413, - "loss": 46.0, - "step": 3251 - }, - { - "epoch": 0.5236925802165948, - "grad_norm": 0.0007707438198849559, - "learning_rate": 0.00019999986549177557, - "loss": 46.0, - "step": 3252 - }, - { - "epoch": 0.5238536172953823, - "grad_norm": 0.0006837815162725747, - "learning_rate": 0.00019999986540878426, - "loss": 46.0, - "step": 3253 - }, - { - "epoch": 0.5240146543741696, - "grad_norm": 0.0007320574368350208, - "learning_rate": 0.00019999986532576733, - "loss": 46.0, - "step": 3254 - }, - { - "epoch": 0.5241756914529571, - "grad_norm": 0.00021528542856685817, - "learning_rate": 0.0001999998652427248, - "loss": 46.0, - "step": 3255 - }, - { - "epoch": 0.5243367285317444, - "grad_norm": 0.0006836784305050969, - "learning_rate": 0.00019999986515965672, - "loss": 46.0, - "step": 3256 - }, - { - "epoch": 0.5244977656105319, - "grad_norm": 0.0013585075503215194, - "learning_rate": 0.000199999865076563, - "loss": 46.0, - "step": 3257 - }, - { - "epoch": 0.5246588026893192, - "grad_norm": 0.0006389369955286384, - "learning_rate": 0.00019999986499344373, - "loss": 46.0, - "step": 3258 - }, - { - "epoch": 0.5248198397681066, - "grad_norm": 0.00033288082340732217, - "learning_rate": 0.0001999998649102988, - "loss": 46.0, - "step": 3259 - }, - { - "epoch": 0.524980876846894, - "grad_norm": 0.002165345475077629, - "learning_rate": 0.0001999998648271283, - "loss": 46.0, - "step": 3260 - }, - { - "epoch": 0.5251419139256814, - "grad_norm": 0.0005821749218739569, - "learning_rate": 0.00019999986474393224, - "loss": 46.0, - "step": 3261 - }, - { - "epoch": 0.5253029510044688, - "grad_norm": 0.0004455771413631737, - "learning_rate": 0.00019999986466071054, - "loss": 46.0, - "step": 3262 - }, - { - "epoch": 0.5254639880832561, - "grad_norm": 0.0023695342242717743, - "learning_rate": 0.00019999986457746327, - "loss": 46.0, - "step": 3263 - }, - { - "epoch": 0.5256250251620436, - "grad_norm": 0.0007352940738201141, - "learning_rate": 0.00019999986449419042, - "loss": 46.0, - "step": 3264 - }, - { - "epoch": 0.5257860622408309, - "grad_norm": 0.0006059968145564198, - "learning_rate": 0.00019999986441089193, - "loss": 46.0, - "step": 3265 - }, - { - "epoch": 0.5259470993196184, - "grad_norm": 0.0005157870473340154, - "learning_rate": 0.00019999986432756787, - "loss": 46.0, - "step": 3266 - }, - { - "epoch": 0.5261081363984057, - "grad_norm": 0.00043126571108587086, - "learning_rate": 0.00019999986424421823, - "loss": 46.0, - "step": 3267 - }, - { - "epoch": 0.5262691734771932, - "grad_norm": 0.0004135149938520044, - "learning_rate": 0.00019999986416084298, - "loss": 46.0, - "step": 3268 - }, - { - "epoch": 0.5264302105559805, - "grad_norm": 0.0007121902890503407, - "learning_rate": 0.00019999986407744213, - "loss": 46.0, - "step": 3269 - }, - { - "epoch": 0.526591247634768, - "grad_norm": 0.00033011010964401066, - "learning_rate": 0.0001999998639940157, - "loss": 46.0, - "step": 3270 - }, - { - "epoch": 0.5267522847135553, - "grad_norm": 0.001519715297035873, - "learning_rate": 0.00019999986391056366, - "loss": 46.0, - "step": 3271 - }, - { - "epoch": 0.5269133217923427, - "grad_norm": 0.00042124453466385603, - "learning_rate": 0.00019999986382708605, - "loss": 46.0, - "step": 3272 - }, - { - "epoch": 0.5270743588711301, - "grad_norm": 0.0007642715936526656, - "learning_rate": 0.00019999986374358284, - "loss": 46.0, - "step": 3273 - }, - { - "epoch": 0.5272353959499174, - "grad_norm": 0.0008970482740551233, - "learning_rate": 0.000199999863660054, - "loss": 46.0, - "step": 3274 - }, - { - "epoch": 0.5273964330287049, - "grad_norm": 0.0010133379837498069, - "learning_rate": 0.0001999998635764996, - "loss": 46.0, - "step": 3275 - }, - { - "epoch": 0.5275574701074922, - "grad_norm": 0.00031207629945129156, - "learning_rate": 0.0001999998634929196, - "loss": 46.0, - "step": 3276 - }, - { - "epoch": 0.5277185071862797, - "grad_norm": 0.0002669797686394304, - "learning_rate": 0.000199999863409314, - "loss": 46.0, - "step": 3277 - }, - { - "epoch": 0.527879544265067, - "grad_norm": 0.0014513595961034298, - "learning_rate": 0.0001999998633256828, - "loss": 46.0, - "step": 3278 - }, - { - "epoch": 0.5280405813438545, - "grad_norm": 0.0011736878659576178, - "learning_rate": 0.000199999863242026, - "loss": 46.0, - "step": 3279 - }, - { - "epoch": 0.5282016184226418, - "grad_norm": 0.0012734222691506147, - "learning_rate": 0.0001999998631583436, - "loss": 46.0, - "step": 3280 - }, - { - "epoch": 0.5283626555014292, - "grad_norm": 0.0006362651474773884, - "learning_rate": 0.00019999986307463563, - "loss": 46.0, - "step": 3281 - }, - { - "epoch": 0.5285236925802166, - "grad_norm": 0.0008188053616322577, - "learning_rate": 0.00019999986299090204, - "loss": 46.0, - "step": 3282 - }, - { - "epoch": 0.528684729659004, - "grad_norm": 0.00035949761513620615, - "learning_rate": 0.0001999998629071429, - "loss": 46.0, - "step": 3283 - }, - { - "epoch": 0.5288457667377914, - "grad_norm": 0.0005487981252372265, - "learning_rate": 0.0001999998628233581, - "loss": 46.0, - "step": 3284 - }, - { - "epoch": 0.5290068038165787, - "grad_norm": 0.001972783589735627, - "learning_rate": 0.00019999986273954776, - "loss": 46.0, - "step": 3285 - }, - { - "epoch": 0.5291678408953662, - "grad_norm": 0.0007641033735126257, - "learning_rate": 0.0001999998626557118, - "loss": 46.0, - "step": 3286 - }, - { - "epoch": 0.5293288779741535, - "grad_norm": 0.0013937512412667274, - "learning_rate": 0.00019999986257185026, - "loss": 46.0, - "step": 3287 - }, - { - "epoch": 0.529489915052941, - "grad_norm": 0.0006470530643127859, - "learning_rate": 0.0001999998624879631, - "loss": 46.0, - "step": 3288 - }, - { - "epoch": 0.5296509521317283, - "grad_norm": 0.000434192072134465, - "learning_rate": 0.00019999986240405037, - "loss": 46.0, - "step": 3289 - }, - { - "epoch": 0.5298119892105158, - "grad_norm": 0.0006054903496988118, - "learning_rate": 0.00019999986232011204, - "loss": 46.0, - "step": 3290 - }, - { - "epoch": 0.5299730262893031, - "grad_norm": 0.00040821536094881594, - "learning_rate": 0.00019999986223614812, - "loss": 46.0, - "step": 3291 - }, - { - "epoch": 0.5301340633680905, - "grad_norm": 0.0009323583799414337, - "learning_rate": 0.00019999986215215858, - "loss": 46.0, - "step": 3292 - }, - { - "epoch": 0.5302951004468779, - "grad_norm": 0.0008833401370793581, - "learning_rate": 0.00019999986206814345, - "loss": 46.0, - "step": 3293 - }, - { - "epoch": 0.5304561375256653, - "grad_norm": 0.0007340603042393923, - "learning_rate": 0.00019999986198410277, - "loss": 46.0, - "step": 3294 - }, - { - "epoch": 0.5306171746044527, - "grad_norm": 0.003398546949028969, - "learning_rate": 0.00019999986190003644, - "loss": 46.0, - "step": 3295 - }, - { - "epoch": 0.53077821168324, - "grad_norm": 0.0012400518171489239, - "learning_rate": 0.00019999986181594453, - "loss": 46.0, - "step": 3296 - }, - { - "epoch": 0.5309392487620275, - "grad_norm": 0.00046752297203056514, - "learning_rate": 0.00019999986173182706, - "loss": 46.0, - "step": 3297 - }, - { - "epoch": 0.5311002858408148, - "grad_norm": 0.000741161871701479, - "learning_rate": 0.00019999986164768397, - "loss": 46.0, - "step": 3298 - }, - { - "epoch": 0.5312613229196023, - "grad_norm": 0.0015461597358807921, - "learning_rate": 0.00019999986156351524, - "loss": 46.0, - "step": 3299 - }, - { - "epoch": 0.5314223599983896, - "grad_norm": 0.00042216136353090405, - "learning_rate": 0.00019999986147932098, - "loss": 46.0, - "step": 3300 - }, - { - "epoch": 0.5315833970771771, - "grad_norm": 0.000270228018052876, - "learning_rate": 0.0001999998613951011, - "loss": 46.0, - "step": 3301 - }, - { - "epoch": 0.5317444341559644, - "grad_norm": 0.00042081010178662837, - "learning_rate": 0.0001999998613108556, - "loss": 46.0, - "step": 3302 - }, - { - "epoch": 0.5319054712347518, - "grad_norm": 0.0019501334754750133, - "learning_rate": 0.00019999986122658456, - "loss": 46.0, - "step": 3303 - }, - { - "epoch": 0.5320665083135392, - "grad_norm": 0.000719082192517817, - "learning_rate": 0.00019999986114228787, - "loss": 46.0, - "step": 3304 - }, - { - "epoch": 0.5322275453923265, - "grad_norm": 0.00044744223123416305, - "learning_rate": 0.00019999986105796565, - "loss": 46.0, - "step": 3305 - }, - { - "epoch": 0.532388582471114, - "grad_norm": 0.0014701862819492817, - "learning_rate": 0.00019999986097361778, - "loss": 46.0, - "step": 3306 - }, - { - "epoch": 0.5325496195499013, - "grad_norm": 0.0014685611240565777, - "learning_rate": 0.00019999986088924433, - "loss": 46.0, - "step": 3307 - }, - { - "epoch": 0.5327106566286888, - "grad_norm": 0.0005893473862670362, - "learning_rate": 0.00019999986080484529, - "loss": 46.0, - "step": 3308 - }, - { - "epoch": 0.5328716937074761, - "grad_norm": 0.0017547404859215021, - "learning_rate": 0.00019999986072042068, - "loss": 46.0, - "step": 3309 - }, - { - "epoch": 0.5330327307862636, - "grad_norm": 0.0003632925800047815, - "learning_rate": 0.00019999986063597042, - "loss": 46.0, - "step": 3310 - }, - { - "epoch": 0.5331937678650509, - "grad_norm": 0.0006466899649240077, - "learning_rate": 0.0001999998605514946, - "loss": 46.0, - "step": 3311 - }, - { - "epoch": 0.5333548049438384, - "grad_norm": 0.002408044645562768, - "learning_rate": 0.00019999986046699317, - "loss": 46.0, - "step": 3312 - }, - { - "epoch": 0.5335158420226257, - "grad_norm": 0.0004881194035988301, - "learning_rate": 0.00019999986038246617, - "loss": 46.0, - "step": 3313 - }, - { - "epoch": 0.5336768791014131, - "grad_norm": 0.0006727608852088451, - "learning_rate": 0.00019999986029791355, - "loss": 46.0, - "step": 3314 - }, - { - "epoch": 0.5338379161802005, - "grad_norm": 0.0008111980860121548, - "learning_rate": 0.00019999986021333537, - "loss": 46.0, - "step": 3315 - }, - { - "epoch": 0.5339989532589878, - "grad_norm": 0.0002524749143049121, - "learning_rate": 0.00019999986012873155, - "loss": 46.0, - "step": 3316 - }, - { - "epoch": 0.5341599903377753, - "grad_norm": 0.001971939578652382, - "learning_rate": 0.00019999986004410217, - "loss": 46.0, - "step": 3317 - }, - { - "epoch": 0.5343210274165626, - "grad_norm": 0.002227312419563532, - "learning_rate": 0.00019999985995944718, - "loss": 46.0, - "step": 3318 - }, - { - "epoch": 0.5344820644953501, - "grad_norm": 0.0003603290824685246, - "learning_rate": 0.0001999998598747666, - "loss": 46.0, - "step": 3319 - }, - { - "epoch": 0.5346431015741374, - "grad_norm": 0.0008477758965454996, - "learning_rate": 0.0001999998597900604, - "loss": 46.0, - "step": 3320 - }, - { - "epoch": 0.5348041386529249, - "grad_norm": 0.00035513032344169915, - "learning_rate": 0.00019999985970532862, - "loss": 46.0, - "step": 3321 - }, - { - "epoch": 0.5349651757317122, - "grad_norm": 0.0005001723184250295, - "learning_rate": 0.00019999985962057125, - "loss": 46.0, - "step": 3322 - }, - { - "epoch": 0.5351262128104997, - "grad_norm": 0.0004099126090295613, - "learning_rate": 0.00019999985953578827, - "loss": 46.0, - "step": 3323 - }, - { - "epoch": 0.535287249889287, - "grad_norm": 0.0016978277126327157, - "learning_rate": 0.00019999985945097973, - "loss": 46.0, - "step": 3324 - }, - { - "epoch": 0.5354482869680744, - "grad_norm": 0.0008843669784255326, - "learning_rate": 0.0001999998593661456, - "loss": 46.0, - "step": 3325 - }, - { - "epoch": 0.5356093240468618, - "grad_norm": 0.0014790837885811925, - "learning_rate": 0.00019999985928128585, - "loss": 46.0, - "step": 3326 - }, - { - "epoch": 0.5357703611256491, - "grad_norm": 0.00046818848932161927, - "learning_rate": 0.0001999998591964005, - "loss": 46.0, - "step": 3327 - }, - { - "epoch": 0.5359313982044366, - "grad_norm": 0.000849020027089864, - "learning_rate": 0.00019999985911148954, - "loss": 46.0, - "step": 3328 - }, - { - "epoch": 0.5360924352832239, - "grad_norm": 0.0014687919756397605, - "learning_rate": 0.000199999859026553, - "loss": 46.0, - "step": 3329 - }, - { - "epoch": 0.5362534723620114, - "grad_norm": 0.00035368630778975785, - "learning_rate": 0.0001999998589415909, - "loss": 46.0, - "step": 3330 - }, - { - "epoch": 0.5364145094407987, - "grad_norm": 0.0006590307457372546, - "learning_rate": 0.00019999985885660318, - "loss": 46.0, - "step": 3331 - }, - { - "epoch": 0.5365755465195862, - "grad_norm": 0.001852670218795538, - "learning_rate": 0.00019999985877158983, - "loss": 46.0, - "step": 3332 - }, - { - "epoch": 0.5367365835983735, - "grad_norm": 0.0005677458830177784, - "learning_rate": 0.00019999985868655092, - "loss": 46.0, - "step": 3333 - }, - { - "epoch": 0.536897620677161, - "grad_norm": 0.000881219923030585, - "learning_rate": 0.00019999985860148645, - "loss": 46.0, - "step": 3334 - }, - { - "epoch": 0.5370586577559483, - "grad_norm": 0.0004763822944369167, - "learning_rate": 0.00019999985851639634, - "loss": 46.0, - "step": 3335 - }, - { - "epoch": 0.5372196948347358, - "grad_norm": 0.0012226671678945422, - "learning_rate": 0.00019999985843128064, - "loss": 46.0, - "step": 3336 - }, - { - "epoch": 0.5373807319135231, - "grad_norm": 0.001961508998647332, - "learning_rate": 0.00019999985834613936, - "loss": 46.0, - "step": 3337 - }, - { - "epoch": 0.5375417689923104, - "grad_norm": 0.001269041677005589, - "learning_rate": 0.00019999985826097246, - "loss": 46.0, - "step": 3338 - }, - { - "epoch": 0.5377028060710979, - "grad_norm": 0.002586194546893239, - "learning_rate": 0.00019999985817577997, - "loss": 46.0, - "step": 3339 - }, - { - "epoch": 0.5378638431498852, - "grad_norm": 0.0011924192076548934, - "learning_rate": 0.0001999998580905619, - "loss": 46.0, - "step": 3340 - }, - { - "epoch": 0.5380248802286727, - "grad_norm": 0.0016669401666149497, - "learning_rate": 0.0001999998580053182, - "loss": 46.0, - "step": 3341 - }, - { - "epoch": 0.53818591730746, - "grad_norm": 0.0013443110510706902, - "learning_rate": 0.00019999985792004896, - "loss": 46.0, - "step": 3342 - }, - { - "epoch": 0.5383469543862475, - "grad_norm": 0.000323209329508245, - "learning_rate": 0.0001999998578347541, - "loss": 46.0, - "step": 3343 - }, - { - "epoch": 0.5385079914650348, - "grad_norm": 0.0014050803147256374, - "learning_rate": 0.00019999985774943365, - "loss": 46.0, - "step": 3344 - }, - { - "epoch": 0.5386690285438223, - "grad_norm": 0.0005120033165439963, - "learning_rate": 0.00019999985766408758, - "loss": 46.0, - "step": 3345 - }, - { - "epoch": 0.5388300656226096, - "grad_norm": 0.0016074093291535974, - "learning_rate": 0.00019999985757871596, - "loss": 46.0, - "step": 3346 - }, - { - "epoch": 0.538991102701397, - "grad_norm": 0.0015373494243249297, - "learning_rate": 0.00019999985749331872, - "loss": 46.0, - "step": 3347 - }, - { - "epoch": 0.5391521397801844, - "grad_norm": 0.0005608833744190633, - "learning_rate": 0.0001999998574078959, - "loss": 46.0, - "step": 3348 - }, - { - "epoch": 0.5393131768589717, - "grad_norm": 0.0008417388307861984, - "learning_rate": 0.00019999985732244743, - "loss": 46.0, - "step": 3349 - }, - { - "epoch": 0.5394742139377592, - "grad_norm": 0.0005155186518095434, - "learning_rate": 0.0001999998572369734, - "loss": 46.0, - "step": 3350 - }, - { - "epoch": 0.5396352510165465, - "grad_norm": 0.0020404248498380184, - "learning_rate": 0.0001999998571514738, - "loss": 46.0, - "step": 3351 - }, - { - "epoch": 0.539796288095334, - "grad_norm": 0.001458734623156488, - "learning_rate": 0.00019999985706594856, - "loss": 46.0, - "step": 3352 - }, - { - "epoch": 0.5399573251741213, - "grad_norm": 0.0004648598260246217, - "learning_rate": 0.00019999985698039777, - "loss": 46.0, - "step": 3353 - }, - { - "epoch": 0.5401183622529088, - "grad_norm": 0.00084132474148646, - "learning_rate": 0.00019999985689482137, - "loss": 46.0, - "step": 3354 - }, - { - "epoch": 0.5402793993316961, - "grad_norm": 0.0010218317620456219, - "learning_rate": 0.00019999985680921938, - "loss": 46.0, - "step": 3355 - }, - { - "epoch": 0.5404404364104836, - "grad_norm": 0.0008872864418663085, - "learning_rate": 0.00019999985672359178, - "loss": 46.0, - "step": 3356 - }, - { - "epoch": 0.5406014734892709, - "grad_norm": 0.001526973326690495, - "learning_rate": 0.00019999985663793858, - "loss": 46.0, - "step": 3357 - }, - { - "epoch": 0.5407625105680582, - "grad_norm": 0.0005437143845483661, - "learning_rate": 0.0001999998565522598, - "loss": 46.0, - "step": 3358 - }, - { - "epoch": 0.5409235476468457, - "grad_norm": 0.002817622385919094, - "learning_rate": 0.00019999985646655544, - "loss": 46.0, - "step": 3359 - }, - { - "epoch": 0.541084584725633, - "grad_norm": 0.0006917702266946435, - "learning_rate": 0.00019999985638082546, - "loss": 46.0, - "step": 3360 - }, - { - "epoch": 0.5412456218044205, - "grad_norm": 0.0003476533165667206, - "learning_rate": 0.00019999985629506987, - "loss": 46.0, - "step": 3361 - }, - { - "epoch": 0.5414066588832078, - "grad_norm": 0.0006579328910447657, - "learning_rate": 0.0001999998562092887, - "loss": 46.0, - "step": 3362 - }, - { - "epoch": 0.5415676959619953, - "grad_norm": 0.0011675585992634296, - "learning_rate": 0.00019999985612348197, - "loss": 46.0, - "step": 3363 - }, - { - "epoch": 0.5417287330407826, - "grad_norm": 0.000983578385785222, - "learning_rate": 0.00019999985603764961, - "loss": 46.0, - "step": 3364 - }, - { - "epoch": 0.5418897701195701, - "grad_norm": 0.0011690407991409302, - "learning_rate": 0.00019999985595179167, - "loss": 46.0, - "step": 3365 - }, - { - "epoch": 0.5420508071983574, - "grad_norm": 0.001205615932121873, - "learning_rate": 0.00019999985586590811, - "loss": 46.0, - "step": 3366 - }, - { - "epoch": 0.5422118442771449, - "grad_norm": 0.0004708147025667131, - "learning_rate": 0.000199999855779999, - "loss": 46.0, - "step": 3367 - }, - { - "epoch": 0.5423728813559322, - "grad_norm": 0.0011563218431547284, - "learning_rate": 0.00019999985569406426, - "loss": 46.0, - "step": 3368 - }, - { - "epoch": 0.5425339184347195, - "grad_norm": 0.0012656064936891198, - "learning_rate": 0.00019999985560810392, - "loss": 46.0, - "step": 3369 - }, - { - "epoch": 0.542694955513507, - "grad_norm": 0.0007580489618703723, - "learning_rate": 0.000199999855522118, - "loss": 46.0, - "step": 3370 - }, - { - "epoch": 0.5428559925922943, - "grad_norm": 0.0025987382978200912, - "learning_rate": 0.0001999998554361065, - "loss": 46.0, - "step": 3371 - }, - { - "epoch": 0.5430170296710818, - "grad_norm": 0.000891050323843956, - "learning_rate": 0.00019999985535006939, - "loss": 46.0, - "step": 3372 - }, - { - "epoch": 0.5431780667498691, - "grad_norm": 0.0007159209926612675, - "learning_rate": 0.0001999998552640067, - "loss": 46.0, - "step": 3373 - }, - { - "epoch": 0.5433391038286566, - "grad_norm": 0.0004322503518778831, - "learning_rate": 0.00019999985517791838, - "loss": 46.0, - "step": 3374 - }, - { - "epoch": 0.5435001409074439, - "grad_norm": 0.0007701522554270923, - "learning_rate": 0.00019999985509180449, - "loss": 46.0, - "step": 3375 - }, - { - "epoch": 0.5436611779862314, - "grad_norm": 0.0008689900278113782, - "learning_rate": 0.00019999985500566498, - "loss": 46.0, - "step": 3376 - }, - { - "epoch": 0.5438222150650187, - "grad_norm": 0.00033185360371135175, - "learning_rate": 0.0001999998549194999, - "loss": 46.0, - "step": 3377 - }, - { - "epoch": 0.5439832521438062, - "grad_norm": 0.000633304298389703, - "learning_rate": 0.00019999985483330922, - "loss": 46.0, - "step": 3378 - }, - { - "epoch": 0.5441442892225935, - "grad_norm": 0.0004575739149004221, - "learning_rate": 0.00019999985474709298, - "loss": 46.0, - "step": 3379 - }, - { - "epoch": 0.5443053263013808, - "grad_norm": 0.0004649245529435575, - "learning_rate": 0.00019999985466085106, - "loss": 46.0, - "step": 3380 - }, - { - "epoch": 0.5444663633801683, - "grad_norm": 0.000492118822876364, - "learning_rate": 0.00019999985457458364, - "loss": 46.0, - "step": 3381 - }, - { - "epoch": 0.5446274004589556, - "grad_norm": 0.0006083305343054235, - "learning_rate": 0.00019999985448829058, - "loss": 46.0, - "step": 3382 - }, - { - "epoch": 0.5447884375377431, - "grad_norm": 0.001091295969672501, - "learning_rate": 0.0001999998544019719, - "loss": 46.0, - "step": 3383 - }, - { - "epoch": 0.5449494746165304, - "grad_norm": 0.00028013280825689435, - "learning_rate": 0.00019999985431562767, - "loss": 46.0, - "step": 3384 - }, - { - "epoch": 0.5451105116953179, - "grad_norm": 0.0005511103081516922, - "learning_rate": 0.00019999985422925783, - "loss": 46.0, - "step": 3385 - }, - { - "epoch": 0.5452715487741052, - "grad_norm": 0.000784877862315625, - "learning_rate": 0.00019999985414286236, - "loss": 46.0, - "step": 3386 - }, - { - "epoch": 0.5454325858528927, - "grad_norm": 0.00091799400979653, - "learning_rate": 0.00019999985405644134, - "loss": 46.0, - "step": 3387 - }, - { - "epoch": 0.54559362293168, - "grad_norm": 0.0020278797019273043, - "learning_rate": 0.00019999985396999473, - "loss": 46.0, - "step": 3388 - }, - { - "epoch": 0.5457546600104675, - "grad_norm": 0.002359907841309905, - "learning_rate": 0.0001999998538835225, - "loss": 46.0, - "step": 3389 - }, - { - "epoch": 0.5459156970892548, - "grad_norm": 0.002854179823771119, - "learning_rate": 0.0001999998537970247, - "loss": 46.0, - "step": 3390 - }, - { - "epoch": 0.5460767341680421, - "grad_norm": 0.0015953172696754336, - "learning_rate": 0.00019999985371050127, - "loss": 46.0, - "step": 3391 - }, - { - "epoch": 0.5462377712468296, - "grad_norm": 0.0018778997473418713, - "learning_rate": 0.00019999985362395225, - "loss": 46.0, - "step": 3392 - }, - { - "epoch": 0.5463988083256169, - "grad_norm": 0.0003722886322066188, - "learning_rate": 0.00019999985353737768, - "loss": 46.0, - "step": 3393 - }, - { - "epoch": 0.5465598454044044, - "grad_norm": 0.0006690325681120157, - "learning_rate": 0.00019999985345077746, - "loss": 46.0, - "step": 3394 - }, - { - "epoch": 0.5467208824831917, - "grad_norm": 0.0020559511613100767, - "learning_rate": 0.00019999985336415166, - "loss": 46.0, - "step": 3395 - }, - { - "epoch": 0.5468819195619792, - "grad_norm": 0.00043758476385846734, - "learning_rate": 0.00019999985327750027, - "loss": 46.0, - "step": 3396 - }, - { - "epoch": 0.5470429566407665, - "grad_norm": 0.00037907183286733925, - "learning_rate": 0.0001999998531908233, - "loss": 46.0, - "step": 3397 - }, - { - "epoch": 0.547203993719554, - "grad_norm": 0.0006615728489123285, - "learning_rate": 0.00019999985310412073, - "loss": 46.0, - "step": 3398 - }, - { - "epoch": 0.5473650307983413, - "grad_norm": 0.00042850023601204157, - "learning_rate": 0.00019999985301739256, - "loss": 46.0, - "step": 3399 - }, - { - "epoch": 0.5475260678771288, - "grad_norm": 0.00036770780570805073, - "learning_rate": 0.0001999998529306388, - "loss": 46.0, - "step": 3400 - }, - { - "epoch": 0.5476871049559161, - "grad_norm": 0.0006704438710585237, - "learning_rate": 0.00019999985284385944, - "loss": 46.0, - "step": 3401 - }, - { - "epoch": 0.5478481420347034, - "grad_norm": 0.0007254494121298194, - "learning_rate": 0.00019999985275705447, - "loss": 46.0, - "step": 3402 - }, - { - "epoch": 0.5480091791134909, - "grad_norm": 0.00037542221252806485, - "learning_rate": 0.00019999985267022392, - "loss": 46.0, - "step": 3403 - }, - { - "epoch": 0.5481702161922782, - "grad_norm": 0.0014997926773503423, - "learning_rate": 0.00019999985258336778, - "loss": 46.0, - "step": 3404 - }, - { - "epoch": 0.5483312532710657, - "grad_norm": 0.0007830832619220018, - "learning_rate": 0.00019999985249648602, - "loss": 46.0, - "step": 3405 - }, - { - "epoch": 0.548492290349853, - "grad_norm": 0.000924643303733319, - "learning_rate": 0.0001999998524095787, - "loss": 46.0, - "step": 3406 - }, - { - "epoch": 0.5486533274286405, - "grad_norm": 0.000445937184849754, - "learning_rate": 0.00019999985232264575, - "loss": 46.0, - "step": 3407 - }, - { - "epoch": 0.5488143645074278, - "grad_norm": 0.0025469991378486156, - "learning_rate": 0.00019999985223568723, - "loss": 46.0, - "step": 3408 - }, - { - "epoch": 0.5489754015862153, - "grad_norm": 0.0009231626754626632, - "learning_rate": 0.0001999998521487031, - "loss": 46.0, - "step": 3409 - }, - { - "epoch": 0.5491364386650026, - "grad_norm": 0.0008242886979132891, - "learning_rate": 0.0001999998520616934, - "loss": 46.0, - "step": 3410 - }, - { - "epoch": 0.54929747574379, - "grad_norm": 0.0005769507843069732, - "learning_rate": 0.00019999985197465808, - "loss": 46.0, - "step": 3411 - }, - { - "epoch": 0.5494585128225774, - "grad_norm": 0.00034231721656396985, - "learning_rate": 0.0001999998518875972, - "loss": 46.0, - "step": 3412 - }, - { - "epoch": 0.5496195499013647, - "grad_norm": 0.00037883000914007425, - "learning_rate": 0.0001999998518005107, - "loss": 46.0, - "step": 3413 - }, - { - "epoch": 0.5497805869801522, - "grad_norm": 0.0007620594697073102, - "learning_rate": 0.00019999985171339861, - "loss": 46.0, - "step": 3414 - }, - { - "epoch": 0.5499416240589395, - "grad_norm": 0.00036850827746093273, - "learning_rate": 0.0001999998516262609, - "loss": 46.0, - "step": 3415 - }, - { - "epoch": 0.550102661137727, - "grad_norm": 0.0005711701815016568, - "learning_rate": 0.00019999985153909764, - "loss": 46.0, - "step": 3416 - }, - { - "epoch": 0.5502636982165143, - "grad_norm": 0.0005954890511929989, - "learning_rate": 0.00019999985145190876, - "loss": 46.0, - "step": 3417 - }, - { - "epoch": 0.5504247352953018, - "grad_norm": 0.0002647766668815166, - "learning_rate": 0.00019999985136469429, - "loss": 46.0, - "step": 3418 - }, - { - "epoch": 0.5505857723740891, - "grad_norm": 0.0007883899961598217, - "learning_rate": 0.00019999985127745423, - "loss": 46.0, - "step": 3419 - }, - { - "epoch": 0.5507468094528766, - "grad_norm": 0.0006732104811817408, - "learning_rate": 0.00019999985119018856, - "loss": 46.0, - "step": 3420 - }, - { - "epoch": 0.5509078465316639, - "grad_norm": 0.0014344204682856798, - "learning_rate": 0.00019999985110289727, - "loss": 46.0, - "step": 3421 - }, - { - "epoch": 0.5510688836104513, - "grad_norm": 0.0004768389044329524, - "learning_rate": 0.00019999985101558045, - "loss": 46.0, - "step": 3422 - }, - { - "epoch": 0.5512299206892387, - "grad_norm": 0.0003428664058446884, - "learning_rate": 0.000199999850928238, - "loss": 46.0, - "step": 3423 - }, - { - "epoch": 0.551390957768026, - "grad_norm": 0.0006917240680195391, - "learning_rate": 0.00019999985084086995, - "loss": 46.0, - "step": 3424 - }, - { - "epoch": 0.5515519948468135, - "grad_norm": 0.00025905275833792984, - "learning_rate": 0.00019999985075347631, - "loss": 46.0, - "step": 3425 - }, - { - "epoch": 0.5517130319256008, - "grad_norm": 0.0005896909278817475, - "learning_rate": 0.00019999985066605707, - "loss": 46.0, - "step": 3426 - }, - { - "epoch": 0.5518740690043883, - "grad_norm": 0.0004898286424577236, - "learning_rate": 0.00019999985057861226, - "loss": 46.0, - "step": 3427 - }, - { - "epoch": 0.5520351060831756, - "grad_norm": 0.0007520309882238507, - "learning_rate": 0.00019999985049114183, - "loss": 46.0, - "step": 3428 - }, - { - "epoch": 0.5521961431619631, - "grad_norm": 0.0006808222969993949, - "learning_rate": 0.00019999985040364583, - "loss": 46.0, - "step": 3429 - }, - { - "epoch": 0.5523571802407504, - "grad_norm": 0.0019085080130025744, - "learning_rate": 0.0001999998503161242, - "loss": 46.0, - "step": 3430 - }, - { - "epoch": 0.5525182173195379, - "grad_norm": 0.0005711892154067755, - "learning_rate": 0.00019999985022857702, - "loss": 46.0, - "step": 3431 - }, - { - "epoch": 0.5526792543983252, - "grad_norm": 0.00027607844094745815, - "learning_rate": 0.0001999998501410042, - "loss": 46.0, - "step": 3432 - }, - { - "epoch": 0.5528402914771126, - "grad_norm": 0.0005318095209077001, - "learning_rate": 0.0001999998500534058, - "loss": 46.0, - "step": 3433 - }, - { - "epoch": 0.5530013285559, - "grad_norm": 0.0006763770361430943, - "learning_rate": 0.0001999998499657818, - "loss": 46.0, - "step": 3434 - }, - { - "epoch": 0.5531623656346873, - "grad_norm": 0.002392539056017995, - "learning_rate": 0.00019999984987813222, - "loss": 46.0, - "step": 3435 - }, - { - "epoch": 0.5533234027134748, - "grad_norm": 0.0018446936737746, - "learning_rate": 0.00019999984979045704, - "loss": 46.0, - "step": 3436 - }, - { - "epoch": 0.5534844397922621, - "grad_norm": 0.005663339979946613, - "learning_rate": 0.00019999984970275628, - "loss": 46.0, - "step": 3437 - }, - { - "epoch": 0.5536454768710496, - "grad_norm": 0.0006960775936022401, - "learning_rate": 0.00019999984961502988, - "loss": 46.0, - "step": 3438 - }, - { - "epoch": 0.5538065139498369, - "grad_norm": 0.0007040626369416714, - "learning_rate": 0.00019999984952727794, - "loss": 46.0, - "step": 3439 - }, - { - "epoch": 0.5539675510286244, - "grad_norm": 0.000625553191639483, - "learning_rate": 0.0001999998494395004, - "loss": 46.0, - "step": 3440 - }, - { - "epoch": 0.5541285881074117, - "grad_norm": 0.0009212561999447644, - "learning_rate": 0.00019999984935169723, - "loss": 46.0, - "step": 3441 - }, - { - "epoch": 0.5542896251861992, - "grad_norm": 0.00029020055080763996, - "learning_rate": 0.00019999984926386848, - "loss": 46.0, - "step": 3442 - }, - { - "epoch": 0.5544506622649865, - "grad_norm": 0.0005106495227664709, - "learning_rate": 0.00019999984917601414, - "loss": 46.0, - "step": 3443 - }, - { - "epoch": 0.5546116993437739, - "grad_norm": 0.0007982651004567742, - "learning_rate": 0.00019999984908813421, - "loss": 46.0, - "step": 3444 - }, - { - "epoch": 0.5547727364225613, - "grad_norm": 0.0014645976480096579, - "learning_rate": 0.00019999984900022865, - "loss": 46.0, - "step": 3445 - }, - { - "epoch": 0.5549337735013486, - "grad_norm": 0.0020567302126437426, - "learning_rate": 0.00019999984891229752, - "loss": 46.0, - "step": 3446 - }, - { - "epoch": 0.5550948105801361, - "grad_norm": 0.0002986452600453049, - "learning_rate": 0.0001999998488243408, - "loss": 46.0, - "step": 3447 - }, - { - "epoch": 0.5552558476589234, - "grad_norm": 0.0004691271751653403, - "learning_rate": 0.00019999984873635848, - "loss": 46.0, - "step": 3448 - }, - { - "epoch": 0.5554168847377109, - "grad_norm": 0.0008632902172394097, - "learning_rate": 0.00019999984864835056, - "loss": 46.0, - "step": 3449 - }, - { - "epoch": 0.5555779218164982, - "grad_norm": 0.000558181491214782, - "learning_rate": 0.00019999984856031709, - "loss": 46.0, - "step": 3450 - }, - { - "epoch": 0.5557389588952857, - "grad_norm": 0.001122056506574154, - "learning_rate": 0.00019999984847225797, - "loss": 46.0, - "step": 3451 - }, - { - "epoch": 0.555899995974073, - "grad_norm": 0.0004552182799670845, - "learning_rate": 0.00019999984838417326, - "loss": 46.0, - "step": 3452 - }, - { - "epoch": 0.5560610330528605, - "grad_norm": 0.0004523425595834851, - "learning_rate": 0.00019999984829606297, - "loss": 46.0, - "step": 3453 - }, - { - "epoch": 0.5562220701316478, - "grad_norm": 0.0008734787115827203, - "learning_rate": 0.0001999998482079271, - "loss": 46.0, - "step": 3454 - }, - { - "epoch": 0.5563831072104352, - "grad_norm": 0.0007075812900438905, - "learning_rate": 0.00019999984811976563, - "loss": 46.0, - "step": 3455 - }, - { - "epoch": 0.5565441442892226, - "grad_norm": 0.0007711488287895918, - "learning_rate": 0.00019999984803157852, - "loss": 46.0, - "step": 3456 - }, - { - "epoch": 0.55670518136801, - "grad_norm": 0.0006179328775033355, - "learning_rate": 0.00019999984794336585, - "loss": 46.0, - "step": 3457 - }, - { - "epoch": 0.5568662184467974, - "grad_norm": 0.0018959949957206845, - "learning_rate": 0.0001999998478551276, - "loss": 46.0, - "step": 3458 - }, - { - "epoch": 0.5570272555255847, - "grad_norm": 0.0007022268255241215, - "learning_rate": 0.00019999984776686373, - "loss": 46.0, - "step": 3459 - }, - { - "epoch": 0.5571882926043722, - "grad_norm": 0.0006481180316768587, - "learning_rate": 0.00019999984767857427, - "loss": 46.0, - "step": 3460 - }, - { - "epoch": 0.5573493296831595, - "grad_norm": 0.0015879544662311673, - "learning_rate": 0.00019999984759025923, - "loss": 46.0, - "step": 3461 - }, - { - "epoch": 0.557510366761947, - "grad_norm": 0.0003428160853218287, - "learning_rate": 0.00019999984750191857, - "loss": 46.0, - "step": 3462 - }, - { - "epoch": 0.5576714038407343, - "grad_norm": 0.000689804379362613, - "learning_rate": 0.00019999984741355236, - "loss": 46.0, - "step": 3463 - }, - { - "epoch": 0.5578324409195217, - "grad_norm": 0.0007661819108761847, - "learning_rate": 0.0001999998473251605, - "loss": 46.0, - "step": 3464 - }, - { - "epoch": 0.5579934779983091, - "grad_norm": 0.002222961513325572, - "learning_rate": 0.00019999984723674308, - "loss": 46.0, - "step": 3465 - }, - { - "epoch": 0.5581545150770965, - "grad_norm": 0.0003630129504017532, - "learning_rate": 0.00019999984714830004, - "loss": 46.0, - "step": 3466 - }, - { - "epoch": 0.5583155521558839, - "grad_norm": 0.0004200133844278753, - "learning_rate": 0.00019999984705983142, - "loss": 46.0, - "step": 3467 - }, - { - "epoch": 0.5584765892346713, - "grad_norm": 0.0006303141708485782, - "learning_rate": 0.0001999998469713372, - "loss": 46.0, - "step": 3468 - }, - { - "epoch": 0.5586376263134587, - "grad_norm": 0.0006812093779444695, - "learning_rate": 0.00019999984688281742, - "loss": 46.0, - "step": 3469 - }, - { - "epoch": 0.558798663392246, - "grad_norm": 0.0017715313006192446, - "learning_rate": 0.00019999984679427198, - "loss": 46.0, - "step": 3470 - }, - { - "epoch": 0.5589597004710335, - "grad_norm": 0.0007096350891515613, - "learning_rate": 0.000199999846705701, - "loss": 46.0, - "step": 3471 - }, - { - "epoch": 0.5591207375498208, - "grad_norm": 0.0007101345108821988, - "learning_rate": 0.00019999984661710443, - "loss": 46.0, - "step": 3472 - }, - { - "epoch": 0.5592817746286083, - "grad_norm": 0.0002391544112470001, - "learning_rate": 0.0001999998465284822, - "loss": 46.0, - "step": 3473 - }, - { - "epoch": 0.5594428117073956, - "grad_norm": 0.0005278324242681265, - "learning_rate": 0.00019999984643983445, - "loss": 46.0, - "step": 3474 - }, - { - "epoch": 0.559603848786183, - "grad_norm": 0.0004809069214388728, - "learning_rate": 0.00019999984635116108, - "loss": 46.0, - "step": 3475 - }, - { - "epoch": 0.5597648858649704, - "grad_norm": 0.001162560423836112, - "learning_rate": 0.0001999998462624621, - "loss": 46.0, - "step": 3476 - }, - { - "epoch": 0.5599259229437578, - "grad_norm": 0.0012873654486611485, - "learning_rate": 0.00019999984617373752, - "loss": 46.0, - "step": 3477 - }, - { - "epoch": 0.5600869600225452, - "grad_norm": 0.00207755109295249, - "learning_rate": 0.00019999984608498736, - "loss": 46.0, - "step": 3478 - }, - { - "epoch": 0.5602479971013326, - "grad_norm": 0.000612785283010453, - "learning_rate": 0.0001999998459962116, - "loss": 46.0, - "step": 3479 - }, - { - "epoch": 0.56040903418012, - "grad_norm": 0.0010419205063953996, - "learning_rate": 0.00019999984590741025, - "loss": 46.0, - "step": 3480 - }, - { - "epoch": 0.5605700712589073, - "grad_norm": 0.0006163626676425338, - "learning_rate": 0.0001999998458185833, - "loss": 46.0, - "step": 3481 - }, - { - "epoch": 0.5607311083376948, - "grad_norm": 0.0008884693379513919, - "learning_rate": 0.00019999984572973073, - "loss": 46.0, - "step": 3482 - }, - { - "epoch": 0.5608921454164821, - "grad_norm": 0.0009190840064547956, - "learning_rate": 0.0001999998456408526, - "loss": 46.0, - "step": 3483 - }, - { - "epoch": 0.5610531824952696, - "grad_norm": 0.0005326207028701901, - "learning_rate": 0.00019999984555194887, - "loss": 46.0, - "step": 3484 - }, - { - "epoch": 0.5612142195740569, - "grad_norm": 0.00141628657002002, - "learning_rate": 0.00019999984546301955, - "loss": 46.0, - "step": 3485 - }, - { - "epoch": 0.5613752566528443, - "grad_norm": 0.0010659357067197561, - "learning_rate": 0.0001999998453740646, - "loss": 46.0, - "step": 3486 - }, - { - "epoch": 0.5615362937316317, - "grad_norm": 0.0016936389729380608, - "learning_rate": 0.0001999998452850841, - "loss": 46.0, - "step": 3487 - }, - { - "epoch": 0.5616973308104191, - "grad_norm": 0.0022782955784350634, - "learning_rate": 0.00019999984519607802, - "loss": 46.0, - "step": 3488 - }, - { - "epoch": 0.5618583678892065, - "grad_norm": 0.0005731368437409401, - "learning_rate": 0.0001999998451070463, - "loss": 46.0, - "step": 3489 - }, - { - "epoch": 0.5620194049679939, - "grad_norm": 0.00043743624701164663, - "learning_rate": 0.000199999845017989, - "loss": 46.0, - "step": 3490 - }, - { - "epoch": 0.5621804420467813, - "grad_norm": 0.000992362853139639, - "learning_rate": 0.0001999998449289061, - "loss": 46.0, - "step": 3491 - }, - { - "epoch": 0.5623414791255686, - "grad_norm": 0.00025123724481090903, - "learning_rate": 0.00019999984483979761, - "loss": 46.0, - "step": 3492 - }, - { - "epoch": 0.5625025162043561, - "grad_norm": 0.00043814961099997163, - "learning_rate": 0.00019999984475066354, - "loss": 46.0, - "step": 3493 - }, - { - "epoch": 0.5626635532831434, - "grad_norm": 0.0011627791682258248, - "learning_rate": 0.00019999984466150385, - "loss": 46.0, - "step": 3494 - }, - { - "epoch": 0.5628245903619309, - "grad_norm": 0.0005311044515110552, - "learning_rate": 0.00019999984457231857, - "loss": 46.0, - "step": 3495 - }, - { - "epoch": 0.5629856274407182, - "grad_norm": 0.0007569093722850084, - "learning_rate": 0.0001999998444831077, - "loss": 46.0, - "step": 3496 - }, - { - "epoch": 0.5631466645195056, - "grad_norm": 0.0007590854656882584, - "learning_rate": 0.00019999984439387122, - "loss": 46.0, - "step": 3497 - }, - { - "epoch": 0.563307701598293, - "grad_norm": 0.00037324574077501893, - "learning_rate": 0.00019999984430460918, - "loss": 46.0, - "step": 3498 - }, - { - "epoch": 0.5634687386770804, - "grad_norm": 0.0006846066098660231, - "learning_rate": 0.00019999984421532153, - "loss": 46.0, - "step": 3499 - }, - { - "epoch": 0.5636297757558678, - "grad_norm": 0.0003338441310916096, - "learning_rate": 0.00019999984412600826, - "loss": 46.0, - "step": 3500 - }, - { - "epoch": 0.5637908128346552, - "grad_norm": 0.0008743259240873158, - "learning_rate": 0.00019999984403666943, - "loss": 46.0, - "step": 3501 - }, - { - "epoch": 0.5639518499134426, - "grad_norm": 0.0003305981808807701, - "learning_rate": 0.000199999843947305, - "loss": 46.0, - "step": 3502 - }, - { - "epoch": 0.56411288699223, - "grad_norm": 0.002117388416081667, - "learning_rate": 0.00019999984385791496, - "loss": 46.0, - "step": 3503 - }, - { - "epoch": 0.5642739240710174, - "grad_norm": 0.00046136858873069286, - "learning_rate": 0.00019999984376849935, - "loss": 46.0, - "step": 3504 - }, - { - "epoch": 0.5644349611498047, - "grad_norm": 0.0002929664624389261, - "learning_rate": 0.00019999984367905811, - "loss": 46.0, - "step": 3505 - }, - { - "epoch": 0.5645959982285922, - "grad_norm": 0.000627997680567205, - "learning_rate": 0.0001999998435895913, - "loss": 46.0, - "step": 3506 - }, - { - "epoch": 0.5647570353073795, - "grad_norm": 0.0005872087203897536, - "learning_rate": 0.0001999998435000989, - "loss": 46.0, - "step": 3507 - }, - { - "epoch": 0.5649180723861669, - "grad_norm": 0.0006196696776896715, - "learning_rate": 0.00019999984341058087, - "loss": 46.0, - "step": 3508 - }, - { - "epoch": 0.5650791094649543, - "grad_norm": 0.0008690227405168116, - "learning_rate": 0.00019999984332103726, - "loss": 46.0, - "step": 3509 - }, - { - "epoch": 0.5652401465437417, - "grad_norm": 0.0003540632897056639, - "learning_rate": 0.00019999984323146807, - "loss": 46.0, - "step": 3510 - }, - { - "epoch": 0.5654011836225291, - "grad_norm": 0.0020956171210855246, - "learning_rate": 0.0001999998431418733, - "loss": 46.0, - "step": 3511 - }, - { - "epoch": 0.5655622207013165, - "grad_norm": 0.0014627899508923292, - "learning_rate": 0.0001999998430522529, - "loss": 46.0, - "step": 3512 - }, - { - "epoch": 0.5657232577801039, - "grad_norm": 0.0012361931148916483, - "learning_rate": 0.0001999998429626069, - "loss": 46.0, - "step": 3513 - }, - { - "epoch": 0.5658842948588912, - "grad_norm": 0.0008946381858550012, - "learning_rate": 0.00019999984287293531, - "loss": 46.0, - "step": 3514 - }, - { - "epoch": 0.5660453319376787, - "grad_norm": 0.000591730757150799, - "learning_rate": 0.00019999984278323816, - "loss": 46.0, - "step": 3515 - }, - { - "epoch": 0.566206369016466, - "grad_norm": 0.00047976497444324195, - "learning_rate": 0.00019999984269351539, - "loss": 46.0, - "step": 3516 - }, - { - "epoch": 0.5663674060952534, - "grad_norm": 0.000979891512542963, - "learning_rate": 0.00019999984260376703, - "loss": 46.0, - "step": 3517 - }, - { - "epoch": 0.5665284431740408, - "grad_norm": 0.0014252635883167386, - "learning_rate": 0.00019999984251399305, - "loss": 46.0, - "step": 3518 - }, - { - "epoch": 0.5666894802528282, - "grad_norm": 0.0005568196647800505, - "learning_rate": 0.00019999984242419352, - "loss": 46.0, - "step": 3519 - }, - { - "epoch": 0.5668505173316156, - "grad_norm": 0.00042711684363894165, - "learning_rate": 0.0001999998423343684, - "loss": 46.0, - "step": 3520 - }, - { - "epoch": 0.567011554410403, - "grad_norm": 0.0013744635507464409, - "learning_rate": 0.00019999984224451764, - "loss": 46.0, - "step": 3521 - }, - { - "epoch": 0.5671725914891904, - "grad_norm": 0.00048566493205726147, - "learning_rate": 0.00019999984215464132, - "loss": 46.0, - "step": 3522 - }, - { - "epoch": 0.5673336285679778, - "grad_norm": 0.0003043602337129414, - "learning_rate": 0.00019999984206473936, - "loss": 46.0, - "step": 3523 - }, - { - "epoch": 0.5674946656467652, - "grad_norm": 0.004065882880240679, - "learning_rate": 0.00019999984197481186, - "loss": 46.0, - "step": 3524 - }, - { - "epoch": 0.5676557027255525, - "grad_norm": 0.0005343631491996348, - "learning_rate": 0.00019999984188485872, - "loss": 46.0, - "step": 3525 - }, - { - "epoch": 0.56781673980434, - "grad_norm": 0.0004673006769735366, - "learning_rate": 0.00019999984179488003, - "loss": 46.0, - "step": 3526 - }, - { - "epoch": 0.5679777768831273, - "grad_norm": 0.0004798337467946112, - "learning_rate": 0.00019999984170487571, - "loss": 46.0, - "step": 3527 - }, - { - "epoch": 0.5681388139619147, - "grad_norm": 0.00035520331584848464, - "learning_rate": 0.00019999984161484582, - "loss": 46.0, - "step": 3528 - }, - { - "epoch": 0.5682998510407021, - "grad_norm": 0.0007749026408419013, - "learning_rate": 0.0001999998415247903, - "loss": 46.0, - "step": 3529 - }, - { - "epoch": 0.5684608881194895, - "grad_norm": 0.0017666397616267204, - "learning_rate": 0.0001999998414347092, - "loss": 46.0, - "step": 3530 - }, - { - "epoch": 0.5686219251982769, - "grad_norm": 0.002026507630944252, - "learning_rate": 0.00019999984134460251, - "loss": 46.0, - "step": 3531 - }, - { - "epoch": 0.5687829622770643, - "grad_norm": 0.0007767747738398612, - "learning_rate": 0.0001999998412544702, - "loss": 46.0, - "step": 3532 - }, - { - "epoch": 0.5689439993558517, - "grad_norm": 0.002654020907357335, - "learning_rate": 0.00019999984116431235, - "loss": 46.0, - "step": 3533 - }, - { - "epoch": 0.5691050364346391, - "grad_norm": 0.0006586925592273474, - "learning_rate": 0.00019999984107412888, - "loss": 46.0, - "step": 3534 - }, - { - "epoch": 0.5692660735134265, - "grad_norm": 0.0027254067827016115, - "learning_rate": 0.0001999998409839198, - "loss": 46.0, - "step": 3535 - }, - { - "epoch": 0.5694271105922138, - "grad_norm": 0.0005620275624096394, - "learning_rate": 0.00019999984089368516, - "loss": 46.0, - "step": 3536 - }, - { - "epoch": 0.5695881476710013, - "grad_norm": 0.000472288578748703, - "learning_rate": 0.0001999998408034249, - "loss": 46.0, - "step": 3537 - }, - { - "epoch": 0.5697491847497886, - "grad_norm": 0.0004454654990695417, - "learning_rate": 0.00019999984071313905, - "loss": 46.0, - "step": 3538 - }, - { - "epoch": 0.569910221828576, - "grad_norm": 0.0006553643615916371, - "learning_rate": 0.0001999998406228276, - "loss": 46.0, - "step": 3539 - }, - { - "epoch": 0.5700712589073634, - "grad_norm": 0.0007052412838675082, - "learning_rate": 0.00019999984053249055, - "loss": 46.0, - "step": 3540 - }, - { - "epoch": 0.5702322959861508, - "grad_norm": 0.0002986126928590238, - "learning_rate": 0.00019999984044212791, - "loss": 46.0, - "step": 3541 - }, - { - "epoch": 0.5703933330649382, - "grad_norm": 0.0007034339942038059, - "learning_rate": 0.00019999984035173969, - "loss": 46.0, - "step": 3542 - }, - { - "epoch": 0.5705543701437256, - "grad_norm": 0.0005461368709802628, - "learning_rate": 0.00019999984026132584, - "loss": 46.0, - "step": 3543 - }, - { - "epoch": 0.570715407222513, - "grad_norm": 0.0002621538587845862, - "learning_rate": 0.00019999984017088642, - "loss": 46.0, - "step": 3544 - }, - { - "epoch": 0.5708764443013004, - "grad_norm": 0.00048391424934379756, - "learning_rate": 0.0001999998400804214, - "loss": 46.0, - "step": 3545 - }, - { - "epoch": 0.5710374813800878, - "grad_norm": 0.0005650295643135905, - "learning_rate": 0.00019999983998993082, - "loss": 46.0, - "step": 3546 - }, - { - "epoch": 0.5711985184588751, - "grad_norm": 0.0007414202555082738, - "learning_rate": 0.0001999998398994146, - "loss": 46.0, - "step": 3547 - }, - { - "epoch": 0.5713595555376626, - "grad_norm": 0.0006256731576286256, - "learning_rate": 0.0001999998398088728, - "loss": 46.0, - "step": 3548 - }, - { - "epoch": 0.5715205926164499, - "grad_norm": 0.0012972938129678369, - "learning_rate": 0.0001999998397183054, - "loss": 46.0, - "step": 3549 - }, - { - "epoch": 0.5716816296952373, - "grad_norm": 0.00039018644019961357, - "learning_rate": 0.00019999983962771243, - "loss": 46.0, - "step": 3550 - }, - { - "epoch": 0.5718426667740247, - "grad_norm": 0.0012930320808663964, - "learning_rate": 0.00019999983953709384, - "loss": 46.0, - "step": 3551 - }, - { - "epoch": 0.5720037038528121, - "grad_norm": 0.0007005230290815234, - "learning_rate": 0.00019999983944644966, - "loss": 46.0, - "step": 3552 - }, - { - "epoch": 0.5721647409315995, - "grad_norm": 0.0004933098680339754, - "learning_rate": 0.00019999983935577986, - "loss": 46.0, - "step": 3553 - }, - { - "epoch": 0.5723257780103869, - "grad_norm": 0.0012687075650319457, - "learning_rate": 0.0001999998392650845, - "loss": 46.0, - "step": 3554 - }, - { - "epoch": 0.5724868150891743, - "grad_norm": 0.00034164986573159695, - "learning_rate": 0.00019999983917436354, - "loss": 46.0, - "step": 3555 - }, - { - "epoch": 0.5726478521679617, - "grad_norm": 0.0007510171853937209, - "learning_rate": 0.00019999983908361698, - "loss": 46.0, - "step": 3556 - }, - { - "epoch": 0.5728088892467491, - "grad_norm": 0.0008446076535619795, - "learning_rate": 0.0001999998389928448, - "loss": 46.0, - "step": 3557 - }, - { - "epoch": 0.5729699263255364, - "grad_norm": 0.0005185237969271839, - "learning_rate": 0.00019999983890204708, - "loss": 46.0, - "step": 3558 - }, - { - "epoch": 0.5731309634043239, - "grad_norm": 0.0003346470766700804, - "learning_rate": 0.00019999983881122373, - "loss": 46.0, - "step": 3559 - }, - { - "epoch": 0.5732920004831112, - "grad_norm": 0.00042650633258745074, - "learning_rate": 0.0001999998387203748, - "loss": 46.0, - "step": 3560 - }, - { - "epoch": 0.5734530375618986, - "grad_norm": 0.0009748564916662872, - "learning_rate": 0.00019999983862950026, - "loss": 46.0, - "step": 3561 - }, - { - "epoch": 0.573614074640686, - "grad_norm": 0.002157721435651183, - "learning_rate": 0.00019999983853860012, - "loss": 46.0, - "step": 3562 - }, - { - "epoch": 0.5737751117194734, - "grad_norm": 0.0015437881229445338, - "learning_rate": 0.0001999998384476744, - "loss": 46.0, - "step": 3563 - }, - { - "epoch": 0.5739361487982608, - "grad_norm": 0.0020530105102807283, - "learning_rate": 0.0001999998383567231, - "loss": 46.0, - "step": 3564 - }, - { - "epoch": 0.5740971858770482, - "grad_norm": 0.0010107549605891109, - "learning_rate": 0.00019999983826574617, - "loss": 46.0, - "step": 3565 - }, - { - "epoch": 0.5742582229558356, - "grad_norm": 0.001300931558944285, - "learning_rate": 0.00019999983817474366, - "loss": 46.0, - "step": 3566 - }, - { - "epoch": 0.574419260034623, - "grad_norm": 0.001504823681898415, - "learning_rate": 0.00019999983808371557, - "loss": 46.0, - "step": 3567 - }, - { - "epoch": 0.5745802971134104, - "grad_norm": 0.00021979343728162348, - "learning_rate": 0.00019999983799266186, - "loss": 46.0, - "step": 3568 - }, - { - "epoch": 0.5747413341921978, - "grad_norm": 0.0015059680445119739, - "learning_rate": 0.00019999983790158256, - "loss": 46.0, - "step": 3569 - }, - { - "epoch": 0.5749023712709851, - "grad_norm": 0.00039003693382255733, - "learning_rate": 0.0001999998378104777, - "loss": 46.0, - "step": 3570 - }, - { - "epoch": 0.5750634083497725, - "grad_norm": 0.0007288960041478276, - "learning_rate": 0.0001999998377193472, - "loss": 46.0, - "step": 3571 - }, - { - "epoch": 0.5752244454285599, - "grad_norm": 0.0005308515974320471, - "learning_rate": 0.0001999998376281911, - "loss": 46.0, - "step": 3572 - }, - { - "epoch": 0.5753854825073473, - "grad_norm": 0.002361439634114504, - "learning_rate": 0.00019999983753700947, - "loss": 46.0, - "step": 3573 - }, - { - "epoch": 0.5755465195861347, - "grad_norm": 0.0008066802402026951, - "learning_rate": 0.0001999998374458022, - "loss": 46.0, - "step": 3574 - }, - { - "epoch": 0.5757075566649221, - "grad_norm": 0.0005559417768381536, - "learning_rate": 0.00019999983735456933, - "loss": 46.0, - "step": 3575 - }, - { - "epoch": 0.5758685937437095, - "grad_norm": 0.0020220763981342316, - "learning_rate": 0.00019999983726331087, - "loss": 46.0, - "step": 3576 - }, - { - "epoch": 0.5760296308224969, - "grad_norm": 0.0006484172772616148, - "learning_rate": 0.00019999983717202682, - "loss": 46.0, - "step": 3577 - }, - { - "epoch": 0.5761906679012843, - "grad_norm": 0.0004538370994850993, - "learning_rate": 0.00019999983708071718, - "loss": 46.0, - "step": 3578 - }, - { - "epoch": 0.5763517049800717, - "grad_norm": 0.00043682201066985726, - "learning_rate": 0.00019999983698938193, - "loss": 46.0, - "step": 3579 - }, - { - "epoch": 0.576512742058859, - "grad_norm": 0.0015078972792252898, - "learning_rate": 0.00019999983689802112, - "loss": 46.0, - "step": 3580 - }, - { - "epoch": 0.5766737791376464, - "grad_norm": 0.0007493096636608243, - "learning_rate": 0.00019999983680663467, - "loss": 46.0, - "step": 3581 - }, - { - "epoch": 0.5768348162164338, - "grad_norm": 0.0013236848171800375, - "learning_rate": 0.00019999983671522265, - "loss": 46.0, - "step": 3582 - }, - { - "epoch": 0.5769958532952212, - "grad_norm": 0.001121731474995613, - "learning_rate": 0.00019999983662378503, - "loss": 46.0, - "step": 3583 - }, - { - "epoch": 0.5771568903740086, - "grad_norm": 0.0005497890524566174, - "learning_rate": 0.00019999983653232181, - "loss": 46.0, - "step": 3584 - }, - { - "epoch": 0.577317927452796, - "grad_norm": 0.0013116998597979546, - "learning_rate": 0.000199999836440833, - "loss": 46.0, - "step": 3585 - }, - { - "epoch": 0.5774789645315834, - "grad_norm": 0.0018959867302328348, - "learning_rate": 0.0001999998363493186, - "loss": 46.0, - "step": 3586 - }, - { - "epoch": 0.5776400016103708, - "grad_norm": 0.0016209406312555075, - "learning_rate": 0.0001999998362577786, - "loss": 46.0, - "step": 3587 - }, - { - "epoch": 0.5778010386891582, - "grad_norm": 0.0005310988635756075, - "learning_rate": 0.000199999836166213, - "loss": 46.0, - "step": 3588 - }, - { - "epoch": 0.5779620757679456, - "grad_norm": 0.0009252220625057817, - "learning_rate": 0.0001999998360746218, - "loss": 46.0, - "step": 3589 - }, - { - "epoch": 0.578123112846733, - "grad_norm": 0.001061987248249352, - "learning_rate": 0.00019999983598300504, - "loss": 46.0, - "step": 3590 - }, - { - "epoch": 0.5782841499255204, - "grad_norm": 0.0008281744085252285, - "learning_rate": 0.00019999983589136266, - "loss": 46.0, - "step": 3591 - }, - { - "epoch": 0.5784451870043077, - "grad_norm": 0.000869329203851521, - "learning_rate": 0.00019999983579969467, - "loss": 46.0, - "step": 3592 - }, - { - "epoch": 0.5786062240830951, - "grad_norm": 0.0005601634038612247, - "learning_rate": 0.0001999998357080011, - "loss": 46.0, - "step": 3593 - }, - { - "epoch": 0.5787672611618825, - "grad_norm": 0.00048407120630145073, - "learning_rate": 0.00019999983561628195, - "loss": 46.0, - "step": 3594 - }, - { - "epoch": 0.5789282982406699, - "grad_norm": 0.0009586061351001263, - "learning_rate": 0.0001999998355245372, - "loss": 46.0, - "step": 3595 - }, - { - "epoch": 0.5790893353194573, - "grad_norm": 0.0009653475135564804, - "learning_rate": 0.00019999983543276682, - "loss": 46.0, - "step": 3596 - }, - { - "epoch": 0.5792503723982447, - "grad_norm": 0.00044590633478946984, - "learning_rate": 0.0001999998353409709, - "loss": 46.0, - "step": 3597 - }, - { - "epoch": 0.5794114094770321, - "grad_norm": 0.0006686762790195644, - "learning_rate": 0.00019999983524914935, - "loss": 46.0, - "step": 3598 - }, - { - "epoch": 0.5795724465558195, - "grad_norm": 0.001065866556018591, - "learning_rate": 0.0001999998351573022, - "loss": 46.0, - "step": 3599 - }, - { - "epoch": 0.5797334836346069, - "grad_norm": 0.00033228585380129516, - "learning_rate": 0.00019999983506542948, - "loss": 46.0, - "step": 3600 - }, - { - "epoch": 0.5798945207133943, - "grad_norm": 0.0004213275678921491, - "learning_rate": 0.00019999983497353117, - "loss": 46.0, - "step": 3601 - }, - { - "epoch": 0.5800555577921817, - "grad_norm": 0.00023344768851529807, - "learning_rate": 0.00019999983488160723, - "loss": 46.0, - "step": 3602 - }, - { - "epoch": 0.580216594870969, - "grad_norm": 0.0004481116484384984, - "learning_rate": 0.00019999983478965772, - "loss": 46.0, - "step": 3603 - }, - { - "epoch": 0.5803776319497564, - "grad_norm": 0.0009275152697227895, - "learning_rate": 0.00019999983469768263, - "loss": 46.0, - "step": 3604 - }, - { - "epoch": 0.5805386690285438, - "grad_norm": 0.0010258641559630632, - "learning_rate": 0.0001999998346056819, - "loss": 46.0, - "step": 3605 - }, - { - "epoch": 0.5806997061073312, - "grad_norm": 0.0012958927545696497, - "learning_rate": 0.0001999998345136556, - "loss": 46.0, - "step": 3606 - }, - { - "epoch": 0.5808607431861186, - "grad_norm": 0.0005233039846643806, - "learning_rate": 0.0001999998344216037, - "loss": 46.0, - "step": 3607 - }, - { - "epoch": 0.581021780264906, - "grad_norm": 0.0007316289120353758, - "learning_rate": 0.00019999983432952622, - "loss": 46.0, - "step": 3608 - }, - { - "epoch": 0.5811828173436934, - "grad_norm": 0.00045096175745129585, - "learning_rate": 0.0001999998342374231, - "loss": 46.0, - "step": 3609 - }, - { - "epoch": 0.5813438544224808, - "grad_norm": 0.0005383588722907007, - "learning_rate": 0.00019999983414529444, - "loss": 46.0, - "step": 3610 - }, - { - "epoch": 0.5815048915012682, - "grad_norm": 0.0005897500668652356, - "learning_rate": 0.00019999983405314016, - "loss": 46.0, - "step": 3611 - }, - { - "epoch": 0.5816659285800555, - "grad_norm": 0.00036973372334614396, - "learning_rate": 0.0001999998339609603, - "loss": 46.0, - "step": 3612 - }, - { - "epoch": 0.581826965658843, - "grad_norm": 0.001100813620723784, - "learning_rate": 0.00019999983386875486, - "loss": 46.0, - "step": 3613 - }, - { - "epoch": 0.5819880027376303, - "grad_norm": 0.000461019721115008, - "learning_rate": 0.00019999983377652378, - "loss": 46.0, - "step": 3614 - }, - { - "epoch": 0.5821490398164177, - "grad_norm": 0.0005715559236705303, - "learning_rate": 0.00019999983368426712, - "loss": 46.0, - "step": 3615 - }, - { - "epoch": 0.5823100768952051, - "grad_norm": 0.001074679777957499, - "learning_rate": 0.00019999983359198488, - "loss": 46.0, - "step": 3616 - }, - { - "epoch": 0.5824711139739925, - "grad_norm": 0.0018154182471334934, - "learning_rate": 0.000199999833499677, - "loss": 46.0, - "step": 3617 - }, - { - "epoch": 0.5826321510527799, - "grad_norm": 0.0009163039503619075, - "learning_rate": 0.0001999998334073436, - "loss": 46.0, - "step": 3618 - }, - { - "epoch": 0.5827931881315673, - "grad_norm": 0.001382135204039514, - "learning_rate": 0.00019999983331498456, - "loss": 46.0, - "step": 3619 - }, - { - "epoch": 0.5829542252103547, - "grad_norm": 0.0013830214738845825, - "learning_rate": 0.00019999983322259993, - "loss": 46.0, - "step": 3620 - }, - { - "epoch": 0.5831152622891421, - "grad_norm": 0.0014277211157605052, - "learning_rate": 0.0001999998331301897, - "loss": 46.0, - "step": 3621 - }, - { - "epoch": 0.5832762993679295, - "grad_norm": 0.000550763972569257, - "learning_rate": 0.0001999998330377539, - "loss": 46.0, - "step": 3622 - }, - { - "epoch": 0.5834373364467168, - "grad_norm": 0.0009363066637888551, - "learning_rate": 0.00019999983294529249, - "loss": 46.0, - "step": 3623 - }, - { - "epoch": 0.5835983735255043, - "grad_norm": 0.0004841428017243743, - "learning_rate": 0.0001999998328528055, - "loss": 46.0, - "step": 3624 - }, - { - "epoch": 0.5837594106042916, - "grad_norm": 0.00046791156637482345, - "learning_rate": 0.00019999983276029287, - "loss": 46.0, - "step": 3625 - }, - { - "epoch": 0.583920447683079, - "grad_norm": 0.0010113624157384038, - "learning_rate": 0.00019999983266775467, - "loss": 46.0, - "step": 3626 - }, - { - "epoch": 0.5840814847618664, - "grad_norm": 0.0011995943496003747, - "learning_rate": 0.00019999983257519089, - "loss": 46.0, - "step": 3627 - }, - { - "epoch": 0.5842425218406538, - "grad_norm": 0.00030865235021337867, - "learning_rate": 0.00019999983248260148, - "loss": 46.0, - "step": 3628 - }, - { - "epoch": 0.5844035589194412, - "grad_norm": 0.002150277839973569, - "learning_rate": 0.0001999998323899865, - "loss": 46.0, - "step": 3629 - }, - { - "epoch": 0.5845645959982286, - "grad_norm": 0.0004402558843139559, - "learning_rate": 0.00019999983229734595, - "loss": 46.0, - "step": 3630 - }, - { - "epoch": 0.584725633077016, - "grad_norm": 0.0005481082480400801, - "learning_rate": 0.00019999983220467976, - "loss": 46.0, - "step": 3631 - }, - { - "epoch": 0.5848866701558034, - "grad_norm": 0.0013047531247138977, - "learning_rate": 0.00019999983211198798, - "loss": 46.0, - "step": 3632 - }, - { - "epoch": 0.5850477072345908, - "grad_norm": 0.0029339916072785854, - "learning_rate": 0.00019999983201927064, - "loss": 46.0, - "step": 3633 - }, - { - "epoch": 0.5852087443133781, - "grad_norm": 0.000449870916781947, - "learning_rate": 0.00019999983192652766, - "loss": 46.0, - "step": 3634 - }, - { - "epoch": 0.5853697813921656, - "grad_norm": 0.00034026397042907774, - "learning_rate": 0.00019999983183375912, - "loss": 46.0, - "step": 3635 - }, - { - "epoch": 0.5855308184709529, - "grad_norm": 0.0006399744306690991, - "learning_rate": 0.00019999983174096497, - "loss": 46.0, - "step": 3636 - }, - { - "epoch": 0.5856918555497403, - "grad_norm": 0.0007925445679575205, - "learning_rate": 0.00019999983164814525, - "loss": 46.0, - "step": 3637 - }, - { - "epoch": 0.5858528926285277, - "grad_norm": 0.00047588645247742534, - "learning_rate": 0.0001999998315552999, - "loss": 46.0, - "step": 3638 - }, - { - "epoch": 0.5860139297073151, - "grad_norm": 0.000949476205278188, - "learning_rate": 0.00019999983146242896, - "loss": 46.0, - "step": 3639 - }, - { - "epoch": 0.5861749667861025, - "grad_norm": 0.0011420522350817919, - "learning_rate": 0.00019999983136953246, - "loss": 46.0, - "step": 3640 - }, - { - "epoch": 0.5863360038648899, - "grad_norm": 0.0017230648081749678, - "learning_rate": 0.00019999983127661034, - "loss": 46.0, - "step": 3641 - }, - { - "epoch": 0.5864970409436773, - "grad_norm": 0.0022951019927859306, - "learning_rate": 0.0001999998311836626, - "loss": 46.0, - "step": 3642 - }, - { - "epoch": 0.5866580780224647, - "grad_norm": 0.0002443125413265079, - "learning_rate": 0.0001999998310906893, - "loss": 46.0, - "step": 3643 - }, - { - "epoch": 0.5868191151012521, - "grad_norm": 0.000896506302524358, - "learning_rate": 0.00019999983099769039, - "loss": 46.0, - "step": 3644 - }, - { - "epoch": 0.5869801521800394, - "grad_norm": 0.0013104716781526804, - "learning_rate": 0.0001999998309046659, - "loss": 46.0, - "step": 3645 - }, - { - "epoch": 0.5871411892588269, - "grad_norm": 0.00042626337381079793, - "learning_rate": 0.00019999983081161581, - "loss": 46.0, - "step": 3646 - }, - { - "epoch": 0.5873022263376142, - "grad_norm": 0.0006456349510699511, - "learning_rate": 0.00019999983071854012, - "loss": 46.0, - "step": 3647 - }, - { - "epoch": 0.5874632634164016, - "grad_norm": 0.0021495043765753508, - "learning_rate": 0.00019999983062543884, - "loss": 46.0, - "step": 3648 - }, - { - "epoch": 0.587624300495189, - "grad_norm": 0.0008493894129060209, - "learning_rate": 0.00019999983053231194, - "loss": 46.0, - "step": 3649 - }, - { - "epoch": 0.5877853375739764, - "grad_norm": 0.0003227617416996509, - "learning_rate": 0.0001999998304391595, - "loss": 46.0, - "step": 3650 - }, - { - "epoch": 0.5879463746527638, - "grad_norm": 0.000657593656796962, - "learning_rate": 0.0001999998303459814, - "loss": 46.0, - "step": 3651 - }, - { - "epoch": 0.5881074117315512, - "grad_norm": 0.0006709296721965075, - "learning_rate": 0.00019999983025277776, - "loss": 46.0, - "step": 3652 - }, - { - "epoch": 0.5882684488103386, - "grad_norm": 0.001672015176154673, - "learning_rate": 0.0001999998301595485, - "loss": 46.0, - "step": 3653 - }, - { - "epoch": 0.588429485889126, - "grad_norm": 0.0018978442531079054, - "learning_rate": 0.00019999983006629363, - "loss": 46.0, - "step": 3654 - }, - { - "epoch": 0.5885905229679134, - "grad_norm": 0.0015170890837907791, - "learning_rate": 0.0001999998299730132, - "loss": 46.0, - "step": 3655 - }, - { - "epoch": 0.5887515600467007, - "grad_norm": 0.001059291884303093, - "learning_rate": 0.00019999982987970713, - "loss": 46.0, - "step": 3656 - }, - { - "epoch": 0.5889125971254882, - "grad_norm": 0.0006420931313186884, - "learning_rate": 0.00019999982978637554, - "loss": 46.0, - "step": 3657 - }, - { - "epoch": 0.5890736342042755, - "grad_norm": 0.003045084420591593, - "learning_rate": 0.0001999998296930183, - "loss": 46.0, - "step": 3658 - }, - { - "epoch": 0.589234671283063, - "grad_norm": 0.0008750835550017655, - "learning_rate": 0.00019999982959963546, - "loss": 46.0, - "step": 3659 - }, - { - "epoch": 0.5893957083618503, - "grad_norm": 0.0018582548946142197, - "learning_rate": 0.00019999982950622705, - "loss": 46.0, - "step": 3660 - }, - { - "epoch": 0.5895567454406377, - "grad_norm": 0.0004779713926836848, - "learning_rate": 0.00019999982941279302, - "loss": 46.0, - "step": 3661 - }, - { - "epoch": 0.5897177825194251, - "grad_norm": 0.0008654078119434416, - "learning_rate": 0.00019999982931933342, - "loss": 46.0, - "step": 3662 - }, - { - "epoch": 0.5898788195982125, - "grad_norm": 0.002062864601612091, - "learning_rate": 0.00019999982922584822, - "loss": 46.0, - "step": 3663 - }, - { - "epoch": 0.5900398566769999, - "grad_norm": 0.0011476227082312107, - "learning_rate": 0.0001999998291323374, - "loss": 46.0, - "step": 3664 - }, - { - "epoch": 0.5902008937557872, - "grad_norm": 0.0005998503183946013, - "learning_rate": 0.00019999982903880098, - "loss": 46.0, - "step": 3665 - }, - { - "epoch": 0.5903619308345747, - "grad_norm": 0.000645753345452249, - "learning_rate": 0.000199999828945239, - "loss": 46.0, - "step": 3666 - }, - { - "epoch": 0.590522967913362, - "grad_norm": 0.0010744401952251792, - "learning_rate": 0.00019999982885165142, - "loss": 46.0, - "step": 3667 - }, - { - "epoch": 0.5906840049921495, - "grad_norm": 0.000499027140904218, - "learning_rate": 0.00019999982875803824, - "loss": 46.0, - "step": 3668 - }, - { - "epoch": 0.5908450420709368, - "grad_norm": 0.0015712347812950611, - "learning_rate": 0.00019999982866439946, - "loss": 46.0, - "step": 3669 - }, - { - "epoch": 0.5910060791497243, - "grad_norm": 0.0006408823537640274, - "learning_rate": 0.0001999998285707351, - "loss": 46.0, - "step": 3670 - }, - { - "epoch": 0.5911671162285116, - "grad_norm": 0.00043742373236455023, - "learning_rate": 0.00019999982847704515, - "loss": 46.0, - "step": 3671 - }, - { - "epoch": 0.591328153307299, - "grad_norm": 0.00101959565654397, - "learning_rate": 0.0001999998283833296, - "loss": 46.0, - "step": 3672 - }, - { - "epoch": 0.5914891903860864, - "grad_norm": 0.0004744932521134615, - "learning_rate": 0.00019999982828958844, - "loss": 46.0, - "step": 3673 - }, - { - "epoch": 0.5916502274648738, - "grad_norm": 0.0006279168301261961, - "learning_rate": 0.00019999982819582165, - "loss": 46.0, - "step": 3674 - }, - { - "epoch": 0.5918112645436612, - "grad_norm": 0.0002817786007653922, - "learning_rate": 0.00019999982810202933, - "loss": 46.0, - "step": 3675 - }, - { - "epoch": 0.5919723016224485, - "grad_norm": 0.0009364317520521581, - "learning_rate": 0.0001999998280082114, - "loss": 46.0, - "step": 3676 - }, - { - "epoch": 0.592133338701236, - "grad_norm": 0.0011621599551290274, - "learning_rate": 0.00019999982791436786, - "loss": 46.0, - "step": 3677 - }, - { - "epoch": 0.5922943757800233, - "grad_norm": 0.0005299728363752365, - "learning_rate": 0.00019999982782049872, - "loss": 46.0, - "step": 3678 - }, - { - "epoch": 0.5924554128588108, - "grad_norm": 0.0007590660243295133, - "learning_rate": 0.00019999982772660402, - "loss": 46.0, - "step": 3679 - }, - { - "epoch": 0.5926164499375981, - "grad_norm": 0.002326404443010688, - "learning_rate": 0.00019999982763268368, - "loss": 46.0, - "step": 3680 - }, - { - "epoch": 0.5927774870163856, - "grad_norm": 0.0001865627127699554, - "learning_rate": 0.00019999982753873775, - "loss": 46.0, - "step": 3681 - }, - { - "epoch": 0.5929385240951729, - "grad_norm": 0.0034428501967340708, - "learning_rate": 0.00019999982744476626, - "loss": 46.0, - "step": 3682 - }, - { - "epoch": 0.5930995611739603, - "grad_norm": 0.0014843136304989457, - "learning_rate": 0.00019999982735076916, - "loss": 46.0, - "step": 3683 - }, - { - "epoch": 0.5932605982527477, - "grad_norm": 0.0003272859612479806, - "learning_rate": 0.00019999982725674647, - "loss": 46.0, - "step": 3684 - }, - { - "epoch": 0.5934216353315351, - "grad_norm": 0.0004735429538413882, - "learning_rate": 0.00019999982716269817, - "loss": 46.0, - "step": 3685 - }, - { - "epoch": 0.5935826724103225, - "grad_norm": 0.00019508595869410783, - "learning_rate": 0.0001999998270686243, - "loss": 46.0, - "step": 3686 - }, - { - "epoch": 0.5937437094891098, - "grad_norm": 0.001732675707899034, - "learning_rate": 0.00019999982697452482, - "loss": 46.0, - "step": 3687 - }, - { - "epoch": 0.5939047465678973, - "grad_norm": 0.001608144724741578, - "learning_rate": 0.00019999982688039973, - "loss": 46.0, - "step": 3688 - }, - { - "epoch": 0.5940657836466846, - "grad_norm": 0.0004894894664175808, - "learning_rate": 0.00019999982678624908, - "loss": 46.0, - "step": 3689 - }, - { - "epoch": 0.5942268207254721, - "grad_norm": 0.0029790301341563463, - "learning_rate": 0.0001999998266920728, - "loss": 46.0, - "step": 3690 - }, - { - "epoch": 0.5943878578042594, - "grad_norm": 0.0005888977902941406, - "learning_rate": 0.00019999982659787093, - "loss": 46.0, - "step": 3691 - }, - { - "epoch": 0.5945488948830469, - "grad_norm": 0.0012244251556694508, - "learning_rate": 0.0001999998265036435, - "loss": 46.0, - "step": 3692 - }, - { - "epoch": 0.5947099319618342, - "grad_norm": 0.0008973320946097374, - "learning_rate": 0.00019999982640939043, - "loss": 46.0, - "step": 3693 - }, - { - "epoch": 0.5948709690406216, - "grad_norm": 0.00048477097880095243, - "learning_rate": 0.0001999998263151118, - "loss": 46.0, - "step": 3694 - }, - { - "epoch": 0.595032006119409, - "grad_norm": 0.00031924486393108964, - "learning_rate": 0.00019999982622080756, - "loss": 46.0, - "step": 3695 - }, - { - "epoch": 0.5951930431981964, - "grad_norm": 0.0010715728858485818, - "learning_rate": 0.0001999998261264777, - "loss": 46.0, - "step": 3696 - }, - { - "epoch": 0.5953540802769838, - "grad_norm": 0.0007987231947481632, - "learning_rate": 0.00019999982603212225, - "loss": 46.0, - "step": 3697 - }, - { - "epoch": 0.5955151173557711, - "grad_norm": 0.002042955718934536, - "learning_rate": 0.00019999982593774124, - "loss": 46.0, - "step": 3698 - }, - { - "epoch": 0.5956761544345586, - "grad_norm": 0.0008362018852494657, - "learning_rate": 0.00019999982584333463, - "loss": 46.0, - "step": 3699 - }, - { - "epoch": 0.5958371915133459, - "grad_norm": 0.0007073442684486508, - "learning_rate": 0.0001999998257489024, - "loss": 46.0, - "step": 3700 - }, - { - "epoch": 0.5959982285921334, - "grad_norm": 0.0005143919261172414, - "learning_rate": 0.0001999998256544446, - "loss": 46.0, - "step": 3701 - }, - { - "epoch": 0.5961592656709207, - "grad_norm": 0.0019854875281453133, - "learning_rate": 0.0001999998255599612, - "loss": 46.0, - "step": 3702 - }, - { - "epoch": 0.5963203027497082, - "grad_norm": 0.0005943268770352006, - "learning_rate": 0.00019999982546545223, - "loss": 46.0, - "step": 3703 - }, - { - "epoch": 0.5964813398284955, - "grad_norm": 0.0009752835612744093, - "learning_rate": 0.0001999998253709176, - "loss": 46.0, - "step": 3704 - }, - { - "epoch": 0.596642376907283, - "grad_norm": 0.00037144351517781615, - "learning_rate": 0.00019999982527635745, - "loss": 46.0, - "step": 3705 - }, - { - "epoch": 0.5968034139860703, - "grad_norm": 0.0006946514477021992, - "learning_rate": 0.00019999982518177162, - "loss": 46.0, - "step": 3706 - }, - { - "epoch": 0.5969644510648577, - "grad_norm": 0.00024010856577660888, - "learning_rate": 0.00019999982508716024, - "loss": 46.0, - "step": 3707 - }, - { - "epoch": 0.5971254881436451, - "grad_norm": 0.0014027707511559129, - "learning_rate": 0.00019999982499252326, - "loss": 46.0, - "step": 3708 - }, - { - "epoch": 0.5972865252224324, - "grad_norm": 0.0010134363546967506, - "learning_rate": 0.00019999982489786073, - "loss": 46.0, - "step": 3709 - }, - { - "epoch": 0.5974475623012199, - "grad_norm": 0.0005614749970845878, - "learning_rate": 0.00019999982480317256, - "loss": 46.0, - "step": 3710 - }, - { - "epoch": 0.5976085993800072, - "grad_norm": 0.0012409163173288107, - "learning_rate": 0.0001999998247084588, - "loss": 46.0, - "step": 3711 - }, - { - "epoch": 0.5977696364587947, - "grad_norm": 0.000538306834641844, - "learning_rate": 0.00019999982461371945, - "loss": 46.0, - "step": 3712 - }, - { - "epoch": 0.597930673537582, - "grad_norm": 0.0003873163368552923, - "learning_rate": 0.0001999998245189545, - "loss": 46.0, - "step": 3713 - }, - { - "epoch": 0.5980917106163695, - "grad_norm": 0.0013742198934778571, - "learning_rate": 0.00019999982442416397, - "loss": 46.0, - "step": 3714 - }, - { - "epoch": 0.5982527476951568, - "grad_norm": 0.0007378747104667127, - "learning_rate": 0.0001999998243293478, - "loss": 46.0, - "step": 3715 - }, - { - "epoch": 0.5984137847739442, - "grad_norm": 0.0006354287033900619, - "learning_rate": 0.00019999982423450608, - "loss": 46.0, - "step": 3716 - }, - { - "epoch": 0.5985748218527316, - "grad_norm": 0.0002755013410933316, - "learning_rate": 0.00019999982413963877, - "loss": 46.0, - "step": 3717 - }, - { - "epoch": 0.5987358589315189, - "grad_norm": 0.0007118434878066182, - "learning_rate": 0.00019999982404474584, - "loss": 46.0, - "step": 3718 - }, - { - "epoch": 0.5988968960103064, - "grad_norm": 0.00195452687330544, - "learning_rate": 0.00019999982394982733, - "loss": 46.0, - "step": 3719 - }, - { - "epoch": 0.5990579330890937, - "grad_norm": 0.0009835156379267573, - "learning_rate": 0.0001999998238548832, - "loss": 46.0, - "step": 3720 - }, - { - "epoch": 0.5992189701678812, - "grad_norm": 0.0013090168358758092, - "learning_rate": 0.00019999982375991352, - "loss": 46.0, - "step": 3721 - }, - { - "epoch": 0.5993800072466685, - "grad_norm": 0.000989032443612814, - "learning_rate": 0.00019999982366491822, - "loss": 46.0, - "step": 3722 - }, - { - "epoch": 0.599541044325456, - "grad_norm": 0.0005709793185815215, - "learning_rate": 0.0001999998235698973, - "loss": 46.0, - "step": 3723 - }, - { - "epoch": 0.5997020814042433, - "grad_norm": 0.0016295472159981728, - "learning_rate": 0.00019999982347485083, - "loss": 46.0, - "step": 3724 - }, - { - "epoch": 0.5998631184830308, - "grad_norm": 0.0006179817137308419, - "learning_rate": 0.00019999982337977874, - "loss": 46.0, - "step": 3725 - }, - { - "epoch": 0.6000241555618181, - "grad_norm": 0.003281452227383852, - "learning_rate": 0.00019999982328468106, - "loss": 46.0, - "step": 3726 - }, - { - "epoch": 0.6001851926406055, - "grad_norm": 0.0005204345216043293, - "learning_rate": 0.00019999982318955777, - "loss": 46.0, - "step": 3727 - }, - { - "epoch": 0.6003462297193929, - "grad_norm": 0.0012670471332967281, - "learning_rate": 0.0001999998230944089, - "loss": 46.0, - "step": 3728 - }, - { - "epoch": 0.6005072667981802, - "grad_norm": 0.0008828460704535246, - "learning_rate": 0.00019999982299923446, - "loss": 46.0, - "step": 3729 - }, - { - "epoch": 0.6006683038769677, - "grad_norm": 0.0009197549661621451, - "learning_rate": 0.00019999982290403438, - "loss": 46.0, - "step": 3730 - }, - { - "epoch": 0.600829340955755, - "grad_norm": 0.0016467836685478687, - "learning_rate": 0.00019999982280880874, - "loss": 46.0, - "step": 3731 - }, - { - "epoch": 0.6009903780345425, - "grad_norm": 0.0011391020379960537, - "learning_rate": 0.00019999982271355748, - "loss": 46.0, - "step": 3732 - }, - { - "epoch": 0.6011514151133298, - "grad_norm": 0.0006474884576164186, - "learning_rate": 0.00019999982261828064, - "loss": 46.0, - "step": 3733 - }, - { - "epoch": 0.6013124521921173, - "grad_norm": 0.00021935020049568266, - "learning_rate": 0.0001999998225229782, - "loss": 46.0, - "step": 3734 - }, - { - "epoch": 0.6014734892709046, - "grad_norm": 0.0004743691242765635, - "learning_rate": 0.00019999982242765017, - "loss": 46.0, - "step": 3735 - }, - { - "epoch": 0.6016345263496921, - "grad_norm": 0.0007327792700380087, - "learning_rate": 0.00019999982233229652, - "loss": 46.0, - "step": 3736 - }, - { - "epoch": 0.6017955634284794, - "grad_norm": 0.0007064550300128758, - "learning_rate": 0.0001999998222369173, - "loss": 46.0, - "step": 3737 - }, - { - "epoch": 0.6019566005072668, - "grad_norm": 0.000860275118611753, - "learning_rate": 0.0001999998221415125, - "loss": 46.0, - "step": 3738 - }, - { - "epoch": 0.6021176375860542, - "grad_norm": 0.0012082658940926194, - "learning_rate": 0.0001999998220460821, - "loss": 46.0, - "step": 3739 - }, - { - "epoch": 0.6022786746648415, - "grad_norm": 0.00036825332790613174, - "learning_rate": 0.00019999982195062607, - "loss": 46.0, - "step": 3740 - }, - { - "epoch": 0.602439711743629, - "grad_norm": 0.0013934002490714192, - "learning_rate": 0.00019999982185514448, - "loss": 46.0, - "step": 3741 - }, - { - "epoch": 0.6026007488224163, - "grad_norm": 0.0005578524433076382, - "learning_rate": 0.00019999982175963727, - "loss": 46.0, - "step": 3742 - }, - { - "epoch": 0.6027617859012038, - "grad_norm": 0.00038510331069119275, - "learning_rate": 0.00019999982166410448, - "loss": 46.0, - "step": 3743 - }, - { - "epoch": 0.6029228229799911, - "grad_norm": 0.0005473191267810762, - "learning_rate": 0.0001999998215685461, - "loss": 46.0, - "step": 3744 - }, - { - "epoch": 0.6030838600587786, - "grad_norm": 0.0015968807274475694, - "learning_rate": 0.00019999982147296213, - "loss": 46.0, - "step": 3745 - }, - { - "epoch": 0.6032448971375659, - "grad_norm": 0.0008202851167879999, - "learning_rate": 0.00019999982137735255, - "loss": 46.0, - "step": 3746 - }, - { - "epoch": 0.6034059342163534, - "grad_norm": 0.0010265555465593934, - "learning_rate": 0.00019999982128171738, - "loss": 46.0, - "step": 3747 - }, - { - "epoch": 0.6035669712951407, - "grad_norm": 0.0016084174858406186, - "learning_rate": 0.0001999998211860566, - "loss": 46.0, - "step": 3748 - }, - { - "epoch": 0.6037280083739281, - "grad_norm": 0.0033480769488960505, - "learning_rate": 0.00019999982109037022, - "loss": 46.0, - "step": 3749 - }, - { - "epoch": 0.6038890454527155, - "grad_norm": 0.0015542751643806696, - "learning_rate": 0.0001999998209946583, - "loss": 46.0, - "step": 3750 - }, - { - "epoch": 0.6040500825315028, - "grad_norm": 0.0006006431649439037, - "learning_rate": 0.00019999982089892075, - "loss": 46.0, - "step": 3751 - }, - { - "epoch": 0.6042111196102903, - "grad_norm": 0.0010201828554272652, - "learning_rate": 0.00019999982080315759, - "loss": 46.0, - "step": 3752 - }, - { - "epoch": 0.6043721566890776, - "grad_norm": 0.0005846228450536728, - "learning_rate": 0.00019999982070736887, - "loss": 46.0, - "step": 3753 - }, - { - "epoch": 0.6045331937678651, - "grad_norm": 0.00048275847802869976, - "learning_rate": 0.00019999982061155453, - "loss": 46.0, - "step": 3754 - }, - { - "epoch": 0.6046942308466524, - "grad_norm": 0.0007435891893692315, - "learning_rate": 0.00019999982051571458, - "loss": 46.0, - "step": 3755 - }, - { - "epoch": 0.6048552679254399, - "grad_norm": 0.0011720845941454172, - "learning_rate": 0.00019999982041984905, - "loss": 46.0, - "step": 3756 - }, - { - "epoch": 0.6050163050042272, - "grad_norm": 0.0019239976536482573, - "learning_rate": 0.00019999982032395795, - "loss": 46.0, - "step": 3757 - }, - { - "epoch": 0.6051773420830147, - "grad_norm": 0.0009448094642721117, - "learning_rate": 0.00019999982022804122, - "loss": 46.0, - "step": 3758 - }, - { - "epoch": 0.605338379161802, - "grad_norm": 0.0015097275609150529, - "learning_rate": 0.00019999982013209892, - "loss": 46.0, - "step": 3759 - }, - { - "epoch": 0.6054994162405894, - "grad_norm": 0.0013230973854660988, - "learning_rate": 0.000199999820036131, - "loss": 46.0, - "step": 3760 - }, - { - "epoch": 0.6056604533193768, - "grad_norm": 0.0014520409749820828, - "learning_rate": 0.00019999981994013754, - "loss": 46.0, - "step": 3761 - }, - { - "epoch": 0.6058214903981641, - "grad_norm": 0.001978206681087613, - "learning_rate": 0.00019999981984411842, - "loss": 46.0, - "step": 3762 - }, - { - "epoch": 0.6059825274769516, - "grad_norm": 0.0010053090518340468, - "learning_rate": 0.00019999981974807372, - "loss": 46.0, - "step": 3763 - }, - { - "epoch": 0.6061435645557389, - "grad_norm": 0.0006428470369428396, - "learning_rate": 0.00019999981965200346, - "loss": 46.0, - "step": 3764 - }, - { - "epoch": 0.6063046016345264, - "grad_norm": 0.001770240836776793, - "learning_rate": 0.0001999998195559076, - "loss": 46.0, - "step": 3765 - }, - { - "epoch": 0.6064656387133137, - "grad_norm": 0.0008203188772313297, - "learning_rate": 0.0001999998194597861, - "loss": 46.0, - "step": 3766 - }, - { - "epoch": 0.6066266757921012, - "grad_norm": 0.000977416057139635, - "learning_rate": 0.00019999981936363902, - "loss": 46.0, - "step": 3767 - }, - { - "epoch": 0.6067877128708885, - "grad_norm": 0.0004567127616610378, - "learning_rate": 0.00019999981926746636, - "loss": 46.0, - "step": 3768 - }, - { - "epoch": 0.606948749949676, - "grad_norm": 0.0009826109744608402, - "learning_rate": 0.0001999998191712681, - "loss": 46.0, - "step": 3769 - }, - { - "epoch": 0.6071097870284633, - "grad_norm": 0.0003219879581592977, - "learning_rate": 0.00019999981907504425, - "loss": 46.0, - "step": 3770 - }, - { - "epoch": 0.6072708241072506, - "grad_norm": 0.0004293559759389609, - "learning_rate": 0.00019999981897879482, - "loss": 46.0, - "step": 3771 - }, - { - "epoch": 0.6074318611860381, - "grad_norm": 0.0004879342159256339, - "learning_rate": 0.00019999981888251978, - "loss": 46.0, - "step": 3772 - }, - { - "epoch": 0.6075928982648254, - "grad_norm": 0.0018218603217974305, - "learning_rate": 0.00019999981878621913, - "loss": 46.0, - "step": 3773 - }, - { - "epoch": 0.6077539353436129, - "grad_norm": 0.0005505846929736435, - "learning_rate": 0.0001999998186898929, - "loss": 46.0, - "step": 3774 - }, - { - "epoch": 0.6079149724224002, - "grad_norm": 0.0007565902196802199, - "learning_rate": 0.0001999998185935411, - "loss": 46.0, - "step": 3775 - }, - { - "epoch": 0.6080760095011877, - "grad_norm": 0.0012273786123842, - "learning_rate": 0.00019999981849716365, - "loss": 46.0, - "step": 3776 - }, - { - "epoch": 0.608237046579975, - "grad_norm": 0.0007383571937680244, - "learning_rate": 0.00019999981840076062, - "loss": 46.0, - "step": 3777 - }, - { - "epoch": 0.6083980836587625, - "grad_norm": 0.0007197211380116642, - "learning_rate": 0.00019999981830433203, - "loss": 46.0, - "step": 3778 - }, - { - "epoch": 0.6085591207375498, - "grad_norm": 0.0005319115007296205, - "learning_rate": 0.0001999998182078778, - "loss": 46.0, - "step": 3779 - }, - { - "epoch": 0.6087201578163373, - "grad_norm": 0.0008898625383153558, - "learning_rate": 0.00019999981811139804, - "loss": 46.0, - "step": 3780 - }, - { - "epoch": 0.6088811948951246, - "grad_norm": 0.001702308771200478, - "learning_rate": 0.00019999981801489263, - "loss": 46.0, - "step": 3781 - }, - { - "epoch": 0.6090422319739119, - "grad_norm": 0.0004603904963005334, - "learning_rate": 0.00019999981791836164, - "loss": 46.0, - "step": 3782 - }, - { - "epoch": 0.6092032690526994, - "grad_norm": 0.0015911172376945615, - "learning_rate": 0.00019999981782180506, - "loss": 46.0, - "step": 3783 - }, - { - "epoch": 0.6093643061314867, - "grad_norm": 0.0006629017880186439, - "learning_rate": 0.00019999981772522287, - "loss": 46.0, - "step": 3784 - }, - { - "epoch": 0.6095253432102742, - "grad_norm": 0.0025001366157084703, - "learning_rate": 0.00019999981762861512, - "loss": 46.0, - "step": 3785 - }, - { - "epoch": 0.6096863802890615, - "grad_norm": 0.0017279554158449173, - "learning_rate": 0.00019999981753198175, - "loss": 46.0, - "step": 3786 - }, - { - "epoch": 0.609847417367849, - "grad_norm": 0.0007181591936387122, - "learning_rate": 0.00019999981743532277, - "loss": 46.0, - "step": 3787 - }, - { - "epoch": 0.6100084544466363, - "grad_norm": 0.005317715462297201, - "learning_rate": 0.0001999998173386382, - "loss": 46.0, - "step": 3788 - }, - { - "epoch": 0.6101694915254238, - "grad_norm": 0.0013797798892483115, - "learning_rate": 0.00019999981724192804, - "loss": 46.0, - "step": 3789 - }, - { - "epoch": 0.6103305286042111, - "grad_norm": 0.0013191585894674063, - "learning_rate": 0.00019999981714519233, - "loss": 46.0, - "step": 3790 - }, - { - "epoch": 0.6104915656829986, - "grad_norm": 0.0009662522352300584, - "learning_rate": 0.00019999981704843097, - "loss": 46.0, - "step": 3791 - }, - { - "epoch": 0.6106526027617859, - "grad_norm": 0.000845549104269594, - "learning_rate": 0.00019999981695164402, - "loss": 46.0, - "step": 3792 - }, - { - "epoch": 0.6108136398405732, - "grad_norm": 0.000900446146260947, - "learning_rate": 0.0001999998168548315, - "loss": 46.0, - "step": 3793 - }, - { - "epoch": 0.6109746769193607, - "grad_norm": 0.0003443977329879999, - "learning_rate": 0.00019999981675799337, - "loss": 46.0, - "step": 3794 - }, - { - "epoch": 0.611135713998148, - "grad_norm": 0.00047858565812930465, - "learning_rate": 0.00019999981666112967, - "loss": 46.0, - "step": 3795 - }, - { - "epoch": 0.6112967510769355, - "grad_norm": 0.0012598923640325665, - "learning_rate": 0.00019999981656424032, - "loss": 46.0, - "step": 3796 - }, - { - "epoch": 0.6114577881557228, - "grad_norm": 0.0006759954849258065, - "learning_rate": 0.00019999981646732538, - "loss": 46.0, - "step": 3797 - }, - { - "epoch": 0.6116188252345103, - "grad_norm": 0.0004393019189592451, - "learning_rate": 0.0001999998163703849, - "loss": 46.0, - "step": 3798 - }, - { - "epoch": 0.6117798623132976, - "grad_norm": 0.0004934060852974653, - "learning_rate": 0.0001999998162734188, - "loss": 46.0, - "step": 3799 - }, - { - "epoch": 0.6119408993920851, - "grad_norm": 0.00030627084197476506, - "learning_rate": 0.0001999998161764271, - "loss": 46.0, - "step": 3800 - }, - { - "epoch": 0.6121019364708724, - "grad_norm": 0.0005866055726073682, - "learning_rate": 0.00019999981607940983, - "loss": 46.0, - "step": 3801 - }, - { - "epoch": 0.6122629735496599, - "grad_norm": 0.00043013528920710087, - "learning_rate": 0.00019999981598236693, - "loss": 46.0, - "step": 3802 - }, - { - "epoch": 0.6124240106284472, - "grad_norm": 0.0009788851020857692, - "learning_rate": 0.00019999981588529844, - "loss": 46.0, - "step": 3803 - }, - { - "epoch": 0.6125850477072345, - "grad_norm": 0.0005991174839437008, - "learning_rate": 0.00019999981578820437, - "loss": 46.0, - "step": 3804 - }, - { - "epoch": 0.612746084786022, - "grad_norm": 0.0006116996519267559, - "learning_rate": 0.0001999998156910847, - "loss": 46.0, - "step": 3805 - }, - { - "epoch": 0.6129071218648093, - "grad_norm": 0.0005804227548651397, - "learning_rate": 0.00019999981559393944, - "loss": 46.0, - "step": 3806 - }, - { - "epoch": 0.6130681589435968, - "grad_norm": 0.00042501665302552283, - "learning_rate": 0.00019999981549676855, - "loss": 46.0, - "step": 3807 - }, - { - "epoch": 0.6132291960223841, - "grad_norm": 0.0005513046635314822, - "learning_rate": 0.00019999981539957213, - "loss": 46.0, - "step": 3808 - }, - { - "epoch": 0.6133902331011716, - "grad_norm": 0.0009094532579183578, - "learning_rate": 0.00019999981530235007, - "loss": 46.0, - "step": 3809 - }, - { - "epoch": 0.6135512701799589, - "grad_norm": 0.0009424593299627304, - "learning_rate": 0.00019999981520510242, - "loss": 46.0, - "step": 3810 - }, - { - "epoch": 0.6137123072587464, - "grad_norm": 0.0018491118680685759, - "learning_rate": 0.00019999981510782918, - "loss": 46.0, - "step": 3811 - }, - { - "epoch": 0.6138733443375337, - "grad_norm": 0.0002931191702373326, - "learning_rate": 0.00019999981501053036, - "loss": 46.0, - "step": 3812 - }, - { - "epoch": 0.6140343814163212, - "grad_norm": 0.0005261205369606614, - "learning_rate": 0.00019999981491320592, - "loss": 46.0, - "step": 3813 - }, - { - "epoch": 0.6141954184951085, - "grad_norm": 0.0008702565683051944, - "learning_rate": 0.0001999998148158559, - "loss": 46.0, - "step": 3814 - }, - { - "epoch": 0.6143564555738958, - "grad_norm": 0.0009721557726152241, - "learning_rate": 0.00019999981471848028, - "loss": 46.0, - "step": 3815 - }, - { - "epoch": 0.6145174926526833, - "grad_norm": 0.0006699041114188731, - "learning_rate": 0.00019999981462107906, - "loss": 46.0, - "step": 3816 - }, - { - "epoch": 0.6146785297314706, - "grad_norm": 0.0013978193746879697, - "learning_rate": 0.00019999981452365227, - "loss": 46.0, - "step": 3817 - }, - { - "epoch": 0.6148395668102581, - "grad_norm": 0.0007404517964459956, - "learning_rate": 0.00019999981442619987, - "loss": 46.0, - "step": 3818 - }, - { - "epoch": 0.6150006038890454, - "grad_norm": 0.002723252633586526, - "learning_rate": 0.00019999981432872183, - "loss": 46.0, - "step": 3819 - }, - { - "epoch": 0.6151616409678329, - "grad_norm": 0.001904648612253368, - "learning_rate": 0.00019999981423121825, - "loss": 46.0, - "step": 3820 - }, - { - "epoch": 0.6153226780466202, - "grad_norm": 0.0015114553971216083, - "learning_rate": 0.00019999981413368906, - "loss": 46.0, - "step": 3821 - }, - { - "epoch": 0.6154837151254077, - "grad_norm": 0.0004959972575306892, - "learning_rate": 0.00019999981403613428, - "loss": 46.0, - "step": 3822 - }, - { - "epoch": 0.615644752204195, - "grad_norm": 0.0012471459340304136, - "learning_rate": 0.0001999998139385539, - "loss": 46.0, - "step": 3823 - }, - { - "epoch": 0.6158057892829824, - "grad_norm": 0.0011846354464069009, - "learning_rate": 0.00019999981384094791, - "loss": 46.0, - "step": 3824 - }, - { - "epoch": 0.6159668263617698, - "grad_norm": 0.0012976614525541663, - "learning_rate": 0.00019999981374331638, - "loss": 46.0, - "step": 3825 - }, - { - "epoch": 0.6161278634405571, - "grad_norm": 0.0011470116442069411, - "learning_rate": 0.0001999998136456592, - "loss": 46.0, - "step": 3826 - }, - { - "epoch": 0.6162889005193446, - "grad_norm": 0.0016877774614840746, - "learning_rate": 0.00019999981354797646, - "loss": 46.0, - "step": 3827 - }, - { - "epoch": 0.6164499375981319, - "grad_norm": 0.0009053684771060944, - "learning_rate": 0.00019999981345026808, - "loss": 46.0, - "step": 3828 - }, - { - "epoch": 0.6166109746769194, - "grad_norm": 0.0007914634770713747, - "learning_rate": 0.00019999981335253416, - "loss": 46.0, - "step": 3829 - }, - { - "epoch": 0.6167720117557067, - "grad_norm": 0.0006173317669890821, - "learning_rate": 0.0001999998132547746, - "loss": 46.0, - "step": 3830 - }, - { - "epoch": 0.6169330488344942, - "grad_norm": 0.0023323465138673782, - "learning_rate": 0.0001999998131569895, - "loss": 46.0, - "step": 3831 - }, - { - "epoch": 0.6170940859132815, - "grad_norm": 0.0005408467259258032, - "learning_rate": 0.00019999981305917873, - "loss": 46.0, - "step": 3832 - }, - { - "epoch": 0.617255122992069, - "grad_norm": 0.00047445937525480986, - "learning_rate": 0.00019999981296134242, - "loss": 46.0, - "step": 3833 - }, - { - "epoch": 0.6174161600708563, - "grad_norm": 0.00043043107143603265, - "learning_rate": 0.00019999981286348049, - "loss": 46.0, - "step": 3834 - }, - { - "epoch": 0.6175771971496437, - "grad_norm": 0.0006480773445218801, - "learning_rate": 0.00019999981276559297, - "loss": 46.0, - "step": 3835 - }, - { - "epoch": 0.6177382342284311, - "grad_norm": 0.0007769428775645792, - "learning_rate": 0.00019999981266767986, - "loss": 46.0, - "step": 3836 - }, - { - "epoch": 0.6178992713072184, - "grad_norm": 0.001116651576012373, - "learning_rate": 0.00019999981256974117, - "loss": 46.0, - "step": 3837 - }, - { - "epoch": 0.6180603083860059, - "grad_norm": 0.0008682135958224535, - "learning_rate": 0.00019999981247177686, - "loss": 46.0, - "step": 3838 - }, - { - "epoch": 0.6182213454647932, - "grad_norm": 0.00042617073631845415, - "learning_rate": 0.00019999981237378697, - "loss": 46.0, - "step": 3839 - }, - { - "epoch": 0.6183823825435807, - "grad_norm": 0.00023524355492554605, - "learning_rate": 0.0001999998122757715, - "loss": 46.0, - "step": 3840 - }, - { - "epoch": 0.618543419622368, - "grad_norm": 0.0008973352960310876, - "learning_rate": 0.00019999981217773042, - "loss": 46.0, - "step": 3841 - }, - { - "epoch": 0.6187044567011555, - "grad_norm": 0.0010380677413195372, - "learning_rate": 0.0001999998120796637, - "loss": 46.0, - "step": 3842 - }, - { - "epoch": 0.6188654937799428, - "grad_norm": 0.0005477048107422888, - "learning_rate": 0.00019999981198157144, - "loss": 46.0, - "step": 3843 - }, - { - "epoch": 0.6190265308587303, - "grad_norm": 0.0002454533241689205, - "learning_rate": 0.00019999981188345358, - "loss": 46.0, - "step": 3844 - }, - { - "epoch": 0.6191875679375176, - "grad_norm": 0.001563397003337741, - "learning_rate": 0.0001999998117853101, - "loss": 46.0, - "step": 3845 - }, - { - "epoch": 0.619348605016305, - "grad_norm": 0.0013099611969664693, - "learning_rate": 0.00019999981168714105, - "loss": 46.0, - "step": 3846 - }, - { - "epoch": 0.6195096420950924, - "grad_norm": 0.0002848474832717329, - "learning_rate": 0.0001999998115889464, - "loss": 46.0, - "step": 3847 - }, - { - "epoch": 0.6196706791738797, - "grad_norm": 0.0006217787740752101, - "learning_rate": 0.00019999981149072615, - "loss": 46.0, - "step": 3848 - }, - { - "epoch": 0.6198317162526672, - "grad_norm": 0.0007854134892113507, - "learning_rate": 0.00019999981139248032, - "loss": 46.0, - "step": 3849 - }, - { - "epoch": 0.6199927533314545, - "grad_norm": 0.0008232304826378822, - "learning_rate": 0.00019999981129420886, - "loss": 46.0, - "step": 3850 - }, - { - "epoch": 0.620153790410242, - "grad_norm": 0.0016802066238597035, - "learning_rate": 0.00019999981119591181, - "loss": 46.0, - "step": 3851 - }, - { - "epoch": 0.6203148274890293, - "grad_norm": 0.0023175273090600967, - "learning_rate": 0.0001999998110975892, - "loss": 46.0, - "step": 3852 - }, - { - "epoch": 0.6204758645678168, - "grad_norm": 0.00041702997987158597, - "learning_rate": 0.00019999981099924098, - "loss": 46.0, - "step": 3853 - }, - { - "epoch": 0.6206369016466041, - "grad_norm": 0.0015941159799695015, - "learning_rate": 0.00019999981090086717, - "loss": 46.0, - "step": 3854 - }, - { - "epoch": 0.6207979387253916, - "grad_norm": 0.0007031516870483756, - "learning_rate": 0.00019999981080246775, - "loss": 46.0, - "step": 3855 - }, - { - "epoch": 0.6209589758041789, - "grad_norm": 0.0005478821694850922, - "learning_rate": 0.00019999981070404274, - "loss": 46.0, - "step": 3856 - }, - { - "epoch": 0.6211200128829663, - "grad_norm": 0.0006148735410533845, - "learning_rate": 0.00019999981060559214, - "loss": 46.0, - "step": 3857 - }, - { - "epoch": 0.6212810499617537, - "grad_norm": 0.006427546963095665, - "learning_rate": 0.00019999981050711595, - "loss": 46.0, - "step": 3858 - }, - { - "epoch": 0.621442087040541, - "grad_norm": 0.0005981564172543585, - "learning_rate": 0.00019999981040861418, - "loss": 46.0, - "step": 3859 - }, - { - "epoch": 0.6216031241193285, - "grad_norm": 0.0006622473010793328, - "learning_rate": 0.00019999981031008676, - "loss": 46.0, - "step": 3860 - }, - { - "epoch": 0.6217641611981158, - "grad_norm": 0.0008345595560967922, - "learning_rate": 0.0001999998102115338, - "loss": 46.0, - "step": 3861 - }, - { - "epoch": 0.6219251982769033, - "grad_norm": 0.0006166543462313712, - "learning_rate": 0.0001999998101129552, - "loss": 46.0, - "step": 3862 - }, - { - "epoch": 0.6220862353556906, - "grad_norm": 0.0003607094695325941, - "learning_rate": 0.00019999981001435105, - "loss": 46.0, - "step": 3863 - }, - { - "epoch": 0.6222472724344781, - "grad_norm": 0.0033777060452848673, - "learning_rate": 0.0001999998099157213, - "loss": 46.0, - "step": 3864 - }, - { - "epoch": 0.6224083095132654, - "grad_norm": 0.0007539305952377617, - "learning_rate": 0.0001999998098170659, - "loss": 46.0, - "step": 3865 - }, - { - "epoch": 0.6225693465920529, - "grad_norm": 0.0010992271127179265, - "learning_rate": 0.00019999980971838497, - "loss": 46.0, - "step": 3866 - }, - { - "epoch": 0.6227303836708402, - "grad_norm": 0.0008015542407520115, - "learning_rate": 0.00019999980961967842, - "loss": 46.0, - "step": 3867 - }, - { - "epoch": 0.6228914207496276, - "grad_norm": 0.0003821809950750321, - "learning_rate": 0.00019999980952094628, - "loss": 46.0, - "step": 3868 - }, - { - "epoch": 0.623052457828415, - "grad_norm": 0.000647315348032862, - "learning_rate": 0.00019999980942218853, - "loss": 46.0, - "step": 3869 - }, - { - "epoch": 0.6232134949072023, - "grad_norm": 0.0005045785801485181, - "learning_rate": 0.0001999998093234052, - "loss": 46.0, - "step": 3870 - }, - { - "epoch": 0.6233745319859898, - "grad_norm": 0.0006298076477833092, - "learning_rate": 0.00019999980922459626, - "loss": 46.0, - "step": 3871 - }, - { - "epoch": 0.6235355690647771, - "grad_norm": 0.0006782549316994846, - "learning_rate": 0.00019999980912576172, - "loss": 46.0, - "step": 3872 - }, - { - "epoch": 0.6236966061435646, - "grad_norm": 0.0040741171687841415, - "learning_rate": 0.00019999980902690162, - "loss": 46.0, - "step": 3873 - }, - { - "epoch": 0.6238576432223519, - "grad_norm": 0.0009928788058459759, - "learning_rate": 0.0001999998089280159, - "loss": 46.0, - "step": 3874 - }, - { - "epoch": 0.6240186803011394, - "grad_norm": 0.0011796929175034165, - "learning_rate": 0.0001999998088291046, - "loss": 46.0, - "step": 3875 - }, - { - "epoch": 0.6241797173799267, - "grad_norm": 0.001887347549200058, - "learning_rate": 0.0001999998087301677, - "loss": 46.0, - "step": 3876 - }, - { - "epoch": 0.6243407544587141, - "grad_norm": 0.0020020632073283195, - "learning_rate": 0.0001999998086312052, - "loss": 46.0, - "step": 3877 - }, - { - "epoch": 0.6245017915375015, - "grad_norm": 0.0009481040760874748, - "learning_rate": 0.00019999980853221708, - "loss": 46.0, - "step": 3878 - }, - { - "epoch": 0.6246628286162889, - "grad_norm": 0.0003913877881132066, - "learning_rate": 0.0001999998084332034, - "loss": 46.0, - "step": 3879 - }, - { - "epoch": 0.6248238656950763, - "grad_norm": 0.0009294972405768931, - "learning_rate": 0.00019999980833416414, - "loss": 46.0, - "step": 3880 - }, - { - "epoch": 0.6249849027738636, - "grad_norm": 0.0006162965437397361, - "learning_rate": 0.00019999980823509926, - "loss": 46.0, - "step": 3881 - }, - { - "epoch": 0.6251459398526511, - "grad_norm": 0.00369398039765656, - "learning_rate": 0.0001999998081360088, - "loss": 46.0, - "step": 3882 - }, - { - "epoch": 0.6253069769314384, - "grad_norm": 0.0015181349590420723, - "learning_rate": 0.00019999980803689273, - "loss": 46.0, - "step": 3883 - }, - { - "epoch": 0.6254680140102259, - "grad_norm": 0.0005074756918475032, - "learning_rate": 0.00019999980793775107, - "loss": 46.0, - "step": 3884 - }, - { - "epoch": 0.6256290510890132, - "grad_norm": 0.00031046729418449104, - "learning_rate": 0.0001999998078385838, - "loss": 46.0, - "step": 3885 - }, - { - "epoch": 0.6257900881678007, - "grad_norm": 0.0006027091876603663, - "learning_rate": 0.00019999980773939097, - "loss": 46.0, - "step": 3886 - }, - { - "epoch": 0.625951125246588, - "grad_norm": 0.000491827551741153, - "learning_rate": 0.0001999998076401725, - "loss": 46.0, - "step": 3887 - }, - { - "epoch": 0.6261121623253754, - "grad_norm": 0.0005214366246946156, - "learning_rate": 0.00019999980754092846, - "loss": 46.0, - "step": 3888 - }, - { - "epoch": 0.6262731994041628, - "grad_norm": 0.0005825932021252811, - "learning_rate": 0.00019999980744165883, - "loss": 46.0, - "step": 3889 - }, - { - "epoch": 0.6264342364829502, - "grad_norm": 0.0003587489773053676, - "learning_rate": 0.00019999980734236358, - "loss": 46.0, - "step": 3890 - }, - { - "epoch": 0.6265952735617376, - "grad_norm": 0.0003709504962898791, - "learning_rate": 0.00019999980724304278, - "loss": 46.0, - "step": 3891 - }, - { - "epoch": 0.626756310640525, - "grad_norm": 0.0005646224017255008, - "learning_rate": 0.00019999980714369636, - "loss": 46.0, - "step": 3892 - }, - { - "epoch": 0.6269173477193124, - "grad_norm": 0.0005972803337499499, - "learning_rate": 0.00019999980704432435, - "loss": 46.0, - "step": 3893 - }, - { - "epoch": 0.6270783847980997, - "grad_norm": 0.0011230631498619914, - "learning_rate": 0.00019999980694492673, - "loss": 46.0, - "step": 3894 - }, - { - "epoch": 0.6272394218768872, - "grad_norm": 0.000535583880264312, - "learning_rate": 0.00019999980684550352, - "loss": 46.0, - "step": 3895 - }, - { - "epoch": 0.6274004589556745, - "grad_norm": 0.0016877740854397416, - "learning_rate": 0.00019999980674605472, - "loss": 46.0, - "step": 3896 - }, - { - "epoch": 0.627561496034462, - "grad_norm": 0.0014109365874901414, - "learning_rate": 0.00019999980664658034, - "loss": 46.0, - "step": 3897 - }, - { - "epoch": 0.6277225331132493, - "grad_norm": 0.001895105349831283, - "learning_rate": 0.00019999980654708034, - "loss": 46.0, - "step": 3898 - }, - { - "epoch": 0.6278835701920367, - "grad_norm": 0.000813856371678412, - "learning_rate": 0.00019999980644755473, - "loss": 46.0, - "step": 3899 - }, - { - "epoch": 0.6280446072708241, - "grad_norm": 0.0017831121804192662, - "learning_rate": 0.00019999980634800358, - "loss": 46.0, - "step": 3900 - }, - { - "epoch": 0.6282056443496115, - "grad_norm": 0.00043440848821774125, - "learning_rate": 0.0001999998062484268, - "loss": 46.0, - "step": 3901 - }, - { - "epoch": 0.6283666814283989, - "grad_norm": 0.0013220850378274918, - "learning_rate": 0.00019999980614882442, - "loss": 46.0, - "step": 3902 - }, - { - "epoch": 0.6285277185071863, - "grad_norm": 0.0003818328841589391, - "learning_rate": 0.00019999980604919646, - "loss": 46.0, - "step": 3903 - }, - { - "epoch": 0.6286887555859737, - "grad_norm": 0.0005465371068567038, - "learning_rate": 0.0001999998059495429, - "loss": 46.0, - "step": 3904 - }, - { - "epoch": 0.628849792664761, - "grad_norm": 0.003256515832617879, - "learning_rate": 0.00019999980584986377, - "loss": 46.0, - "step": 3905 - }, - { - "epoch": 0.6290108297435485, - "grad_norm": 0.0007257511606439948, - "learning_rate": 0.000199999805750159, - "loss": 46.0, - "step": 3906 - }, - { - "epoch": 0.6291718668223358, - "grad_norm": 0.0015296990750357509, - "learning_rate": 0.0001999998056504287, - "loss": 46.0, - "step": 3907 - }, - { - "epoch": 0.6293329039011233, - "grad_norm": 0.0016780991572886705, - "learning_rate": 0.00019999980555067274, - "loss": 46.0, - "step": 3908 - }, - { - "epoch": 0.6294939409799106, - "grad_norm": 0.0003995900333393365, - "learning_rate": 0.0001999998054508912, - "loss": 46.0, - "step": 3909 - }, - { - "epoch": 0.629654978058698, - "grad_norm": 0.0007608133601024747, - "learning_rate": 0.00019999980535108407, - "loss": 46.0, - "step": 3910 - }, - { - "epoch": 0.6298160151374854, - "grad_norm": 0.0023052096366882324, - "learning_rate": 0.00019999980525125136, - "loss": 46.0, - "step": 3911 - }, - { - "epoch": 0.6299770522162728, - "grad_norm": 0.0012004564050585032, - "learning_rate": 0.00019999980515139303, - "loss": 46.0, - "step": 3912 - }, - { - "epoch": 0.6301380892950602, - "grad_norm": 0.0005191197851672769, - "learning_rate": 0.00019999980505150912, - "loss": 46.0, - "step": 3913 - }, - { - "epoch": 0.6302991263738476, - "grad_norm": 0.0022934270091354847, - "learning_rate": 0.0001999998049515996, - "loss": 46.0, - "step": 3914 - }, - { - "epoch": 0.630460163452635, - "grad_norm": 0.00094611756503582, - "learning_rate": 0.00019999980485166453, - "loss": 46.0, - "step": 3915 - }, - { - "epoch": 0.6306212005314223, - "grad_norm": 0.0015847343020141125, - "learning_rate": 0.0001999998047517038, - "loss": 46.0, - "step": 3916 - }, - { - "epoch": 0.6307822376102098, - "grad_norm": 0.0008317606407217681, - "learning_rate": 0.00019999980465171754, - "loss": 46.0, - "step": 3917 - }, - { - "epoch": 0.6309432746889971, - "grad_norm": 0.0027236042078584433, - "learning_rate": 0.00019999980455170563, - "loss": 46.0, - "step": 3918 - }, - { - "epoch": 0.6311043117677846, - "grad_norm": 0.0015027662739157677, - "learning_rate": 0.00019999980445166817, - "loss": 46.0, - "step": 3919 - }, - { - "epoch": 0.6312653488465719, - "grad_norm": 0.0009333167690783739, - "learning_rate": 0.0001999998043516051, - "loss": 46.0, - "step": 3920 - }, - { - "epoch": 0.6314263859253593, - "grad_norm": 0.00038940150989219546, - "learning_rate": 0.0001999998042515164, - "loss": 46.0, - "step": 3921 - }, - { - "epoch": 0.6315874230041467, - "grad_norm": 0.0011956420494243503, - "learning_rate": 0.00019999980415140215, - "loss": 46.0, - "step": 3922 - }, - { - "epoch": 0.6317484600829341, - "grad_norm": 0.0017112076748162508, - "learning_rate": 0.00019999980405126228, - "loss": 46.0, - "step": 3923 - }, - { - "epoch": 0.6319094971617215, - "grad_norm": 0.0005666279466822743, - "learning_rate": 0.00019999980395109682, - "loss": 46.0, - "step": 3924 - }, - { - "epoch": 0.6320705342405089, - "grad_norm": 0.0002572282974142581, - "learning_rate": 0.0001999998038509058, - "loss": 46.0, - "step": 3925 - }, - { - "epoch": 0.6322315713192963, - "grad_norm": 0.0008163744932971895, - "learning_rate": 0.00019999980375068915, - "loss": 46.0, - "step": 3926 - }, - { - "epoch": 0.6323926083980836, - "grad_norm": 0.0007077203481458127, - "learning_rate": 0.0001999998036504469, - "loss": 46.0, - "step": 3927 - }, - { - "epoch": 0.6325536454768711, - "grad_norm": 0.0013686062302440405, - "learning_rate": 0.00019999980355017905, - "loss": 46.0, - "step": 3928 - }, - { - "epoch": 0.6327146825556584, - "grad_norm": 0.0018887012265622616, - "learning_rate": 0.00019999980344988564, - "loss": 46.0, - "step": 3929 - }, - { - "epoch": 0.6328757196344458, - "grad_norm": 0.0004644394794013351, - "learning_rate": 0.0001999998033495666, - "loss": 46.0, - "step": 3930 - }, - { - "epoch": 0.6330367567132332, - "grad_norm": 0.001086959382519126, - "learning_rate": 0.00019999980324922199, - "loss": 46.0, - "step": 3931 - }, - { - "epoch": 0.6331977937920206, - "grad_norm": 0.0012090452946722507, - "learning_rate": 0.00019999980314885178, - "loss": 46.0, - "step": 3932 - }, - { - "epoch": 0.633358830870808, - "grad_norm": 0.0019170187879353762, - "learning_rate": 0.000199999803048456, - "loss": 46.0, - "step": 3933 - }, - { - "epoch": 0.6335198679495954, - "grad_norm": 0.0011698042508214712, - "learning_rate": 0.00019999980294803458, - "loss": 46.0, - "step": 3934 - }, - { - "epoch": 0.6336809050283828, - "grad_norm": 0.0004384565108921379, - "learning_rate": 0.00019999980284758756, - "loss": 46.0, - "step": 3935 - }, - { - "epoch": 0.6338419421071702, - "grad_norm": 0.0014051563339307904, - "learning_rate": 0.00019999980274711498, - "loss": 46.0, - "step": 3936 - }, - { - "epoch": 0.6340029791859576, - "grad_norm": 0.00027268854319117963, - "learning_rate": 0.0001999998026466168, - "loss": 46.0, - "step": 3937 - }, - { - "epoch": 0.634164016264745, - "grad_norm": 0.0009903289610520005, - "learning_rate": 0.000199999802546093, - "loss": 46.0, - "step": 3938 - }, - { - "epoch": 0.6343250533435324, - "grad_norm": 0.00039567198837175965, - "learning_rate": 0.00019999980244554363, - "loss": 46.0, - "step": 3939 - }, - { - "epoch": 0.6344860904223197, - "grad_norm": 0.0019074567826464772, - "learning_rate": 0.00019999980234496867, - "loss": 46.0, - "step": 3940 - }, - { - "epoch": 0.6346471275011071, - "grad_norm": 0.0004465160018298775, - "learning_rate": 0.0001999998022443681, - "loss": 46.0, - "step": 3941 - }, - { - "epoch": 0.6348081645798945, - "grad_norm": 0.000665780738927424, - "learning_rate": 0.00019999980214374194, - "loss": 46.0, - "step": 3942 - }, - { - "epoch": 0.6349692016586819, - "grad_norm": 0.002296403981745243, - "learning_rate": 0.00019999980204309017, - "loss": 46.0, - "step": 3943 - }, - { - "epoch": 0.6351302387374693, - "grad_norm": 0.0014289953978732228, - "learning_rate": 0.00019999980194241283, - "loss": 46.0, - "step": 3944 - }, - { - "epoch": 0.6352912758162567, - "grad_norm": 0.0019106982508674264, - "learning_rate": 0.00019999980184170989, - "loss": 46.0, - "step": 3945 - }, - { - "epoch": 0.6354523128950441, - "grad_norm": 0.00034020375460386276, - "learning_rate": 0.00019999980174098135, - "loss": 46.0, - "step": 3946 - }, - { - "epoch": 0.6356133499738315, - "grad_norm": 0.00044324115151539445, - "learning_rate": 0.00019999980164022723, - "loss": 46.0, - "step": 3947 - }, - { - "epoch": 0.6357743870526189, - "grad_norm": 0.001357447705231607, - "learning_rate": 0.0001999998015394475, - "loss": 46.0, - "step": 3948 - }, - { - "epoch": 0.6359354241314062, - "grad_norm": 0.0007523288950324059, - "learning_rate": 0.00019999980143864217, - "loss": 46.0, - "step": 3949 - }, - { - "epoch": 0.6360964612101937, - "grad_norm": 0.0007136139320209622, - "learning_rate": 0.00019999980133781123, - "loss": 46.0, - "step": 3950 - }, - { - "epoch": 0.636257498288981, - "grad_norm": 0.0017579509876668453, - "learning_rate": 0.0001999998012369547, - "loss": 46.0, - "step": 3951 - }, - { - "epoch": 0.6364185353677684, - "grad_norm": 0.00034304361906833947, - "learning_rate": 0.0001999998011360726, - "loss": 46.0, - "step": 3952 - }, - { - "epoch": 0.6365795724465558, - "grad_norm": 0.0014558410039171576, - "learning_rate": 0.00019999980103516492, - "loss": 46.0, - "step": 3953 - }, - { - "epoch": 0.6367406095253432, - "grad_norm": 0.0004298131098039448, - "learning_rate": 0.0001999998009342316, - "loss": 46.0, - "step": 3954 - }, - { - "epoch": 0.6369016466041306, - "grad_norm": 0.0006449212087318301, - "learning_rate": 0.0001999998008332727, - "loss": 46.0, - "step": 3955 - }, - { - "epoch": 0.637062683682918, - "grad_norm": 0.0004265451862011105, - "learning_rate": 0.00019999980073228822, - "loss": 46.0, - "step": 3956 - }, - { - "epoch": 0.6372237207617054, - "grad_norm": 0.0030151167884469032, - "learning_rate": 0.00019999980063127815, - "loss": 46.0, - "step": 3957 - }, - { - "epoch": 0.6373847578404928, - "grad_norm": 0.002641751430928707, - "learning_rate": 0.00019999980053024246, - "loss": 46.0, - "step": 3958 - }, - { - "epoch": 0.6375457949192802, - "grad_norm": 0.0006884320755489171, - "learning_rate": 0.0001999998004291812, - "loss": 46.0, - "step": 3959 - }, - { - "epoch": 0.6377068319980675, - "grad_norm": 0.000506981392391026, - "learning_rate": 0.00019999980032809435, - "loss": 46.0, - "step": 3960 - }, - { - "epoch": 0.637867869076855, - "grad_norm": 0.001656600390560925, - "learning_rate": 0.00019999980022698187, - "loss": 46.0, - "step": 3961 - }, - { - "epoch": 0.6380289061556423, - "grad_norm": 0.001254578004591167, - "learning_rate": 0.0001999998001258438, - "loss": 46.0, - "step": 3962 - }, - { - "epoch": 0.6381899432344297, - "grad_norm": 0.0005974529194645584, - "learning_rate": 0.00019999980002468015, - "loss": 46.0, - "step": 3963 - }, - { - "epoch": 0.6383509803132171, - "grad_norm": 0.001432158867828548, - "learning_rate": 0.0001999997999234909, - "loss": 46.0, - "step": 3964 - }, - { - "epoch": 0.6385120173920045, - "grad_norm": 0.00041471130680292845, - "learning_rate": 0.00019999979982227606, - "loss": 46.0, - "step": 3965 - }, - { - "epoch": 0.6386730544707919, - "grad_norm": 0.0004181638068985194, - "learning_rate": 0.00019999979972103562, - "loss": 46.0, - "step": 3966 - }, - { - "epoch": 0.6388340915495793, - "grad_norm": 0.0012539547169581056, - "learning_rate": 0.0001999997996197696, - "loss": 46.0, - "step": 3967 - }, - { - "epoch": 0.6389951286283667, - "grad_norm": 0.000602704705670476, - "learning_rate": 0.00019999979951847795, - "loss": 46.0, - "step": 3968 - }, - { - "epoch": 0.6391561657071541, - "grad_norm": 0.0007977702771313488, - "learning_rate": 0.00019999979941716074, - "loss": 46.0, - "step": 3969 - }, - { - "epoch": 0.6393172027859415, - "grad_norm": 0.0003835688694380224, - "learning_rate": 0.00019999979931581793, - "loss": 46.0, - "step": 3970 - }, - { - "epoch": 0.6394782398647288, - "grad_norm": 0.0012510454980656505, - "learning_rate": 0.00019999979921444952, - "loss": 46.0, - "step": 3971 - }, - { - "epoch": 0.6396392769435163, - "grad_norm": 0.0006620041676796973, - "learning_rate": 0.0001999997991130555, - "loss": 46.0, - "step": 3972 - }, - { - "epoch": 0.6398003140223036, - "grad_norm": 0.001054050400853157, - "learning_rate": 0.00019999979901163593, - "loss": 46.0, - "step": 3973 - }, - { - "epoch": 0.639961351101091, - "grad_norm": 0.00041232031071558595, - "learning_rate": 0.0001999997989101907, - "loss": 46.0, - "step": 3974 - }, - { - "epoch": 0.6401223881798784, - "grad_norm": 0.0028472826816141605, - "learning_rate": 0.00019999979880871993, - "loss": 46.0, - "step": 3975 - }, - { - "epoch": 0.6402834252586658, - "grad_norm": 0.0006197105394676328, - "learning_rate": 0.00019999979870722354, - "loss": 46.0, - "step": 3976 - }, - { - "epoch": 0.6404444623374532, - "grad_norm": 0.0006238263449631631, - "learning_rate": 0.00019999979860570158, - "loss": 46.0, - "step": 3977 - }, - { - "epoch": 0.6406054994162406, - "grad_norm": 0.0005026328144595027, - "learning_rate": 0.000199999798504154, - "loss": 46.0, - "step": 3978 - }, - { - "epoch": 0.640766536495028, - "grad_norm": 0.0004826491349376738, - "learning_rate": 0.0001999997984025808, - "loss": 46.0, - "step": 3979 - }, - { - "epoch": 0.6409275735738154, - "grad_norm": 0.0003820724377874285, - "learning_rate": 0.00019999979830098206, - "loss": 46.0, - "step": 3980 - }, - { - "epoch": 0.6410886106526028, - "grad_norm": 0.0014432755997404456, - "learning_rate": 0.00019999979819935768, - "loss": 46.0, - "step": 3981 - }, - { - "epoch": 0.6412496477313901, - "grad_norm": 0.0013067788677290082, - "learning_rate": 0.00019999979809770773, - "loss": 46.0, - "step": 3982 - }, - { - "epoch": 0.6414106848101775, - "grad_norm": 0.002178050111979246, - "learning_rate": 0.0001999997979960322, - "loss": 46.0, - "step": 3983 - }, - { - "epoch": 0.6415717218889649, - "grad_norm": 0.0004842475464101881, - "learning_rate": 0.00019999979789433106, - "loss": 46.0, - "step": 3984 - }, - { - "epoch": 0.6417327589677523, - "grad_norm": 0.000640039099380374, - "learning_rate": 0.0001999997977926043, - "loss": 46.0, - "step": 3985 - }, - { - "epoch": 0.6418937960465397, - "grad_norm": 0.0008534935768693686, - "learning_rate": 0.00019999979769085198, - "loss": 46.0, - "step": 3986 - }, - { - "epoch": 0.6420548331253271, - "grad_norm": 0.0008801156654953957, - "learning_rate": 0.00019999979758907404, - "loss": 46.0, - "step": 3987 - }, - { - "epoch": 0.6422158702041145, - "grad_norm": 0.0003403981390874833, - "learning_rate": 0.00019999979748727052, - "loss": 46.0, - "step": 3988 - }, - { - "epoch": 0.6423769072829019, - "grad_norm": 0.0014114284422248602, - "learning_rate": 0.00019999979738544142, - "loss": 46.0, - "step": 3989 - }, - { - "epoch": 0.6425379443616893, - "grad_norm": 0.0005789893912151456, - "learning_rate": 0.0001999997972835867, - "loss": 46.0, - "step": 3990 - }, - { - "epoch": 0.6426989814404767, - "grad_norm": 0.0006970431422814727, - "learning_rate": 0.00019999979718170638, - "loss": 46.0, - "step": 3991 - }, - { - "epoch": 0.6428600185192641, - "grad_norm": 0.00064929632935673, - "learning_rate": 0.00019999979707980049, - "loss": 46.0, - "step": 3992 - }, - { - "epoch": 0.6430210555980514, - "grad_norm": 0.00061324069974944, - "learning_rate": 0.000199999796977869, - "loss": 46.0, - "step": 3993 - }, - { - "epoch": 0.6431820926768388, - "grad_norm": 0.00045321404468268156, - "learning_rate": 0.00019999979687591188, - "loss": 46.0, - "step": 3994 - }, - { - "epoch": 0.6433431297556262, - "grad_norm": 0.0008014366612769663, - "learning_rate": 0.0001999997967739292, - "loss": 46.0, - "step": 3995 - }, - { - "epoch": 0.6435041668344136, - "grad_norm": 0.0003293314657639712, - "learning_rate": 0.00019999979667192092, - "loss": 46.0, - "step": 3996 - }, - { - "epoch": 0.643665203913201, - "grad_norm": 0.001550797838717699, - "learning_rate": 0.00019999979656988703, - "loss": 46.0, - "step": 3997 - }, - { - "epoch": 0.6438262409919884, - "grad_norm": 0.00034418125869706273, - "learning_rate": 0.00019999979646782758, - "loss": 46.0, - "step": 3998 - }, - { - "epoch": 0.6439872780707758, - "grad_norm": 0.000606780347879976, - "learning_rate": 0.0001999997963657425, - "loss": 46.0, - "step": 3999 - }, - { - "epoch": 0.6441483151495632, - "grad_norm": 0.000773335515987128, - "learning_rate": 0.00019999979626363185, - "loss": 46.0, - "step": 4000 - }, - { - "epoch": 0.6443093522283506, - "grad_norm": 0.00028685005963779986, - "learning_rate": 0.00019999979616149558, - "loss": 46.0, - "step": 4001 - }, - { - "epoch": 0.644470389307138, - "grad_norm": 0.0018651674035936594, - "learning_rate": 0.00019999979605933373, - "loss": 46.0, - "step": 4002 - }, - { - "epoch": 0.6446314263859254, - "grad_norm": 0.0023687323555350304, - "learning_rate": 0.0001999997959571463, - "loss": 46.0, - "step": 4003 - }, - { - "epoch": 0.6447924634647128, - "grad_norm": 0.0006122443592175841, - "learning_rate": 0.00019999979585493325, - "loss": 46.0, - "step": 4004 - }, - { - "epoch": 0.6449535005435001, - "grad_norm": 0.0003398013941477984, - "learning_rate": 0.00019999979575269463, - "loss": 46.0, - "step": 4005 - }, - { - "epoch": 0.6451145376222875, - "grad_norm": 0.0005419240333139896, - "learning_rate": 0.00019999979565043038, - "loss": 46.0, - "step": 4006 - }, - { - "epoch": 0.6452755747010749, - "grad_norm": 0.0004764005425386131, - "learning_rate": 0.00019999979554814057, - "loss": 46.0, - "step": 4007 - }, - { - "epoch": 0.6454366117798623, - "grad_norm": 0.000622907595243305, - "learning_rate": 0.00019999979544582517, - "loss": 46.0, - "step": 4008 - }, - { - "epoch": 0.6455976488586497, - "grad_norm": 0.0005630090017803013, - "learning_rate": 0.00019999979534348415, - "loss": 46.0, - "step": 4009 - }, - { - "epoch": 0.6457586859374371, - "grad_norm": 0.0012180102057754993, - "learning_rate": 0.00019999979524111753, - "loss": 46.0, - "step": 4010 - }, - { - "epoch": 0.6459197230162245, - "grad_norm": 0.0009136570733971894, - "learning_rate": 0.0001999997951387253, - "loss": 46.0, - "step": 4011 - }, - { - "epoch": 0.6460807600950119, - "grad_norm": 0.0005233532865531743, - "learning_rate": 0.00019999979503630753, - "loss": 46.0, - "step": 4012 - }, - { - "epoch": 0.6462417971737993, - "grad_norm": 0.0008663739426992834, - "learning_rate": 0.00019999979493386414, - "loss": 46.0, - "step": 4013 - }, - { - "epoch": 0.6464028342525867, - "grad_norm": 0.0011348624248057604, - "learning_rate": 0.00019999979483139514, - "loss": 46.0, - "step": 4014 - }, - { - "epoch": 0.646563871331374, - "grad_norm": 0.001946570468135178, - "learning_rate": 0.00019999979472890055, - "loss": 46.0, - "step": 4015 - }, - { - "epoch": 0.6467249084101614, - "grad_norm": 0.0005715051665902138, - "learning_rate": 0.0001999997946263804, - "loss": 46.0, - "step": 4016 - }, - { - "epoch": 0.6468859454889488, - "grad_norm": 0.000543768925126642, - "learning_rate": 0.0001999997945238346, - "loss": 46.0, - "step": 4017 - }, - { - "epoch": 0.6470469825677362, - "grad_norm": 0.000579813786316663, - "learning_rate": 0.00019999979442126325, - "loss": 46.0, - "step": 4018 - }, - { - "epoch": 0.6472080196465236, - "grad_norm": 0.0011095544323325157, - "learning_rate": 0.00019999979431866626, - "loss": 46.0, - "step": 4019 - }, - { - "epoch": 0.647369056725311, - "grad_norm": 0.0003147822862956673, - "learning_rate": 0.00019999979421604373, - "loss": 46.0, - "step": 4020 - }, - { - "epoch": 0.6475300938040984, - "grad_norm": 0.0009261103696189821, - "learning_rate": 0.00019999979411339559, - "loss": 46.0, - "step": 4021 - }, - { - "epoch": 0.6476911308828858, - "grad_norm": 0.0006047890055924654, - "learning_rate": 0.00019999979401072183, - "loss": 46.0, - "step": 4022 - }, - { - "epoch": 0.6478521679616732, - "grad_norm": 0.0015719252405688167, - "learning_rate": 0.0001999997939080225, - "loss": 46.0, - "step": 4023 - }, - { - "epoch": 0.6480132050404606, - "grad_norm": 0.003473407356068492, - "learning_rate": 0.00019999979380529753, - "loss": 46.0, - "step": 4024 - }, - { - "epoch": 0.648174242119248, - "grad_norm": 0.0007352083339355886, - "learning_rate": 0.000199999793702547, - "loss": 46.0, - "step": 4025 - }, - { - "epoch": 0.6483352791980354, - "grad_norm": 0.0004880555206909776, - "learning_rate": 0.00019999979359977088, - "loss": 46.0, - "step": 4026 - }, - { - "epoch": 0.6484963162768227, - "grad_norm": 0.001536658382974565, - "learning_rate": 0.00019999979349696916, - "loss": 46.0, - "step": 4027 - }, - { - "epoch": 0.6486573533556101, - "grad_norm": 0.0005246897926554084, - "learning_rate": 0.00019999979339414186, - "loss": 46.0, - "step": 4028 - }, - { - "epoch": 0.6488183904343975, - "grad_norm": 0.0006400058628059924, - "learning_rate": 0.00019999979329128893, - "loss": 46.0, - "step": 4029 - }, - { - "epoch": 0.6489794275131849, - "grad_norm": 0.0013222291599959135, - "learning_rate": 0.00019999979318841043, - "loss": 46.0, - "step": 4030 - }, - { - "epoch": 0.6491404645919723, - "grad_norm": 0.0004871092096436769, - "learning_rate": 0.00019999979308550633, - "loss": 46.0, - "step": 4031 - }, - { - "epoch": 0.6493015016707597, - "grad_norm": 0.0011869113659486175, - "learning_rate": 0.00019999979298257662, - "loss": 46.0, - "step": 4032 - }, - { - "epoch": 0.6494625387495471, - "grad_norm": 0.002591888653114438, - "learning_rate": 0.00019999979287962133, - "loss": 46.0, - "step": 4033 - }, - { - "epoch": 0.6496235758283345, - "grad_norm": 0.00039742272929288447, - "learning_rate": 0.00019999979277664047, - "loss": 46.0, - "step": 4034 - }, - { - "epoch": 0.6497846129071219, - "grad_norm": 0.00040701148100197315, - "learning_rate": 0.00019999979267363397, - "loss": 46.0, - "step": 4035 - }, - { - "epoch": 0.6499456499859092, - "grad_norm": 0.00034645546111278236, - "learning_rate": 0.00019999979257060192, - "loss": 46.0, - "step": 4036 - }, - { - "epoch": 0.6501066870646967, - "grad_norm": 0.0005740129272453487, - "learning_rate": 0.00019999979246754424, - "loss": 46.0, - "step": 4037 - }, - { - "epoch": 0.650267724143484, - "grad_norm": 0.0013352630194276571, - "learning_rate": 0.00019999979236446098, - "loss": 46.0, - "step": 4038 - }, - { - "epoch": 0.6504287612222714, - "grad_norm": 0.0011539635015651584, - "learning_rate": 0.00019999979226135214, - "loss": 46.0, - "step": 4039 - }, - { - "epoch": 0.6505897983010588, - "grad_norm": 0.0005610228981822729, - "learning_rate": 0.00019999979215821768, - "loss": 46.0, - "step": 4040 - }, - { - "epoch": 0.6507508353798462, - "grad_norm": 0.0004671792557928711, - "learning_rate": 0.00019999979205505763, - "loss": 46.0, - "step": 4041 - }, - { - "epoch": 0.6509118724586336, - "grad_norm": 0.0005615011905319989, - "learning_rate": 0.000199999791951872, - "loss": 46.0, - "step": 4042 - }, - { - "epoch": 0.651072909537421, - "grad_norm": 0.0011466463329270482, - "learning_rate": 0.00019999979184866075, - "loss": 46.0, - "step": 4043 - }, - { - "epoch": 0.6512339466162084, - "grad_norm": 0.0005127380136400461, - "learning_rate": 0.0001999997917454239, - "loss": 46.0, - "step": 4044 - }, - { - "epoch": 0.6513949836949958, - "grad_norm": 0.0005644343327730894, - "learning_rate": 0.00019999979164216146, - "loss": 46.0, - "step": 4045 - }, - { - "epoch": 0.6515560207737832, - "grad_norm": 0.0009933200199157, - "learning_rate": 0.00019999979153887348, - "loss": 46.0, - "step": 4046 - }, - { - "epoch": 0.6517170578525705, - "grad_norm": 0.0008299188921228051, - "learning_rate": 0.00019999979143555985, - "loss": 46.0, - "step": 4047 - }, - { - "epoch": 0.651878094931358, - "grad_norm": 0.0002949825720861554, - "learning_rate": 0.00019999979133222064, - "loss": 46.0, - "step": 4048 - }, - { - "epoch": 0.6520391320101453, - "grad_norm": 0.0014827088452875614, - "learning_rate": 0.0001999997912288558, - "loss": 46.0, - "step": 4049 - }, - { - "epoch": 0.6522001690889327, - "grad_norm": 0.0003980410401709378, - "learning_rate": 0.0001999997911254654, - "loss": 46.0, - "step": 4050 - }, - { - "epoch": 0.6523612061677201, - "grad_norm": 0.001480201375670731, - "learning_rate": 0.00019999979102204942, - "loss": 46.0, - "step": 4051 - }, - { - "epoch": 0.6525222432465075, - "grad_norm": 0.0009770323522388935, - "learning_rate": 0.00019999979091860784, - "loss": 46.0, - "step": 4052 - }, - { - "epoch": 0.6526832803252949, - "grad_norm": 0.001219235360622406, - "learning_rate": 0.00019999979081514066, - "loss": 46.0, - "step": 4053 - }, - { - "epoch": 0.6528443174040823, - "grad_norm": 0.0005848513683304191, - "learning_rate": 0.00019999979071164787, - "loss": 46.0, - "step": 4054 - }, - { - "epoch": 0.6530053544828697, - "grad_norm": 0.0008878202643245459, - "learning_rate": 0.0001999997906081295, - "loss": 46.0, - "step": 4055 - }, - { - "epoch": 0.6531663915616571, - "grad_norm": 0.00041669944766908884, - "learning_rate": 0.00019999979050458553, - "loss": 46.0, - "step": 4056 - }, - { - "epoch": 0.6533274286404445, - "grad_norm": 0.0007548247813247144, - "learning_rate": 0.00019999979040101598, - "loss": 46.0, - "step": 4057 - }, - { - "epoch": 0.6534884657192318, - "grad_norm": 0.00044422075734473765, - "learning_rate": 0.0001999997902974208, - "loss": 46.0, - "step": 4058 - }, - { - "epoch": 0.6536495027980193, - "grad_norm": 0.0004771124222315848, - "learning_rate": 0.00019999979019380004, - "loss": 46.0, - "step": 4059 - }, - { - "epoch": 0.6538105398768066, - "grad_norm": 0.0005559561541303992, - "learning_rate": 0.0001999997900901537, - "loss": 46.0, - "step": 4060 - }, - { - "epoch": 0.653971576955594, - "grad_norm": 0.0031795059330761433, - "learning_rate": 0.00019999978998648174, - "loss": 46.0, - "step": 4061 - }, - { - "epoch": 0.6541326140343814, - "grad_norm": 0.0013497446198016405, - "learning_rate": 0.0001999997898827842, - "loss": 46.0, - "step": 4062 - }, - { - "epoch": 0.6542936511131688, - "grad_norm": 0.004771994426846504, - "learning_rate": 0.00019999978977906107, - "loss": 46.0, - "step": 4063 - }, - { - "epoch": 0.6544546881919562, - "grad_norm": 0.0005237840232439339, - "learning_rate": 0.00019999978967531233, - "loss": 46.0, - "step": 4064 - }, - { - "epoch": 0.6546157252707436, - "grad_norm": 0.0022218686062842607, - "learning_rate": 0.00019999978957153803, - "loss": 46.0, - "step": 4065 - }, - { - "epoch": 0.654776762349531, - "grad_norm": 0.0006077251164242625, - "learning_rate": 0.0001999997894677381, - "loss": 46.0, - "step": 4066 - }, - { - "epoch": 0.6549377994283184, - "grad_norm": 0.003656107233837247, - "learning_rate": 0.00019999978936391258, - "loss": 46.0, - "step": 4067 - }, - { - "epoch": 0.6550988365071058, - "grad_norm": 0.0009901061421260238, - "learning_rate": 0.0001999997892600615, - "loss": 46.0, - "step": 4068 - }, - { - "epoch": 0.6552598735858931, - "grad_norm": 0.0008633272955194116, - "learning_rate": 0.00019999978915618478, - "loss": 46.0, - "step": 4069 - }, - { - "epoch": 0.6554209106646806, - "grad_norm": 0.0005279080360196531, - "learning_rate": 0.0001999997890522825, - "loss": 46.0, - "step": 4070 - }, - { - "epoch": 0.6555819477434679, - "grad_norm": 0.0006885799812152982, - "learning_rate": 0.0001999997889483546, - "loss": 46.0, - "step": 4071 - }, - { - "epoch": 0.6557429848222553, - "grad_norm": 0.0006982874474488199, - "learning_rate": 0.0001999997888444011, - "loss": 46.0, - "step": 4072 - }, - { - "epoch": 0.6559040219010427, - "grad_norm": 0.0008390130242332816, - "learning_rate": 0.00019999978874042204, - "loss": 46.0, - "step": 4073 - }, - { - "epoch": 0.6560650589798301, - "grad_norm": 0.0003052345127798617, - "learning_rate": 0.00019999978863641734, - "loss": 46.0, - "step": 4074 - }, - { - "epoch": 0.6562260960586175, - "grad_norm": 0.0006468112114816904, - "learning_rate": 0.00019999978853238708, - "loss": 46.0, - "step": 4075 - }, - { - "epoch": 0.6563871331374049, - "grad_norm": 0.000711954606231302, - "learning_rate": 0.00019999978842833122, - "loss": 46.0, - "step": 4076 - }, - { - "epoch": 0.6565481702161923, - "grad_norm": 0.00034803905873559415, - "learning_rate": 0.00019999978832424976, - "loss": 46.0, - "step": 4077 - }, - { - "epoch": 0.6567092072949797, - "grad_norm": 0.0005067437305115163, - "learning_rate": 0.00019999978822014266, - "loss": 46.0, - "step": 4078 - }, - { - "epoch": 0.6568702443737671, - "grad_norm": 0.0004636379308067262, - "learning_rate": 0.00019999978811601003, - "loss": 46.0, - "step": 4079 - }, - { - "epoch": 0.6570312814525544, - "grad_norm": 0.0005774891469627619, - "learning_rate": 0.00019999978801185178, - "loss": 46.0, - "step": 4080 - }, - { - "epoch": 0.6571923185313419, - "grad_norm": 0.0004242188297212124, - "learning_rate": 0.00019999978790766795, - "loss": 46.0, - "step": 4081 - }, - { - "epoch": 0.6573533556101292, - "grad_norm": 0.002381916856393218, - "learning_rate": 0.0001999997878034585, - "loss": 46.0, - "step": 4082 - }, - { - "epoch": 0.6575143926889166, - "grad_norm": 0.0035492258612066507, - "learning_rate": 0.0001999997876992235, - "loss": 46.0, - "step": 4083 - }, - { - "epoch": 0.657675429767704, - "grad_norm": 0.0010473319562152028, - "learning_rate": 0.00019999978759496285, - "loss": 46.0, - "step": 4084 - }, - { - "epoch": 0.6578364668464914, - "grad_norm": 0.0007973707397468388, - "learning_rate": 0.00019999978749067664, - "loss": 46.0, - "step": 4085 - }, - { - "epoch": 0.6579975039252788, - "grad_norm": 0.0007441894267685711, - "learning_rate": 0.00019999978738636482, - "loss": 46.0, - "step": 4086 - }, - { - "epoch": 0.6581585410040662, - "grad_norm": 0.000384774204576388, - "learning_rate": 0.0001999997872820274, - "loss": 46.0, - "step": 4087 - }, - { - "epoch": 0.6583195780828536, - "grad_norm": 0.0011437773937359452, - "learning_rate": 0.0001999997871776644, - "loss": 46.0, - "step": 4088 - }, - { - "epoch": 0.6584806151616409, - "grad_norm": 0.002420299919322133, - "learning_rate": 0.0001999997870732758, - "loss": 46.0, - "step": 4089 - }, - { - "epoch": 0.6586416522404284, - "grad_norm": 0.002654311014339328, - "learning_rate": 0.00019999978696886163, - "loss": 46.0, - "step": 4090 - }, - { - "epoch": 0.6588026893192157, - "grad_norm": 0.0023667917121201754, - "learning_rate": 0.0001999997868644218, - "loss": 46.0, - "step": 4091 - }, - { - "epoch": 0.6589637263980032, - "grad_norm": 0.0006622622604481876, - "learning_rate": 0.00019999978675995644, - "loss": 46.0, - "step": 4092 - }, - { - "epoch": 0.6591247634767905, - "grad_norm": 0.000863416469655931, - "learning_rate": 0.00019999978665546545, - "loss": 46.0, - "step": 4093 - }, - { - "epoch": 0.659285800555578, - "grad_norm": 0.00053859205218032, - "learning_rate": 0.00019999978655094888, - "loss": 46.0, - "step": 4094 - }, - { - "epoch": 0.6594468376343653, - "grad_norm": 0.001705170376226306, - "learning_rate": 0.00019999978644640672, - "loss": 46.0, - "step": 4095 - }, - { - "epoch": 0.6596078747131527, - "grad_norm": 0.0013821489410474896, - "learning_rate": 0.00019999978634183897, - "loss": 46.0, - "step": 4096 - }, - { - "epoch": 0.6597689117919401, - "grad_norm": 0.0011414185864850879, - "learning_rate": 0.0001999997862372456, - "loss": 46.0, - "step": 4097 - }, - { - "epoch": 0.6599299488707275, - "grad_norm": 0.0017310691764578223, - "learning_rate": 0.00019999978613262665, - "loss": 46.0, - "step": 4098 - }, - { - "epoch": 0.6600909859495149, - "grad_norm": 0.0008604437462054193, - "learning_rate": 0.0001999997860279821, - "loss": 46.0, - "step": 4099 - }, - { - "epoch": 0.6602520230283022, - "grad_norm": 0.0005920581752434373, - "learning_rate": 0.00019999978592331194, - "loss": 46.0, - "step": 4100 - }, - { - "epoch": 0.6604130601070897, - "grad_norm": 0.000564577232580632, - "learning_rate": 0.00019999978581861623, - "loss": 46.0, - "step": 4101 - }, - { - "epoch": 0.660574097185877, - "grad_norm": 0.0005751004791818559, - "learning_rate": 0.00019999978571389487, - "loss": 46.0, - "step": 4102 - }, - { - "epoch": 0.6607351342646645, - "grad_norm": 0.0014589970232918859, - "learning_rate": 0.00019999978560914796, - "loss": 46.0, - "step": 4103 - }, - { - "epoch": 0.6608961713434518, - "grad_norm": 0.0027970534283667803, - "learning_rate": 0.00019999978550437543, - "loss": 46.0, - "step": 4104 - }, - { - "epoch": 0.6610572084222393, - "grad_norm": 0.0016785872867330909, - "learning_rate": 0.00019999978539957735, - "loss": 46.0, - "step": 4105 - }, - { - "epoch": 0.6612182455010266, - "grad_norm": 0.0010435240110382438, - "learning_rate": 0.00019999978529475362, - "loss": 46.0, - "step": 4106 - }, - { - "epoch": 0.661379282579814, - "grad_norm": 0.0005667787627317011, - "learning_rate": 0.0001999997851899043, - "loss": 46.0, - "step": 4107 - }, - { - "epoch": 0.6615403196586014, - "grad_norm": 0.0005285568186081946, - "learning_rate": 0.0001999997850850294, - "loss": 46.0, - "step": 4108 - }, - { - "epoch": 0.6617013567373888, - "grad_norm": 0.000425788079155609, - "learning_rate": 0.0001999997849801289, - "loss": 46.0, - "step": 4109 - }, - { - "epoch": 0.6618623938161762, - "grad_norm": 0.002144289668649435, - "learning_rate": 0.00019999978487520283, - "loss": 46.0, - "step": 4110 - }, - { - "epoch": 0.6620234308949635, - "grad_norm": 0.0004664475563913584, - "learning_rate": 0.00019999978477025114, - "loss": 46.0, - "step": 4111 - }, - { - "epoch": 0.662184467973751, - "grad_norm": 0.0006001853034831583, - "learning_rate": 0.00019999978466527386, - "loss": 46.0, - "step": 4112 - }, - { - "epoch": 0.6623455050525383, - "grad_norm": 0.0008905772701837122, - "learning_rate": 0.00019999978456027096, - "loss": 46.0, - "step": 4113 - }, - { - "epoch": 0.6625065421313258, - "grad_norm": 0.0006841265712864697, - "learning_rate": 0.0001999997844552425, - "loss": 46.0, - "step": 4114 - }, - { - "epoch": 0.6626675792101131, - "grad_norm": 0.0012838974362239242, - "learning_rate": 0.00019999978435018844, - "loss": 46.0, - "step": 4115 - }, - { - "epoch": 0.6628286162889006, - "grad_norm": 0.005266777705401182, - "learning_rate": 0.0001999997842451088, - "loss": 46.0, - "step": 4116 - }, - { - "epoch": 0.6629896533676879, - "grad_norm": 0.0018772606272250414, - "learning_rate": 0.00019999978414000352, - "loss": 46.0, - "step": 4117 - }, - { - "epoch": 0.6631506904464753, - "grad_norm": 0.0008285978110507131, - "learning_rate": 0.00019999978403487266, - "loss": 46.0, - "step": 4118 - }, - { - "epoch": 0.6633117275252627, - "grad_norm": 0.0004435323644429445, - "learning_rate": 0.00019999978392971622, - "loss": 46.0, - "step": 4119 - }, - { - "epoch": 0.6634727646040501, - "grad_norm": 0.0010455332230776548, - "learning_rate": 0.00019999978382453422, - "loss": 46.0, - "step": 4120 - }, - { - "epoch": 0.6636338016828375, - "grad_norm": 0.0004350183589849621, - "learning_rate": 0.00019999978371932657, - "loss": 46.0, - "step": 4121 - }, - { - "epoch": 0.6637948387616248, - "grad_norm": 0.001673016115091741, - "learning_rate": 0.00019999978361409334, - "loss": 46.0, - "step": 4122 - }, - { - "epoch": 0.6639558758404123, - "grad_norm": 0.001579698990099132, - "learning_rate": 0.00019999978350883452, - "loss": 46.0, - "step": 4123 - }, - { - "epoch": 0.6641169129191996, - "grad_norm": 0.0006469959043897688, - "learning_rate": 0.00019999978340355011, - "loss": 46.0, - "step": 4124 - }, - { - "epoch": 0.6642779499979871, - "grad_norm": 0.0008787863189354539, - "learning_rate": 0.0001999997832982401, - "loss": 46.0, - "step": 4125 - }, - { - "epoch": 0.6644389870767744, - "grad_norm": 0.0007085479446686804, - "learning_rate": 0.00019999978319290449, - "loss": 46.0, - "step": 4126 - }, - { - "epoch": 0.6646000241555619, - "grad_norm": 0.0006078773876652122, - "learning_rate": 0.0001999997830875433, - "loss": 46.0, - "step": 4127 - }, - { - "epoch": 0.6647610612343492, - "grad_norm": 0.0033882097341120243, - "learning_rate": 0.00019999978298215648, - "loss": 46.0, - "step": 4128 - }, - { - "epoch": 0.6649220983131366, - "grad_norm": 0.0006846666801720858, - "learning_rate": 0.0001999997828767441, - "loss": 46.0, - "step": 4129 - }, - { - "epoch": 0.665083135391924, - "grad_norm": 0.00039883560384623706, - "learning_rate": 0.0001999997827713061, - "loss": 46.0, - "step": 4130 - }, - { - "epoch": 0.6652441724707114, - "grad_norm": 0.00034085099468939006, - "learning_rate": 0.00019999978266584253, - "loss": 46.0, - "step": 4131 - }, - { - "epoch": 0.6654052095494988, - "grad_norm": 0.00025625157286413014, - "learning_rate": 0.00019999978256035334, - "loss": 46.0, - "step": 4132 - }, - { - "epoch": 0.6655662466282861, - "grad_norm": 0.00106684525962919, - "learning_rate": 0.00019999978245483857, - "loss": 46.0, - "step": 4133 - }, - { - "epoch": 0.6657272837070736, - "grad_norm": 0.002219436690211296, - "learning_rate": 0.00019999978234929819, - "loss": 46.0, - "step": 4134 - }, - { - "epoch": 0.6658883207858609, - "grad_norm": 0.0026410138234496117, - "learning_rate": 0.00019999978224373224, - "loss": 46.0, - "step": 4135 - }, - { - "epoch": 0.6660493578646484, - "grad_norm": 0.0004199472314212471, - "learning_rate": 0.0001999997821381407, - "loss": 46.0, - "step": 4136 - }, - { - "epoch": 0.6662103949434357, - "grad_norm": 0.000729235471226275, - "learning_rate": 0.00019999978203252353, - "loss": 46.0, - "step": 4137 - }, - { - "epoch": 0.6663714320222232, - "grad_norm": 0.00041112376493401825, - "learning_rate": 0.0001999997819268808, - "loss": 46.0, - "step": 4138 - }, - { - "epoch": 0.6665324691010105, - "grad_norm": 0.0013306516921147704, - "learning_rate": 0.00019999978182121245, - "loss": 46.0, - "step": 4139 - }, - { - "epoch": 0.666693506179798, - "grad_norm": 0.0004246679600328207, - "learning_rate": 0.0001999997817155185, - "loss": 46.0, - "step": 4140 - }, - { - "epoch": 0.6668545432585853, - "grad_norm": 0.0003912248939741403, - "learning_rate": 0.00019999978160979899, - "loss": 46.0, - "step": 4141 - }, - { - "epoch": 0.6670155803373726, - "grad_norm": 0.001465828507207334, - "learning_rate": 0.00019999978150405387, - "loss": 46.0, - "step": 4142 - }, - { - "epoch": 0.6671766174161601, - "grad_norm": 0.002138945274055004, - "learning_rate": 0.00019999978139828315, - "loss": 46.0, - "step": 4143 - }, - { - "epoch": 0.6673376544949474, - "grad_norm": 0.002108029555529356, - "learning_rate": 0.00019999978129248684, - "loss": 46.0, - "step": 4144 - }, - { - "epoch": 0.6674986915737349, - "grad_norm": 0.0013364724582061172, - "learning_rate": 0.0001999997811866649, - "loss": 46.0, - "step": 4145 - }, - { - "epoch": 0.6676597286525222, - "grad_norm": 0.0006084447377361357, - "learning_rate": 0.0001999997810808174, - "loss": 46.0, - "step": 4146 - }, - { - "epoch": 0.6678207657313097, - "grad_norm": 0.0013225043658167124, - "learning_rate": 0.00019999978097494432, - "loss": 46.0, - "step": 4147 - }, - { - "epoch": 0.667981802810097, - "grad_norm": 0.0004937726189382374, - "learning_rate": 0.00019999978086904563, - "loss": 46.0, - "step": 4148 - }, - { - "epoch": 0.6681428398888845, - "grad_norm": 0.0010499687632545829, - "learning_rate": 0.00019999978076312133, - "loss": 46.0, - "step": 4149 - }, - { - "epoch": 0.6683038769676718, - "grad_norm": 0.0005146781331859529, - "learning_rate": 0.00019999978065717144, - "loss": 46.0, - "step": 4150 - }, - { - "epoch": 0.6684649140464592, - "grad_norm": 0.00031648215372115374, - "learning_rate": 0.000199999780551196, - "loss": 46.0, - "step": 4151 - }, - { - "epoch": 0.6686259511252466, - "grad_norm": 0.0007224585278891027, - "learning_rate": 0.00019999978044519493, - "loss": 46.0, - "step": 4152 - }, - { - "epoch": 0.6687869882040339, - "grad_norm": 0.001101408852264285, - "learning_rate": 0.00019999978033916825, - "loss": 46.0, - "step": 4153 - }, - { - "epoch": 0.6689480252828214, - "grad_norm": 0.0010674436343833804, - "learning_rate": 0.00019999978023311598, - "loss": 46.0, - "step": 4154 - }, - { - "epoch": 0.6691090623616087, - "grad_norm": 0.0031106816604733467, - "learning_rate": 0.00019999978012703813, - "loss": 46.0, - "step": 4155 - }, - { - "epoch": 0.6692700994403962, - "grad_norm": 0.0010225242003798485, - "learning_rate": 0.00019999978002093466, - "loss": 46.0, - "step": 4156 - }, - { - "epoch": 0.6694311365191835, - "grad_norm": 0.004867867100983858, - "learning_rate": 0.00019999977991480564, - "loss": 46.0, - "step": 4157 - }, - { - "epoch": 0.669592173597971, - "grad_norm": 0.0024208661634474993, - "learning_rate": 0.000199999779808651, - "loss": 46.0, - "step": 4158 - }, - { - "epoch": 0.6697532106767583, - "grad_norm": 0.0013799263397231698, - "learning_rate": 0.00019999977970247074, - "loss": 46.0, - "step": 4159 - }, - { - "epoch": 0.6699142477555458, - "grad_norm": 0.00043896023998968303, - "learning_rate": 0.00019999977959626492, - "loss": 46.0, - "step": 4160 - }, - { - "epoch": 0.6700752848343331, - "grad_norm": 0.0008262143819592893, - "learning_rate": 0.0001999997794900335, - "loss": 46.0, - "step": 4161 - }, - { - "epoch": 0.6702363219131205, - "grad_norm": 0.0014996931422501802, - "learning_rate": 0.00019999977938377645, - "loss": 46.0, - "step": 4162 - }, - { - "epoch": 0.6703973589919079, - "grad_norm": 0.0005781179643236101, - "learning_rate": 0.00019999977927749385, - "loss": 46.0, - "step": 4163 - }, - { - "epoch": 0.6705583960706952, - "grad_norm": 0.0006504025659523904, - "learning_rate": 0.00019999977917118565, - "loss": 46.0, - "step": 4164 - }, - { - "epoch": 0.6707194331494827, - "grad_norm": 0.0010935229947790504, - "learning_rate": 0.00019999977906485182, - "loss": 46.0, - "step": 4165 - }, - { - "epoch": 0.67088047022827, - "grad_norm": 0.0011264767963439226, - "learning_rate": 0.00019999977895849243, - "loss": 46.0, - "step": 4166 - }, - { - "epoch": 0.6710415073070575, - "grad_norm": 0.0012723105028271675, - "learning_rate": 0.00019999977885210742, - "loss": 46.0, - "step": 4167 - }, - { - "epoch": 0.6712025443858448, - "grad_norm": 0.0024030390195548534, - "learning_rate": 0.00019999977874569685, - "loss": 46.0, - "step": 4168 - }, - { - "epoch": 0.6713635814646323, - "grad_norm": 0.0013126195408403873, - "learning_rate": 0.00019999977863926067, - "loss": 46.0, - "step": 4169 - }, - { - "epoch": 0.6715246185434196, - "grad_norm": 0.0024808617308735847, - "learning_rate": 0.0001999997785327989, - "loss": 46.0, - "step": 4170 - }, - { - "epoch": 0.6716856556222071, - "grad_norm": 0.0005823391838930547, - "learning_rate": 0.0001999997784263115, - "loss": 46.0, - "step": 4171 - }, - { - "epoch": 0.6718466927009944, - "grad_norm": 0.0003966906515415758, - "learning_rate": 0.00019999977831979855, - "loss": 46.0, - "step": 4172 - }, - { - "epoch": 0.6720077297797818, - "grad_norm": 0.0016826341161504388, - "learning_rate": 0.00019999977821325997, - "loss": 46.0, - "step": 4173 - }, - { - "epoch": 0.6721687668585692, - "grad_norm": 0.0005524946027435362, - "learning_rate": 0.00019999977810669582, - "loss": 46.0, - "step": 4174 - }, - { - "epoch": 0.6723298039373565, - "grad_norm": 0.0005389831494539976, - "learning_rate": 0.00019999977800010606, - "loss": 46.0, - "step": 4175 - }, - { - "epoch": 0.672490841016144, - "grad_norm": 0.0015466846525669098, - "learning_rate": 0.00019999977789349072, - "loss": 46.0, - "step": 4176 - }, - { - "epoch": 0.6726518780949313, - "grad_norm": 0.0007032767753116786, - "learning_rate": 0.00019999977778684979, - "loss": 46.0, - "step": 4177 - }, - { - "epoch": 0.6728129151737188, - "grad_norm": 0.0026330510154366493, - "learning_rate": 0.00019999977768018324, - "loss": 46.0, - "step": 4178 - }, - { - "epoch": 0.6729739522525061, - "grad_norm": 0.001228445558808744, - "learning_rate": 0.0001999997775734911, - "loss": 46.0, - "step": 4179 - }, - { - "epoch": 0.6731349893312936, - "grad_norm": 0.0008239775779657066, - "learning_rate": 0.00019999977746677335, - "loss": 46.0, - "step": 4180 - }, - { - "epoch": 0.6732960264100809, - "grad_norm": 0.0013179155066609383, - "learning_rate": 0.00019999977736003007, - "loss": 46.0, - "step": 4181 - }, - { - "epoch": 0.6734570634888684, - "grad_norm": 0.0008079595281742513, - "learning_rate": 0.00019999977725326112, - "loss": 46.0, - "step": 4182 - }, - { - "epoch": 0.6736181005676557, - "grad_norm": 0.0025764591991901398, - "learning_rate": 0.00019999977714646664, - "loss": 46.0, - "step": 4183 - }, - { - "epoch": 0.673779137646443, - "grad_norm": 0.0010873764986172318, - "learning_rate": 0.00019999977703964651, - "loss": 46.0, - "step": 4184 - }, - { - "epoch": 0.6739401747252305, - "grad_norm": 0.0004788876394741237, - "learning_rate": 0.0001999997769328008, - "loss": 46.0, - "step": 4185 - }, - { - "epoch": 0.6741012118040178, - "grad_norm": 0.0034970359411090612, - "learning_rate": 0.00019999977682592953, - "loss": 46.0, - "step": 4186 - }, - { - "epoch": 0.6742622488828053, - "grad_norm": 0.0005447661969810724, - "learning_rate": 0.00019999977671903262, - "loss": 46.0, - "step": 4187 - }, - { - "epoch": 0.6744232859615926, - "grad_norm": 0.0012893053935840726, - "learning_rate": 0.00019999977661211014, - "loss": 46.0, - "step": 4188 - }, - { - "epoch": 0.6745843230403801, - "grad_norm": 0.0009679575450718403, - "learning_rate": 0.00019999977650516206, - "loss": 46.0, - "step": 4189 - }, - { - "epoch": 0.6747453601191674, - "grad_norm": 0.000922391249332577, - "learning_rate": 0.00019999977639818838, - "loss": 46.0, - "step": 4190 - }, - { - "epoch": 0.6749063971979549, - "grad_norm": 0.0014050399186089635, - "learning_rate": 0.00019999977629118912, - "loss": 46.0, - "step": 4191 - }, - { - "epoch": 0.6750674342767422, - "grad_norm": 0.002332976320758462, - "learning_rate": 0.00019999977618416424, - "loss": 46.0, - "step": 4192 - }, - { - "epoch": 0.6752284713555297, - "grad_norm": 0.0005326683167368174, - "learning_rate": 0.0001999997760771138, - "loss": 46.0, - "step": 4193 - }, - { - "epoch": 0.675389508434317, - "grad_norm": 0.001163173234090209, - "learning_rate": 0.00019999977597003773, - "loss": 46.0, - "step": 4194 - }, - { - "epoch": 0.6755505455131043, - "grad_norm": 0.00047027115942910314, - "learning_rate": 0.00019999977586293607, - "loss": 46.0, - "step": 4195 - }, - { - "epoch": 0.6757115825918918, - "grad_norm": 0.0009730842430144548, - "learning_rate": 0.00019999977575580884, - "loss": 46.0, - "step": 4196 - }, - { - "epoch": 0.6758726196706791, - "grad_norm": 0.0007298915879800916, - "learning_rate": 0.00019999977564865603, - "loss": 46.0, - "step": 4197 - }, - { - "epoch": 0.6760336567494666, - "grad_norm": 0.0015910177025943995, - "learning_rate": 0.00019999977554147758, - "loss": 46.0, - "step": 4198 - }, - { - "epoch": 0.6761946938282539, - "grad_norm": 0.00048593347310088575, - "learning_rate": 0.00019999977543427356, - "loss": 46.0, - "step": 4199 - }, - { - "epoch": 0.6763557309070414, - "grad_norm": 0.0005243184277787805, - "learning_rate": 0.00019999977532704393, - "loss": 46.0, - "step": 4200 - }, - { - "epoch": 0.6765167679858287, - "grad_norm": 0.0009467373602092266, - "learning_rate": 0.0001999997752197887, - "loss": 46.0, - "step": 4201 - }, - { - "epoch": 0.6766778050646162, - "grad_norm": 0.001184506225399673, - "learning_rate": 0.0001999997751125079, - "loss": 46.0, - "step": 4202 - }, - { - "epoch": 0.6768388421434035, - "grad_norm": 0.0005141497240401804, - "learning_rate": 0.00019999977500520147, - "loss": 46.0, - "step": 4203 - }, - { - "epoch": 0.676999879222191, - "grad_norm": 0.0008613636018708348, - "learning_rate": 0.00019999977489786947, - "loss": 46.0, - "step": 4204 - }, - { - "epoch": 0.6771609163009783, - "grad_norm": 0.0016730212373659015, - "learning_rate": 0.00019999977479051188, - "loss": 46.0, - "step": 4205 - }, - { - "epoch": 0.6773219533797656, - "grad_norm": 0.0006290310993790627, - "learning_rate": 0.0001999997746831287, - "loss": 46.0, - "step": 4206 - }, - { - "epoch": 0.6774829904585531, - "grad_norm": 0.0006873703096061945, - "learning_rate": 0.0001999997745757199, - "loss": 46.0, - "step": 4207 - }, - { - "epoch": 0.6776440275373404, - "grad_norm": 0.0006282299873419106, - "learning_rate": 0.0001999997744682855, - "loss": 46.0, - "step": 4208 - }, - { - "epoch": 0.6778050646161279, - "grad_norm": 0.0007294894312508404, - "learning_rate": 0.00019999977436082556, - "loss": 46.0, - "step": 4209 - }, - { - "epoch": 0.6779661016949152, - "grad_norm": 0.00033628210076130927, - "learning_rate": 0.00019999977425333998, - "loss": 46.0, - "step": 4210 - }, - { - "epoch": 0.6781271387737027, - "grad_norm": 0.0007842194754630327, - "learning_rate": 0.0001999997741458288, - "loss": 46.0, - "step": 4211 - }, - { - "epoch": 0.67828817585249, - "grad_norm": 0.00039297586772590876, - "learning_rate": 0.00019999977403829203, - "loss": 46.0, - "step": 4212 - }, - { - "epoch": 0.6784492129312775, - "grad_norm": 0.0003269016451667994, - "learning_rate": 0.0001999997739307297, - "loss": 46.0, - "step": 4213 - }, - { - "epoch": 0.6786102500100648, - "grad_norm": 0.0021821712143719196, - "learning_rate": 0.00019999977382314173, - "loss": 46.0, - "step": 4214 - }, - { - "epoch": 0.6787712870888523, - "grad_norm": 0.0005154683603905141, - "learning_rate": 0.0001999997737155282, - "loss": 46.0, - "step": 4215 - }, - { - "epoch": 0.6789323241676396, - "grad_norm": 0.0015087351202964783, - "learning_rate": 0.00019999977360788903, - "loss": 46.0, - "step": 4216 - }, - { - "epoch": 0.6790933612464269, - "grad_norm": 0.0030605087522417307, - "learning_rate": 0.00019999977350022431, - "loss": 46.0, - "step": 4217 - }, - { - "epoch": 0.6792543983252144, - "grad_norm": 0.0005566399777308106, - "learning_rate": 0.00019999977339253398, - "loss": 46.0, - "step": 4218 - }, - { - "epoch": 0.6794154354040017, - "grad_norm": 0.0006102929473854601, - "learning_rate": 0.00019999977328481804, - "loss": 46.0, - "step": 4219 - }, - { - "epoch": 0.6795764724827892, - "grad_norm": 0.001174689969047904, - "learning_rate": 0.00019999977317707656, - "loss": 46.0, - "step": 4220 - }, - { - "epoch": 0.6797375095615765, - "grad_norm": 0.001205551903694868, - "learning_rate": 0.0001999997730693094, - "loss": 46.0, - "step": 4221 - }, - { - "epoch": 0.679898546640364, - "grad_norm": 0.0017714441055431962, - "learning_rate": 0.0001999997729615167, - "loss": 46.0, - "step": 4222 - }, - { - "epoch": 0.6800595837191513, - "grad_norm": 0.004116755444556475, - "learning_rate": 0.00019999977285369843, - "loss": 46.0, - "step": 4223 - }, - { - "epoch": 0.6802206207979388, - "grad_norm": 0.00029588156030513346, - "learning_rate": 0.0001999997727458545, - "loss": 46.0, - "step": 4224 - }, - { - "epoch": 0.6803816578767261, - "grad_norm": 0.0010703310836106539, - "learning_rate": 0.00019999977263798503, - "loss": 46.0, - "step": 4225 - }, - { - "epoch": 0.6805426949555136, - "grad_norm": 0.0008275836589746177, - "learning_rate": 0.00019999977253008991, - "loss": 46.0, - "step": 4226 - }, - { - "epoch": 0.6807037320343009, - "grad_norm": 0.0009265264379791915, - "learning_rate": 0.00019999977242216924, - "loss": 46.0, - "step": 4227 - }, - { - "epoch": 0.6808647691130882, - "grad_norm": 0.00046720009413547814, - "learning_rate": 0.00019999977231422298, - "loss": 46.0, - "step": 4228 - }, - { - "epoch": 0.6810258061918757, - "grad_norm": 0.00045534424134530127, - "learning_rate": 0.00019999977220625109, - "loss": 46.0, - "step": 4229 - }, - { - "epoch": 0.681186843270663, - "grad_norm": 0.00221816822886467, - "learning_rate": 0.00019999977209825363, - "loss": 46.0, - "step": 4230 - }, - { - "epoch": 0.6813478803494505, - "grad_norm": 0.0007783073815517128, - "learning_rate": 0.00019999977199023058, - "loss": 46.0, - "step": 4231 - }, - { - "epoch": 0.6815089174282378, - "grad_norm": 0.0011567584006115794, - "learning_rate": 0.00019999977188218192, - "loss": 46.0, - "step": 4232 - }, - { - "epoch": 0.6816699545070253, - "grad_norm": 0.005762670189142227, - "learning_rate": 0.00019999977177410764, - "loss": 46.0, - "step": 4233 - }, - { - "epoch": 0.6818309915858126, - "grad_norm": 0.0010815223213285208, - "learning_rate": 0.0001999997716660078, - "loss": 46.0, - "step": 4234 - }, - { - "epoch": 0.6819920286646001, - "grad_norm": 0.0031720888800919056, - "learning_rate": 0.00019999977155788236, - "loss": 46.0, - "step": 4235 - }, - { - "epoch": 0.6821530657433874, - "grad_norm": 0.0006679006037302315, - "learning_rate": 0.00019999977144973135, - "loss": 46.0, - "step": 4236 - }, - { - "epoch": 0.6823141028221748, - "grad_norm": 0.0002713054418563843, - "learning_rate": 0.0001999997713415547, - "loss": 46.0, - "step": 4237 - }, - { - "epoch": 0.6824751399009622, - "grad_norm": 0.0004060633364133537, - "learning_rate": 0.00019999977123335248, - "loss": 46.0, - "step": 4238 - }, - { - "epoch": 0.6826361769797495, - "grad_norm": 0.0012862241128459573, - "learning_rate": 0.00019999977112512466, - "loss": 46.0, - "step": 4239 - }, - { - "epoch": 0.682797214058537, - "grad_norm": 0.0013272602809593081, - "learning_rate": 0.00019999977101687125, - "loss": 46.0, - "step": 4240 - }, - { - "epoch": 0.6829582511373243, - "grad_norm": 0.0010185547871515155, - "learning_rate": 0.00019999977090859222, - "loss": 46.0, - "step": 4241 - }, - { - "epoch": 0.6831192882161118, - "grad_norm": 0.0009247910347767174, - "learning_rate": 0.0001999997708002876, - "loss": 46.0, - "step": 4242 - }, - { - "epoch": 0.6832803252948991, - "grad_norm": 0.0007376632420346141, - "learning_rate": 0.0001999997706919574, - "loss": 46.0, - "step": 4243 - }, - { - "epoch": 0.6834413623736866, - "grad_norm": 0.0004836310399696231, - "learning_rate": 0.00019999977058360161, - "loss": 46.0, - "step": 4244 - }, - { - "epoch": 0.6836023994524739, - "grad_norm": 0.000720982498023659, - "learning_rate": 0.00019999977047522024, - "loss": 46.0, - "step": 4245 - }, - { - "epoch": 0.6837634365312614, - "grad_norm": 0.0019374929834157228, - "learning_rate": 0.00019999977036681322, - "loss": 46.0, - "step": 4246 - }, - { - "epoch": 0.6839244736100487, - "grad_norm": 0.0006270456360653043, - "learning_rate": 0.00019999977025838067, - "loss": 46.0, - "step": 4247 - }, - { - "epoch": 0.684085510688836, - "grad_norm": 0.0009443956078030169, - "learning_rate": 0.0001999997701499225, - "loss": 46.0, - "step": 4248 - }, - { - "epoch": 0.6842465477676235, - "grad_norm": 0.0007357310969382524, - "learning_rate": 0.00019999977004143873, - "loss": 46.0, - "step": 4249 - }, - { - "epoch": 0.6844075848464108, - "grad_norm": 0.0008445387938991189, - "learning_rate": 0.00019999976993292936, - "loss": 46.0, - "step": 4250 - }, - { - "epoch": 0.6845686219251983, - "grad_norm": 0.0009629289270378649, - "learning_rate": 0.0001999997698243944, - "loss": 46.0, - "step": 4251 - }, - { - "epoch": 0.6847296590039856, - "grad_norm": 0.0006416314863599837, - "learning_rate": 0.00019999976971583382, - "loss": 46.0, - "step": 4252 - }, - { - "epoch": 0.6848906960827731, - "grad_norm": 0.001572751789353788, - "learning_rate": 0.00019999976960724772, - "loss": 46.0, - "step": 4253 - }, - { - "epoch": 0.6850517331615604, - "grad_norm": 0.000453545042546466, - "learning_rate": 0.00019999976949863597, - "loss": 46.0, - "step": 4254 - }, - { - "epoch": 0.6852127702403479, - "grad_norm": 0.00030573667027056217, - "learning_rate": 0.0001999997693899986, - "loss": 46.0, - "step": 4255 - }, - { - "epoch": 0.6853738073191352, - "grad_norm": 0.0007804098422639072, - "learning_rate": 0.00019999976928133568, - "loss": 46.0, - "step": 4256 - }, - { - "epoch": 0.6855348443979227, - "grad_norm": 0.0004297084524296224, - "learning_rate": 0.00019999976917264715, - "loss": 46.0, - "step": 4257 - }, - { - "epoch": 0.68569588147671, - "grad_norm": 0.0005432379548437893, - "learning_rate": 0.00019999976906393303, - "loss": 46.0, - "step": 4258 - }, - { - "epoch": 0.6858569185554974, - "grad_norm": 0.0017453789478167892, - "learning_rate": 0.0001999997689551933, - "loss": 46.0, - "step": 4259 - }, - { - "epoch": 0.6860179556342848, - "grad_norm": 0.0005355547182261944, - "learning_rate": 0.000199999768846428, - "loss": 46.0, - "step": 4260 - }, - { - "epoch": 0.6861789927130721, - "grad_norm": 0.0028631852474063635, - "learning_rate": 0.0001999997687376371, - "loss": 46.0, - "step": 4261 - }, - { - "epoch": 0.6863400297918596, - "grad_norm": 0.0020064173731952906, - "learning_rate": 0.0001999997686288206, - "loss": 46.0, - "step": 4262 - }, - { - "epoch": 0.6865010668706469, - "grad_norm": 0.0022759444545954466, - "learning_rate": 0.0001999997685199785, - "loss": 46.0, - "step": 4263 - }, - { - "epoch": 0.6866621039494344, - "grad_norm": 0.0007143968250602484, - "learning_rate": 0.0001999997684111108, - "loss": 46.0, - "step": 4264 - }, - { - "epoch": 0.6868231410282217, - "grad_norm": 0.000550659082364291, - "learning_rate": 0.00019999976830221752, - "loss": 46.0, - "step": 4265 - }, - { - "epoch": 0.6869841781070092, - "grad_norm": 0.00155602116137743, - "learning_rate": 0.00019999976819329865, - "loss": 46.0, - "step": 4266 - }, - { - "epoch": 0.6871452151857965, - "grad_norm": 0.001805823645554483, - "learning_rate": 0.00019999976808435417, - "loss": 46.0, - "step": 4267 - }, - { - "epoch": 0.687306252264584, - "grad_norm": 0.0010955483885481954, - "learning_rate": 0.0001999997679753841, - "loss": 46.0, - "step": 4268 - }, - { - "epoch": 0.6874672893433713, - "grad_norm": 0.002118520438671112, - "learning_rate": 0.00019999976786638844, - "loss": 46.0, - "step": 4269 - }, - { - "epoch": 0.6876283264221587, - "grad_norm": 0.001822443213313818, - "learning_rate": 0.00019999976775736714, - "loss": 46.0, - "step": 4270 - }, - { - "epoch": 0.6877893635009461, - "grad_norm": 0.0005718372412957251, - "learning_rate": 0.0001999997676483203, - "loss": 46.0, - "step": 4271 - }, - { - "epoch": 0.6879504005797334, - "grad_norm": 0.0029179533012211323, - "learning_rate": 0.00019999976753924784, - "loss": 46.0, - "step": 4272 - }, - { - "epoch": 0.6881114376585209, - "grad_norm": 0.0024086886551231146, - "learning_rate": 0.00019999976743014983, - "loss": 46.0, - "step": 4273 - }, - { - "epoch": 0.6882724747373082, - "grad_norm": 0.0011706891236826777, - "learning_rate": 0.00019999976732102618, - "loss": 46.0, - "step": 4274 - }, - { - "epoch": 0.6884335118160957, - "grad_norm": 0.000658955832477659, - "learning_rate": 0.00019999976721187695, - "loss": 46.0, - "step": 4275 - }, - { - "epoch": 0.688594548894883, - "grad_norm": 0.0004229762125760317, - "learning_rate": 0.00019999976710270213, - "loss": 46.0, - "step": 4276 - }, - { - "epoch": 0.6887555859736705, - "grad_norm": 0.00032889386056922376, - "learning_rate": 0.0001999997669935017, - "loss": 46.0, - "step": 4277 - }, - { - "epoch": 0.6889166230524578, - "grad_norm": 0.0006168846739456058, - "learning_rate": 0.0001999997668842757, - "loss": 46.0, - "step": 4278 - }, - { - "epoch": 0.6890776601312453, - "grad_norm": 0.001164330285973847, - "learning_rate": 0.00019999976677502406, - "loss": 46.0, - "step": 4279 - }, - { - "epoch": 0.6892386972100326, - "grad_norm": 0.0009718163637444377, - "learning_rate": 0.00019999976666574686, - "loss": 46.0, - "step": 4280 - }, - { - "epoch": 0.68939973428882, - "grad_norm": 0.0008400626247748733, - "learning_rate": 0.00019999976655644405, - "loss": 46.0, - "step": 4281 - }, - { - "epoch": 0.6895607713676074, - "grad_norm": 0.00071126245893538, - "learning_rate": 0.00019999976644711565, - "loss": 46.0, - "step": 4282 - }, - { - "epoch": 0.6897218084463947, - "grad_norm": 0.00064807542366907, - "learning_rate": 0.00019999976633776166, - "loss": 46.0, - "step": 4283 - }, - { - "epoch": 0.6898828455251822, - "grad_norm": 0.0005875331116840243, - "learning_rate": 0.0001999997662283821, - "loss": 46.0, - "step": 4284 - }, - { - "epoch": 0.6900438826039695, - "grad_norm": 0.0006677934434264898, - "learning_rate": 0.00019999976611897687, - "loss": 46.0, - "step": 4285 - }, - { - "epoch": 0.690204919682757, - "grad_norm": 0.0015023647574707866, - "learning_rate": 0.0001999997660095461, - "loss": 46.0, - "step": 4286 - }, - { - "epoch": 0.6903659567615443, - "grad_norm": 0.0013992848107591271, - "learning_rate": 0.00019999976590008974, - "loss": 46.0, - "step": 4287 - }, - { - "epoch": 0.6905269938403318, - "grad_norm": 0.0016881533665582538, - "learning_rate": 0.00019999976579060779, - "loss": 46.0, - "step": 4288 - }, - { - "epoch": 0.6906880309191191, - "grad_norm": 0.0013599763624370098, - "learning_rate": 0.00019999976568110022, - "loss": 46.0, - "step": 4289 - }, - { - "epoch": 0.6908490679979065, - "grad_norm": 0.0005329299601726234, - "learning_rate": 0.00019999976557156704, - "loss": 46.0, - "step": 4290 - }, - { - "epoch": 0.6910101050766939, - "grad_norm": 0.0009675774490460753, - "learning_rate": 0.00019999976546200828, - "loss": 46.0, - "step": 4291 - }, - { - "epoch": 0.6911711421554813, - "grad_norm": 0.000509542238432914, - "learning_rate": 0.00019999976535242395, - "loss": 46.0, - "step": 4292 - }, - { - "epoch": 0.6913321792342687, - "grad_norm": 0.0023436935152858496, - "learning_rate": 0.00019999976524281402, - "loss": 46.0, - "step": 4293 - }, - { - "epoch": 0.691493216313056, - "grad_norm": 0.0007163815316744149, - "learning_rate": 0.00019999976513317846, - "loss": 46.0, - "step": 4294 - }, - { - "epoch": 0.6916542533918435, - "grad_norm": 0.002450268715620041, - "learning_rate": 0.00019999976502351735, - "loss": 46.0, - "step": 4295 - }, - { - "epoch": 0.6918152904706308, - "grad_norm": 0.0003730957396328449, - "learning_rate": 0.00019999976491383065, - "loss": 46.0, - "step": 4296 - }, - { - "epoch": 0.6919763275494183, - "grad_norm": 0.003522138111293316, - "learning_rate": 0.0001999997648041183, - "loss": 46.0, - "step": 4297 - }, - { - "epoch": 0.6921373646282056, - "grad_norm": 0.00035113058402203023, - "learning_rate": 0.0001999997646943804, - "loss": 46.0, - "step": 4298 - }, - { - "epoch": 0.6922984017069931, - "grad_norm": 0.0015195432351902127, - "learning_rate": 0.00019999976458461686, - "loss": 46.0, - "step": 4299 - }, - { - "epoch": 0.6924594387857804, - "grad_norm": 0.0033547542989253998, - "learning_rate": 0.00019999976447482775, - "loss": 46.0, - "step": 4300 - }, - { - "epoch": 0.6926204758645678, - "grad_norm": 0.0008956454694271088, - "learning_rate": 0.00019999976436501306, - "loss": 46.0, - "step": 4301 - }, - { - "epoch": 0.6927815129433552, - "grad_norm": 0.0013032873393967748, - "learning_rate": 0.00019999976425517279, - "loss": 46.0, - "step": 4302 - }, - { - "epoch": 0.6929425500221426, - "grad_norm": 0.0015084196347743273, - "learning_rate": 0.0001999997641453069, - "loss": 46.0, - "step": 4303 - }, - { - "epoch": 0.69310358710093, - "grad_norm": 0.0011208360083401203, - "learning_rate": 0.00019999976403541541, - "loss": 46.0, - "step": 4304 - }, - { - "epoch": 0.6932646241797173, - "grad_norm": 0.001271988032385707, - "learning_rate": 0.00019999976392549832, - "loss": 46.0, - "step": 4305 - }, - { - "epoch": 0.6934256612585048, - "grad_norm": 0.0007630119216628373, - "learning_rate": 0.00019999976381555564, - "loss": 46.0, - "step": 4306 - }, - { - "epoch": 0.6935866983372921, - "grad_norm": 0.0014788095140829682, - "learning_rate": 0.0001999997637055874, - "loss": 46.0, - "step": 4307 - }, - { - "epoch": 0.6937477354160796, - "grad_norm": 0.0014191108057275414, - "learning_rate": 0.00019999976359559352, - "loss": 46.0, - "step": 4308 - }, - { - "epoch": 0.6939087724948669, - "grad_norm": 0.002827615011483431, - "learning_rate": 0.00019999976348557405, - "loss": 46.0, - "step": 4309 - }, - { - "epoch": 0.6940698095736544, - "grad_norm": 0.0012578010791912675, - "learning_rate": 0.00019999976337552902, - "loss": 46.0, - "step": 4310 - }, - { - "epoch": 0.6942308466524417, - "grad_norm": 0.0002686958760023117, - "learning_rate": 0.00019999976326545834, - "loss": 46.0, - "step": 4311 - }, - { - "epoch": 0.6943918837312291, - "grad_norm": 0.0004917251062579453, - "learning_rate": 0.0001999997631553621, - "loss": 46.0, - "step": 4312 - }, - { - "epoch": 0.6945529208100165, - "grad_norm": 0.0008446917636319995, - "learning_rate": 0.0001999997630452403, - "loss": 46.0, - "step": 4313 - }, - { - "epoch": 0.6947139578888039, - "grad_norm": 0.0010952099692076445, - "learning_rate": 0.00019999976293509284, - "loss": 46.0, - "step": 4314 - }, - { - "epoch": 0.6948749949675913, - "grad_norm": 0.0010970853036269546, - "learning_rate": 0.00019999976282491982, - "loss": 46.0, - "step": 4315 - }, - { - "epoch": 0.6950360320463786, - "grad_norm": 0.0005779225029982626, - "learning_rate": 0.0001999997627147212, - "loss": 46.0, - "step": 4316 - }, - { - "epoch": 0.6951970691251661, - "grad_norm": 0.0006116090225987136, - "learning_rate": 0.000199999762604497, - "loss": 46.0, - "step": 4317 - }, - { - "epoch": 0.6953581062039534, - "grad_norm": 0.0010594524210318923, - "learning_rate": 0.00019999976249424718, - "loss": 46.0, - "step": 4318 - }, - { - "epoch": 0.6955191432827409, - "grad_norm": 0.002785651246085763, - "learning_rate": 0.00019999976238397178, - "loss": 46.0, - "step": 4319 - }, - { - "epoch": 0.6956801803615282, - "grad_norm": 0.0014585310127586126, - "learning_rate": 0.00019999976227367077, - "loss": 46.0, - "step": 4320 - }, - { - "epoch": 0.6958412174403157, - "grad_norm": 0.0005903081619180739, - "learning_rate": 0.00019999976216334418, - "loss": 46.0, - "step": 4321 - }, - { - "epoch": 0.696002254519103, - "grad_norm": 0.0004984555998817086, - "learning_rate": 0.000199999762052992, - "loss": 46.0, - "step": 4322 - }, - { - "epoch": 0.6961632915978904, - "grad_norm": 0.0017128164181485772, - "learning_rate": 0.0001999997619426142, - "loss": 46.0, - "step": 4323 - }, - { - "epoch": 0.6963243286766778, - "grad_norm": 0.002269329270347953, - "learning_rate": 0.00019999976183221084, - "loss": 46.0, - "step": 4324 - }, - { - "epoch": 0.6964853657554652, - "grad_norm": 0.0019107286352664232, - "learning_rate": 0.00019999976172178184, - "loss": 46.0, - "step": 4325 - }, - { - "epoch": 0.6966464028342526, - "grad_norm": 0.002972388407215476, - "learning_rate": 0.00019999976161132728, - "loss": 46.0, - "step": 4326 - }, - { - "epoch": 0.69680743991304, - "grad_norm": 0.0006845807074569166, - "learning_rate": 0.0001999997615008471, - "loss": 46.0, - "step": 4327 - }, - { - "epoch": 0.6969684769918274, - "grad_norm": 0.0017358340555801988, - "learning_rate": 0.00019999976139034137, - "loss": 46.0, - "step": 4328 - }, - { - "epoch": 0.6971295140706147, - "grad_norm": 0.0009827794274315238, - "learning_rate": 0.00019999976127981, - "loss": 46.0, - "step": 4329 - }, - { - "epoch": 0.6972905511494022, - "grad_norm": 0.001294991234317422, - "learning_rate": 0.00019999976116925306, - "loss": 46.0, - "step": 4330 - }, - { - "epoch": 0.6974515882281895, - "grad_norm": 0.0012318383669480681, - "learning_rate": 0.00019999976105867048, - "loss": 46.0, - "step": 4331 - }, - { - "epoch": 0.697612625306977, - "grad_norm": 0.00047702481970191, - "learning_rate": 0.00019999976094806237, - "loss": 46.0, - "step": 4332 - }, - { - "epoch": 0.6977736623857643, - "grad_norm": 0.0006607566610909998, - "learning_rate": 0.00019999976083742862, - "loss": 46.0, - "step": 4333 - }, - { - "epoch": 0.6979346994645517, - "grad_norm": 0.00367356208153069, - "learning_rate": 0.0001999997607267693, - "loss": 46.0, - "step": 4334 - }, - { - "epoch": 0.6980957365433391, - "grad_norm": 0.0009460444562137127, - "learning_rate": 0.00019999976061608438, - "loss": 46.0, - "step": 4335 - }, - { - "epoch": 0.6982567736221265, - "grad_norm": 0.002952640177682042, - "learning_rate": 0.00019999976050537387, - "loss": 46.0, - "step": 4336 - }, - { - "epoch": 0.6984178107009139, - "grad_norm": 0.0006764120189473033, - "learning_rate": 0.00019999976039463774, - "loss": 46.0, - "step": 4337 - }, - { - "epoch": 0.6985788477797013, - "grad_norm": 0.0006747061852365732, - "learning_rate": 0.00019999976028387602, - "loss": 46.0, - "step": 4338 - }, - { - "epoch": 0.6987398848584887, - "grad_norm": 0.0006493888795375824, - "learning_rate": 0.00019999976017308875, - "loss": 46.0, - "step": 4339 - }, - { - "epoch": 0.698900921937276, - "grad_norm": 0.00040915925637818873, - "learning_rate": 0.00019999976006227583, - "loss": 46.0, - "step": 4340 - }, - { - "epoch": 0.6990619590160635, - "grad_norm": 0.00402578292414546, - "learning_rate": 0.00019999975995143733, - "loss": 46.0, - "step": 4341 - }, - { - "epoch": 0.6992229960948508, - "grad_norm": 0.0024500973522663116, - "learning_rate": 0.00019999975984057327, - "loss": 46.0, - "step": 4342 - }, - { - "epoch": 0.6993840331736382, - "grad_norm": 0.0026648384518921375, - "learning_rate": 0.00019999975972968356, - "loss": 46.0, - "step": 4343 - }, - { - "epoch": 0.6995450702524256, - "grad_norm": 0.002576041966676712, - "learning_rate": 0.00019999975961876833, - "loss": 46.0, - "step": 4344 - }, - { - "epoch": 0.699706107331213, - "grad_norm": 0.0008924486464820802, - "learning_rate": 0.00019999975950782745, - "loss": 46.0, - "step": 4345 - }, - { - "epoch": 0.6998671444100004, - "grad_norm": 0.0014739540638402104, - "learning_rate": 0.00019999975939686098, - "loss": 46.0, - "step": 4346 - }, - { - "epoch": 0.7000281814887878, - "grad_norm": 0.00282071391120553, - "learning_rate": 0.00019999975928586893, - "loss": 46.0, - "step": 4347 - }, - { - "epoch": 0.7001892185675752, - "grad_norm": 0.0007176279905252159, - "learning_rate": 0.00019999975917485126, - "loss": 46.0, - "step": 4348 - }, - { - "epoch": 0.7003502556463626, - "grad_norm": 0.00041312022949568927, - "learning_rate": 0.00019999975906380798, - "loss": 46.0, - "step": 4349 - }, - { - "epoch": 0.70051129272515, - "grad_norm": 0.0025801435112953186, - "learning_rate": 0.00019999975895273914, - "loss": 46.0, - "step": 4350 - }, - { - "epoch": 0.7006723298039373, - "grad_norm": 0.001084522227756679, - "learning_rate": 0.00019999975884164473, - "loss": 46.0, - "step": 4351 - }, - { - "epoch": 0.7008333668827248, - "grad_norm": 0.004320534411817789, - "learning_rate": 0.0001999997587305247, - "loss": 46.0, - "step": 4352 - }, - { - "epoch": 0.7009944039615121, - "grad_norm": 0.00176756433211267, - "learning_rate": 0.00019999975861937906, - "loss": 46.0, - "step": 4353 - }, - { - "epoch": 0.7011554410402995, - "grad_norm": 0.0005683852941729128, - "learning_rate": 0.00019999975850820784, - "loss": 46.0, - "step": 4354 - }, - { - "epoch": 0.7013164781190869, - "grad_norm": 0.0003886757476720959, - "learning_rate": 0.000199999758397011, - "loss": 46.0, - "step": 4355 - }, - { - "epoch": 0.7014775151978743, - "grad_norm": 0.0002913382777478546, - "learning_rate": 0.00019999975828578862, - "loss": 46.0, - "step": 4356 - }, - { - "epoch": 0.7016385522766617, - "grad_norm": 0.0010053608566522598, - "learning_rate": 0.00019999975817454059, - "loss": 46.0, - "step": 4357 - }, - { - "epoch": 0.7017995893554491, - "grad_norm": 0.0011389650171622634, - "learning_rate": 0.000199999758063267, - "loss": 46.0, - "step": 4358 - }, - { - "epoch": 0.7019606264342365, - "grad_norm": 0.0009005354950204492, - "learning_rate": 0.00019999975795196778, - "loss": 46.0, - "step": 4359 - }, - { - "epoch": 0.7021216635130239, - "grad_norm": 0.0006556838634423912, - "learning_rate": 0.000199999757840643, - "loss": 46.0, - "step": 4360 - }, - { - "epoch": 0.7022827005918113, - "grad_norm": 0.0007158315274864435, - "learning_rate": 0.0001999997577292926, - "loss": 46.0, - "step": 4361 - }, - { - "epoch": 0.7024437376705986, - "grad_norm": 0.0007340789888985455, - "learning_rate": 0.00019999975761791664, - "loss": 46.0, - "step": 4362 - }, - { - "epoch": 0.7026047747493861, - "grad_norm": 0.0005704319919459522, - "learning_rate": 0.00019999975750651505, - "loss": 46.0, - "step": 4363 - }, - { - "epoch": 0.7027658118281734, - "grad_norm": 0.001266828621737659, - "learning_rate": 0.00019999975739508788, - "loss": 46.0, - "step": 4364 - }, - { - "epoch": 0.7029268489069608, - "grad_norm": 0.0019355610711500049, - "learning_rate": 0.0001999997572836351, - "loss": 46.0, - "step": 4365 - }, - { - "epoch": 0.7030878859857482, - "grad_norm": 0.00064177653985098, - "learning_rate": 0.00019999975717215675, - "loss": 46.0, - "step": 4366 - }, - { - "epoch": 0.7032489230645356, - "grad_norm": 0.001236030482687056, - "learning_rate": 0.0001999997570606528, - "loss": 46.0, - "step": 4367 - }, - { - "epoch": 0.703409960143323, - "grad_norm": 0.0017155156238004565, - "learning_rate": 0.00019999975694912325, - "loss": 46.0, - "step": 4368 - }, - { - "epoch": 0.7035709972221104, - "grad_norm": 0.0027009411714971066, - "learning_rate": 0.0001999997568375681, - "loss": 46.0, - "step": 4369 - }, - { - "epoch": 0.7037320343008978, - "grad_norm": 0.0006052309763617814, - "learning_rate": 0.00019999975672598734, - "loss": 46.0, - "step": 4370 - }, - { - "epoch": 0.7038930713796852, - "grad_norm": 0.0006586650270037353, - "learning_rate": 0.000199999756614381, - "loss": 46.0, - "step": 4371 - }, - { - "epoch": 0.7040541084584726, - "grad_norm": 0.000682335696183145, - "learning_rate": 0.00019999975650274908, - "loss": 46.0, - "step": 4372 - }, - { - "epoch": 0.70421514553726, - "grad_norm": 0.0015550417592749, - "learning_rate": 0.00019999975639109155, - "loss": 46.0, - "step": 4373 - }, - { - "epoch": 0.7043761826160474, - "grad_norm": 0.0010525945108383894, - "learning_rate": 0.00019999975627940842, - "loss": 46.0, - "step": 4374 - }, - { - "epoch": 0.7045372196948347, - "grad_norm": 0.0004901184001937509, - "learning_rate": 0.00019999975616769974, - "loss": 46.0, - "step": 4375 - }, - { - "epoch": 0.7046982567736221, - "grad_norm": 0.0007887351093813777, - "learning_rate": 0.00019999975605596541, - "loss": 46.0, - "step": 4376 - }, - { - "epoch": 0.7048592938524095, - "grad_norm": 0.00023493637854699045, - "learning_rate": 0.0001999997559442055, - "loss": 46.0, - "step": 4377 - }, - { - "epoch": 0.7050203309311969, - "grad_norm": 0.0015376863302662969, - "learning_rate": 0.00019999975583242, - "loss": 46.0, - "step": 4378 - }, - { - "epoch": 0.7051813680099843, - "grad_norm": 0.000744383898563683, - "learning_rate": 0.00019999975572060892, - "loss": 46.0, - "step": 4379 - }, - { - "epoch": 0.7053424050887717, - "grad_norm": 0.0011030557798221707, - "learning_rate": 0.00019999975560877221, - "loss": 46.0, - "step": 4380 - }, - { - "epoch": 0.7055034421675591, - "grad_norm": 0.0008488588500767946, - "learning_rate": 0.00019999975549690993, - "loss": 46.0, - "step": 4381 - }, - { - "epoch": 0.7056644792463465, - "grad_norm": 0.0008094905642792583, - "learning_rate": 0.00019999975538502205, - "loss": 46.0, - "step": 4382 - }, - { - "epoch": 0.7058255163251339, - "grad_norm": 0.0013153315521776676, - "learning_rate": 0.0001999997552731086, - "loss": 46.0, - "step": 4383 - }, - { - "epoch": 0.7059865534039212, - "grad_norm": 0.0008418290526606143, - "learning_rate": 0.0001999997551611695, - "loss": 46.0, - "step": 4384 - }, - { - "epoch": 0.7061475904827087, - "grad_norm": 0.0007125269039534032, - "learning_rate": 0.00019999975504920485, - "loss": 46.0, - "step": 4385 - }, - { - "epoch": 0.706308627561496, - "grad_norm": 0.0017784794326871634, - "learning_rate": 0.00019999975493721462, - "loss": 46.0, - "step": 4386 - }, - { - "epoch": 0.7064696646402834, - "grad_norm": 0.0009435164392925799, - "learning_rate": 0.00019999975482519876, - "loss": 46.0, - "step": 4387 - }, - { - "epoch": 0.7066307017190708, - "grad_norm": 0.0021223300136625767, - "learning_rate": 0.0001999997547131573, - "loss": 46.0, - "step": 4388 - }, - { - "epoch": 0.7067917387978582, - "grad_norm": 0.0007367211510427296, - "learning_rate": 0.00019999975460109024, - "loss": 46.0, - "step": 4389 - }, - { - "epoch": 0.7069527758766456, - "grad_norm": 0.0016931348945945501, - "learning_rate": 0.0001999997544889976, - "loss": 46.0, - "step": 4390 - }, - { - "epoch": 0.707113812955433, - "grad_norm": 0.0009905026527121663, - "learning_rate": 0.0001999997543768794, - "loss": 46.0, - "step": 4391 - }, - { - "epoch": 0.7072748500342204, - "grad_norm": 0.00047286628978326917, - "learning_rate": 0.00019999975426473557, - "loss": 46.0, - "step": 4392 - }, - { - "epoch": 0.7074358871130078, - "grad_norm": 0.0011455706553533673, - "learning_rate": 0.00019999975415256613, - "loss": 46.0, - "step": 4393 - }, - { - "epoch": 0.7075969241917952, - "grad_norm": 0.003250241745263338, - "learning_rate": 0.00019999975404037112, - "loss": 46.0, - "step": 4394 - }, - { - "epoch": 0.7077579612705825, - "grad_norm": 0.0009388535981997848, - "learning_rate": 0.0001999997539281505, - "loss": 46.0, - "step": 4395 - }, - { - "epoch": 0.7079189983493699, - "grad_norm": 0.003122034016996622, - "learning_rate": 0.0001999997538159043, - "loss": 46.0, - "step": 4396 - }, - { - "epoch": 0.7080800354281573, - "grad_norm": 0.0007923035882413387, - "learning_rate": 0.00019999975370363249, - "loss": 46.0, - "step": 4397 - }, - { - "epoch": 0.7082410725069447, - "grad_norm": 0.0013338671997189522, - "learning_rate": 0.0001999997535913351, - "loss": 46.0, - "step": 4398 - }, - { - "epoch": 0.7084021095857321, - "grad_norm": 0.0017197271808981895, - "learning_rate": 0.00019999975347901211, - "loss": 46.0, - "step": 4399 - }, - { - "epoch": 0.7085631466645195, - "grad_norm": 0.0010688776383176446, - "learning_rate": 0.00019999975336666353, - "loss": 46.0, - "step": 4400 - }, - { - "epoch": 0.7087241837433069, - "grad_norm": 0.0008712644339539111, - "learning_rate": 0.00019999975325428934, - "loss": 46.0, - "step": 4401 - }, - { - "epoch": 0.7088852208220943, - "grad_norm": 0.0006837205146439373, - "learning_rate": 0.00019999975314188956, - "loss": 46.0, - "step": 4402 - }, - { - "epoch": 0.7090462579008817, - "grad_norm": 0.0012785367434844375, - "learning_rate": 0.00019999975302946422, - "loss": 46.0, - "step": 4403 - }, - { - "epoch": 0.7092072949796691, - "grad_norm": 0.0025434347335249186, - "learning_rate": 0.00019999975291701323, - "loss": 46.0, - "step": 4404 - }, - { - "epoch": 0.7093683320584565, - "grad_norm": 0.0009659446659497917, - "learning_rate": 0.00019999975280453666, - "loss": 46.0, - "step": 4405 - }, - { - "epoch": 0.7095293691372438, - "grad_norm": 0.0008181935409083962, - "learning_rate": 0.00019999975269203453, - "loss": 46.0, - "step": 4406 - }, - { - "epoch": 0.7096904062160312, - "grad_norm": 0.0018432833021506667, - "learning_rate": 0.0001999997525795068, - "loss": 46.0, - "step": 4407 - }, - { - "epoch": 0.7098514432948186, - "grad_norm": 0.0020430530421435833, - "learning_rate": 0.00019999975246695343, - "loss": 46.0, - "step": 4408 - }, - { - "epoch": 0.710012480373606, - "grad_norm": 0.0013659343821927905, - "learning_rate": 0.00019999975235437449, - "loss": 46.0, - "step": 4409 - }, - { - "epoch": 0.7101735174523934, - "grad_norm": 0.0007254041847772896, - "learning_rate": 0.00019999975224176995, - "loss": 46.0, - "step": 4410 - }, - { - "epoch": 0.7103345545311808, - "grad_norm": 0.0003318408562336117, - "learning_rate": 0.00019999975212913983, - "loss": 46.0, - "step": 4411 - }, - { - "epoch": 0.7104955916099682, - "grad_norm": 0.0016765260370448232, - "learning_rate": 0.0001999997520164841, - "loss": 46.0, - "step": 4412 - }, - { - "epoch": 0.7106566286887556, - "grad_norm": 0.0005592864472419024, - "learning_rate": 0.00019999975190380278, - "loss": 46.0, - "step": 4413 - }, - { - "epoch": 0.710817665767543, - "grad_norm": 0.0014356225728988647, - "learning_rate": 0.00019999975179109587, - "loss": 46.0, - "step": 4414 - }, - { - "epoch": 0.7109787028463304, - "grad_norm": 0.0015646725660189986, - "learning_rate": 0.00019999975167836337, - "loss": 46.0, - "step": 4415 - }, - { - "epoch": 0.7111397399251178, - "grad_norm": 0.0006083520711399615, - "learning_rate": 0.00019999975156560526, - "loss": 46.0, - "step": 4416 - }, - { - "epoch": 0.7113007770039051, - "grad_norm": 0.0007206778391264379, - "learning_rate": 0.00019999975145282157, - "loss": 46.0, - "step": 4417 - }, - { - "epoch": 0.7114618140826925, - "grad_norm": 0.0005120691494084895, - "learning_rate": 0.0001999997513400123, - "loss": 46.0, - "step": 4418 - }, - { - "epoch": 0.7116228511614799, - "grad_norm": 0.001734325778670609, - "learning_rate": 0.0001999997512271774, - "loss": 46.0, - "step": 4419 - }, - { - "epoch": 0.7117838882402673, - "grad_norm": 0.0011700481409206986, - "learning_rate": 0.00019999975111431692, - "loss": 46.0, - "step": 4420 - }, - { - "epoch": 0.7119449253190547, - "grad_norm": 0.0006509965169243515, - "learning_rate": 0.00019999975100143082, - "loss": 46.0, - "step": 4421 - }, - { - "epoch": 0.7121059623978421, - "grad_norm": 0.001998740714043379, - "learning_rate": 0.00019999975088851916, - "loss": 46.0, - "step": 4422 - }, - { - "epoch": 0.7122669994766295, - "grad_norm": 0.00327190849930048, - "learning_rate": 0.00019999975077558192, - "loss": 46.0, - "step": 4423 - }, - { - "epoch": 0.7124280365554169, - "grad_norm": 0.0019606011919677258, - "learning_rate": 0.00019999975066261905, - "loss": 46.0, - "step": 4424 - }, - { - "epoch": 0.7125890736342043, - "grad_norm": 0.0006763917626813054, - "learning_rate": 0.00019999975054963058, - "loss": 46.0, - "step": 4425 - }, - { - "epoch": 0.7127501107129917, - "grad_norm": 0.0006627399125136435, - "learning_rate": 0.00019999975043661654, - "loss": 46.0, - "step": 4426 - }, - { - "epoch": 0.7129111477917791, - "grad_norm": 0.0005783293163403869, - "learning_rate": 0.0001999997503235769, - "loss": 46.0, - "step": 4427 - }, - { - "epoch": 0.7130721848705665, - "grad_norm": 0.001110888784751296, - "learning_rate": 0.00019999975021051165, - "loss": 46.0, - "step": 4428 - }, - { - "epoch": 0.7132332219493538, - "grad_norm": 0.001701586996205151, - "learning_rate": 0.0001999997500974208, - "loss": 46.0, - "step": 4429 - }, - { - "epoch": 0.7133942590281412, - "grad_norm": 0.001331550651229918, - "learning_rate": 0.0001999997499843044, - "loss": 46.0, - "step": 4430 - }, - { - "epoch": 0.7135552961069286, - "grad_norm": 0.0012772154295817018, - "learning_rate": 0.00019999974987116236, - "loss": 46.0, - "step": 4431 - }, - { - "epoch": 0.713716333185716, - "grad_norm": 0.0005927607417106628, - "learning_rate": 0.00019999974975799475, - "loss": 46.0, - "step": 4432 - }, - { - "epoch": 0.7138773702645034, - "grad_norm": 0.0010581662645563483, - "learning_rate": 0.00019999974964480152, - "loss": 46.0, - "step": 4433 - }, - { - "epoch": 0.7140384073432908, - "grad_norm": 0.0010330479126423597, - "learning_rate": 0.0001999997495315827, - "loss": 46.0, - "step": 4434 - }, - { - "epoch": 0.7141994444220782, - "grad_norm": 0.00033568430808372796, - "learning_rate": 0.00019999974941833833, - "loss": 46.0, - "step": 4435 - }, - { - "epoch": 0.7143604815008656, - "grad_norm": 0.00046789299813099205, - "learning_rate": 0.00019999974930506832, - "loss": 46.0, - "step": 4436 - }, - { - "epoch": 0.714521518579653, - "grad_norm": 0.0006808977341279387, - "learning_rate": 0.00019999974919177271, - "loss": 46.0, - "step": 4437 - }, - { - "epoch": 0.7146825556584404, - "grad_norm": 0.003396762302145362, - "learning_rate": 0.00019999974907845152, - "loss": 46.0, - "step": 4438 - }, - { - "epoch": 0.7148435927372278, - "grad_norm": 0.0006392574869096279, - "learning_rate": 0.00019999974896510475, - "loss": 46.0, - "step": 4439 - }, - { - "epoch": 0.7150046298160151, - "grad_norm": 0.0003603216609917581, - "learning_rate": 0.00019999974885173235, - "loss": 46.0, - "step": 4440 - }, - { - "epoch": 0.7151656668948025, - "grad_norm": 0.00169837917201221, - "learning_rate": 0.0001999997487383344, - "loss": 46.0, - "step": 4441 - }, - { - "epoch": 0.7153267039735899, - "grad_norm": 0.005692393984645605, - "learning_rate": 0.0001999997486249108, - "loss": 46.0, - "step": 4442 - }, - { - "epoch": 0.7154877410523773, - "grad_norm": 0.0006954838754609227, - "learning_rate": 0.00019999974851146165, - "loss": 46.0, - "step": 4443 - }, - { - "epoch": 0.7156487781311647, - "grad_norm": 0.0010099103674292564, - "learning_rate": 0.00019999974839798691, - "loss": 46.0, - "step": 4444 - }, - { - "epoch": 0.7158098152099521, - "grad_norm": 0.00042093818774446845, - "learning_rate": 0.00019999974828448656, - "loss": 46.0, - "step": 4445 - }, - { - "epoch": 0.7159708522887395, - "grad_norm": 0.00670892046764493, - "learning_rate": 0.00019999974817096062, - "loss": 46.0, - "step": 4446 - }, - { - "epoch": 0.7161318893675269, - "grad_norm": 0.0010251224739477038, - "learning_rate": 0.0001999997480574091, - "loss": 46.0, - "step": 4447 - }, - { - "epoch": 0.7162929264463143, - "grad_norm": 0.0010518493363633752, - "learning_rate": 0.00019999974794383194, - "loss": 46.0, - "step": 4448 - }, - { - "epoch": 0.7164539635251016, - "grad_norm": 0.000520506757311523, - "learning_rate": 0.00019999974783022921, - "loss": 46.0, - "step": 4449 - }, - { - "epoch": 0.716615000603889, - "grad_norm": 0.0029253712855279446, - "learning_rate": 0.0001999997477166009, - "loss": 46.0, - "step": 4450 - }, - { - "epoch": 0.7167760376826764, - "grad_norm": 0.00048601144226267934, - "learning_rate": 0.00019999974760294696, - "loss": 46.0, - "step": 4451 - }, - { - "epoch": 0.7169370747614638, - "grad_norm": 0.001080303336493671, - "learning_rate": 0.00019999974748926747, - "loss": 46.0, - "step": 4452 - }, - { - "epoch": 0.7170981118402512, - "grad_norm": 0.0003332775959279388, - "learning_rate": 0.00019999974737556234, - "loss": 46.0, - "step": 4453 - }, - { - "epoch": 0.7172591489190386, - "grad_norm": 0.0013118890346959233, - "learning_rate": 0.00019999974726183164, - "loss": 46.0, - "step": 4454 - }, - { - "epoch": 0.717420185997826, - "grad_norm": 0.0007557226344943047, - "learning_rate": 0.00019999974714807534, - "loss": 46.0, - "step": 4455 - }, - { - "epoch": 0.7175812230766134, - "grad_norm": 0.00036685456871055067, - "learning_rate": 0.00019999974703429344, - "loss": 46.0, - "step": 4456 - }, - { - "epoch": 0.7177422601554008, - "grad_norm": 0.001007641083560884, - "learning_rate": 0.00019999974692048596, - "loss": 46.0, - "step": 4457 - }, - { - "epoch": 0.7179032972341882, - "grad_norm": 0.0009070277446880937, - "learning_rate": 0.0001999997468066529, - "loss": 46.0, - "step": 4458 - }, - { - "epoch": 0.7180643343129756, - "grad_norm": 0.0016711081843823195, - "learning_rate": 0.00019999974669279418, - "loss": 46.0, - "step": 4459 - }, - { - "epoch": 0.7182253713917629, - "grad_norm": 0.0012451380025595427, - "learning_rate": 0.0001999997465789099, - "loss": 46.0, - "step": 4460 - }, - { - "epoch": 0.7183864084705504, - "grad_norm": 0.002102638129144907, - "learning_rate": 0.00019999974646500005, - "loss": 46.0, - "step": 4461 - }, - { - "epoch": 0.7185474455493377, - "grad_norm": 0.0018069209763780236, - "learning_rate": 0.0001999997463510646, - "loss": 46.0, - "step": 4462 - }, - { - "epoch": 0.7187084826281251, - "grad_norm": 0.0016833161935210228, - "learning_rate": 0.00019999974623710354, - "loss": 46.0, - "step": 4463 - }, - { - "epoch": 0.7188695197069125, - "grad_norm": 0.0006691787275485694, - "learning_rate": 0.00019999974612311687, - "loss": 46.0, - "step": 4464 - }, - { - "epoch": 0.7190305567856999, - "grad_norm": 0.0006645992980338633, - "learning_rate": 0.00019999974600910464, - "loss": 46.0, - "step": 4465 - }, - { - "epoch": 0.7191915938644873, - "grad_norm": 0.0024473455268889666, - "learning_rate": 0.0001999997458950668, - "loss": 46.0, - "step": 4466 - }, - { - "epoch": 0.7193526309432747, - "grad_norm": 0.004251326899975538, - "learning_rate": 0.00019999974578100335, - "loss": 46.0, - "step": 4467 - }, - { - "epoch": 0.7195136680220621, - "grad_norm": 0.001146490452811122, - "learning_rate": 0.0001999997456669143, - "loss": 46.0, - "step": 4468 - }, - { - "epoch": 0.7196747051008495, - "grad_norm": 0.0014277849113568664, - "learning_rate": 0.00019999974555279967, - "loss": 46.0, - "step": 4469 - }, - { - "epoch": 0.7198357421796369, - "grad_norm": 0.0005654064007103443, - "learning_rate": 0.00019999974543865947, - "loss": 46.0, - "step": 4470 - }, - { - "epoch": 0.7199967792584242, - "grad_norm": 0.0011332668364048004, - "learning_rate": 0.00019999974532449363, - "loss": 46.0, - "step": 4471 - }, - { - "epoch": 0.7201578163372117, - "grad_norm": 0.0005477855447679758, - "learning_rate": 0.00019999974521030223, - "loss": 46.0, - "step": 4472 - }, - { - "epoch": 0.720318853415999, - "grad_norm": 0.0022119577042758465, - "learning_rate": 0.00019999974509608522, - "loss": 46.0, - "step": 4473 - }, - { - "epoch": 0.7204798904947864, - "grad_norm": 0.00034289906034246087, - "learning_rate": 0.00019999974498184262, - "loss": 46.0, - "step": 4474 - }, - { - "epoch": 0.7206409275735738, - "grad_norm": 0.0017581023275852203, - "learning_rate": 0.00019999974486757443, - "loss": 46.0, - "step": 4475 - }, - { - "epoch": 0.7208019646523612, - "grad_norm": 0.0008935669902712107, - "learning_rate": 0.00019999974475328066, - "loss": 46.0, - "step": 4476 - }, - { - "epoch": 0.7209630017311486, - "grad_norm": 0.0005180076695978642, - "learning_rate": 0.00019999974463896124, - "loss": 46.0, - "step": 4477 - }, - { - "epoch": 0.721124038809936, - "grad_norm": 0.0009835090022534132, - "learning_rate": 0.00019999974452461627, - "loss": 46.0, - "step": 4478 - }, - { - "epoch": 0.7212850758887234, - "grad_norm": 0.0005806116969324648, - "learning_rate": 0.0001999997444102457, - "loss": 46.0, - "step": 4479 - }, - { - "epoch": 0.7214461129675108, - "grad_norm": 0.0012206181418150663, - "learning_rate": 0.00019999974429584953, - "loss": 46.0, - "step": 4480 - }, - { - "epoch": 0.7216071500462982, - "grad_norm": 0.0011013572802767158, - "learning_rate": 0.00019999974418142776, - "loss": 46.0, - "step": 4481 - }, - { - "epoch": 0.7217681871250855, - "grad_norm": 0.0010482007637619972, - "learning_rate": 0.0001999997440669804, - "loss": 46.0, - "step": 4482 - }, - { - "epoch": 0.721929224203873, - "grad_norm": 0.0012515585403889418, - "learning_rate": 0.00019999974395250748, - "loss": 46.0, - "step": 4483 - }, - { - "epoch": 0.7220902612826603, - "grad_norm": 0.0009090963285416365, - "learning_rate": 0.00019999974383800892, - "loss": 46.0, - "step": 4484 - }, - { - "epoch": 0.7222512983614477, - "grad_norm": 0.0033045061863958836, - "learning_rate": 0.00019999974372348476, - "loss": 46.0, - "step": 4485 - }, - { - "epoch": 0.7224123354402351, - "grad_norm": 0.0010728840716183186, - "learning_rate": 0.00019999974360893503, - "loss": 46.0, - "step": 4486 - }, - { - "epoch": 0.7225733725190225, - "grad_norm": 0.0017847331473603845, - "learning_rate": 0.0001999997434943597, - "loss": 46.0, - "step": 4487 - }, - { - "epoch": 0.7227344095978099, - "grad_norm": 0.0009227863629348576, - "learning_rate": 0.00019999974337975879, - "loss": 46.0, - "step": 4488 - }, - { - "epoch": 0.7228954466765973, - "grad_norm": 0.0019123065285384655, - "learning_rate": 0.00019999974326513224, - "loss": 46.0, - "step": 4489 - }, - { - "epoch": 0.7230564837553847, - "grad_norm": 0.00047635557712055743, - "learning_rate": 0.00019999974315048014, - "loss": 46.0, - "step": 4490 - }, - { - "epoch": 0.7232175208341721, - "grad_norm": 0.00041297514690086246, - "learning_rate": 0.0001999997430358024, - "loss": 46.0, - "step": 4491 - }, - { - "epoch": 0.7233785579129595, - "grad_norm": 0.0012477511772885919, - "learning_rate": 0.00019999974292109912, - "loss": 46.0, - "step": 4492 - }, - { - "epoch": 0.7235395949917468, - "grad_norm": 0.000492687860969454, - "learning_rate": 0.0001999997428063702, - "loss": 46.0, - "step": 4493 - }, - { - "epoch": 0.7237006320705343, - "grad_norm": 0.001497862278483808, - "learning_rate": 0.00019999974269161572, - "loss": 46.0, - "step": 4494 - }, - { - "epoch": 0.7238616691493216, - "grad_norm": 0.0021726367995142937, - "learning_rate": 0.00019999974257683563, - "loss": 46.0, - "step": 4495 - }, - { - "epoch": 0.724022706228109, - "grad_norm": 0.00046507263323292136, - "learning_rate": 0.00019999974246202992, - "loss": 46.0, - "step": 4496 - }, - { - "epoch": 0.7241837433068964, - "grad_norm": 0.0007227102178148925, - "learning_rate": 0.00019999974234719865, - "loss": 46.0, - "step": 4497 - }, - { - "epoch": 0.7243447803856838, - "grad_norm": 0.0007895276648923755, - "learning_rate": 0.00019999974223234177, - "loss": 46.0, - "step": 4498 - }, - { - "epoch": 0.7245058174644712, - "grad_norm": 0.0011881771497428417, - "learning_rate": 0.0001999997421174593, - "loss": 46.0, - "step": 4499 - }, - { - "epoch": 0.7246668545432586, - "grad_norm": 0.003271875437349081, - "learning_rate": 0.00019999974200255122, - "loss": 46.0, - "step": 4500 - }, - { - "epoch": 0.724827891622046, - "grad_norm": 0.0007997031207196414, - "learning_rate": 0.00019999974188761758, - "loss": 46.0, - "step": 4501 - }, - { - "epoch": 0.7249889287008333, - "grad_norm": 0.0009179948829114437, - "learning_rate": 0.00019999974177265832, - "loss": 46.0, - "step": 4502 - }, - { - "epoch": 0.7251499657796208, - "grad_norm": 0.0017261349130421877, - "learning_rate": 0.00019999974165767345, - "loss": 46.0, - "step": 4503 - }, - { - "epoch": 0.7253110028584081, - "grad_norm": 0.0012372866040095687, - "learning_rate": 0.00019999974154266302, - "loss": 46.0, - "step": 4504 - }, - { - "epoch": 0.7254720399371956, - "grad_norm": 0.0008584472816437483, - "learning_rate": 0.000199999741427627, - "loss": 46.0, - "step": 4505 - }, - { - "epoch": 0.7256330770159829, - "grad_norm": 0.0012302917893975973, - "learning_rate": 0.00019999974131256534, - "loss": 46.0, - "step": 4506 - }, - { - "epoch": 0.7257941140947703, - "grad_norm": 0.0025850438978523016, - "learning_rate": 0.00019999974119747812, - "loss": 46.0, - "step": 4507 - }, - { - "epoch": 0.7259551511735577, - "grad_norm": 0.0013695707311853766, - "learning_rate": 0.0001999997410823653, - "loss": 46.0, - "step": 4508 - }, - { - "epoch": 0.7261161882523451, - "grad_norm": 0.0006834747036918998, - "learning_rate": 0.00019999974096722686, - "loss": 46.0, - "step": 4509 - }, - { - "epoch": 0.7262772253311325, - "grad_norm": 0.000519837427418679, - "learning_rate": 0.00019999974085206285, - "loss": 46.0, - "step": 4510 - }, - { - "epoch": 0.7264382624099199, - "grad_norm": 0.0011834946926683187, - "learning_rate": 0.00019999974073687325, - "loss": 46.0, - "step": 4511 - }, - { - "epoch": 0.7265992994887073, - "grad_norm": 0.0006753116031177342, - "learning_rate": 0.00019999974062165804, - "loss": 46.0, - "step": 4512 - }, - { - "epoch": 0.7267603365674946, - "grad_norm": 0.0022536867763847113, - "learning_rate": 0.00019999974050641725, - "loss": 46.0, - "step": 4513 - }, - { - "epoch": 0.7269213736462821, - "grad_norm": 0.0021636977326124907, - "learning_rate": 0.00019999974039115086, - "loss": 46.0, - "step": 4514 - }, - { - "epoch": 0.7270824107250694, - "grad_norm": 0.00033525892649777234, - "learning_rate": 0.0001999997402758589, - "loss": 46.0, - "step": 4515 - }, - { - "epoch": 0.7272434478038569, - "grad_norm": 0.0008815217297524214, - "learning_rate": 0.00019999974016054128, - "loss": 46.0, - "step": 4516 - }, - { - "epoch": 0.7274044848826442, - "grad_norm": 0.0015479987487196922, - "learning_rate": 0.0001999997400451981, - "loss": 46.0, - "step": 4517 - }, - { - "epoch": 0.7275655219614316, - "grad_norm": 0.0007409134414047003, - "learning_rate": 0.00019999973992982932, - "loss": 46.0, - "step": 4518 - }, - { - "epoch": 0.727726559040219, - "grad_norm": 0.0018958671716973186, - "learning_rate": 0.00019999973981443497, - "loss": 46.0, - "step": 4519 - }, - { - "epoch": 0.7278875961190064, - "grad_norm": 0.0006282523390837014, - "learning_rate": 0.00019999973969901503, - "loss": 46.0, - "step": 4520 - }, - { - "epoch": 0.7280486331977938, - "grad_norm": 0.00042072057840414345, - "learning_rate": 0.00019999973958356946, - "loss": 46.0, - "step": 4521 - }, - { - "epoch": 0.7282096702765812, - "grad_norm": 0.0006787588354200125, - "learning_rate": 0.00019999973946809832, - "loss": 46.0, - "step": 4522 - }, - { - "epoch": 0.7283707073553686, - "grad_norm": 0.0011091604828834534, - "learning_rate": 0.00019999973935260154, - "loss": 46.0, - "step": 4523 - }, - { - "epoch": 0.7285317444341559, - "grad_norm": 0.0007960863877087831, - "learning_rate": 0.0001999997392370792, - "loss": 46.0, - "step": 4524 - }, - { - "epoch": 0.7286927815129434, - "grad_norm": 0.0013086351100355387, - "learning_rate": 0.00019999973912153128, - "loss": 46.0, - "step": 4525 - }, - { - "epoch": 0.7288538185917307, - "grad_norm": 0.0007039692136459053, - "learning_rate": 0.00019999973900595774, - "loss": 46.0, - "step": 4526 - }, - { - "epoch": 0.7290148556705182, - "grad_norm": 0.0005595731781795621, - "learning_rate": 0.00019999973889035864, - "loss": 46.0, - "step": 4527 - }, - { - "epoch": 0.7291758927493055, - "grad_norm": 0.0006170624401420355, - "learning_rate": 0.0001999997387747339, - "loss": 46.0, - "step": 4528 - }, - { - "epoch": 0.729336929828093, - "grad_norm": 0.0011318547185510397, - "learning_rate": 0.0001999997386590836, - "loss": 46.0, - "step": 4529 - }, - { - "epoch": 0.7294979669068803, - "grad_norm": 0.0026329183019697666, - "learning_rate": 0.00019999973854340768, - "loss": 46.0, - "step": 4530 - }, - { - "epoch": 0.7296590039856677, - "grad_norm": 0.0013854143908247352, - "learning_rate": 0.00019999973842770618, - "loss": 46.0, - "step": 4531 - }, - { - "epoch": 0.7298200410644551, - "grad_norm": 0.0020008685532957315, - "learning_rate": 0.00019999973831197907, - "loss": 46.0, - "step": 4532 - }, - { - "epoch": 0.7299810781432425, - "grad_norm": 0.0018082942115142941, - "learning_rate": 0.00019999973819622636, - "loss": 46.0, - "step": 4533 - }, - { - "epoch": 0.7301421152220299, - "grad_norm": 0.0003881430020555854, - "learning_rate": 0.0001999997380804481, - "loss": 46.0, - "step": 4534 - }, - { - "epoch": 0.7303031523008172, - "grad_norm": 0.000548550917301327, - "learning_rate": 0.0001999997379646442, - "loss": 46.0, - "step": 4535 - }, - { - "epoch": 0.7304641893796047, - "grad_norm": 0.000778438348788768, - "learning_rate": 0.00019999973784881473, - "loss": 46.0, - "step": 4536 - }, - { - "epoch": 0.730625226458392, - "grad_norm": 0.0023691642563790083, - "learning_rate": 0.00019999973773295965, - "loss": 46.0, - "step": 4537 - }, - { - "epoch": 0.7307862635371795, - "grad_norm": 0.0009080860763788223, - "learning_rate": 0.00019999973761707898, - "loss": 46.0, - "step": 4538 - }, - { - "epoch": 0.7309473006159668, - "grad_norm": 0.0006811475032009184, - "learning_rate": 0.0001999997375011727, - "loss": 46.0, - "step": 4539 - }, - { - "epoch": 0.7311083376947543, - "grad_norm": 0.0007978445501066744, - "learning_rate": 0.00019999973738524086, - "loss": 46.0, - "step": 4540 - }, - { - "epoch": 0.7312693747735416, - "grad_norm": 0.0005806766566820443, - "learning_rate": 0.0001999997372692834, - "loss": 46.0, - "step": 4541 - }, - { - "epoch": 0.731430411852329, - "grad_norm": 0.0007834543939679861, - "learning_rate": 0.00019999973715330036, - "loss": 46.0, - "step": 4542 - }, - { - "epoch": 0.7315914489311164, - "grad_norm": 0.0005286663072183728, - "learning_rate": 0.00019999973703729173, - "loss": 46.0, - "step": 4543 - }, - { - "epoch": 0.7317524860099038, - "grad_norm": 0.000779558380600065, - "learning_rate": 0.0001999997369212575, - "loss": 46.0, - "step": 4544 - }, - { - "epoch": 0.7319135230886912, - "grad_norm": 0.0010596184292808175, - "learning_rate": 0.00019999973680519766, - "loss": 46.0, - "step": 4545 - }, - { - "epoch": 0.7320745601674785, - "grad_norm": 0.005791361443698406, - "learning_rate": 0.00019999973668911224, - "loss": 46.0, - "step": 4546 - }, - { - "epoch": 0.732235597246266, - "grad_norm": 0.0008321021450683475, - "learning_rate": 0.0001999997365730012, - "loss": 46.0, - "step": 4547 - }, - { - "epoch": 0.7323966343250533, - "grad_norm": 0.0005218035657890141, - "learning_rate": 0.0001999997364568646, - "loss": 46.0, - "step": 4548 - }, - { - "epoch": 0.7325576714038408, - "grad_norm": 0.0017712480621412396, - "learning_rate": 0.00019999973634070238, - "loss": 46.0, - "step": 4549 - }, - { - "epoch": 0.7327187084826281, - "grad_norm": 0.001056362991221249, - "learning_rate": 0.00019999973622451456, - "loss": 46.0, - "step": 4550 - }, - { - "epoch": 0.7328797455614156, - "grad_norm": 0.0016243801219388843, - "learning_rate": 0.00019999973610830115, - "loss": 46.0, - "step": 4551 - }, - { - "epoch": 0.7330407826402029, - "grad_norm": 0.0028840191662311554, - "learning_rate": 0.00019999973599206218, - "loss": 46.0, - "step": 4552 - }, - { - "epoch": 0.7332018197189903, - "grad_norm": 0.0006374493823386729, - "learning_rate": 0.0001999997358757976, - "loss": 46.0, - "step": 4553 - }, - { - "epoch": 0.7333628567977777, - "grad_norm": 0.0019701439887285233, - "learning_rate": 0.0001999997357595074, - "loss": 46.0, - "step": 4554 - }, - { - "epoch": 0.733523893876565, - "grad_norm": 0.001144581357948482, - "learning_rate": 0.00019999973564319162, - "loss": 46.0, - "step": 4555 - }, - { - "epoch": 0.7336849309553525, - "grad_norm": 0.0007458243635483086, - "learning_rate": 0.00019999973552685022, - "loss": 46.0, - "step": 4556 - }, - { - "epoch": 0.7338459680341398, - "grad_norm": 0.0025849926751106977, - "learning_rate": 0.00019999973541048326, - "loss": 46.0, - "step": 4557 - }, - { - "epoch": 0.7340070051129273, - "grad_norm": 0.0012840854469686747, - "learning_rate": 0.00019999973529409072, - "loss": 46.0, - "step": 4558 - }, - { - "epoch": 0.7341680421917146, - "grad_norm": 0.0008985294844023883, - "learning_rate": 0.00019999973517767256, - "loss": 46.0, - "step": 4559 - }, - { - "epoch": 0.7343290792705021, - "grad_norm": 0.0006069798255339265, - "learning_rate": 0.0001999997350612288, - "loss": 46.0, - "step": 4560 - }, - { - "epoch": 0.7344901163492894, - "grad_norm": 0.002930633258074522, - "learning_rate": 0.00019999973494475945, - "loss": 46.0, - "step": 4561 - }, - { - "epoch": 0.7346511534280769, - "grad_norm": 0.0014089002506807446, - "learning_rate": 0.0001999997348282645, - "loss": 46.0, - "step": 4562 - }, - { - "epoch": 0.7348121905068642, - "grad_norm": 0.0005846631829626858, - "learning_rate": 0.000199999734711744, - "loss": 46.0, - "step": 4563 - }, - { - "epoch": 0.7349732275856516, - "grad_norm": 0.000707046187017113, - "learning_rate": 0.00019999973459519784, - "loss": 46.0, - "step": 4564 - }, - { - "epoch": 0.735134264664439, - "grad_norm": 0.00043157863547094166, - "learning_rate": 0.0001999997344786261, - "loss": 46.0, - "step": 4565 - }, - { - "epoch": 0.7352953017432263, - "grad_norm": 0.0039930869825184345, - "learning_rate": 0.00019999973436202878, - "loss": 46.0, - "step": 4566 - }, - { - "epoch": 0.7354563388220138, - "grad_norm": 0.00042738113552331924, - "learning_rate": 0.00019999973424540584, - "loss": 46.0, - "step": 4567 - }, - { - "epoch": 0.7356173759008011, - "grad_norm": 0.0006795520312152803, - "learning_rate": 0.00019999973412875734, - "loss": 46.0, - "step": 4568 - }, - { - "epoch": 0.7357784129795886, - "grad_norm": 0.0004120956582482904, - "learning_rate": 0.00019999973401208326, - "loss": 46.0, - "step": 4569 - }, - { - "epoch": 0.7359394500583759, - "grad_norm": 0.0004701597208622843, - "learning_rate": 0.00019999973389538353, - "loss": 46.0, - "step": 4570 - }, - { - "epoch": 0.7361004871371634, - "grad_norm": 0.0009315081988461316, - "learning_rate": 0.00019999973377865824, - "loss": 46.0, - "step": 4571 - }, - { - "epoch": 0.7362615242159507, - "grad_norm": 0.00033041753340512514, - "learning_rate": 0.00019999973366190737, - "loss": 46.0, - "step": 4572 - }, - { - "epoch": 0.7364225612947382, - "grad_norm": 0.0014328625984489918, - "learning_rate": 0.00019999973354513085, - "loss": 46.0, - "step": 4573 - }, - { - "epoch": 0.7365835983735255, - "grad_norm": 0.0002727169485297054, - "learning_rate": 0.00019999973342832878, - "loss": 46.0, - "step": 4574 - }, - { - "epoch": 0.736744635452313, - "grad_norm": 0.00032052627648226917, - "learning_rate": 0.00019999973331150112, - "loss": 46.0, - "step": 4575 - }, - { - "epoch": 0.7369056725311003, - "grad_norm": 0.0007307630148716271, - "learning_rate": 0.00019999973319464784, - "loss": 46.0, - "step": 4576 - }, - { - "epoch": 0.7370667096098876, - "grad_norm": 0.0003718679363373667, - "learning_rate": 0.00019999973307776897, - "loss": 46.0, - "step": 4577 - }, - { - "epoch": 0.7372277466886751, - "grad_norm": 0.0006524814525619149, - "learning_rate": 0.00019999973296086452, - "loss": 46.0, - "step": 4578 - }, - { - "epoch": 0.7373887837674624, - "grad_norm": 0.0005162352463230491, - "learning_rate": 0.00019999973284393448, - "loss": 46.0, - "step": 4579 - }, - { - "epoch": 0.7375498208462499, - "grad_norm": 0.0009220059146173298, - "learning_rate": 0.0001999997327269788, - "loss": 46.0, - "step": 4580 - }, - { - "epoch": 0.7377108579250372, - "grad_norm": 0.00027430258342064917, - "learning_rate": 0.00019999973260999756, - "loss": 46.0, - "step": 4581 - }, - { - "epoch": 0.7378718950038247, - "grad_norm": 0.0005880841636098921, - "learning_rate": 0.00019999973249299074, - "loss": 46.0, - "step": 4582 - }, - { - "epoch": 0.738032932082612, - "grad_norm": 0.00032640728750266135, - "learning_rate": 0.0001999997323759583, - "loss": 46.0, - "step": 4583 - }, - { - "epoch": 0.7381939691613995, - "grad_norm": 0.0015201785136014223, - "learning_rate": 0.00019999973225890027, - "loss": 46.0, - "step": 4584 - }, - { - "epoch": 0.7383550062401868, - "grad_norm": 0.00042293520527891815, - "learning_rate": 0.00019999973214181665, - "loss": 46.0, - "step": 4585 - }, - { - "epoch": 0.7385160433189742, - "grad_norm": 0.0008818499627523124, - "learning_rate": 0.00019999973202470742, - "loss": 46.0, - "step": 4586 - }, - { - "epoch": 0.7386770803977616, - "grad_norm": 0.0009171415586024523, - "learning_rate": 0.0001999997319075726, - "loss": 46.0, - "step": 4587 - }, - { - "epoch": 0.7388381174765489, - "grad_norm": 0.001969479024410248, - "learning_rate": 0.0001999997317904122, - "loss": 46.0, - "step": 4588 - }, - { - "epoch": 0.7389991545553364, - "grad_norm": 0.0025983729865401983, - "learning_rate": 0.00019999973167322618, - "loss": 46.0, - "step": 4589 - }, - { - "epoch": 0.7391601916341237, - "grad_norm": 0.0014474327908828855, - "learning_rate": 0.00019999973155601463, - "loss": 46.0, - "step": 4590 - }, - { - "epoch": 0.7393212287129112, - "grad_norm": 0.0032787180971354246, - "learning_rate": 0.0001999997314387774, - "loss": 46.0, - "step": 4591 - }, - { - "epoch": 0.7394822657916985, - "grad_norm": 0.0005762985674664378, - "learning_rate": 0.00019999973132151463, - "loss": 46.0, - "step": 4592 - }, - { - "epoch": 0.739643302870486, - "grad_norm": 0.001087381155230105, - "learning_rate": 0.00019999973120422623, - "loss": 46.0, - "step": 4593 - }, - { - "epoch": 0.7398043399492733, - "grad_norm": 0.000422496726969257, - "learning_rate": 0.00019999973108691225, - "loss": 46.0, - "step": 4594 - }, - { - "epoch": 0.7399653770280608, - "grad_norm": 0.004855965729802847, - "learning_rate": 0.0001999997309695727, - "loss": 46.0, - "step": 4595 - }, - { - "epoch": 0.7401264141068481, - "grad_norm": 0.0004002965579275042, - "learning_rate": 0.00019999973085220752, - "loss": 46.0, - "step": 4596 - }, - { - "epoch": 0.7402874511856355, - "grad_norm": 0.0005231106770224869, - "learning_rate": 0.00019999973073481675, - "loss": 46.0, - "step": 4597 - }, - { - "epoch": 0.7404484882644229, - "grad_norm": 0.000597749138250947, - "learning_rate": 0.0001999997306174004, - "loss": 46.0, - "step": 4598 - }, - { - "epoch": 0.7406095253432102, - "grad_norm": 0.00393320620059967, - "learning_rate": 0.00019999973049995845, - "loss": 46.0, - "step": 4599 - }, - { - "epoch": 0.7407705624219977, - "grad_norm": 0.001538130920380354, - "learning_rate": 0.0001999997303824909, - "loss": 46.0, - "step": 4600 - }, - { - "epoch": 0.740931599500785, - "grad_norm": 0.0006161427590996027, - "learning_rate": 0.00019999973026499777, - "loss": 46.0, - "step": 4601 - }, - { - "epoch": 0.7410926365795725, - "grad_norm": 0.0028469713870435953, - "learning_rate": 0.000199999730147479, - "loss": 46.0, - "step": 4602 - }, - { - "epoch": 0.7412536736583598, - "grad_norm": 0.0007435024017468095, - "learning_rate": 0.0001999997300299347, - "loss": 46.0, - "step": 4603 - }, - { - "epoch": 0.7414147107371473, - "grad_norm": 0.0004241475253365934, - "learning_rate": 0.00019999972991236478, - "loss": 46.0, - "step": 4604 - }, - { - "epoch": 0.7415757478159346, - "grad_norm": 0.0007312520174309611, - "learning_rate": 0.00019999972979476923, - "loss": 46.0, - "step": 4605 - }, - { - "epoch": 0.7417367848947221, - "grad_norm": 0.0013212421908974648, - "learning_rate": 0.00019999972967714812, - "loss": 46.0, - "step": 4606 - }, - { - "epoch": 0.7418978219735094, - "grad_norm": 0.0008603931055404246, - "learning_rate": 0.00019999972955950143, - "loss": 46.0, - "step": 4607 - }, - { - "epoch": 0.7420588590522967, - "grad_norm": 0.000343397434335202, - "learning_rate": 0.00019999972944182912, - "loss": 46.0, - "step": 4608 - }, - { - "epoch": 0.7422198961310842, - "grad_norm": 0.0011578669073060155, - "learning_rate": 0.00019999972932413122, - "loss": 46.0, - "step": 4609 - }, - { - "epoch": 0.7423809332098715, - "grad_norm": 0.0035870710853487253, - "learning_rate": 0.00019999972920640773, - "loss": 46.0, - "step": 4610 - }, - { - "epoch": 0.742541970288659, - "grad_norm": 0.000637476216070354, - "learning_rate": 0.00019999972908865866, - "loss": 46.0, - "step": 4611 - }, - { - "epoch": 0.7427030073674463, - "grad_norm": 0.0010959397768601775, - "learning_rate": 0.00019999972897088398, - "loss": 46.0, - "step": 4612 - }, - { - "epoch": 0.7428640444462338, - "grad_norm": 0.0009431284852325916, - "learning_rate": 0.00019999972885308367, - "loss": 46.0, - "step": 4613 - }, - { - "epoch": 0.7430250815250211, - "grad_norm": 0.00114214897621423, - "learning_rate": 0.00019999972873525781, - "loss": 46.0, - "step": 4614 - }, - { - "epoch": 0.7431861186038086, - "grad_norm": 0.0010769155342131853, - "learning_rate": 0.0001999997286174063, - "loss": 46.0, - "step": 4615 - }, - { - "epoch": 0.7433471556825959, - "grad_norm": 0.0019252679776400328, - "learning_rate": 0.00019999972849952928, - "loss": 46.0, - "step": 4616 - }, - { - "epoch": 0.7435081927613834, - "grad_norm": 0.0005160552100278437, - "learning_rate": 0.00019999972838162663, - "loss": 46.0, - "step": 4617 - }, - { - "epoch": 0.7436692298401707, - "grad_norm": 0.0005786482943221927, - "learning_rate": 0.00019999972826369836, - "loss": 46.0, - "step": 4618 - }, - { - "epoch": 0.743830266918958, - "grad_norm": 0.0003764034481719136, - "learning_rate": 0.0001999997281457445, - "loss": 46.0, - "step": 4619 - }, - { - "epoch": 0.7439913039977455, - "grad_norm": 0.0006265260744839907, - "learning_rate": 0.00019999972802776507, - "loss": 46.0, - "step": 4620 - }, - { - "epoch": 0.7441523410765328, - "grad_norm": 0.0012461300939321518, - "learning_rate": 0.00019999972790976002, - "loss": 46.0, - "step": 4621 - }, - { - "epoch": 0.7443133781553203, - "grad_norm": 0.0006371387280523777, - "learning_rate": 0.0001999997277917294, - "loss": 46.0, - "step": 4622 - }, - { - "epoch": 0.7444744152341076, - "grad_norm": 0.00048348153359256685, - "learning_rate": 0.00019999972767367318, - "loss": 46.0, - "step": 4623 - }, - { - "epoch": 0.7446354523128951, - "grad_norm": 0.0005813002935610712, - "learning_rate": 0.00019999972755559137, - "loss": 46.0, - "step": 4624 - }, - { - "epoch": 0.7447964893916824, - "grad_norm": 0.0007331267697736621, - "learning_rate": 0.00019999972743748394, - "loss": 46.0, - "step": 4625 - }, - { - "epoch": 0.7449575264704699, - "grad_norm": 0.001337914727628231, - "learning_rate": 0.00019999972731935093, - "loss": 46.0, - "step": 4626 - }, - { - "epoch": 0.7451185635492572, - "grad_norm": 0.002132835565134883, - "learning_rate": 0.00019999972720119232, - "loss": 46.0, - "step": 4627 - }, - { - "epoch": 0.7452796006280447, - "grad_norm": 0.0005022824043408036, - "learning_rate": 0.0001999997270830081, - "loss": 46.0, - "step": 4628 - }, - { - "epoch": 0.745440637706832, - "grad_norm": 0.00043734745122492313, - "learning_rate": 0.00019999972696479833, - "loss": 46.0, - "step": 4629 - }, - { - "epoch": 0.7456016747856193, - "grad_norm": 0.001717623439617455, - "learning_rate": 0.00019999972684656294, - "loss": 46.0, - "step": 4630 - }, - { - "epoch": 0.7457627118644068, - "grad_norm": 0.000820228538941592, - "learning_rate": 0.00019999972672830196, - "loss": 46.0, - "step": 4631 - }, - { - "epoch": 0.7459237489431941, - "grad_norm": 0.0009194695157930255, - "learning_rate": 0.00019999972661001537, - "loss": 46.0, - "step": 4632 - }, - { - "epoch": 0.7460847860219816, - "grad_norm": 0.001057522022165358, - "learning_rate": 0.0001999997264917032, - "loss": 46.0, - "step": 4633 - }, - { - "epoch": 0.7462458231007689, - "grad_norm": 0.00047464677481912076, - "learning_rate": 0.00019999972637336545, - "loss": 46.0, - "step": 4634 - }, - { - "epoch": 0.7464068601795564, - "grad_norm": 0.0032051538582891226, - "learning_rate": 0.00019999972625500207, - "loss": 46.0, - "step": 4635 - }, - { - "epoch": 0.7465678972583437, - "grad_norm": 0.00036359333898872137, - "learning_rate": 0.0001999997261366131, - "loss": 46.0, - "step": 4636 - }, - { - "epoch": 0.7467289343371312, - "grad_norm": 0.0029360989574342966, - "learning_rate": 0.00019999972601819857, - "loss": 46.0, - "step": 4637 - }, - { - "epoch": 0.7468899714159185, - "grad_norm": 0.0008089731563813984, - "learning_rate": 0.00019999972589975843, - "loss": 46.0, - "step": 4638 - }, - { - "epoch": 0.747051008494706, - "grad_norm": 0.001520228455774486, - "learning_rate": 0.00019999972578129267, - "loss": 46.0, - "step": 4639 - }, - { - "epoch": 0.7472120455734933, - "grad_norm": 0.0003285162674728781, - "learning_rate": 0.00019999972566280133, - "loss": 46.0, - "step": 4640 - }, - { - "epoch": 0.7473730826522806, - "grad_norm": 0.005414806772023439, - "learning_rate": 0.0001999997255442844, - "loss": 46.0, - "step": 4641 - }, - { - "epoch": 0.7475341197310681, - "grad_norm": 0.00030580884777009487, - "learning_rate": 0.00019999972542574188, - "loss": 46.0, - "step": 4642 - }, - { - "epoch": 0.7476951568098554, - "grad_norm": 0.0019031764240935445, - "learning_rate": 0.00019999972530717374, - "loss": 46.0, - "step": 4643 - }, - { - "epoch": 0.7478561938886429, - "grad_norm": 0.00041826689266599715, - "learning_rate": 0.00019999972518858002, - "loss": 46.0, - "step": 4644 - }, - { - "epoch": 0.7480172309674302, - "grad_norm": 0.00042645016219466925, - "learning_rate": 0.00019999972506996072, - "loss": 46.0, - "step": 4645 - }, - { - "epoch": 0.7481782680462177, - "grad_norm": 0.0012782086851075292, - "learning_rate": 0.00019999972495131582, - "loss": 46.0, - "step": 4646 - }, - { - "epoch": 0.748339305125005, - "grad_norm": 0.0008948395261541009, - "learning_rate": 0.00019999972483264531, - "loss": 46.0, - "step": 4647 - }, - { - "epoch": 0.7485003422037925, - "grad_norm": 0.0014121216954663396, - "learning_rate": 0.00019999972471394922, - "loss": 46.0, - "step": 4648 - }, - { - "epoch": 0.7486613792825798, - "grad_norm": 0.0008100031409412622, - "learning_rate": 0.0001999997245952275, - "loss": 46.0, - "step": 4649 - }, - { - "epoch": 0.7488224163613673, - "grad_norm": 0.0008760725613683462, - "learning_rate": 0.00019999972447648024, - "loss": 46.0, - "step": 4650 - }, - { - "epoch": 0.7489834534401546, - "grad_norm": 0.000714444147888571, - "learning_rate": 0.00019999972435770738, - "loss": 46.0, - "step": 4651 - }, - { - "epoch": 0.7491444905189419, - "grad_norm": 0.000542790221516043, - "learning_rate": 0.00019999972423890888, - "loss": 46.0, - "step": 4652 - }, - { - "epoch": 0.7493055275977294, - "grad_norm": 0.0007445058436132967, - "learning_rate": 0.0001999997241200848, - "loss": 46.0, - "step": 4653 - }, - { - "epoch": 0.7494665646765167, - "grad_norm": 0.0006571363192051649, - "learning_rate": 0.00019999972400123515, - "loss": 46.0, - "step": 4654 - }, - { - "epoch": 0.7496276017553042, - "grad_norm": 0.0005258733872324228, - "learning_rate": 0.0001999997238823599, - "loss": 46.0, - "step": 4655 - }, - { - "epoch": 0.7497886388340915, - "grad_norm": 0.0020241141319274902, - "learning_rate": 0.00019999972376345904, - "loss": 46.0, - "step": 4656 - }, - { - "epoch": 0.749949675912879, - "grad_norm": 0.001398624968715012, - "learning_rate": 0.00019999972364453256, - "loss": 46.0, - "step": 4657 - }, - { - "epoch": 0.7501107129916663, - "grad_norm": 0.0015605477383360267, - "learning_rate": 0.00019999972352558053, - "loss": 46.0, - "step": 4658 - }, - { - "epoch": 0.7502717500704538, - "grad_norm": 0.0008498406386934221, - "learning_rate": 0.0001999997234066029, - "loss": 46.0, - "step": 4659 - }, - { - "epoch": 0.7504327871492411, - "grad_norm": 0.0027355796191841364, - "learning_rate": 0.00019999972328759965, - "loss": 46.0, - "step": 4660 - }, - { - "epoch": 0.7505938242280285, - "grad_norm": 0.00039430696051567793, - "learning_rate": 0.00019999972316857086, - "loss": 46.0, - "step": 4661 - }, - { - "epoch": 0.7507548613068159, - "grad_norm": 0.0004359922604635358, - "learning_rate": 0.0001999997230495164, - "loss": 46.0, - "step": 4662 - }, - { - "epoch": 0.7509158983856032, - "grad_norm": 0.0014977410901337862, - "learning_rate": 0.0001999997229304364, - "loss": 46.0, - "step": 4663 - }, - { - "epoch": 0.7510769354643907, - "grad_norm": 0.008042525500059128, - "learning_rate": 0.0001999997228113308, - "loss": 46.0, - "step": 4664 - }, - { - "epoch": 0.751237972543178, - "grad_norm": 0.000974364927969873, - "learning_rate": 0.0001999997226921996, - "loss": 46.0, - "step": 4665 - }, - { - "epoch": 0.7513990096219655, - "grad_norm": 0.0005854789051227272, - "learning_rate": 0.0001999997225730428, - "loss": 46.0, - "step": 4666 - }, - { - "epoch": 0.7515600467007528, - "grad_norm": 0.0028308595065027475, - "learning_rate": 0.00019999972245386037, - "loss": 46.0, - "step": 4667 - }, - { - "epoch": 0.7517210837795403, - "grad_norm": 0.004498977214097977, - "learning_rate": 0.00019999972233465237, - "loss": 46.0, - "step": 4668 - }, - { - "epoch": 0.7518821208583276, - "grad_norm": 0.0010365810012444854, - "learning_rate": 0.0001999997222154188, - "loss": 46.0, - "step": 4669 - }, - { - "epoch": 0.7520431579371151, - "grad_norm": 0.002472150372341275, - "learning_rate": 0.00019999972209615963, - "loss": 46.0, - "step": 4670 - }, - { - "epoch": 0.7522041950159024, - "grad_norm": 0.0008548650075681508, - "learning_rate": 0.00019999972197687482, - "loss": 46.0, - "step": 4671 - }, - { - "epoch": 0.7523652320946898, - "grad_norm": 0.002625685650855303, - "learning_rate": 0.00019999972185756447, - "loss": 46.0, - "step": 4672 - }, - { - "epoch": 0.7525262691734772, - "grad_norm": 0.00300764967687428, - "learning_rate": 0.0001999997217382285, - "loss": 46.0, - "step": 4673 - }, - { - "epoch": 0.7526873062522645, - "grad_norm": 0.0010463440557941794, - "learning_rate": 0.00019999972161886692, - "loss": 46.0, - "step": 4674 - }, - { - "epoch": 0.752848343331052, - "grad_norm": 0.0005545472959056497, - "learning_rate": 0.00019999972149947978, - "loss": 46.0, - "step": 4675 - }, - { - "epoch": 0.7530093804098393, - "grad_norm": 0.004115791991353035, - "learning_rate": 0.00019999972138006703, - "loss": 46.0, - "step": 4676 - }, - { - "epoch": 0.7531704174886268, - "grad_norm": 0.001644573058001697, - "learning_rate": 0.0001999997212606287, - "loss": 46.0, - "step": 4677 - }, - { - "epoch": 0.7533314545674141, - "grad_norm": 0.0012830966152250767, - "learning_rate": 0.00019999972114116474, - "loss": 46.0, - "step": 4678 - }, - { - "epoch": 0.7534924916462016, - "grad_norm": 0.0034075479488819838, - "learning_rate": 0.00019999972102167522, - "loss": 46.0, - "step": 4679 - }, - { - "epoch": 0.7536535287249889, - "grad_norm": 0.001458398182876408, - "learning_rate": 0.0001999997209021601, - "loss": 46.0, - "step": 4680 - }, - { - "epoch": 0.7538145658037764, - "grad_norm": 0.0004869127878919244, - "learning_rate": 0.00019999972078261937, - "loss": 46.0, - "step": 4681 - }, - { - "epoch": 0.7539756028825637, - "grad_norm": 0.003625623881816864, - "learning_rate": 0.00019999972066305304, - "loss": 46.0, - "step": 4682 - }, - { - "epoch": 0.754136639961351, - "grad_norm": 0.0018892440712079406, - "learning_rate": 0.00019999972054346113, - "loss": 46.0, - "step": 4683 - }, - { - "epoch": 0.7542976770401385, - "grad_norm": 0.004187189042568207, - "learning_rate": 0.0001999997204238436, - "loss": 46.0, - "step": 4684 - }, - { - "epoch": 0.7544587141189258, - "grad_norm": 0.001565721700899303, - "learning_rate": 0.0001999997203042005, - "loss": 46.0, - "step": 4685 - }, - { - "epoch": 0.7546197511977133, - "grad_norm": 0.0005464204004965723, - "learning_rate": 0.00019999972018453182, - "loss": 46.0, - "step": 4686 - }, - { - "epoch": 0.7547807882765006, - "grad_norm": 0.004974219016730785, - "learning_rate": 0.0001999997200648375, - "loss": 46.0, - "step": 4687 - }, - { - "epoch": 0.7549418253552881, - "grad_norm": 0.0008043889538384974, - "learning_rate": 0.00019999971994511762, - "loss": 46.0, - "step": 4688 - }, - { - "epoch": 0.7551028624340754, - "grad_norm": 0.0011184316826984286, - "learning_rate": 0.00019999971982537215, - "loss": 46.0, - "step": 4689 - }, - { - "epoch": 0.7552638995128629, - "grad_norm": 0.0008623666362836957, - "learning_rate": 0.00019999971970560107, - "loss": 46.0, - "step": 4690 - }, - { - "epoch": 0.7554249365916502, - "grad_norm": 0.0012672202428802848, - "learning_rate": 0.0001999997195858044, - "loss": 46.0, - "step": 4691 - }, - { - "epoch": 0.7555859736704377, - "grad_norm": 0.0011607082560658455, - "learning_rate": 0.00019999971946598212, - "loss": 46.0, - "step": 4692 - }, - { - "epoch": 0.755747010749225, - "grad_norm": 0.0004718489362858236, - "learning_rate": 0.00019999971934613428, - "loss": 46.0, - "step": 4693 - }, - { - "epoch": 0.7559080478280124, - "grad_norm": 0.0014752136776223779, - "learning_rate": 0.00019999971922626082, - "loss": 46.0, - "step": 4694 - }, - { - "epoch": 0.7560690849067998, - "grad_norm": 0.0024576419964432716, - "learning_rate": 0.00019999971910636174, - "loss": 46.0, - "step": 4695 - }, - { - "epoch": 0.7562301219855871, - "grad_norm": 0.0015845508314669132, - "learning_rate": 0.0001999997189864371, - "loss": 46.0, - "step": 4696 - }, - { - "epoch": 0.7563911590643746, - "grad_norm": 0.0010749782668426633, - "learning_rate": 0.00019999971886648687, - "loss": 46.0, - "step": 4697 - }, - { - "epoch": 0.7565521961431619, - "grad_norm": 0.0005168747738935053, - "learning_rate": 0.00019999971874651103, - "loss": 46.0, - "step": 4698 - }, - { - "epoch": 0.7567132332219494, - "grad_norm": 0.0013822518521919847, - "learning_rate": 0.00019999971862650958, - "loss": 46.0, - "step": 4699 - }, - { - "epoch": 0.7568742703007367, - "grad_norm": 0.0003715934290084988, - "learning_rate": 0.00019999971850648258, - "loss": 46.0, - "step": 4700 - }, - { - "epoch": 0.7570353073795242, - "grad_norm": 0.00046432638191618025, - "learning_rate": 0.00019999971838642995, - "loss": 46.0, - "step": 4701 - }, - { - "epoch": 0.7571963444583115, - "grad_norm": 0.0005687046214006841, - "learning_rate": 0.00019999971826635174, - "loss": 46.0, - "step": 4702 - }, - { - "epoch": 0.7573573815370989, - "grad_norm": 0.0012473512906581163, - "learning_rate": 0.00019999971814624795, - "loss": 46.0, - "step": 4703 - }, - { - "epoch": 0.7575184186158863, - "grad_norm": 0.0014611040242016315, - "learning_rate": 0.0001999997180261185, - "loss": 46.0, - "step": 4704 - }, - { - "epoch": 0.7576794556946737, - "grad_norm": 0.0023170479107648134, - "learning_rate": 0.0001999997179059635, - "loss": 46.0, - "step": 4705 - }, - { - "epoch": 0.7578404927734611, - "grad_norm": 0.0008970486233010888, - "learning_rate": 0.00019999971778578293, - "loss": 46.0, - "step": 4706 - }, - { - "epoch": 0.7580015298522484, - "grad_norm": 0.0011431307066231966, - "learning_rate": 0.00019999971766557675, - "loss": 46.0, - "step": 4707 - }, - { - "epoch": 0.7581625669310359, - "grad_norm": 0.0024071645457297564, - "learning_rate": 0.00019999971754534497, - "loss": 46.0, - "step": 4708 - }, - { - "epoch": 0.7583236040098232, - "grad_norm": 0.0013848600210621953, - "learning_rate": 0.00019999971742508757, - "loss": 46.0, - "step": 4709 - }, - { - "epoch": 0.7584846410886107, - "grad_norm": 0.000974328548181802, - "learning_rate": 0.0001999997173048046, - "loss": 46.0, - "step": 4710 - }, - { - "epoch": 0.758645678167398, - "grad_norm": 0.0009354418143630028, - "learning_rate": 0.00019999971718449603, - "loss": 46.0, - "step": 4711 - }, - { - "epoch": 0.7588067152461855, - "grad_norm": 0.0008783229277469218, - "learning_rate": 0.00019999971706416187, - "loss": 46.0, - "step": 4712 - }, - { - "epoch": 0.7589677523249728, - "grad_norm": 0.0020138919353485107, - "learning_rate": 0.00019999971694380212, - "loss": 46.0, - "step": 4713 - }, - { - "epoch": 0.7591287894037602, - "grad_norm": 0.001696997438557446, - "learning_rate": 0.00019999971682341675, - "loss": 46.0, - "step": 4714 - }, - { - "epoch": 0.7592898264825476, - "grad_norm": 0.001785373780876398, - "learning_rate": 0.0001999997167030058, - "loss": 46.0, - "step": 4715 - }, - { - "epoch": 0.759450863561335, - "grad_norm": 0.001679008244536817, - "learning_rate": 0.0001999997165825693, - "loss": 46.0, - "step": 4716 - }, - { - "epoch": 0.7596119006401224, - "grad_norm": 0.0025074633304029703, - "learning_rate": 0.00019999971646210714, - "loss": 46.0, - "step": 4717 - }, - { - "epoch": 0.7597729377189097, - "grad_norm": 0.0010412584524601698, - "learning_rate": 0.0001999997163416194, - "loss": 46.0, - "step": 4718 - }, - { - "epoch": 0.7599339747976972, - "grad_norm": 0.00029251494561322033, - "learning_rate": 0.00019999971622110607, - "loss": 46.0, - "step": 4719 - }, - { - "epoch": 0.7600950118764845, - "grad_norm": 0.001483130268752575, - "learning_rate": 0.00019999971610056716, - "loss": 46.0, - "step": 4720 - }, - { - "epoch": 0.760256048955272, - "grad_norm": 0.0016655530780553818, - "learning_rate": 0.00019999971598000263, - "loss": 46.0, - "step": 4721 - }, - { - "epoch": 0.7604170860340593, - "grad_norm": 0.0029624358285218477, - "learning_rate": 0.00019999971585941254, - "loss": 46.0, - "step": 4722 - }, - { - "epoch": 0.7605781231128468, - "grad_norm": 0.0006819451809860766, - "learning_rate": 0.0001999997157387968, - "loss": 46.0, - "step": 4723 - }, - { - "epoch": 0.7607391601916341, - "grad_norm": 0.0012939812149852514, - "learning_rate": 0.00019999971561815552, - "loss": 46.0, - "step": 4724 - }, - { - "epoch": 0.7609001972704215, - "grad_norm": 0.0005127053591422737, - "learning_rate": 0.00019999971549748864, - "loss": 46.0, - "step": 4725 - }, - { - "epoch": 0.7610612343492089, - "grad_norm": 0.0016179886879399419, - "learning_rate": 0.00019999971537679612, - "loss": 46.0, - "step": 4726 - }, - { - "epoch": 0.7612222714279963, - "grad_norm": 0.0036728864070028067, - "learning_rate": 0.00019999971525607805, - "loss": 46.0, - "step": 4727 - }, - { - "epoch": 0.7613833085067837, - "grad_norm": 0.0023441568482667208, - "learning_rate": 0.00019999971513533438, - "loss": 46.0, - "step": 4728 - }, - { - "epoch": 0.761544345585571, - "grad_norm": 0.00042874025530181825, - "learning_rate": 0.0001999997150145651, - "loss": 46.0, - "step": 4729 - }, - { - "epoch": 0.7617053826643585, - "grad_norm": 0.0007069238345138729, - "learning_rate": 0.00019999971489377023, - "loss": 46.0, - "step": 4730 - }, - { - "epoch": 0.7618664197431458, - "grad_norm": 0.0003816660610027611, - "learning_rate": 0.00019999971477294975, - "loss": 46.0, - "step": 4731 - }, - { - "epoch": 0.7620274568219333, - "grad_norm": 0.0009648569393903017, - "learning_rate": 0.0001999997146521037, - "loss": 46.0, - "step": 4732 - }, - { - "epoch": 0.7621884939007206, - "grad_norm": 0.0006991874543018639, - "learning_rate": 0.00019999971453123205, - "loss": 46.0, - "step": 4733 - }, - { - "epoch": 0.7623495309795081, - "grad_norm": 0.0007834224379621446, - "learning_rate": 0.0001999997144103348, - "loss": 46.0, - "step": 4734 - }, - { - "epoch": 0.7625105680582954, - "grad_norm": 0.0010299012064933777, - "learning_rate": 0.00019999971428941195, - "loss": 46.0, - "step": 4735 - }, - { - "epoch": 0.7626716051370828, - "grad_norm": 0.0006626648246310651, - "learning_rate": 0.00019999971416846354, - "loss": 46.0, - "step": 4736 - }, - { - "epoch": 0.7628326422158702, - "grad_norm": 0.0015821504639461637, - "learning_rate": 0.0001999997140474895, - "loss": 46.0, - "step": 4737 - }, - { - "epoch": 0.7629936792946576, - "grad_norm": 0.0056922342628240585, - "learning_rate": 0.00019999971392648983, - "loss": 46.0, - "step": 4738 - }, - { - "epoch": 0.763154716373445, - "grad_norm": 0.0005951990024186671, - "learning_rate": 0.00019999971380546463, - "loss": 46.0, - "step": 4739 - }, - { - "epoch": 0.7633157534522323, - "grad_norm": 0.0007348938379436731, - "learning_rate": 0.0001999997136844138, - "loss": 46.0, - "step": 4740 - }, - { - "epoch": 0.7634767905310198, - "grad_norm": 0.000635460251942277, - "learning_rate": 0.0001999997135633374, - "loss": 46.0, - "step": 4741 - }, - { - "epoch": 0.7636378276098071, - "grad_norm": 0.0010311596561223269, - "learning_rate": 0.00019999971344223538, - "loss": 46.0, - "step": 4742 - }, - { - "epoch": 0.7637988646885946, - "grad_norm": 0.0004321200249250978, - "learning_rate": 0.0001999997133211078, - "loss": 46.0, - "step": 4743 - }, - { - "epoch": 0.7639599017673819, - "grad_norm": 0.001043844036757946, - "learning_rate": 0.0001999997131999546, - "loss": 46.0, - "step": 4744 - }, - { - "epoch": 0.7641209388461694, - "grad_norm": 0.00042273811413906515, - "learning_rate": 0.00019999971307877582, - "loss": 46.0, - "step": 4745 - }, - { - "epoch": 0.7642819759249567, - "grad_norm": 0.0007260072161443532, - "learning_rate": 0.00019999971295757142, - "loss": 46.0, - "step": 4746 - }, - { - "epoch": 0.7644430130037441, - "grad_norm": 0.0029898914508521557, - "learning_rate": 0.00019999971283634143, - "loss": 46.0, - "step": 4747 - }, - { - "epoch": 0.7646040500825315, - "grad_norm": 0.0030045099556446075, - "learning_rate": 0.00019999971271508586, - "loss": 46.0, - "step": 4748 - }, - { - "epoch": 0.7647650871613189, - "grad_norm": 0.00179480598308146, - "learning_rate": 0.00019999971259380468, - "loss": 46.0, - "step": 4749 - }, - { - "epoch": 0.7649261242401063, - "grad_norm": 0.0046968418173491955, - "learning_rate": 0.00019999971247249793, - "loss": 46.0, - "step": 4750 - }, - { - "epoch": 0.7650871613188936, - "grad_norm": 0.0012994071003049612, - "learning_rate": 0.00019999971235116557, - "loss": 46.0, - "step": 4751 - }, - { - "epoch": 0.7652481983976811, - "grad_norm": 0.0007074291352182627, - "learning_rate": 0.0001999997122298076, - "loss": 46.0, - "step": 4752 - }, - { - "epoch": 0.7654092354764684, - "grad_norm": 0.0038666988257318735, - "learning_rate": 0.00019999971210842406, - "loss": 46.0, - "step": 4753 - }, - { - "epoch": 0.7655702725552559, - "grad_norm": 0.0013941572979092598, - "learning_rate": 0.0001999997119870149, - "loss": 46.0, - "step": 4754 - }, - { - "epoch": 0.7657313096340432, - "grad_norm": 0.0005347354453988373, - "learning_rate": 0.00019999971186558018, - "loss": 46.0, - "step": 4755 - }, - { - "epoch": 0.7658923467128306, - "grad_norm": 0.0004978392389602959, - "learning_rate": 0.00019999971174411983, - "loss": 46.0, - "step": 4756 - }, - { - "epoch": 0.766053383791618, - "grad_norm": 0.0011792602017521858, - "learning_rate": 0.00019999971162263391, - "loss": 46.0, - "step": 4757 - }, - { - "epoch": 0.7662144208704054, - "grad_norm": 0.0006544819916598499, - "learning_rate": 0.0001999997115011224, - "loss": 46.0, - "step": 4758 - }, - { - "epoch": 0.7663754579491928, - "grad_norm": 0.0014281094772741199, - "learning_rate": 0.00019999971137958525, - "loss": 46.0, - "step": 4759 - }, - { - "epoch": 0.7665364950279802, - "grad_norm": 0.0017949396278709173, - "learning_rate": 0.00019999971125802255, - "loss": 46.0, - "step": 4760 - }, - { - "epoch": 0.7666975321067676, - "grad_norm": 0.00041266894550062716, - "learning_rate": 0.00019999971113643424, - "loss": 46.0, - "step": 4761 - }, - { - "epoch": 0.766858569185555, - "grad_norm": 0.0004257208784110844, - "learning_rate": 0.00019999971101482034, - "loss": 46.0, - "step": 4762 - }, - { - "epoch": 0.7670196062643424, - "grad_norm": 0.0015154016437008977, - "learning_rate": 0.00019999971089318085, - "loss": 46.0, - "step": 4763 - }, - { - "epoch": 0.7671806433431297, - "grad_norm": 0.0005804282845929265, - "learning_rate": 0.00019999971077151575, - "loss": 46.0, - "step": 4764 - }, - { - "epoch": 0.7673416804219172, - "grad_norm": 0.0006973495474085212, - "learning_rate": 0.00019999971064982509, - "loss": 46.0, - "step": 4765 - }, - { - "epoch": 0.7675027175007045, - "grad_norm": 0.0024062858428806067, - "learning_rate": 0.00019999971052810878, - "loss": 46.0, - "step": 4766 - }, - { - "epoch": 0.7676637545794919, - "grad_norm": 0.0005186183843761683, - "learning_rate": 0.0001999997104063669, - "loss": 46.0, - "step": 4767 - }, - { - "epoch": 0.7678247916582793, - "grad_norm": 0.0024316725321114063, - "learning_rate": 0.00019999971028459944, - "loss": 46.0, - "step": 4768 - }, - { - "epoch": 0.7679858287370667, - "grad_norm": 0.0034432136453688145, - "learning_rate": 0.00019999971016280638, - "loss": 46.0, - "step": 4769 - }, - { - "epoch": 0.7681468658158541, - "grad_norm": 0.0011938678799197078, - "learning_rate": 0.0001999997100409877, - "loss": 46.0, - "step": 4770 - }, - { - "epoch": 0.7683079028946415, - "grad_norm": 0.0003913584805559367, - "learning_rate": 0.00019999970991914343, - "loss": 46.0, - "step": 4771 - }, - { - "epoch": 0.7684689399734289, - "grad_norm": 0.0005460192332975566, - "learning_rate": 0.00019999970979727358, - "loss": 46.0, - "step": 4772 - }, - { - "epoch": 0.7686299770522163, - "grad_norm": 0.000703003432136029, - "learning_rate": 0.00019999970967537816, - "loss": 46.0, - "step": 4773 - }, - { - "epoch": 0.7687910141310037, - "grad_norm": 0.00302697136066854, - "learning_rate": 0.0001999997095534571, - "loss": 46.0, - "step": 4774 - }, - { - "epoch": 0.768952051209791, - "grad_norm": 0.00032928737346082926, - "learning_rate": 0.00019999970943151047, - "loss": 46.0, - "step": 4775 - }, - { - "epoch": 0.7691130882885785, - "grad_norm": 0.001323476666584611, - "learning_rate": 0.00019999970930953824, - "loss": 46.0, - "step": 4776 - }, - { - "epoch": 0.7692741253673658, - "grad_norm": 0.0007389159873127937, - "learning_rate": 0.00019999970918754042, - "loss": 46.0, - "step": 4777 - }, - { - "epoch": 0.7694351624461532, - "grad_norm": 0.0032452831510454416, - "learning_rate": 0.000199999709065517, - "loss": 46.0, - "step": 4778 - }, - { - "epoch": 0.7695961995249406, - "grad_norm": 0.000530002755112946, - "learning_rate": 0.00019999970894346797, - "loss": 46.0, - "step": 4779 - }, - { - "epoch": 0.769757236603728, - "grad_norm": 0.0011758236214518547, - "learning_rate": 0.00019999970882139337, - "loss": 46.0, - "step": 4780 - }, - { - "epoch": 0.7699182736825154, - "grad_norm": 0.0005026006838306785, - "learning_rate": 0.00019999970869929317, - "loss": 46.0, - "step": 4781 - }, - { - "epoch": 0.7700793107613028, - "grad_norm": 0.0009113785345107317, - "learning_rate": 0.00019999970857716737, - "loss": 46.0, - "step": 4782 - }, - { - "epoch": 0.7702403478400902, - "grad_norm": 0.0008467392181046307, - "learning_rate": 0.000199999708455016, - "loss": 46.0, - "step": 4783 - }, - { - "epoch": 0.7704013849188776, - "grad_norm": 0.00046924632624723017, - "learning_rate": 0.000199999708332839, - "loss": 46.0, - "step": 4784 - }, - { - "epoch": 0.770562421997665, - "grad_norm": 0.0009634041343815625, - "learning_rate": 0.0001999997082106364, - "loss": 46.0, - "step": 4785 - }, - { - "epoch": 0.7707234590764523, - "grad_norm": 0.004751934669911861, - "learning_rate": 0.00019999970808840821, - "loss": 46.0, - "step": 4786 - }, - { - "epoch": 0.7708844961552398, - "grad_norm": 0.0010547551792114973, - "learning_rate": 0.00019999970796615447, - "loss": 46.0, - "step": 4787 - }, - { - "epoch": 0.7710455332340271, - "grad_norm": 0.0007109145517461002, - "learning_rate": 0.00019999970784387506, - "loss": 46.0, - "step": 4788 - }, - { - "epoch": 0.7712065703128145, - "grad_norm": 0.0013763969764113426, - "learning_rate": 0.00019999970772157012, - "loss": 46.0, - "step": 4789 - }, - { - "epoch": 0.7713676073916019, - "grad_norm": 0.0005206355708651245, - "learning_rate": 0.00019999970759923956, - "loss": 46.0, - "step": 4790 - }, - { - "epoch": 0.7715286444703893, - "grad_norm": 0.0009797917446121573, - "learning_rate": 0.0001999997074768834, - "loss": 46.0, - "step": 4791 - }, - { - "epoch": 0.7716896815491767, - "grad_norm": 0.0005716772866435349, - "learning_rate": 0.00019999970735450165, - "loss": 46.0, - "step": 4792 - }, - { - "epoch": 0.7718507186279641, - "grad_norm": 0.0004491658473853022, - "learning_rate": 0.0001999997072320943, - "loss": 46.0, - "step": 4793 - }, - { - "epoch": 0.7720117557067515, - "grad_norm": 0.000580527528654784, - "learning_rate": 0.00019999970710966137, - "loss": 46.0, - "step": 4794 - }, - { - "epoch": 0.7721727927855389, - "grad_norm": 0.0009397721150889993, - "learning_rate": 0.00019999970698720288, - "loss": 46.0, - "step": 4795 - }, - { - "epoch": 0.7723338298643263, - "grad_norm": 0.0032832517754286528, - "learning_rate": 0.00019999970686471872, - "loss": 46.0, - "step": 4796 - }, - { - "epoch": 0.7724948669431136, - "grad_norm": 0.0010393010452389717, - "learning_rate": 0.00019999970674220902, - "loss": 46.0, - "step": 4797 - }, - { - "epoch": 0.7726559040219011, - "grad_norm": 0.001162507920525968, - "learning_rate": 0.0001999997066196737, - "loss": 46.0, - "step": 4798 - }, - { - "epoch": 0.7728169411006884, - "grad_norm": 0.0024015961680561304, - "learning_rate": 0.00019999970649711276, - "loss": 46.0, - "step": 4799 - }, - { - "epoch": 0.7729779781794758, - "grad_norm": 0.000556223327293992, - "learning_rate": 0.00019999970637452628, - "loss": 46.0, - "step": 4800 - }, - { - "epoch": 0.7731390152582632, - "grad_norm": 0.0019684734288603067, - "learning_rate": 0.00019999970625191418, - "loss": 46.0, - "step": 4801 - }, - { - "epoch": 0.7733000523370506, - "grad_norm": 0.0017118286341428757, - "learning_rate": 0.00019999970612927646, - "loss": 46.0, - "step": 4802 - }, - { - "epoch": 0.773461089415838, - "grad_norm": 0.0032506200950592756, - "learning_rate": 0.0001999997060066132, - "loss": 46.0, - "step": 4803 - }, - { - "epoch": 0.7736221264946254, - "grad_norm": 0.0006733370828442276, - "learning_rate": 0.0001999997058839243, - "loss": 46.0, - "step": 4804 - }, - { - "epoch": 0.7737831635734128, - "grad_norm": 0.0020027526188641787, - "learning_rate": 0.00019999970576120983, - "loss": 46.0, - "step": 4805 - }, - { - "epoch": 0.7739442006522002, - "grad_norm": 0.00076530990190804, - "learning_rate": 0.00019999970563846974, - "loss": 46.0, - "step": 4806 - }, - { - "epoch": 0.7741052377309876, - "grad_norm": 0.0016813938273116946, - "learning_rate": 0.0001999997055157041, - "loss": 46.0, - "step": 4807 - }, - { - "epoch": 0.774266274809775, - "grad_norm": 0.002859602915123105, - "learning_rate": 0.00019999970539291283, - "loss": 46.0, - "step": 4808 - }, - { - "epoch": 0.7744273118885623, - "grad_norm": 0.0013228071620687842, - "learning_rate": 0.00019999970527009598, - "loss": 46.0, - "step": 4809 - }, - { - "epoch": 0.7745883489673497, - "grad_norm": 0.0003916521091014147, - "learning_rate": 0.00019999970514725352, - "loss": 46.0, - "step": 4810 - }, - { - "epoch": 0.7747493860461371, - "grad_norm": 0.0007609453168697655, - "learning_rate": 0.00019999970502438546, - "loss": 46.0, - "step": 4811 - }, - { - "epoch": 0.7749104231249245, - "grad_norm": 0.002951554022729397, - "learning_rate": 0.00019999970490149183, - "loss": 46.0, - "step": 4812 - }, - { - "epoch": 0.7750714602037119, - "grad_norm": 0.0015105771599337459, - "learning_rate": 0.00019999970477857257, - "loss": 46.0, - "step": 4813 - }, - { - "epoch": 0.7752324972824993, - "grad_norm": 0.00033626353251747787, - "learning_rate": 0.00019999970465562776, - "loss": 46.0, - "step": 4814 - }, - { - "epoch": 0.7753935343612867, - "grad_norm": 0.00082097650738433, - "learning_rate": 0.00019999970453265733, - "loss": 46.0, - "step": 4815 - }, - { - "epoch": 0.7755545714400741, - "grad_norm": 0.0006431712536141276, - "learning_rate": 0.00019999970440966132, - "loss": 46.0, - "step": 4816 - }, - { - "epoch": 0.7757156085188615, - "grad_norm": 0.0008429883164353669, - "learning_rate": 0.0001999997042866397, - "loss": 46.0, - "step": 4817 - }, - { - "epoch": 0.7758766455976489, - "grad_norm": 0.0008026235736906528, - "learning_rate": 0.00019999970416359247, - "loss": 46.0, - "step": 4818 - }, - { - "epoch": 0.7760376826764362, - "grad_norm": 0.0010946061229333282, - "learning_rate": 0.00019999970404051967, - "loss": 46.0, - "step": 4819 - }, - { - "epoch": 0.7761987197552236, - "grad_norm": 0.0012417907128110528, - "learning_rate": 0.00019999970391742125, - "loss": 46.0, - "step": 4820 - }, - { - "epoch": 0.776359756834011, - "grad_norm": 0.003090334590524435, - "learning_rate": 0.00019999970379429728, - "loss": 46.0, - "step": 4821 - }, - { - "epoch": 0.7765207939127984, - "grad_norm": 0.0007975895423442125, - "learning_rate": 0.00019999970367114766, - "loss": 46.0, - "step": 4822 - }, - { - "epoch": 0.7766818309915858, - "grad_norm": 0.00222119246609509, - "learning_rate": 0.0001999997035479725, - "loss": 46.0, - "step": 4823 - }, - { - "epoch": 0.7768428680703732, - "grad_norm": 0.000647918728645891, - "learning_rate": 0.0001999997034247717, - "loss": 46.0, - "step": 4824 - }, - { - "epoch": 0.7770039051491606, - "grad_norm": 0.003519634949043393, - "learning_rate": 0.00019999970330154533, - "loss": 46.0, - "step": 4825 - }, - { - "epoch": 0.777164942227948, - "grad_norm": 0.0021936146076768637, - "learning_rate": 0.00019999970317829336, - "loss": 46.0, - "step": 4826 - }, - { - "epoch": 0.7773259793067354, - "grad_norm": 0.0012823280412703753, - "learning_rate": 0.0001999997030550158, - "loss": 46.0, - "step": 4827 - }, - { - "epoch": 0.7774870163855228, - "grad_norm": 0.0012383607681840658, - "learning_rate": 0.00019999970293171264, - "loss": 46.0, - "step": 4828 - }, - { - "epoch": 0.7776480534643102, - "grad_norm": 0.0011226102942600846, - "learning_rate": 0.00019999970280838389, - "loss": 46.0, - "step": 4829 - }, - { - "epoch": 0.7778090905430975, - "grad_norm": 0.001930180354975164, - "learning_rate": 0.00019999970268502954, - "loss": 46.0, - "step": 4830 - }, - { - "epoch": 0.7779701276218849, - "grad_norm": 0.0007477269391529262, - "learning_rate": 0.00019999970256164958, - "loss": 46.0, - "step": 4831 - }, - { - "epoch": 0.7781311647006723, - "grad_norm": 0.0005857248906977475, - "learning_rate": 0.00019999970243824407, - "loss": 46.0, - "step": 4832 - }, - { - "epoch": 0.7782922017794597, - "grad_norm": 0.0009798255050554872, - "learning_rate": 0.00019999970231481293, - "loss": 46.0, - "step": 4833 - }, - { - "epoch": 0.7784532388582471, - "grad_norm": 0.00033370786695741117, - "learning_rate": 0.0001999997021913562, - "loss": 46.0, - "step": 4834 - }, - { - "epoch": 0.7786142759370345, - "grad_norm": 0.00040524365613237023, - "learning_rate": 0.00019999970206787388, - "loss": 46.0, - "step": 4835 - }, - { - "epoch": 0.7787753130158219, - "grad_norm": 0.0015656567411497235, - "learning_rate": 0.00019999970194436596, - "loss": 46.0, - "step": 4836 - }, - { - "epoch": 0.7789363500946093, - "grad_norm": 0.0004574234480969608, - "learning_rate": 0.00019999970182083243, - "loss": 46.0, - "step": 4837 - }, - { - "epoch": 0.7790973871733967, - "grad_norm": 0.0009385333396494389, - "learning_rate": 0.00019999970169727333, - "loss": 46.0, - "step": 4838 - }, - { - "epoch": 0.7792584242521841, - "grad_norm": 0.00122968596406281, - "learning_rate": 0.00019999970157368862, - "loss": 46.0, - "step": 4839 - }, - { - "epoch": 0.7794194613309715, - "grad_norm": 0.00411304971203208, - "learning_rate": 0.00019999970145007833, - "loss": 46.0, - "step": 4840 - }, - { - "epoch": 0.7795804984097588, - "grad_norm": 0.0006165204686112702, - "learning_rate": 0.00019999970132644244, - "loss": 46.0, - "step": 4841 - }, - { - "epoch": 0.7797415354885462, - "grad_norm": 0.0006161246565170586, - "learning_rate": 0.00019999970120278095, - "loss": 46.0, - "step": 4842 - }, - { - "epoch": 0.7799025725673336, - "grad_norm": 0.0030055155511945486, - "learning_rate": 0.00019999970107909386, - "loss": 46.0, - "step": 4843 - }, - { - "epoch": 0.780063609646121, - "grad_norm": 0.0006349720642901957, - "learning_rate": 0.00019999970095538122, - "loss": 46.0, - "step": 4844 - }, - { - "epoch": 0.7802246467249084, - "grad_norm": 0.0028327363543212414, - "learning_rate": 0.00019999970083164293, - "loss": 46.0, - "step": 4845 - }, - { - "epoch": 0.7803856838036958, - "grad_norm": 0.0011354926973581314, - "learning_rate": 0.00019999970070787906, - "loss": 46.0, - "step": 4846 - }, - { - "epoch": 0.7805467208824832, - "grad_norm": 0.0043993135914206505, - "learning_rate": 0.0001999997005840896, - "loss": 46.0, - "step": 4847 - }, - { - "epoch": 0.7807077579612706, - "grad_norm": 0.0015721928793936968, - "learning_rate": 0.00019999970046027455, - "loss": 46.0, - "step": 4848 - }, - { - "epoch": 0.780868795040058, - "grad_norm": 0.0005242746556177735, - "learning_rate": 0.00019999970033643389, - "loss": 46.0, - "step": 4849 - }, - { - "epoch": 0.7810298321188454, - "grad_norm": 0.0011350411223247647, - "learning_rate": 0.00019999970021256766, - "loss": 46.0, - "step": 4850 - }, - { - "epoch": 0.7811908691976328, - "grad_norm": 0.0019493073923513293, - "learning_rate": 0.0001999997000886758, - "loss": 46.0, - "step": 4851 - }, - { - "epoch": 0.7813519062764201, - "grad_norm": 0.0007735356921330094, - "learning_rate": 0.00019999969996475838, - "loss": 46.0, - "step": 4852 - }, - { - "epoch": 0.7815129433552075, - "grad_norm": 0.0006262017996050417, - "learning_rate": 0.00019999969984081534, - "loss": 46.0, - "step": 4853 - }, - { - "epoch": 0.7816739804339949, - "grad_norm": 0.0014904163544997573, - "learning_rate": 0.00019999969971684674, - "loss": 46.0, - "step": 4854 - }, - { - "epoch": 0.7818350175127823, - "grad_norm": 0.0012866721954196692, - "learning_rate": 0.0001999996995928525, - "loss": 46.0, - "step": 4855 - }, - { - "epoch": 0.7819960545915697, - "grad_norm": 0.0006702210521325469, - "learning_rate": 0.00019999969946883267, - "loss": 46.0, - "step": 4856 - }, - { - "epoch": 0.7821570916703571, - "grad_norm": 0.0007764595211483538, - "learning_rate": 0.0001999996993447873, - "loss": 46.0, - "step": 4857 - }, - { - "epoch": 0.7823181287491445, - "grad_norm": 0.0002539651177357882, - "learning_rate": 0.00019999969922071626, - "loss": 46.0, - "step": 4858 - }, - { - "epoch": 0.7824791658279319, - "grad_norm": 0.0004988660803064704, - "learning_rate": 0.00019999969909661967, - "loss": 46.0, - "step": 4859 - }, - { - "epoch": 0.7826402029067193, - "grad_norm": 0.001338389003649354, - "learning_rate": 0.0001999996989724975, - "loss": 46.0, - "step": 4860 - }, - { - "epoch": 0.7828012399855067, - "grad_norm": 0.0012628274271264672, - "learning_rate": 0.0001999996988483497, - "loss": 46.0, - "step": 4861 - }, - { - "epoch": 0.782962277064294, - "grad_norm": 0.0005042063421569765, - "learning_rate": 0.00019999969872417633, - "loss": 46.0, - "step": 4862 - }, - { - "epoch": 0.7831233141430815, - "grad_norm": 0.004220644477754831, - "learning_rate": 0.00019999969859997734, - "loss": 46.0, - "step": 4863 - }, - { - "epoch": 0.7832843512218688, - "grad_norm": 0.00038851742283441126, - "learning_rate": 0.00019999969847575276, - "loss": 46.0, - "step": 4864 - }, - { - "epoch": 0.7834453883006562, - "grad_norm": 0.0008721654303371906, - "learning_rate": 0.0001999996983515026, - "loss": 46.0, - "step": 4865 - }, - { - "epoch": 0.7836064253794436, - "grad_norm": 0.002580776112154126, - "learning_rate": 0.00019999969822722684, - "loss": 46.0, - "step": 4866 - }, - { - "epoch": 0.783767462458231, - "grad_norm": 0.0004379845631774515, - "learning_rate": 0.00019999969810292548, - "loss": 46.0, - "step": 4867 - }, - { - "epoch": 0.7839284995370184, - "grad_norm": 0.0007844308856874704, - "learning_rate": 0.00019999969797859852, - "loss": 46.0, - "step": 4868 - }, - { - "epoch": 0.7840895366158058, - "grad_norm": 0.0023445901460945606, - "learning_rate": 0.00019999969785424598, - "loss": 46.0, - "step": 4869 - }, - { - "epoch": 0.7842505736945932, - "grad_norm": 0.001878255046904087, - "learning_rate": 0.00019999969772986785, - "loss": 46.0, - "step": 4870 - }, - { - "epoch": 0.7844116107733806, - "grad_norm": 0.0017349470872431993, - "learning_rate": 0.0001999996976054641, - "loss": 46.0, - "step": 4871 - }, - { - "epoch": 0.784572647852168, - "grad_norm": 0.0031744639854878187, - "learning_rate": 0.00019999969748103478, - "loss": 46.0, - "step": 4872 - }, - { - "epoch": 0.7847336849309553, - "grad_norm": 0.0037809982895851135, - "learning_rate": 0.00019999969735657986, - "loss": 46.0, - "step": 4873 - }, - { - "epoch": 0.7848947220097428, - "grad_norm": 0.0003046912606805563, - "learning_rate": 0.00019999969723209936, - "loss": 46.0, - "step": 4874 - }, - { - "epoch": 0.7850557590885301, - "grad_norm": 0.0007008084212429821, - "learning_rate": 0.00019999969710759322, - "loss": 46.0, - "step": 4875 - }, - { - "epoch": 0.7852167961673175, - "grad_norm": 0.0029918367508798838, - "learning_rate": 0.0001999996969830615, - "loss": 46.0, - "step": 4876 - }, - { - "epoch": 0.7853778332461049, - "grad_norm": 0.00042898106039501727, - "learning_rate": 0.0001999996968585042, - "loss": 46.0, - "step": 4877 - }, - { - "epoch": 0.7855388703248923, - "grad_norm": 0.0013238437240943313, - "learning_rate": 0.0001999996967339213, - "loss": 46.0, - "step": 4878 - }, - { - "epoch": 0.7856999074036797, - "grad_norm": 0.0015143059426918626, - "learning_rate": 0.00019999969660931282, - "loss": 46.0, - "step": 4879 - }, - { - "epoch": 0.7858609444824671, - "grad_norm": 0.004708718508481979, - "learning_rate": 0.00019999969648467874, - "loss": 46.0, - "step": 4880 - }, - { - "epoch": 0.7860219815612545, - "grad_norm": 0.0009256972698494792, - "learning_rate": 0.00019999969636001904, - "loss": 46.0, - "step": 4881 - }, - { - "epoch": 0.7861830186400419, - "grad_norm": 0.0019786139018833637, - "learning_rate": 0.00019999969623533376, - "loss": 46.0, - "step": 4882 - }, - { - "epoch": 0.7863440557188293, - "grad_norm": 0.0009576291777193546, - "learning_rate": 0.0001999996961106229, - "loss": 46.0, - "step": 4883 - }, - { - "epoch": 0.7865050927976166, - "grad_norm": 0.0004916126490570605, - "learning_rate": 0.0001999996959858864, - "loss": 46.0, - "step": 4884 - }, - { - "epoch": 0.786666129876404, - "grad_norm": 0.002730055944994092, - "learning_rate": 0.00019999969586112436, - "loss": 46.0, - "step": 4885 - }, - { - "epoch": 0.7868271669551914, - "grad_norm": 0.0011625067563727498, - "learning_rate": 0.0001999996957363367, - "loss": 46.0, - "step": 4886 - }, - { - "epoch": 0.7869882040339788, - "grad_norm": 0.002465092809870839, - "learning_rate": 0.00019999969561152345, - "loss": 46.0, - "step": 4887 - }, - { - "epoch": 0.7871492411127662, - "grad_norm": 0.0007205911679193377, - "learning_rate": 0.0001999996954866846, - "loss": 46.0, - "step": 4888 - }, - { - "epoch": 0.7873102781915536, - "grad_norm": 0.0003511893155518919, - "learning_rate": 0.00019999969536182017, - "loss": 46.0, - "step": 4889 - }, - { - "epoch": 0.787471315270341, - "grad_norm": 0.000942239654250443, - "learning_rate": 0.0001999996952369301, - "loss": 46.0, - "step": 4890 - }, - { - "epoch": 0.7876323523491284, - "grad_norm": 0.00026638482813723385, - "learning_rate": 0.0001999996951120145, - "loss": 46.0, - "step": 4891 - }, - { - "epoch": 0.7877933894279158, - "grad_norm": 0.0027480493299663067, - "learning_rate": 0.00019999969498707328, - "loss": 46.0, - "step": 4892 - }, - { - "epoch": 0.7879544265067032, - "grad_norm": 0.0018945022020488977, - "learning_rate": 0.00019999969486210643, - "loss": 46.0, - "step": 4893 - }, - { - "epoch": 0.7881154635854906, - "grad_norm": 0.001131606986746192, - "learning_rate": 0.00019999969473711402, - "loss": 46.0, - "step": 4894 - }, - { - "epoch": 0.7882765006642779, - "grad_norm": 0.0037077227607369423, - "learning_rate": 0.00019999969461209603, - "loss": 46.0, - "step": 4895 - }, - { - "epoch": 0.7884375377430654, - "grad_norm": 0.0014807177940383554, - "learning_rate": 0.00019999969448705241, - "loss": 46.0, - "step": 4896 - }, - { - "epoch": 0.7885985748218527, - "grad_norm": 0.0004393410054035485, - "learning_rate": 0.00019999969436198322, - "loss": 46.0, - "step": 4897 - }, - { - "epoch": 0.7887596119006401, - "grad_norm": 0.0011072284542024136, - "learning_rate": 0.00019999969423688843, - "loss": 46.0, - "step": 4898 - }, - { - "epoch": 0.7889206489794275, - "grad_norm": 0.001103946240618825, - "learning_rate": 0.00019999969411176803, - "loss": 46.0, - "step": 4899 - }, - { - "epoch": 0.7890816860582149, - "grad_norm": 0.0005947441677562892, - "learning_rate": 0.00019999969398662204, - "loss": 46.0, - "step": 4900 - }, - { - "epoch": 0.7892427231370023, - "grad_norm": 0.0017201130976900458, - "learning_rate": 0.00019999969386145047, - "loss": 46.0, - "step": 4901 - }, - { - "epoch": 0.7894037602157897, - "grad_norm": 0.0026060270611196756, - "learning_rate": 0.00019999969373625328, - "loss": 46.0, - "step": 4902 - }, - { - "epoch": 0.7895647972945771, - "grad_norm": 0.0015082237077876925, - "learning_rate": 0.00019999969361103053, - "loss": 46.0, - "step": 4903 - }, - { - "epoch": 0.7897258343733645, - "grad_norm": 0.0013800321612507105, - "learning_rate": 0.00019999969348578217, - "loss": 46.0, - "step": 4904 - }, - { - "epoch": 0.7898868714521519, - "grad_norm": 0.001854727161116898, - "learning_rate": 0.00019999969336050822, - "loss": 46.0, - "step": 4905 - }, - { - "epoch": 0.7900479085309392, - "grad_norm": 0.0003750800678972155, - "learning_rate": 0.00019999969323520865, - "loss": 46.0, - "step": 4906 - }, - { - "epoch": 0.7902089456097267, - "grad_norm": 0.0006828468176536262, - "learning_rate": 0.0001999996931098835, - "loss": 46.0, - "step": 4907 - }, - { - "epoch": 0.790369982688514, - "grad_norm": 0.004550055600702763, - "learning_rate": 0.00019999969298453273, - "loss": 46.0, - "step": 4908 - }, - { - "epoch": 0.7905310197673014, - "grad_norm": 0.0003666391712613404, - "learning_rate": 0.0001999996928591564, - "loss": 46.0, - "step": 4909 - }, - { - "epoch": 0.7906920568460888, - "grad_norm": 0.00104003946762532, - "learning_rate": 0.0001999996927337545, - "loss": 46.0, - "step": 4910 - }, - { - "epoch": 0.7908530939248762, - "grad_norm": 0.0006893830723129213, - "learning_rate": 0.00019999969260832697, - "loss": 46.0, - "step": 4911 - }, - { - "epoch": 0.7910141310036636, - "grad_norm": 0.0005217760917730629, - "learning_rate": 0.00019999969248287385, - "loss": 46.0, - "step": 4912 - }, - { - "epoch": 0.791175168082451, - "grad_norm": 0.004187182988971472, - "learning_rate": 0.0001999996923573951, - "loss": 46.0, - "step": 4913 - }, - { - "epoch": 0.7913362051612384, - "grad_norm": 0.00039599742740392685, - "learning_rate": 0.00019999969223189078, - "loss": 46.0, - "step": 4914 - }, - { - "epoch": 0.7914972422400257, - "grad_norm": 0.002221059752628207, - "learning_rate": 0.0001999996921063609, - "loss": 46.0, - "step": 4915 - }, - { - "epoch": 0.7916582793188132, - "grad_norm": 0.0005677093286067247, - "learning_rate": 0.0001999996919808054, - "loss": 46.0, - "step": 4916 - }, - { - "epoch": 0.7918193163976005, - "grad_norm": 0.002351069124415517, - "learning_rate": 0.0001999996918552243, - "loss": 46.0, - "step": 4917 - }, - { - "epoch": 0.791980353476388, - "grad_norm": 0.0014926479198038578, - "learning_rate": 0.00019999969172961762, - "loss": 46.0, - "step": 4918 - }, - { - "epoch": 0.7921413905551753, - "grad_norm": 0.0008763189543969929, - "learning_rate": 0.00019999969160398534, - "loss": 46.0, - "step": 4919 - }, - { - "epoch": 0.7923024276339627, - "grad_norm": 0.0007234427612274885, - "learning_rate": 0.00019999969147832744, - "loss": 46.0, - "step": 4920 - }, - { - "epoch": 0.7924634647127501, - "grad_norm": 0.0005524869775399566, - "learning_rate": 0.000199999691352644, - "loss": 46.0, - "step": 4921 - }, - { - "epoch": 0.7926245017915375, - "grad_norm": 0.003294931724667549, - "learning_rate": 0.0001999996912269349, - "loss": 46.0, - "step": 4922 - }, - { - "epoch": 0.7927855388703249, - "grad_norm": 0.0017873761244118214, - "learning_rate": 0.00019999969110120021, - "loss": 46.0, - "step": 4923 - }, - { - "epoch": 0.7929465759491123, - "grad_norm": 0.001016058144159615, - "learning_rate": 0.00019999969097543997, - "loss": 46.0, - "step": 4924 - }, - { - "epoch": 0.7931076130278997, - "grad_norm": 0.00024809749447740614, - "learning_rate": 0.00019999969084965412, - "loss": 46.0, - "step": 4925 - }, - { - "epoch": 0.793268650106687, - "grad_norm": 0.0007221294799819589, - "learning_rate": 0.00019999969072384265, - "loss": 46.0, - "step": 4926 - }, - { - "epoch": 0.7934296871854745, - "grad_norm": 0.004974100738763809, - "learning_rate": 0.00019999969059800564, - "loss": 46.0, - "step": 4927 - }, - { - "epoch": 0.7935907242642618, - "grad_norm": 0.0026517470832914114, - "learning_rate": 0.00019999969047214297, - "loss": 46.0, - "step": 4928 - }, - { - "epoch": 0.7937517613430493, - "grad_norm": 0.0012876553228124976, - "learning_rate": 0.00019999969034625474, - "loss": 46.0, - "step": 4929 - }, - { - "epoch": 0.7939127984218366, - "grad_norm": 0.0008008077275007963, - "learning_rate": 0.00019999969022034092, - "loss": 46.0, - "step": 4930 - }, - { - "epoch": 0.794073835500624, - "grad_norm": 0.0033534260001033545, - "learning_rate": 0.00019999969009440148, - "loss": 46.0, - "step": 4931 - }, - { - "epoch": 0.7942348725794114, - "grad_norm": 0.0005811078590340912, - "learning_rate": 0.00019999968996843646, - "loss": 46.0, - "step": 4932 - }, - { - "epoch": 0.7943959096581988, - "grad_norm": 0.0007451678975485265, - "learning_rate": 0.00019999968984244588, - "loss": 46.0, - "step": 4933 - }, - { - "epoch": 0.7945569467369862, - "grad_norm": 0.0032919771037995815, - "learning_rate": 0.00019999968971642966, - "loss": 46.0, - "step": 4934 - }, - { - "epoch": 0.7947179838157736, - "grad_norm": 0.0005804127431474626, - "learning_rate": 0.00019999968959038785, - "loss": 46.0, - "step": 4935 - }, - { - "epoch": 0.794879020894561, - "grad_norm": 0.00047154416097328067, - "learning_rate": 0.00019999968946432045, - "loss": 46.0, - "step": 4936 - }, - { - "epoch": 0.7950400579733483, - "grad_norm": 0.0025603254325687885, - "learning_rate": 0.00019999968933822747, - "loss": 46.0, - "step": 4937 - }, - { - "epoch": 0.7952010950521358, - "grad_norm": 0.0012943173060193658, - "learning_rate": 0.0001999996892121089, - "loss": 46.0, - "step": 4938 - }, - { - "epoch": 0.7953621321309231, - "grad_norm": 0.0006102478946559131, - "learning_rate": 0.0001999996890859647, - "loss": 46.0, - "step": 4939 - }, - { - "epoch": 0.7955231692097106, - "grad_norm": 0.0005684745847247541, - "learning_rate": 0.00019999968895979492, - "loss": 46.0, - "step": 4940 - }, - { - "epoch": 0.7956842062884979, - "grad_norm": 0.002167138736695051, - "learning_rate": 0.00019999968883359956, - "loss": 46.0, - "step": 4941 - }, - { - "epoch": 0.7958452433672853, - "grad_norm": 0.0008825754630379379, - "learning_rate": 0.0001999996887073786, - "loss": 46.0, - "step": 4942 - }, - { - "epoch": 0.7960062804460727, - "grad_norm": 0.0007869398104958236, - "learning_rate": 0.00019999968858113202, - "loss": 46.0, - "step": 4943 - }, - { - "epoch": 0.7961673175248601, - "grad_norm": 0.00489926477894187, - "learning_rate": 0.00019999968845485987, - "loss": 46.0, - "step": 4944 - }, - { - "epoch": 0.7963283546036475, - "grad_norm": 0.0006775857764296234, - "learning_rate": 0.00019999968832856214, - "loss": 46.0, - "step": 4945 - }, - { - "epoch": 0.7964893916824349, - "grad_norm": 0.0008033980848267674, - "learning_rate": 0.0001999996882022388, - "loss": 46.0, - "step": 4946 - }, - { - "epoch": 0.7966504287612223, - "grad_norm": 0.0008058302919380367, - "learning_rate": 0.00019999968807588985, - "loss": 46.0, - "step": 4947 - }, - { - "epoch": 0.7968114658400096, - "grad_norm": 0.00040172855369746685, - "learning_rate": 0.0001999996879495153, - "loss": 46.0, - "step": 4948 - }, - { - "epoch": 0.7969725029187971, - "grad_norm": 0.0006174162263050675, - "learning_rate": 0.0001999996878231152, - "loss": 46.0, - "step": 4949 - }, - { - "epoch": 0.7971335399975844, - "grad_norm": 0.0011549360351637006, - "learning_rate": 0.00019999968769668946, - "loss": 46.0, - "step": 4950 - }, - { - "epoch": 0.7972945770763719, - "grad_norm": 0.0007918311166577041, - "learning_rate": 0.00019999968757023812, - "loss": 46.0, - "step": 4951 - }, - { - "epoch": 0.7974556141551592, - "grad_norm": 0.0003489319933578372, - "learning_rate": 0.0001999996874437612, - "loss": 46.0, - "step": 4952 - }, - { - "epoch": 0.7976166512339466, - "grad_norm": 0.000532814534381032, - "learning_rate": 0.0001999996873172587, - "loss": 46.0, - "step": 4953 - }, - { - "epoch": 0.797777688312734, - "grad_norm": 0.0018908587517216802, - "learning_rate": 0.0001999996871907306, - "loss": 46.0, - "step": 4954 - }, - { - "epoch": 0.7979387253915214, - "grad_norm": 0.002615349367260933, - "learning_rate": 0.00019999968706417692, - "loss": 46.0, - "step": 4955 - }, - { - "epoch": 0.7980997624703088, - "grad_norm": 0.0007406870718114078, - "learning_rate": 0.00019999968693759762, - "loss": 46.0, - "step": 4956 - }, - { - "epoch": 0.7982607995490962, - "grad_norm": 0.0006270628655329347, - "learning_rate": 0.00019999968681099276, - "loss": 46.0, - "step": 4957 - }, - { - "epoch": 0.7984218366278836, - "grad_norm": 0.0002609911607578397, - "learning_rate": 0.00019999968668436225, - "loss": 46.0, - "step": 4958 - }, - { - "epoch": 0.7985828737066709, - "grad_norm": 0.0011190030490979552, - "learning_rate": 0.00019999968655770616, - "loss": 46.0, - "step": 4959 - }, - { - "epoch": 0.7987439107854584, - "grad_norm": 0.0007208367460407317, - "learning_rate": 0.0001999996864310245, - "loss": 46.0, - "step": 4960 - }, - { - "epoch": 0.7989049478642457, - "grad_norm": 0.0008196748676709831, - "learning_rate": 0.00019999968630431724, - "loss": 46.0, - "step": 4961 - }, - { - "epoch": 0.7990659849430332, - "grad_norm": 0.0007227154565043747, - "learning_rate": 0.00019999968617758436, - "loss": 46.0, - "step": 4962 - }, - { - "epoch": 0.7992270220218205, - "grad_norm": 0.0007980055524967611, - "learning_rate": 0.00019999968605082592, - "loss": 46.0, - "step": 4963 - }, - { - "epoch": 0.799388059100608, - "grad_norm": 0.0005577775300480425, - "learning_rate": 0.00019999968592404187, - "loss": 46.0, - "step": 4964 - }, - { - "epoch": 0.7995490961793953, - "grad_norm": 0.0010210535256192088, - "learning_rate": 0.0001999996857972322, - "loss": 46.0, - "step": 4965 - }, - { - "epoch": 0.7997101332581827, - "grad_norm": 0.00203165621496737, - "learning_rate": 0.00019999968567039697, - "loss": 46.0, - "step": 4966 - }, - { - "epoch": 0.7998711703369701, - "grad_norm": 0.0004046688845846802, - "learning_rate": 0.00019999968554353613, - "loss": 46.0, - "step": 4967 - }, - { - "epoch": 0.8000322074157574, - "grad_norm": 0.002894398057833314, - "learning_rate": 0.0001999996854166497, - "loss": 46.0, - "step": 4968 - }, - { - "epoch": 0.8001932444945449, - "grad_norm": 0.0013177957152947783, - "learning_rate": 0.00019999968528973768, - "loss": 46.0, - "step": 4969 - }, - { - "epoch": 0.8003542815733322, - "grad_norm": 0.0026697125285863876, - "learning_rate": 0.00019999968516280005, - "loss": 46.0, - "step": 4970 - }, - { - "epoch": 0.8005153186521197, - "grad_norm": 0.0017101905541494489, - "learning_rate": 0.00019999968503583686, - "loss": 46.0, - "step": 4971 - }, - { - "epoch": 0.800676355730907, - "grad_norm": 0.0006968029774725437, - "learning_rate": 0.00019999968490884803, - "loss": 46.0, - "step": 4972 - }, - { - "epoch": 0.8008373928096945, - "grad_norm": 0.0038087673019617796, - "learning_rate": 0.00019999968478183363, - "loss": 46.0, - "step": 4973 - }, - { - "epoch": 0.8009984298884818, - "grad_norm": 0.0013463938375934958, - "learning_rate": 0.00019999968465479363, - "loss": 46.0, - "step": 4974 - }, - { - "epoch": 0.8011594669672693, - "grad_norm": 0.00038374605355784297, - "learning_rate": 0.00019999968452772806, - "loss": 46.0, - "step": 4975 - }, - { - "epoch": 0.8013205040460566, - "grad_norm": 0.003065139288082719, - "learning_rate": 0.00019999968440063685, - "loss": 46.0, - "step": 4976 - }, - { - "epoch": 0.801481541124844, - "grad_norm": 0.00507735600695014, - "learning_rate": 0.00019999968427352008, - "loss": 46.0, - "step": 4977 - }, - { - "epoch": 0.8016425782036314, - "grad_norm": 0.0011967107420787215, - "learning_rate": 0.00019999968414637767, - "loss": 46.0, - "step": 4978 - }, - { - "epoch": 0.8018036152824187, - "grad_norm": 0.0011453555198386312, - "learning_rate": 0.00019999968401920973, - "loss": 46.0, - "step": 4979 - }, - { - "epoch": 0.8019646523612062, - "grad_norm": 0.0003181898209732026, - "learning_rate": 0.00019999968389201615, - "loss": 46.0, - "step": 4980 - }, - { - "epoch": 0.8021256894399935, - "grad_norm": 0.0033435707446187735, - "learning_rate": 0.000199999683764797, - "loss": 46.0, - "step": 4981 - }, - { - "epoch": 0.802286726518781, - "grad_norm": 0.0011169268982484937, - "learning_rate": 0.00019999968363755224, - "loss": 46.0, - "step": 4982 - }, - { - "epoch": 0.8024477635975683, - "grad_norm": 0.0012636369792744517, - "learning_rate": 0.0001999996835102819, - "loss": 46.0, - "step": 4983 - }, - { - "epoch": 0.8026088006763558, - "grad_norm": 0.0007384041091427207, - "learning_rate": 0.00019999968338298594, - "loss": 46.0, - "step": 4984 - }, - { - "epoch": 0.8027698377551431, - "grad_norm": 0.0012772405752912164, - "learning_rate": 0.00019999968325566442, - "loss": 46.0, - "step": 4985 - }, - { - "epoch": 0.8029308748339306, - "grad_norm": 0.0005906779551878572, - "learning_rate": 0.00019999968312831725, - "loss": 46.0, - "step": 4986 - }, - { - "epoch": 0.8030919119127179, - "grad_norm": 0.000568014511372894, - "learning_rate": 0.0001999996830009445, - "loss": 46.0, - "step": 4987 - }, - { - "epoch": 0.8032529489915053, - "grad_norm": 0.00076268520206213, - "learning_rate": 0.0001999996828735462, - "loss": 46.0, - "step": 4988 - }, - { - "epoch": 0.8034139860702927, - "grad_norm": 0.004757401067763567, - "learning_rate": 0.00019999968274612227, - "loss": 46.0, - "step": 4989 - }, - { - "epoch": 0.80357502314908, - "grad_norm": 0.0024922143202275038, - "learning_rate": 0.00019999968261867276, - "loss": 46.0, - "step": 4990 - }, - { - "epoch": 0.8037360602278675, - "grad_norm": 0.003020449075847864, - "learning_rate": 0.00019999968249119764, - "loss": 46.0, - "step": 4991 - }, - { - "epoch": 0.8038970973066548, - "grad_norm": 0.0004710409848485142, - "learning_rate": 0.00019999968236369695, - "loss": 46.0, - "step": 4992 - }, - { - "epoch": 0.8040581343854423, - "grad_norm": 0.0016607147408649325, - "learning_rate": 0.00019999968223617065, - "loss": 46.0, - "step": 4993 - }, - { - "epoch": 0.8042191714642296, - "grad_norm": 0.0007125571719370782, - "learning_rate": 0.00019999968210861874, - "loss": 46.0, - "step": 4994 - }, - { - "epoch": 0.8043802085430171, - "grad_norm": 0.0012517754221335053, - "learning_rate": 0.00019999968198104127, - "loss": 46.0, - "step": 4995 - }, - { - "epoch": 0.8045412456218044, - "grad_norm": 0.0008217499125748873, - "learning_rate": 0.00019999968185343818, - "loss": 46.0, - "step": 4996 - }, - { - "epoch": 0.8047022827005919, - "grad_norm": 0.00045296180178411305, - "learning_rate": 0.0001999996817258095, - "loss": 46.0, - "step": 4997 - }, - { - "epoch": 0.8048633197793792, - "grad_norm": 0.00023589274496771395, - "learning_rate": 0.00019999968159815524, - "loss": 46.0, - "step": 4998 - }, - { - "epoch": 0.8050243568581666, - "grad_norm": 0.0025104512460529804, - "learning_rate": 0.00019999968147047537, - "loss": 46.0, - "step": 4999 - }, - { - "epoch": 0.805185393936954, - "grad_norm": 0.0012525818310678005, - "learning_rate": 0.0001999996813427699, - "loss": 46.0, - "step": 5000 - }, - { - "epoch": 0.8053464310157413, - "grad_norm": 0.0007998804212547839, - "learning_rate": 0.00019999968121503883, - "loss": 46.0, - "step": 5001 - }, - { - "epoch": 0.8055074680945288, - "grad_norm": 0.007886652834713459, - "learning_rate": 0.0001999996810872822, - "loss": 46.0, - "step": 5002 - }, - { - "epoch": 0.8056685051733161, - "grad_norm": 0.0035920701920986176, - "learning_rate": 0.00019999968095949994, - "loss": 46.0, - "step": 5003 - }, - { - "epoch": 0.8058295422521036, - "grad_norm": 0.0019281983841210604, - "learning_rate": 0.0001999996808316921, - "loss": 46.0, - "step": 5004 - }, - { - "epoch": 0.8059905793308909, - "grad_norm": 0.0008937554666772485, - "learning_rate": 0.00019999968070385867, - "loss": 46.0, - "step": 5005 - }, - { - "epoch": 0.8061516164096784, - "grad_norm": 0.000303289940347895, - "learning_rate": 0.00019999968057599963, - "loss": 46.0, - "step": 5006 - }, - { - "epoch": 0.8063126534884657, - "grad_norm": 0.0005682788323611021, - "learning_rate": 0.000199999680448115, - "loss": 46.0, - "step": 5007 - }, - { - "epoch": 0.8064736905672532, - "grad_norm": 0.0003950601676478982, - "learning_rate": 0.0001999996803202048, - "loss": 46.0, - "step": 5008 - }, - { - "epoch": 0.8066347276460405, - "grad_norm": 0.00045897779637016356, - "learning_rate": 0.00019999968019226896, - "loss": 46.0, - "step": 5009 - }, - { - "epoch": 0.806795764724828, - "grad_norm": 0.0011327541433274746, - "learning_rate": 0.00019999968006430754, - "loss": 46.0, - "step": 5010 - }, - { - "epoch": 0.8069568018036153, - "grad_norm": 0.0008953461074270308, - "learning_rate": 0.00019999967993632054, - "loss": 46.0, - "step": 5011 - }, - { - "epoch": 0.8071178388824026, - "grad_norm": 0.0023495692294090986, - "learning_rate": 0.00019999967980830795, - "loss": 46.0, - "step": 5012 - }, - { - "epoch": 0.8072788759611901, - "grad_norm": 0.0019212618935853243, - "learning_rate": 0.00019999967968026977, - "loss": 46.0, - "step": 5013 - }, - { - "epoch": 0.8074399130399774, - "grad_norm": 0.0010068389819934964, - "learning_rate": 0.00019999967955220595, - "loss": 46.0, - "step": 5014 - }, - { - "epoch": 0.8076009501187649, - "grad_norm": 0.00038365210639312863, - "learning_rate": 0.00019999967942411657, - "loss": 46.0, - "step": 5015 - }, - { - "epoch": 0.8077619871975522, - "grad_norm": 0.0007847102242521942, - "learning_rate": 0.00019999967929600158, - "loss": 46.0, - "step": 5016 - }, - { - "epoch": 0.8079230242763397, - "grad_norm": 0.0008404904510825872, - "learning_rate": 0.00019999967916786102, - "loss": 46.0, - "step": 5017 - }, - { - "epoch": 0.808084061355127, - "grad_norm": 0.0009001229773275554, - "learning_rate": 0.00019999967903969483, - "loss": 46.0, - "step": 5018 - }, - { - "epoch": 0.8082450984339145, - "grad_norm": 0.0018883231095969677, - "learning_rate": 0.00019999967891150307, - "loss": 46.0, - "step": 5019 - }, - { - "epoch": 0.8084061355127018, - "grad_norm": 0.0006504155462607741, - "learning_rate": 0.0001999996787832857, - "loss": 46.0, - "step": 5020 - }, - { - "epoch": 0.8085671725914891, - "grad_norm": 0.0006420365534722805, - "learning_rate": 0.00019999967865504277, - "loss": 46.0, - "step": 5021 - }, - { - "epoch": 0.8087282096702766, - "grad_norm": 0.001215107156895101, - "learning_rate": 0.0001999996785267742, - "loss": 46.0, - "step": 5022 - }, - { - "epoch": 0.8088892467490639, - "grad_norm": 0.0016643713461235166, - "learning_rate": 0.00019999967839848007, - "loss": 46.0, - "step": 5023 - }, - { - "epoch": 0.8090502838278514, - "grad_norm": 0.0009740013047121465, - "learning_rate": 0.00019999967827016035, - "loss": 46.0, - "step": 5024 - }, - { - "epoch": 0.8092113209066387, - "grad_norm": 0.00035582008422352374, - "learning_rate": 0.00019999967814181502, - "loss": 46.0, - "step": 5025 - }, - { - "epoch": 0.8093723579854262, - "grad_norm": 0.0012886858312413096, - "learning_rate": 0.00019999967801344408, - "loss": 46.0, - "step": 5026 - }, - { - "epoch": 0.8095333950642135, - "grad_norm": 0.001792251132428646, - "learning_rate": 0.00019999967788504757, - "loss": 46.0, - "step": 5027 - }, - { - "epoch": 0.809694432143001, - "grad_norm": 0.0004244278825353831, - "learning_rate": 0.00019999967775662545, - "loss": 46.0, - "step": 5028 - }, - { - "epoch": 0.8098554692217883, - "grad_norm": 0.0006020162254571915, - "learning_rate": 0.0001999996776281777, - "loss": 46.0, - "step": 5029 - }, - { - "epoch": 0.8100165063005758, - "grad_norm": 0.0013522617518901825, - "learning_rate": 0.00019999967749970442, - "loss": 46.0, - "step": 5030 - }, - { - "epoch": 0.8101775433793631, - "grad_norm": 0.0005848242435604334, - "learning_rate": 0.00019999967737120553, - "loss": 46.0, - "step": 5031 - }, - { - "epoch": 0.8103385804581504, - "grad_norm": 0.0006331526674330235, - "learning_rate": 0.000199999677242681, - "loss": 46.0, - "step": 5032 - }, - { - "epoch": 0.8104996175369379, - "grad_norm": 0.0007234473014250398, - "learning_rate": 0.00019999967711413093, - "loss": 46.0, - "step": 5033 - }, - { - "epoch": 0.8106606546157252, - "grad_norm": 0.0042146965861320496, - "learning_rate": 0.00019999967698555523, - "loss": 46.0, - "step": 5034 - }, - { - "epoch": 0.8108216916945127, - "grad_norm": 0.0005461159162223339, - "learning_rate": 0.00019999967685695394, - "loss": 46.0, - "step": 5035 - }, - { - "epoch": 0.8109827287733, - "grad_norm": 0.003211408853530884, - "learning_rate": 0.00019999967672832707, - "loss": 46.0, - "step": 5036 - }, - { - "epoch": 0.8111437658520875, - "grad_norm": 0.0005885272985324264, - "learning_rate": 0.0001999996765996746, - "loss": 46.0, - "step": 5037 - }, - { - "epoch": 0.8113048029308748, - "grad_norm": 0.0016029799589887261, - "learning_rate": 0.00019999967647099656, - "loss": 46.0, - "step": 5038 - }, - { - "epoch": 0.8114658400096623, - "grad_norm": 0.0013309215428307652, - "learning_rate": 0.00019999967634229287, - "loss": 46.0, - "step": 5039 - }, - { - "epoch": 0.8116268770884496, - "grad_norm": 0.0015502702444791794, - "learning_rate": 0.00019999967621356363, - "loss": 46.0, - "step": 5040 - }, - { - "epoch": 0.8117879141672371, - "grad_norm": 0.0012823846191167831, - "learning_rate": 0.0001999996760848088, - "loss": 46.0, - "step": 5041 - }, - { - "epoch": 0.8119489512460244, - "grad_norm": 0.0007383690099231899, - "learning_rate": 0.00019999967595602834, - "loss": 46.0, - "step": 5042 - }, - { - "epoch": 0.8121099883248117, - "grad_norm": 0.00037825643084943295, - "learning_rate": 0.0001999996758272223, - "loss": 46.0, - "step": 5043 - }, - { - "epoch": 0.8122710254035992, - "grad_norm": 0.00038448069244623184, - "learning_rate": 0.00019999967569839065, - "loss": 46.0, - "step": 5044 - }, - { - "epoch": 0.8124320624823865, - "grad_norm": 0.0008973998483270407, - "learning_rate": 0.00019999967556953344, - "loss": 46.0, - "step": 5045 - }, - { - "epoch": 0.812593099561174, - "grad_norm": 0.0008637954015284777, - "learning_rate": 0.00019999967544065062, - "loss": 46.0, - "step": 5046 - }, - { - "epoch": 0.8127541366399613, - "grad_norm": 0.0038218258414417505, - "learning_rate": 0.0001999996753117422, - "loss": 46.0, - "step": 5047 - }, - { - "epoch": 0.8129151737187488, - "grad_norm": 0.0007042171200737357, - "learning_rate": 0.00019999967518280818, - "loss": 46.0, - "step": 5048 - }, - { - "epoch": 0.8130762107975361, - "grad_norm": 0.000422991142841056, - "learning_rate": 0.00019999967505384856, - "loss": 46.0, - "step": 5049 - }, - { - "epoch": 0.8132372478763236, - "grad_norm": 0.0008916970691643655, - "learning_rate": 0.0001999996749248634, - "loss": 46.0, - "step": 5050 - }, - { - "epoch": 0.8133982849551109, - "grad_norm": 0.0009910401422530413, - "learning_rate": 0.00019999967479585257, - "loss": 46.0, - "step": 5051 - }, - { - "epoch": 0.8135593220338984, - "grad_norm": 0.0027690459974110126, - "learning_rate": 0.0001999996746668162, - "loss": 46.0, - "step": 5052 - }, - { - "epoch": 0.8137203591126857, - "grad_norm": 0.0004160216776654124, - "learning_rate": 0.00019999967453775418, - "loss": 46.0, - "step": 5053 - }, - { - "epoch": 0.813881396191473, - "grad_norm": 0.004208892118185759, - "learning_rate": 0.0001999996744086666, - "loss": 46.0, - "step": 5054 - }, - { - "epoch": 0.8140424332702605, - "grad_norm": 0.00034192390739917755, - "learning_rate": 0.00019999967427955344, - "loss": 46.0, - "step": 5055 - }, - { - "epoch": 0.8142034703490478, - "grad_norm": 0.001324885874055326, - "learning_rate": 0.00019999967415041466, - "loss": 46.0, - "step": 5056 - }, - { - "epoch": 0.8143645074278353, - "grad_norm": 0.0003919968439731747, - "learning_rate": 0.0001999996740212503, - "loss": 46.0, - "step": 5057 - }, - { - "epoch": 0.8145255445066226, - "grad_norm": 0.00042274646693840623, - "learning_rate": 0.00019999967389206034, - "loss": 46.0, - "step": 5058 - }, - { - "epoch": 0.8146865815854101, - "grad_norm": 0.0005962664727121592, - "learning_rate": 0.00019999967376284477, - "loss": 46.0, - "step": 5059 - }, - { - "epoch": 0.8148476186641974, - "grad_norm": 0.001136332517489791, - "learning_rate": 0.00019999967363360362, - "loss": 46.0, - "step": 5060 - }, - { - "epoch": 0.8150086557429849, - "grad_norm": 0.0004930444993078709, - "learning_rate": 0.00019999967350433688, - "loss": 46.0, - "step": 5061 - }, - { - "epoch": 0.8151696928217722, - "grad_norm": 0.0006116154254414141, - "learning_rate": 0.00019999967337504452, - "loss": 46.0, - "step": 5062 - }, - { - "epoch": 0.8153307299005597, - "grad_norm": 0.0006569311372004449, - "learning_rate": 0.0001999996732457266, - "loss": 46.0, - "step": 5063 - }, - { - "epoch": 0.815491766979347, - "grad_norm": 0.0032229472417384386, - "learning_rate": 0.00019999967311638307, - "loss": 46.0, - "step": 5064 - }, - { - "epoch": 0.8156528040581343, - "grad_norm": 0.0067578526213765144, - "learning_rate": 0.00019999967298701393, - "loss": 46.0, - "step": 5065 - }, - { - "epoch": 0.8158138411369218, - "grad_norm": 0.001125033595599234, - "learning_rate": 0.00019999967285761922, - "loss": 46.0, - "step": 5066 - }, - { - "epoch": 0.8159748782157091, - "grad_norm": 0.0028744186274707317, - "learning_rate": 0.00019999967272819893, - "loss": 46.0, - "step": 5067 - }, - { - "epoch": 0.8161359152944966, - "grad_norm": 0.0007400520844385028, - "learning_rate": 0.000199999672598753, - "loss": 46.0, - "step": 5068 - }, - { - "epoch": 0.8162969523732839, - "grad_norm": 0.0031711275223642588, - "learning_rate": 0.0001999996724692815, - "loss": 46.0, - "step": 5069 - }, - { - "epoch": 0.8164579894520714, - "grad_norm": 0.0010434158612042665, - "learning_rate": 0.0001999996723397844, - "loss": 46.0, - "step": 5070 - }, - { - "epoch": 0.8166190265308587, - "grad_norm": 0.00046424055472016335, - "learning_rate": 0.00019999967221026173, - "loss": 46.0, - "step": 5071 - }, - { - "epoch": 0.8167800636096462, - "grad_norm": 0.0009833801304921508, - "learning_rate": 0.00019999967208071345, - "loss": 46.0, - "step": 5072 - }, - { - "epoch": 0.8169411006884335, - "grad_norm": 0.001199959428049624, - "learning_rate": 0.00019999967195113953, - "loss": 46.0, - "step": 5073 - }, - { - "epoch": 0.8171021377672208, - "grad_norm": 0.002370106056332588, - "learning_rate": 0.00019999967182154007, - "loss": 46.0, - "step": 5074 - }, - { - "epoch": 0.8172631748460083, - "grad_norm": 0.0013635419309139252, - "learning_rate": 0.00019999967169191497, - "loss": 46.0, - "step": 5075 - }, - { - "epoch": 0.8174242119247956, - "grad_norm": 0.0043939026072621346, - "learning_rate": 0.00019999967156226434, - "loss": 46.0, - "step": 5076 - }, - { - "epoch": 0.8175852490035831, - "grad_norm": 0.0003139891487080604, - "learning_rate": 0.00019999967143258807, - "loss": 46.0, - "step": 5077 - }, - { - "epoch": 0.8177462860823704, - "grad_norm": 0.000730452942661941, - "learning_rate": 0.0001999996713028862, - "loss": 46.0, - "step": 5078 - }, - { - "epoch": 0.8179073231611579, - "grad_norm": 0.0018734782934188843, - "learning_rate": 0.00019999967117315874, - "loss": 46.0, - "step": 5079 - }, - { - "epoch": 0.8180683602399452, - "grad_norm": 0.0011661278549581766, - "learning_rate": 0.0001999996710434057, - "loss": 46.0, - "step": 5080 - }, - { - "epoch": 0.8182293973187327, - "grad_norm": 0.005574526730924845, - "learning_rate": 0.00019999967091362706, - "loss": 46.0, - "step": 5081 - }, - { - "epoch": 0.81839043439752, - "grad_norm": 0.0009668830898590386, - "learning_rate": 0.00019999967078382285, - "loss": 46.0, - "step": 5082 - }, - { - "epoch": 0.8185514714763075, - "grad_norm": 0.0014443137915804982, - "learning_rate": 0.000199999670653993, - "loss": 46.0, - "step": 5083 - }, - { - "epoch": 0.8187125085550948, - "grad_norm": 0.0013937398325651884, - "learning_rate": 0.00019999967052413756, - "loss": 46.0, - "step": 5084 - }, - { - "epoch": 0.8188735456338821, - "grad_norm": 0.0007658318500034511, - "learning_rate": 0.00019999967039425657, - "loss": 46.0, - "step": 5085 - }, - { - "epoch": 0.8190345827126696, - "grad_norm": 0.001386961666867137, - "learning_rate": 0.00019999967026434993, - "loss": 46.0, - "step": 5086 - }, - { - "epoch": 0.8191956197914569, - "grad_norm": 0.004695508629083633, - "learning_rate": 0.00019999967013441773, - "loss": 46.0, - "step": 5087 - }, - { - "epoch": 0.8193566568702444, - "grad_norm": 0.0005968232289887965, - "learning_rate": 0.00019999967000445992, - "loss": 46.0, - "step": 5088 - }, - { - "epoch": 0.8195176939490317, - "grad_norm": 0.0008575798710808158, - "learning_rate": 0.00019999966987447652, - "loss": 46.0, - "step": 5089 - }, - { - "epoch": 0.8196787310278192, - "grad_norm": 0.0040957932360470295, - "learning_rate": 0.00019999966974446753, - "loss": 46.0, - "step": 5090 - }, - { - "epoch": 0.8198397681066065, - "grad_norm": 0.001956131774932146, - "learning_rate": 0.00019999966961443293, - "loss": 46.0, - "step": 5091 - }, - { - "epoch": 0.820000805185394, - "grad_norm": 0.0004973598406650126, - "learning_rate": 0.00019999966948437277, - "loss": 46.0, - "step": 5092 - }, - { - "epoch": 0.8201618422641813, - "grad_norm": 0.002371931914240122, - "learning_rate": 0.00019999966935428697, - "loss": 46.0, - "step": 5093 - }, - { - "epoch": 0.8203228793429688, - "grad_norm": 0.0005688421661034226, - "learning_rate": 0.0001999996692241756, - "loss": 46.0, - "step": 5094 - }, - { - "epoch": 0.8204839164217561, - "grad_norm": 0.004293699748814106, - "learning_rate": 0.00019999966909403863, - "loss": 46.0, - "step": 5095 - }, - { - "epoch": 0.8206449535005435, - "grad_norm": 0.0007094823522493243, - "learning_rate": 0.00019999966896387607, - "loss": 46.0, - "step": 5096 - }, - { - "epoch": 0.8208059905793309, - "grad_norm": 0.000648115819785744, - "learning_rate": 0.0001999996688336879, - "loss": 46.0, - "step": 5097 - }, - { - "epoch": 0.8209670276581182, - "grad_norm": 0.000635456875897944, - "learning_rate": 0.00019999966870347415, - "loss": 46.0, - "step": 5098 - }, - { - "epoch": 0.8211280647369057, - "grad_norm": 0.0007707527838647366, - "learning_rate": 0.00019999966857323483, - "loss": 46.0, - "step": 5099 - }, - { - "epoch": 0.821289101815693, - "grad_norm": 0.0008147604530677199, - "learning_rate": 0.00019999966844296986, - "loss": 46.0, - "step": 5100 - }, - { - "epoch": 0.8214501388944805, - "grad_norm": 0.0008703813655301929, - "learning_rate": 0.00019999966831267933, - "loss": 46.0, - "step": 5101 - }, - { - "epoch": 0.8216111759732678, - "grad_norm": 0.0010957014746963978, - "learning_rate": 0.00019999966818236322, - "loss": 46.0, - "step": 5102 - }, - { - "epoch": 0.8217722130520553, - "grad_norm": 0.001453327713534236, - "learning_rate": 0.0001999996680520215, - "loss": 46.0, - "step": 5103 - }, - { - "epoch": 0.8219332501308426, - "grad_norm": 0.0025897640734910965, - "learning_rate": 0.00019999966792165418, - "loss": 46.0, - "step": 5104 - }, - { - "epoch": 0.8220942872096301, - "grad_norm": 0.0007721189176663756, - "learning_rate": 0.00019999966779126125, - "loss": 46.0, - "step": 5105 - }, - { - "epoch": 0.8222553242884174, - "grad_norm": 0.0013284151209518313, - "learning_rate": 0.00019999966766084276, - "loss": 46.0, - "step": 5106 - }, - { - "epoch": 0.8224163613672048, - "grad_norm": 0.000944761501159519, - "learning_rate": 0.00019999966753039863, - "loss": 46.0, - "step": 5107 - }, - { - "epoch": 0.8225773984459922, - "grad_norm": 0.0005585766048170626, - "learning_rate": 0.00019999966739992894, - "loss": 46.0, - "step": 5108 - }, - { - "epoch": 0.8227384355247795, - "grad_norm": 0.0022968715056777, - "learning_rate": 0.00019999966726943363, - "loss": 46.0, - "step": 5109 - }, - { - "epoch": 0.822899472603567, - "grad_norm": 0.0046213772147893906, - "learning_rate": 0.00019999966713891274, - "loss": 46.0, - "step": 5110 - }, - { - "epoch": 0.8230605096823543, - "grad_norm": 0.004788071382790804, - "learning_rate": 0.00019999966700836626, - "loss": 46.0, - "step": 5111 - }, - { - "epoch": 0.8232215467611418, - "grad_norm": 0.000617998477537185, - "learning_rate": 0.0001999996668777942, - "loss": 46.0, - "step": 5112 - }, - { - "epoch": 0.8233825838399291, - "grad_norm": 0.0010753405513241887, - "learning_rate": 0.0001999996667471965, - "loss": 46.0, - "step": 5113 - }, - { - "epoch": 0.8235436209187166, - "grad_norm": 0.0015353562775999308, - "learning_rate": 0.00019999966661657324, - "loss": 46.0, - "step": 5114 - }, - { - "epoch": 0.8237046579975039, - "grad_norm": 0.0008767135441303253, - "learning_rate": 0.00019999966648592436, - "loss": 46.0, - "step": 5115 - }, - { - "epoch": 0.8238656950762914, - "grad_norm": 0.001422939240001142, - "learning_rate": 0.00019999966635524992, - "loss": 46.0, - "step": 5116 - }, - { - "epoch": 0.8240267321550787, - "grad_norm": 0.0006881275912746787, - "learning_rate": 0.00019999966622454986, - "loss": 46.0, - "step": 5117 - }, - { - "epoch": 0.824187769233866, - "grad_norm": 0.0009932867251336575, - "learning_rate": 0.00019999966609382422, - "loss": 46.0, - "step": 5118 - }, - { - "epoch": 0.8243488063126535, - "grad_norm": 0.0010311916703358293, - "learning_rate": 0.00019999966596307296, - "loss": 46.0, - "step": 5119 - }, - { - "epoch": 0.8245098433914408, - "grad_norm": 0.00047003134386613965, - "learning_rate": 0.00019999966583229614, - "loss": 46.0, - "step": 5120 - }, - { - "epoch": 0.8246708804702283, - "grad_norm": 0.003551816800609231, - "learning_rate": 0.0001999996657014937, - "loss": 46.0, - "step": 5121 - }, - { - "epoch": 0.8248319175490156, - "grad_norm": 0.0004784806224051863, - "learning_rate": 0.00019999966557066569, - "loss": 46.0, - "step": 5122 - }, - { - "epoch": 0.8249929546278031, - "grad_norm": 0.001361970091238618, - "learning_rate": 0.00019999966543981205, - "loss": 46.0, - "step": 5123 - }, - { - "epoch": 0.8251539917065904, - "grad_norm": 0.0008016018546186388, - "learning_rate": 0.00019999966530893283, - "loss": 46.0, - "step": 5124 - }, - { - "epoch": 0.8253150287853779, - "grad_norm": 0.0015518533764407039, - "learning_rate": 0.00019999966517802802, - "loss": 46.0, - "step": 5125 - }, - { - "epoch": 0.8254760658641652, - "grad_norm": 0.0009186487877741456, - "learning_rate": 0.00019999966504709763, - "loss": 46.0, - "step": 5126 - }, - { - "epoch": 0.8256371029429526, - "grad_norm": 0.0014622766757383943, - "learning_rate": 0.00019999966491614162, - "loss": 46.0, - "step": 5127 - }, - { - "epoch": 0.82579814002174, - "grad_norm": 0.0023140297271311283, - "learning_rate": 0.00019999966478516002, - "loss": 46.0, - "step": 5128 - }, - { - "epoch": 0.8259591771005274, - "grad_norm": 0.0023945504799485207, - "learning_rate": 0.00019999966465415284, - "loss": 46.0, - "step": 5129 - }, - { - "epoch": 0.8261202141793148, - "grad_norm": 0.00042854438652284443, - "learning_rate": 0.00019999966452312004, - "loss": 46.0, - "step": 5130 - }, - { - "epoch": 0.8262812512581021, - "grad_norm": 0.0013614867348223925, - "learning_rate": 0.00019999966439206165, - "loss": 46.0, - "step": 5131 - }, - { - "epoch": 0.8264422883368896, - "grad_norm": 0.0005885640275664628, - "learning_rate": 0.0001999996642609777, - "loss": 46.0, - "step": 5132 - }, - { - "epoch": 0.8266033254156769, - "grad_norm": 0.0010897950269281864, - "learning_rate": 0.00019999966412986812, - "loss": 46.0, - "step": 5133 - }, - { - "epoch": 0.8267643624944644, - "grad_norm": 0.0013300544815137982, - "learning_rate": 0.00019999966399873294, - "loss": 46.0, - "step": 5134 - }, - { - "epoch": 0.8269253995732517, - "grad_norm": 0.000617846439126879, - "learning_rate": 0.0001999996638675722, - "loss": 46.0, - "step": 5135 - }, - { - "epoch": 0.8270864366520392, - "grad_norm": 0.0006907997885718942, - "learning_rate": 0.00019999966373638583, - "loss": 46.0, - "step": 5136 - }, - { - "epoch": 0.8272474737308265, - "grad_norm": 0.0005879865493625402, - "learning_rate": 0.00019999966360517387, - "loss": 46.0, - "step": 5137 - }, - { - "epoch": 0.8274085108096139, - "grad_norm": 0.0007456826278939843, - "learning_rate": 0.00019999966347393632, - "loss": 46.0, - "step": 5138 - }, - { - "epoch": 0.8275695478884013, - "grad_norm": 0.0006295247003436089, - "learning_rate": 0.00019999966334267318, - "loss": 46.0, - "step": 5139 - }, - { - "epoch": 0.8277305849671887, - "grad_norm": 0.0007951277657411993, - "learning_rate": 0.00019999966321138449, - "loss": 46.0, - "step": 5140 - }, - { - "epoch": 0.8278916220459761, - "grad_norm": 0.0006441830191761255, - "learning_rate": 0.00019999966308007015, - "loss": 46.0, - "step": 5141 - }, - { - "epoch": 0.8280526591247634, - "grad_norm": 0.0011686583748087287, - "learning_rate": 0.00019999966294873022, - "loss": 46.0, - "step": 5142 - }, - { - "epoch": 0.8282136962035509, - "grad_norm": 0.0032313179690390825, - "learning_rate": 0.0001999996628173647, - "loss": 46.0, - "step": 5143 - }, - { - "epoch": 0.8283747332823382, - "grad_norm": 0.0010612113401293755, - "learning_rate": 0.00019999966268597358, - "loss": 46.0, - "step": 5144 - }, - { - "epoch": 0.8285357703611257, - "grad_norm": 0.0068147857673466206, - "learning_rate": 0.0001999996625545569, - "loss": 46.0, - "step": 5145 - }, - { - "epoch": 0.828696807439913, - "grad_norm": 0.0029785921797156334, - "learning_rate": 0.0001999996624231146, - "loss": 46.0, - "step": 5146 - }, - { - "epoch": 0.8288578445187005, - "grad_norm": 0.0004786409845110029, - "learning_rate": 0.00019999966229164668, - "loss": 46.0, - "step": 5147 - }, - { - "epoch": 0.8290188815974878, - "grad_norm": 0.0011323199141770601, - "learning_rate": 0.0001999996621601532, - "loss": 46.0, - "step": 5148 - }, - { - "epoch": 0.8291799186762752, - "grad_norm": 0.005421845242381096, - "learning_rate": 0.00019999966202863408, - "loss": 46.0, - "step": 5149 - }, - { - "epoch": 0.8293409557550626, - "grad_norm": 0.002832339145243168, - "learning_rate": 0.00019999966189708943, - "loss": 46.0, - "step": 5150 - }, - { - "epoch": 0.82950199283385, - "grad_norm": 0.00041840950143523514, - "learning_rate": 0.00019999966176551914, - "loss": 46.0, - "step": 5151 - }, - { - "epoch": 0.8296630299126374, - "grad_norm": 0.0016298888949677348, - "learning_rate": 0.00019999966163392326, - "loss": 46.0, - "step": 5152 - }, - { - "epoch": 0.8298240669914247, - "grad_norm": 0.0020185254979878664, - "learning_rate": 0.00019999966150230182, - "loss": 46.0, - "step": 5153 - }, - { - "epoch": 0.8299851040702122, - "grad_norm": 0.0010778631549328566, - "learning_rate": 0.00019999966137065477, - "loss": 46.0, - "step": 5154 - }, - { - "epoch": 0.8301461411489995, - "grad_norm": 0.0006393252406269312, - "learning_rate": 0.0001999996612389821, - "loss": 46.0, - "step": 5155 - }, - { - "epoch": 0.830307178227787, - "grad_norm": 0.0006912277895025909, - "learning_rate": 0.00019999966110728385, - "loss": 46.0, - "step": 5156 - }, - { - "epoch": 0.8304682153065743, - "grad_norm": 0.00045371902524493635, - "learning_rate": 0.00019999966097556, - "loss": 46.0, - "step": 5157 - }, - { - "epoch": 0.8306292523853618, - "grad_norm": 0.0007965494296513498, - "learning_rate": 0.00019999966084381058, - "loss": 46.0, - "step": 5158 - }, - { - "epoch": 0.8307902894641491, - "grad_norm": 0.001802053302526474, - "learning_rate": 0.00019999966071203553, - "loss": 46.0, - "step": 5159 - }, - { - "epoch": 0.8309513265429365, - "grad_norm": 0.000842803216073662, - "learning_rate": 0.00019999966058023493, - "loss": 46.0, - "step": 5160 - }, - { - "epoch": 0.8311123636217239, - "grad_norm": 0.0008711520349606872, - "learning_rate": 0.00019999966044840868, - "loss": 46.0, - "step": 5161 - }, - { - "epoch": 0.8312734007005113, - "grad_norm": 0.00036608861410059035, - "learning_rate": 0.00019999966031655685, - "loss": 46.0, - "step": 5162 - }, - { - "epoch": 0.8314344377792987, - "grad_norm": 0.0019751228392124176, - "learning_rate": 0.00019999966018467943, - "loss": 46.0, - "step": 5163 - }, - { - "epoch": 0.831595474858086, - "grad_norm": 0.0008423432591371238, - "learning_rate": 0.00019999966005277643, - "loss": 46.0, - "step": 5164 - }, - { - "epoch": 0.8317565119368735, - "grad_norm": 0.0033180860336869955, - "learning_rate": 0.00019999965992084783, - "loss": 46.0, - "step": 5165 - }, - { - "epoch": 0.8319175490156608, - "grad_norm": 0.004071468487381935, - "learning_rate": 0.00019999965978889363, - "loss": 46.0, - "step": 5166 - }, - { - "epoch": 0.8320785860944483, - "grad_norm": 0.0012572259875014424, - "learning_rate": 0.00019999965965691386, - "loss": 46.0, - "step": 5167 - }, - { - "epoch": 0.8322396231732356, - "grad_norm": 0.0006736009963788092, - "learning_rate": 0.00019999965952490848, - "loss": 46.0, - "step": 5168 - }, - { - "epoch": 0.8324006602520231, - "grad_norm": 0.004254922736436129, - "learning_rate": 0.00019999965939287745, - "loss": 46.0, - "step": 5169 - }, - { - "epoch": 0.8325616973308104, - "grad_norm": 0.0007358370930887759, - "learning_rate": 0.0001999996592608209, - "loss": 46.0, - "step": 5170 - }, - { - "epoch": 0.8327227344095978, - "grad_norm": 0.0020096038933843374, - "learning_rate": 0.00019999965912873872, - "loss": 46.0, - "step": 5171 - }, - { - "epoch": 0.8328837714883852, - "grad_norm": 0.0010486580431461334, - "learning_rate": 0.00019999965899663097, - "loss": 46.0, - "step": 5172 - }, - { - "epoch": 0.8330448085671726, - "grad_norm": 0.0010104109533131123, - "learning_rate": 0.0001999996588644976, - "loss": 46.0, - "step": 5173 - }, - { - "epoch": 0.83320584564596, - "grad_norm": 0.0009896950796246529, - "learning_rate": 0.00019999965873233864, - "loss": 46.0, - "step": 5174 - }, - { - "epoch": 0.8333668827247473, - "grad_norm": 0.0012071151286363602, - "learning_rate": 0.00019999965860015412, - "loss": 46.0, - "step": 5175 - }, - { - "epoch": 0.8335279198035348, - "grad_norm": 0.0007486878894269466, - "learning_rate": 0.00019999965846794396, - "loss": 46.0, - "step": 5176 - }, - { - "epoch": 0.8336889568823221, - "grad_norm": 0.0030823503620922565, - "learning_rate": 0.0001999996583357082, - "loss": 46.0, - "step": 5177 - }, - { - "epoch": 0.8338499939611096, - "grad_norm": 0.0012784936698153615, - "learning_rate": 0.00019999965820344687, - "loss": 46.0, - "step": 5178 - }, - { - "epoch": 0.8340110310398969, - "grad_norm": 0.000638270634226501, - "learning_rate": 0.00019999965807115992, - "loss": 46.0, - "step": 5179 - }, - { - "epoch": 0.8341720681186843, - "grad_norm": 0.0019719786942005157, - "learning_rate": 0.0001999996579388474, - "loss": 46.0, - "step": 5180 - }, - { - "epoch": 0.8343331051974717, - "grad_norm": 0.0009430918726138771, - "learning_rate": 0.00019999965780650932, - "loss": 46.0, - "step": 5181 - }, - { - "epoch": 0.8344941422762591, - "grad_norm": 0.000830064935144037, - "learning_rate": 0.00019999965767414558, - "loss": 46.0, - "step": 5182 - }, - { - "epoch": 0.8346551793550465, - "grad_norm": 0.0015963127370923758, - "learning_rate": 0.00019999965754175628, - "loss": 46.0, - "step": 5183 - }, - { - "epoch": 0.8348162164338339, - "grad_norm": 0.0006838154513388872, - "learning_rate": 0.00019999965740934137, - "loss": 46.0, - "step": 5184 - }, - { - "epoch": 0.8349772535126213, - "grad_norm": 0.0011424896074458957, - "learning_rate": 0.00019999965727690087, - "loss": 46.0, - "step": 5185 - }, - { - "epoch": 0.8351382905914086, - "grad_norm": 0.0030657697934657335, - "learning_rate": 0.00019999965714443478, - "loss": 46.0, - "step": 5186 - }, - { - "epoch": 0.8352993276701961, - "grad_norm": 0.0010062780929729342, - "learning_rate": 0.00019999965701194308, - "loss": 46.0, - "step": 5187 - }, - { - "epoch": 0.8354603647489834, - "grad_norm": 0.0032498009968549013, - "learning_rate": 0.0001999996568794258, - "loss": 46.0, - "step": 5188 - }, - { - "epoch": 0.8356214018277709, - "grad_norm": 0.001423538546077907, - "learning_rate": 0.00019999965674688292, - "loss": 46.0, - "step": 5189 - }, - { - "epoch": 0.8357824389065582, - "grad_norm": 0.0016784424660727382, - "learning_rate": 0.00019999965661431446, - "loss": 46.0, - "step": 5190 - }, - { - "epoch": 0.8359434759853456, - "grad_norm": 0.0017954377690330148, - "learning_rate": 0.00019999965648172038, - "loss": 46.0, - "step": 5191 - }, - { - "epoch": 0.836104513064133, - "grad_norm": 0.0004234558145981282, - "learning_rate": 0.00019999965634910071, - "loss": 46.0, - "step": 5192 - }, - { - "epoch": 0.8362655501429204, - "grad_norm": 0.00044127603177912533, - "learning_rate": 0.00019999965621645546, - "loss": 46.0, - "step": 5193 - }, - { - "epoch": 0.8364265872217078, - "grad_norm": 0.0005068947793915868, - "learning_rate": 0.00019999965608378462, - "loss": 46.0, - "step": 5194 - }, - { - "epoch": 0.8365876243004952, - "grad_norm": 0.0034015923738479614, - "learning_rate": 0.00019999965595108817, - "loss": 46.0, - "step": 5195 - }, - { - "epoch": 0.8367486613792826, - "grad_norm": 0.001955636776983738, - "learning_rate": 0.00019999965581836613, - "loss": 46.0, - "step": 5196 - }, - { - "epoch": 0.83690969845807, - "grad_norm": 0.0012738144723698497, - "learning_rate": 0.0001999996556856185, - "loss": 46.0, - "step": 5197 - }, - { - "epoch": 0.8370707355368574, - "grad_norm": 0.0005093156942166388, - "learning_rate": 0.00019999965555284526, - "loss": 46.0, - "step": 5198 - }, - { - "epoch": 0.8372317726156447, - "grad_norm": 0.0020549949258565903, - "learning_rate": 0.00019999965542004644, - "loss": 46.0, - "step": 5199 - }, - { - "epoch": 0.8373928096944322, - "grad_norm": 0.0015045731561258435, - "learning_rate": 0.00019999965528722202, - "loss": 46.0, - "step": 5200 - }, - { - "epoch": 0.8375538467732195, - "grad_norm": 0.0009060234297066927, - "learning_rate": 0.000199999655154372, - "loss": 46.0, - "step": 5201 - }, - { - "epoch": 0.8377148838520069, - "grad_norm": 0.0005925034638494253, - "learning_rate": 0.00019999965502149637, - "loss": 46.0, - "step": 5202 - }, - { - "epoch": 0.8378759209307943, - "grad_norm": 0.0013877975288778543, - "learning_rate": 0.00019999965488859517, - "loss": 46.0, - "step": 5203 - }, - { - "epoch": 0.8380369580095817, - "grad_norm": 0.0007647479069419205, - "learning_rate": 0.0001999996547556684, - "loss": 46.0, - "step": 5204 - }, - { - "epoch": 0.8381979950883691, - "grad_norm": 0.0012101917527616024, - "learning_rate": 0.000199999654622716, - "loss": 46.0, - "step": 5205 - }, - { - "epoch": 0.8383590321671565, - "grad_norm": 0.0013114619068801403, - "learning_rate": 0.000199999654489738, - "loss": 46.0, - "step": 5206 - }, - { - "epoch": 0.8385200692459439, - "grad_norm": 0.0016159799415618181, - "learning_rate": 0.00019999965435673443, - "loss": 46.0, - "step": 5207 - }, - { - "epoch": 0.8386811063247313, - "grad_norm": 0.0010230635525658727, - "learning_rate": 0.00019999965422370526, - "loss": 46.0, - "step": 5208 - }, - { - "epoch": 0.8388421434035187, - "grad_norm": 0.001263906480744481, - "learning_rate": 0.00019999965409065048, - "loss": 46.0, - "step": 5209 - }, - { - "epoch": 0.839003180482306, - "grad_norm": 0.002087854780256748, - "learning_rate": 0.0001999996539575701, - "loss": 46.0, - "step": 5210 - }, - { - "epoch": 0.8391642175610935, - "grad_norm": 0.0005869521992281079, - "learning_rate": 0.00019999965382446413, - "loss": 46.0, - "step": 5211 - }, - { - "epoch": 0.8393252546398808, - "grad_norm": 0.00035122825647704303, - "learning_rate": 0.00019999965369133258, - "loss": 46.0, - "step": 5212 - }, - { - "epoch": 0.8394862917186682, - "grad_norm": 0.0005226124776527286, - "learning_rate": 0.00019999965355817543, - "loss": 46.0, - "step": 5213 - }, - { - "epoch": 0.8396473287974556, - "grad_norm": 0.0019003982888534665, - "learning_rate": 0.00019999965342499268, - "loss": 46.0, - "step": 5214 - }, - { - "epoch": 0.839808365876243, - "grad_norm": 0.0009372249478474259, - "learning_rate": 0.00019999965329178435, - "loss": 46.0, - "step": 5215 - }, - { - "epoch": 0.8399694029550304, - "grad_norm": 0.002607081551104784, - "learning_rate": 0.0001999996531585504, - "loss": 46.0, - "step": 5216 - }, - { - "epoch": 0.8401304400338178, - "grad_norm": 0.0017386317485943437, - "learning_rate": 0.00019999965302529087, - "loss": 46.0, - "step": 5217 - }, - { - "epoch": 0.8402914771126052, - "grad_norm": 0.0016375096747651696, - "learning_rate": 0.00019999965289200575, - "loss": 46.0, - "step": 5218 - }, - { - "epoch": 0.8404525141913926, - "grad_norm": 0.0007120749796740711, - "learning_rate": 0.00019999965275869502, - "loss": 46.0, - "step": 5219 - }, - { - "epoch": 0.84061355127018, - "grad_norm": 0.0006274740444496274, - "learning_rate": 0.0001999996526253587, - "loss": 46.0, - "step": 5220 - }, - { - "epoch": 0.8407745883489673, - "grad_norm": 0.000432023371104151, - "learning_rate": 0.0001999996524919968, - "loss": 46.0, - "step": 5221 - }, - { - "epoch": 0.8409356254277548, - "grad_norm": 0.0038063465617597103, - "learning_rate": 0.0001999996523586093, - "loss": 46.0, - "step": 5222 - }, - { - "epoch": 0.8410966625065421, - "grad_norm": 0.00364326243288815, - "learning_rate": 0.00019999965222519618, - "loss": 46.0, - "step": 5223 - }, - { - "epoch": 0.8412576995853295, - "grad_norm": 0.0008819945505820215, - "learning_rate": 0.00019999965209175748, - "loss": 46.0, - "step": 5224 - }, - { - "epoch": 0.8414187366641169, - "grad_norm": 0.0007538118516094983, - "learning_rate": 0.0001999996519582932, - "loss": 46.0, - "step": 5225 - }, - { - "epoch": 0.8415797737429043, - "grad_norm": 0.0011087398743256927, - "learning_rate": 0.00019999965182480333, - "loss": 46.0, - "step": 5226 - }, - { - "epoch": 0.8417408108216917, - "grad_norm": 0.0004935208708047867, - "learning_rate": 0.00019999965169128781, - "loss": 46.0, - "step": 5227 - }, - { - "epoch": 0.8419018479004791, - "grad_norm": 0.0019165939884260297, - "learning_rate": 0.00019999965155774677, - "loss": 46.0, - "step": 5228 - }, - { - "epoch": 0.8420628849792665, - "grad_norm": 0.0011473592603579164, - "learning_rate": 0.0001999996514241801, - "loss": 46.0, - "step": 5229 - }, - { - "epoch": 0.8422239220580539, - "grad_norm": 0.0012515212874859571, - "learning_rate": 0.0001999996512905878, - "loss": 46.0, - "step": 5230 - }, - { - "epoch": 0.8423849591368413, - "grad_norm": 0.00712304562330246, - "learning_rate": 0.00019999965115697, - "loss": 46.0, - "step": 5231 - }, - { - "epoch": 0.8425459962156286, - "grad_norm": 0.002442289376631379, - "learning_rate": 0.00019999965102332652, - "loss": 46.0, - "step": 5232 - }, - { - "epoch": 0.842707033294416, - "grad_norm": 0.0008963527507148683, - "learning_rate": 0.00019999965088965746, - "loss": 46.0, - "step": 5233 - }, - { - "epoch": 0.8428680703732034, - "grad_norm": 0.001386677147820592, - "learning_rate": 0.00019999965075596284, - "loss": 46.0, - "step": 5234 - }, - { - "epoch": 0.8430291074519908, - "grad_norm": 0.0009650291176512837, - "learning_rate": 0.0001999996506222426, - "loss": 46.0, - "step": 5235 - }, - { - "epoch": 0.8431901445307782, - "grad_norm": 0.0004423398931976408, - "learning_rate": 0.00019999965048849675, - "loss": 46.0, - "step": 5236 - }, - { - "epoch": 0.8433511816095656, - "grad_norm": 0.00396422715857625, - "learning_rate": 0.0001999996503547253, - "loss": 46.0, - "step": 5237 - }, - { - "epoch": 0.843512218688353, - "grad_norm": 0.0008061259286478162, - "learning_rate": 0.0001999996502209283, - "loss": 46.0, - "step": 5238 - }, - { - "epoch": 0.8436732557671404, - "grad_norm": 0.004156382754445076, - "learning_rate": 0.0001999996500871057, - "loss": 46.0, - "step": 5239 - }, - { - "epoch": 0.8438342928459278, - "grad_norm": 0.0015063994796946645, - "learning_rate": 0.0001999996499532575, - "loss": 46.0, - "step": 5240 - }, - { - "epoch": 0.8439953299247152, - "grad_norm": 0.0027145070489495993, - "learning_rate": 0.00019999964981938368, - "loss": 46.0, - "step": 5241 - }, - { - "epoch": 0.8441563670035026, - "grad_norm": 0.0008000176749192178, - "learning_rate": 0.00019999964968548428, - "loss": 46.0, - "step": 5242 - }, - { - "epoch": 0.84431740408229, - "grad_norm": 0.0041970014572143555, - "learning_rate": 0.0001999996495515593, - "loss": 46.0, - "step": 5243 - }, - { - "epoch": 0.8444784411610773, - "grad_norm": 0.0006794112850911915, - "learning_rate": 0.00019999964941760869, - "loss": 46.0, - "step": 5244 - }, - { - "epoch": 0.8446394782398647, - "grad_norm": 0.0006485472549684346, - "learning_rate": 0.0001999996492836325, - "loss": 46.0, - "step": 5245 - }, - { - "epoch": 0.8448005153186521, - "grad_norm": 0.0009412890649400651, - "learning_rate": 0.00019999964914963072, - "loss": 46.0, - "step": 5246 - }, - { - "epoch": 0.8449615523974395, - "grad_norm": 0.0013822100590914488, - "learning_rate": 0.00019999964901560335, - "loss": 46.0, - "step": 5247 - }, - { - "epoch": 0.8451225894762269, - "grad_norm": 0.0021872129291296005, - "learning_rate": 0.00019999964888155037, - "loss": 46.0, - "step": 5248 - }, - { - "epoch": 0.8452836265550143, - "grad_norm": 0.0002928603789769113, - "learning_rate": 0.0001999996487474718, - "loss": 46.0, - "step": 5249 - }, - { - "epoch": 0.8454446636338017, - "grad_norm": 0.0018276500049978495, - "learning_rate": 0.00019999964861336765, - "loss": 46.0, - "step": 5250 - }, - { - "epoch": 0.8456057007125891, - "grad_norm": 0.005268497858196497, - "learning_rate": 0.0001999996484792379, - "loss": 46.0, - "step": 5251 - }, - { - "epoch": 0.8457667377913765, - "grad_norm": 0.00033274025190621614, - "learning_rate": 0.00019999964834508256, - "loss": 46.0, - "step": 5252 - }, - { - "epoch": 0.8459277748701639, - "grad_norm": 0.0003299397649243474, - "learning_rate": 0.00019999964821090162, - "loss": 46.0, - "step": 5253 - }, - { - "epoch": 0.8460888119489512, - "grad_norm": 0.0012615256709977984, - "learning_rate": 0.00019999964807669506, - "loss": 46.0, - "step": 5254 - }, - { - "epoch": 0.8462498490277386, - "grad_norm": 0.0011292611015960574, - "learning_rate": 0.00019999964794246294, - "loss": 46.0, - "step": 5255 - }, - { - "epoch": 0.846410886106526, - "grad_norm": 0.00420538242906332, - "learning_rate": 0.0001999996478082052, - "loss": 46.0, - "step": 5256 - }, - { - "epoch": 0.8465719231853134, - "grad_norm": 0.0014823495876044035, - "learning_rate": 0.00019999964767392187, - "loss": 46.0, - "step": 5257 - }, - { - "epoch": 0.8467329602641008, - "grad_norm": 0.0019098966149613261, - "learning_rate": 0.00019999964753961296, - "loss": 46.0, - "step": 5258 - }, - { - "epoch": 0.8468939973428882, - "grad_norm": 0.0024737513158470392, - "learning_rate": 0.00019999964740527847, - "loss": 46.0, - "step": 5259 - }, - { - "epoch": 0.8470550344216756, - "grad_norm": 0.0009053554385900497, - "learning_rate": 0.00019999964727091834, - "loss": 46.0, - "step": 5260 - }, - { - "epoch": 0.847216071500463, - "grad_norm": 0.0014724271604791284, - "learning_rate": 0.00019999964713653264, - "loss": 46.0, - "step": 5261 - }, - { - "epoch": 0.8473771085792504, - "grad_norm": 0.0046810791827738285, - "learning_rate": 0.00019999964700212136, - "loss": 46.0, - "step": 5262 - }, - { - "epoch": 0.8475381456580378, - "grad_norm": 0.0009719933732412755, - "learning_rate": 0.00019999964686768447, - "loss": 46.0, - "step": 5263 - }, - { - "epoch": 0.8476991827368252, - "grad_norm": 0.0030296803452074528, - "learning_rate": 0.00019999964673322196, - "loss": 46.0, - "step": 5264 - }, - { - "epoch": 0.8478602198156125, - "grad_norm": 0.0014079560060054064, - "learning_rate": 0.00019999964659873389, - "loss": 46.0, - "step": 5265 - }, - { - "epoch": 0.8480212568943999, - "grad_norm": 0.002824049210175872, - "learning_rate": 0.0001999996464642202, - "loss": 46.0, - "step": 5266 - }, - { - "epoch": 0.8481822939731873, - "grad_norm": 0.0005882881814613938, - "learning_rate": 0.00019999964632968093, - "loss": 46.0, - "step": 5267 - }, - { - "epoch": 0.8483433310519747, - "grad_norm": 0.0012592347338795662, - "learning_rate": 0.00019999964619511607, - "loss": 46.0, - "step": 5268 - }, - { - "epoch": 0.8485043681307621, - "grad_norm": 0.0007983875111676753, - "learning_rate": 0.0001999996460605256, - "loss": 46.0, - "step": 5269 - }, - { - "epoch": 0.8486654052095495, - "grad_norm": 0.002135386923328042, - "learning_rate": 0.00019999964592590957, - "loss": 46.0, - "step": 5270 - }, - { - "epoch": 0.8488264422883369, - "grad_norm": 0.0006917768623679876, - "learning_rate": 0.00019999964579126792, - "loss": 46.0, - "step": 5271 - }, - { - "epoch": 0.8489874793671243, - "grad_norm": 0.0004974614130333066, - "learning_rate": 0.0001999996456566007, - "loss": 46.0, - "step": 5272 - }, - { - "epoch": 0.8491485164459117, - "grad_norm": 0.0021558695007115602, - "learning_rate": 0.00019999964552190784, - "loss": 46.0, - "step": 5273 - }, - { - "epoch": 0.8493095535246991, - "grad_norm": 0.0007196618244051933, - "learning_rate": 0.00019999964538718938, - "loss": 46.0, - "step": 5274 - }, - { - "epoch": 0.8494705906034864, - "grad_norm": 0.001629539649002254, - "learning_rate": 0.00019999964525244535, - "loss": 46.0, - "step": 5275 - }, - { - "epoch": 0.8496316276822738, - "grad_norm": 0.0021558755543082952, - "learning_rate": 0.00019999964511767574, - "loss": 46.0, - "step": 5276 - }, - { - "epoch": 0.8497926647610612, - "grad_norm": 0.002615951234474778, - "learning_rate": 0.00019999964498288052, - "loss": 46.0, - "step": 5277 - }, - { - "epoch": 0.8499537018398486, - "grad_norm": 0.0005678863381035626, - "learning_rate": 0.0001999996448480597, - "loss": 46.0, - "step": 5278 - }, - { - "epoch": 0.850114738918636, - "grad_norm": 0.004259361419826746, - "learning_rate": 0.0001999996447132133, - "loss": 46.0, - "step": 5279 - }, - { - "epoch": 0.8502757759974234, - "grad_norm": 0.0012753693154081702, - "learning_rate": 0.0001999996445783413, - "loss": 46.0, - "step": 5280 - }, - { - "epoch": 0.8504368130762108, - "grad_norm": 0.001906612771563232, - "learning_rate": 0.0001999996444434437, - "loss": 46.0, - "step": 5281 - }, - { - "epoch": 0.8505978501549982, - "grad_norm": 0.0013399564195424318, - "learning_rate": 0.0001999996443085205, - "loss": 46.0, - "step": 5282 - }, - { - "epoch": 0.8507588872337856, - "grad_norm": 0.0012392661301419139, - "learning_rate": 0.00019999964417357174, - "loss": 46.0, - "step": 5283 - }, - { - "epoch": 0.850919924312573, - "grad_norm": 0.0004138743388466537, - "learning_rate": 0.00019999964403859735, - "loss": 46.0, - "step": 5284 - }, - { - "epoch": 0.8510809613913604, - "grad_norm": 0.0015947495121508837, - "learning_rate": 0.00019999964390359737, - "loss": 46.0, - "step": 5285 - }, - { - "epoch": 0.8512419984701477, - "grad_norm": 0.0023456921335309744, - "learning_rate": 0.00019999964376857178, - "loss": 46.0, - "step": 5286 - }, - { - "epoch": 0.8514030355489351, - "grad_norm": 0.0006762603879906237, - "learning_rate": 0.0001999996436335206, - "loss": 46.0, - "step": 5287 - }, - { - "epoch": 0.8515640726277225, - "grad_norm": 0.0011161736911162734, - "learning_rate": 0.00019999964349844387, - "loss": 46.0, - "step": 5288 - }, - { - "epoch": 0.8517251097065099, - "grad_norm": 0.0011126425815746188, - "learning_rate": 0.00019999964336334152, - "loss": 46.0, - "step": 5289 - }, - { - "epoch": 0.8518861467852973, - "grad_norm": 0.0007951778243295848, - "learning_rate": 0.00019999964322821355, - "loss": 46.0, - "step": 5290 - }, - { - "epoch": 0.8520471838640847, - "grad_norm": 0.0037739980034530163, - "learning_rate": 0.00019999964309306, - "loss": 46.0, - "step": 5291 - }, - { - "epoch": 0.8522082209428721, - "grad_norm": 0.0006727104191668332, - "learning_rate": 0.00019999964295788089, - "loss": 46.0, - "step": 5292 - }, - { - "epoch": 0.8523692580216595, - "grad_norm": 0.0008476900984533131, - "learning_rate": 0.00019999964282267616, - "loss": 46.0, - "step": 5293 - }, - { - "epoch": 0.8525302951004469, - "grad_norm": 0.0014890005113556981, - "learning_rate": 0.0001999996426874458, - "loss": 46.0, - "step": 5294 - }, - { - "epoch": 0.8526913321792343, - "grad_norm": 0.0024564142804592848, - "learning_rate": 0.00019999964255218992, - "loss": 46.0, - "step": 5295 - }, - { - "epoch": 0.8528523692580217, - "grad_norm": 0.003224824322387576, - "learning_rate": 0.00019999964241690837, - "loss": 46.0, - "step": 5296 - }, - { - "epoch": 0.853013406336809, - "grad_norm": 0.0016472613206133246, - "learning_rate": 0.00019999964228160127, - "loss": 46.0, - "step": 5297 - }, - { - "epoch": 0.8531744434155965, - "grad_norm": 0.001739516039378941, - "learning_rate": 0.00019999964214626855, - "loss": 46.0, - "step": 5298 - }, - { - "epoch": 0.8533354804943838, - "grad_norm": 0.0007754797115921974, - "learning_rate": 0.00019999964201091025, - "loss": 46.0, - "step": 5299 - }, - { - "epoch": 0.8534965175731712, - "grad_norm": 0.0010283966548740864, - "learning_rate": 0.00019999964187552633, - "loss": 46.0, - "step": 5300 - }, - { - "epoch": 0.8536575546519586, - "grad_norm": 0.0010554902255535126, - "learning_rate": 0.00019999964174011685, - "loss": 46.0, - "step": 5301 - }, - { - "epoch": 0.853818591730746, - "grad_norm": 0.0022285226732492447, - "learning_rate": 0.00019999964160468176, - "loss": 46.0, - "step": 5302 - }, - { - "epoch": 0.8539796288095334, - "grad_norm": 0.0018978987354785204, - "learning_rate": 0.00019999964146922108, - "loss": 46.0, - "step": 5303 - }, - { - "epoch": 0.8541406658883208, - "grad_norm": 0.0016012756386771798, - "learning_rate": 0.0001999996413337348, - "loss": 46.0, - "step": 5304 - }, - { - "epoch": 0.8543017029671082, - "grad_norm": 0.0010058789048343897, - "learning_rate": 0.00019999964119822293, - "loss": 46.0, - "step": 5305 - }, - { - "epoch": 0.8544627400458956, - "grad_norm": 0.002617072081193328, - "learning_rate": 0.00019999964106268546, - "loss": 46.0, - "step": 5306 - }, - { - "epoch": 0.854623777124683, - "grad_norm": 0.0012582477647811174, - "learning_rate": 0.0001999996409271224, - "loss": 46.0, - "step": 5307 - }, - { - "epoch": 0.8547848142034703, - "grad_norm": 0.0003376406675670296, - "learning_rate": 0.00019999964079153376, - "loss": 46.0, - "step": 5308 - }, - { - "epoch": 0.8549458512822578, - "grad_norm": 0.006716587580740452, - "learning_rate": 0.0001999996406559195, - "loss": 46.0, - "step": 5309 - }, - { - "epoch": 0.8551068883610451, - "grad_norm": 0.0012386812595650554, - "learning_rate": 0.00019999964052027966, - "loss": 46.0, - "step": 5310 - }, - { - "epoch": 0.8552679254398325, - "grad_norm": 0.0006290592718869448, - "learning_rate": 0.0001999996403846142, - "loss": 46.0, - "step": 5311 - }, - { - "epoch": 0.8554289625186199, - "grad_norm": 0.00058881810400635, - "learning_rate": 0.00019999964024892318, - "loss": 46.0, - "step": 5312 - }, - { - "epoch": 0.8555899995974073, - "grad_norm": 0.0008410885347984731, - "learning_rate": 0.00019999964011320652, - "loss": 46.0, - "step": 5313 - }, - { - "epoch": 0.8557510366761947, - "grad_norm": 0.0017486875876784325, - "learning_rate": 0.00019999963997746433, - "loss": 46.0, - "step": 5314 - }, - { - "epoch": 0.8559120737549821, - "grad_norm": 0.006869988515973091, - "learning_rate": 0.0001999996398416965, - "loss": 46.0, - "step": 5315 - }, - { - "epoch": 0.8560731108337695, - "grad_norm": 0.0015343126142397523, - "learning_rate": 0.0001999996397059031, - "loss": 46.0, - "step": 5316 - }, - { - "epoch": 0.8562341479125569, - "grad_norm": 0.0007613686611875892, - "learning_rate": 0.00019999963957008406, - "loss": 46.0, - "step": 5317 - }, - { - "epoch": 0.8563951849913443, - "grad_norm": 0.0009837878169491887, - "learning_rate": 0.00019999963943423947, - "loss": 46.0, - "step": 5318 - }, - { - "epoch": 0.8565562220701316, - "grad_norm": 0.0031392339151352644, - "learning_rate": 0.00019999963929836925, - "loss": 46.0, - "step": 5319 - }, - { - "epoch": 0.856717259148919, - "grad_norm": 0.0007671216153539717, - "learning_rate": 0.00019999963916247348, - "loss": 46.0, - "step": 5320 - }, - { - "epoch": 0.8568782962277064, - "grad_norm": 0.0007769123767502606, - "learning_rate": 0.00019999963902655207, - "loss": 46.0, - "step": 5321 - }, - { - "epoch": 0.8570393333064938, - "grad_norm": 0.0020357626490294933, - "learning_rate": 0.0001999996388906051, - "loss": 46.0, - "step": 5322 - }, - { - "epoch": 0.8572003703852812, - "grad_norm": 0.0008045024587772787, - "learning_rate": 0.00019999963875463251, - "loss": 46.0, - "step": 5323 - }, - { - "epoch": 0.8573614074640686, - "grad_norm": 0.001799550955183804, - "learning_rate": 0.00019999963861863437, - "loss": 46.0, - "step": 5324 - }, - { - "epoch": 0.857522444542856, - "grad_norm": 0.0007439398323185742, - "learning_rate": 0.00019999963848261058, - "loss": 46.0, - "step": 5325 - }, - { - "epoch": 0.8576834816216434, - "grad_norm": 0.0014336802996695042, - "learning_rate": 0.0001999996383465612, - "loss": 46.0, - "step": 5326 - }, - { - "epoch": 0.8578445187004308, - "grad_norm": 0.001388859935104847, - "learning_rate": 0.0001999996382104863, - "loss": 46.0, - "step": 5327 - }, - { - "epoch": 0.8580055557792181, - "grad_norm": 0.000919417361728847, - "learning_rate": 0.00019999963807438572, - "loss": 46.0, - "step": 5328 - }, - { - "epoch": 0.8581665928580056, - "grad_norm": 0.0009685653494670987, - "learning_rate": 0.00019999963793825956, - "loss": 46.0, - "step": 5329 - }, - { - "epoch": 0.8583276299367929, - "grad_norm": 0.0030787650030106306, - "learning_rate": 0.00019999963780210783, - "loss": 46.0, - "step": 5330 - }, - { - "epoch": 0.8584886670155804, - "grad_norm": 0.0017713705310598016, - "learning_rate": 0.0001999996376659305, - "loss": 46.0, - "step": 5331 - }, - { - "epoch": 0.8586497040943677, - "grad_norm": 0.0009224415989592671, - "learning_rate": 0.00019999963752972754, - "loss": 46.0, - "step": 5332 - }, - { - "epoch": 0.8588107411731551, - "grad_norm": 0.0019316660473123193, - "learning_rate": 0.00019999963739349906, - "loss": 46.0, - "step": 5333 - }, - { - "epoch": 0.8589717782519425, - "grad_norm": 0.011059368029236794, - "learning_rate": 0.0001999996372572449, - "loss": 46.0, - "step": 5334 - }, - { - "epoch": 0.8591328153307299, - "grad_norm": 0.0013488434487953782, - "learning_rate": 0.0001999996371209652, - "loss": 46.0, - "step": 5335 - }, - { - "epoch": 0.8592938524095173, - "grad_norm": 0.0007839535246603191, - "learning_rate": 0.0001999996369846599, - "loss": 46.0, - "step": 5336 - }, - { - "epoch": 0.8594548894883047, - "grad_norm": 0.0004955589538440108, - "learning_rate": 0.00019999963684832898, - "loss": 46.0, - "step": 5337 - }, - { - "epoch": 0.8596159265670921, - "grad_norm": 0.0004249998601153493, - "learning_rate": 0.00019999963671197247, - "loss": 46.0, - "step": 5338 - }, - { - "epoch": 0.8597769636458794, - "grad_norm": 0.0005931945634074509, - "learning_rate": 0.00019999963657559038, - "loss": 46.0, - "step": 5339 - }, - { - "epoch": 0.8599380007246669, - "grad_norm": 0.001101514557376504, - "learning_rate": 0.00019999963643918268, - "loss": 46.0, - "step": 5340 - }, - { - "epoch": 0.8600990378034542, - "grad_norm": 0.0017062551341950893, - "learning_rate": 0.00019999963630274942, - "loss": 46.0, - "step": 5341 - }, - { - "epoch": 0.8602600748822417, - "grad_norm": 0.0007795640267431736, - "learning_rate": 0.00019999963616629054, - "loss": 46.0, - "step": 5342 - }, - { - "epoch": 0.860421111961029, - "grad_norm": 0.0008742253994569182, - "learning_rate": 0.00019999963602980607, - "loss": 46.0, - "step": 5343 - }, - { - "epoch": 0.8605821490398164, - "grad_norm": 0.0009085464989766479, - "learning_rate": 0.000199999635893296, - "loss": 46.0, - "step": 5344 - }, - { - "epoch": 0.8607431861186038, - "grad_norm": 0.0014292245032265782, - "learning_rate": 0.00019999963575676033, - "loss": 46.0, - "step": 5345 - }, - { - "epoch": 0.8609042231973912, - "grad_norm": 0.0007941921357996762, - "learning_rate": 0.00019999963562019907, - "loss": 46.0, - "step": 5346 - }, - { - "epoch": 0.8610652602761786, - "grad_norm": 0.002296466613188386, - "learning_rate": 0.00019999963548361223, - "loss": 46.0, - "step": 5347 - }, - { - "epoch": 0.861226297354966, - "grad_norm": 0.001823195954784751, - "learning_rate": 0.00019999963534699978, - "loss": 46.0, - "step": 5348 - }, - { - "epoch": 0.8613873344337534, - "grad_norm": 0.001987638184800744, - "learning_rate": 0.00019999963521036173, - "loss": 46.0, - "step": 5349 - }, - { - "epoch": 0.8615483715125407, - "grad_norm": 0.0005175631958991289, - "learning_rate": 0.0001999996350736981, - "loss": 46.0, - "step": 5350 - }, - { - "epoch": 0.8617094085913282, - "grad_norm": 0.0008064642897807062, - "learning_rate": 0.00019999963493700886, - "loss": 46.0, - "step": 5351 - }, - { - "epoch": 0.8618704456701155, - "grad_norm": 0.0023549997713416815, - "learning_rate": 0.00019999963480029403, - "loss": 46.0, - "step": 5352 - }, - { - "epoch": 0.862031482748903, - "grad_norm": 0.0006986106163822114, - "learning_rate": 0.00019999963466355364, - "loss": 46.0, - "step": 5353 - }, - { - "epoch": 0.8621925198276903, - "grad_norm": 0.0022354733664542437, - "learning_rate": 0.0001999996345267876, - "loss": 46.0, - "step": 5354 - }, - { - "epoch": 0.8623535569064777, - "grad_norm": 0.0006340537802316248, - "learning_rate": 0.00019999963438999601, - "loss": 46.0, - "step": 5355 - }, - { - "epoch": 0.8625145939852651, - "grad_norm": 0.0009771937038749456, - "learning_rate": 0.00019999963425317878, - "loss": 46.0, - "step": 5356 - }, - { - "epoch": 0.8626756310640525, - "grad_norm": 0.0015052348608151078, - "learning_rate": 0.00019999963411633599, - "loss": 46.0, - "step": 5357 - }, - { - "epoch": 0.8628366681428399, - "grad_norm": 0.0006980052567087114, - "learning_rate": 0.0001999996339794676, - "loss": 46.0, - "step": 5358 - }, - { - "epoch": 0.8629977052216273, - "grad_norm": 0.0009572468115948141, - "learning_rate": 0.00019999963384257358, - "loss": 46.0, - "step": 5359 - }, - { - "epoch": 0.8631587423004147, - "grad_norm": 0.00037086111842654645, - "learning_rate": 0.000199999633705654, - "loss": 46.0, - "step": 5360 - }, - { - "epoch": 0.863319779379202, - "grad_norm": 0.0028319223783910275, - "learning_rate": 0.00019999963356870883, - "loss": 46.0, - "step": 5361 - }, - { - "epoch": 0.8634808164579895, - "grad_norm": 0.0009068941581062973, - "learning_rate": 0.00019999963343173805, - "loss": 46.0, - "step": 5362 - }, - { - "epoch": 0.8636418535367768, - "grad_norm": 0.0013134769396856427, - "learning_rate": 0.00019999963329474167, - "loss": 46.0, - "step": 5363 - }, - { - "epoch": 0.8638028906155643, - "grad_norm": 0.0009307610453106463, - "learning_rate": 0.00019999963315771972, - "loss": 46.0, - "step": 5364 - }, - { - "epoch": 0.8639639276943516, - "grad_norm": 0.0004538406792562455, - "learning_rate": 0.00019999963302067214, - "loss": 46.0, - "step": 5365 - }, - { - "epoch": 0.864124964773139, - "grad_norm": 0.0013267614413052797, - "learning_rate": 0.000199999632883599, - "loss": 46.0, - "step": 5366 - }, - { - "epoch": 0.8642860018519264, - "grad_norm": 0.0005370157305151224, - "learning_rate": 0.00019999963274650026, - "loss": 46.0, - "step": 5367 - }, - { - "epoch": 0.8644470389307138, - "grad_norm": 0.0004512017185334116, - "learning_rate": 0.0001999996326093759, - "loss": 46.0, - "step": 5368 - }, - { - "epoch": 0.8646080760095012, - "grad_norm": 0.0007292244699783623, - "learning_rate": 0.00019999963247222598, - "loss": 46.0, - "step": 5369 - }, - { - "epoch": 0.8647691130882886, - "grad_norm": 0.0006310438620857894, - "learning_rate": 0.00019999963233505044, - "loss": 46.0, - "step": 5370 - }, - { - "epoch": 0.864930150167076, - "grad_norm": 0.0023617143742740154, - "learning_rate": 0.00019999963219784932, - "loss": 46.0, - "step": 5371 - }, - { - "epoch": 0.8650911872458633, - "grad_norm": 0.0006030204240232706, - "learning_rate": 0.0001999996320606226, - "loss": 46.0, - "step": 5372 - }, - { - "epoch": 0.8652522243246508, - "grad_norm": 0.0018976028077304363, - "learning_rate": 0.0001999996319233703, - "loss": 46.0, - "step": 5373 - }, - { - "epoch": 0.8654132614034381, - "grad_norm": 0.002421885496005416, - "learning_rate": 0.00019999963178609238, - "loss": 46.0, - "step": 5374 - }, - { - "epoch": 0.8655742984822256, - "grad_norm": 0.0008818595670163631, - "learning_rate": 0.00019999963164878888, - "loss": 46.0, - "step": 5375 - }, - { - "epoch": 0.8657353355610129, - "grad_norm": 0.0030620465986430645, - "learning_rate": 0.00019999963151145976, - "loss": 46.0, - "step": 5376 - }, - { - "epoch": 0.8658963726398003, - "grad_norm": 0.0010807124199345708, - "learning_rate": 0.00019999963137410506, - "loss": 46.0, - "step": 5377 - }, - { - "epoch": 0.8660574097185877, - "grad_norm": 0.005061842035502195, - "learning_rate": 0.00019999963123672478, - "loss": 46.0, - "step": 5378 - }, - { - "epoch": 0.8662184467973751, - "grad_norm": 0.002218588488176465, - "learning_rate": 0.0001999996310993189, - "loss": 46.0, - "step": 5379 - }, - { - "epoch": 0.8663794838761625, - "grad_norm": 0.0028829635120928288, - "learning_rate": 0.00019999963096188741, - "loss": 46.0, - "step": 5380 - }, - { - "epoch": 0.8665405209549498, - "grad_norm": 0.003809395944699645, - "learning_rate": 0.00019999963082443034, - "loss": 46.0, - "step": 5381 - }, - { - "epoch": 0.8667015580337373, - "grad_norm": 0.0029538862872868776, - "learning_rate": 0.00019999963068694767, - "loss": 46.0, - "step": 5382 - }, - { - "epoch": 0.8668625951125246, - "grad_norm": 0.0009006448090076447, - "learning_rate": 0.0001999996305494394, - "loss": 46.0, - "step": 5383 - }, - { - "epoch": 0.8670236321913121, - "grad_norm": 0.001607679994776845, - "learning_rate": 0.00019999963041190556, - "loss": 46.0, - "step": 5384 - }, - { - "epoch": 0.8671846692700994, - "grad_norm": 0.004454975016415119, - "learning_rate": 0.00019999963027434608, - "loss": 46.0, - "step": 5385 - }, - { - "epoch": 0.8673457063488869, - "grad_norm": 0.0012642252258956432, - "learning_rate": 0.00019999963013676104, - "loss": 46.0, - "step": 5386 - }, - { - "epoch": 0.8675067434276742, - "grad_norm": 0.0029488876461982727, - "learning_rate": 0.00019999962999915042, - "loss": 46.0, - "step": 5387 - }, - { - "epoch": 0.8676677805064616, - "grad_norm": 0.00043018089490942657, - "learning_rate": 0.00019999962986151415, - "loss": 46.0, - "step": 5388 - }, - { - "epoch": 0.867828817585249, - "grad_norm": 0.002301212400197983, - "learning_rate": 0.00019999962972385232, - "loss": 46.0, - "step": 5389 - }, - { - "epoch": 0.8679898546640364, - "grad_norm": 0.0005062994314357638, - "learning_rate": 0.0001999996295861649, - "loss": 46.0, - "step": 5390 - }, - { - "epoch": 0.8681508917428238, - "grad_norm": 0.0008245017379522324, - "learning_rate": 0.00019999962944845188, - "loss": 46.0, - "step": 5391 - }, - { - "epoch": 0.8683119288216111, - "grad_norm": 0.0010551703162491322, - "learning_rate": 0.00019999962931071326, - "loss": 46.0, - "step": 5392 - }, - { - "epoch": 0.8684729659003986, - "grad_norm": 0.0013676321832463145, - "learning_rate": 0.00019999962917294903, - "loss": 46.0, - "step": 5393 - }, - { - "epoch": 0.8686340029791859, - "grad_norm": 0.005799941252917051, - "learning_rate": 0.00019999962903515924, - "loss": 46.0, - "step": 5394 - }, - { - "epoch": 0.8687950400579734, - "grad_norm": 0.0008178535499610007, - "learning_rate": 0.00019999962889734384, - "loss": 46.0, - "step": 5395 - }, - { - "epoch": 0.8689560771367607, - "grad_norm": 0.0011396018089726567, - "learning_rate": 0.00019999962875950284, - "loss": 46.0, - "step": 5396 - }, - { - "epoch": 0.8691171142155482, - "grad_norm": 0.0009169162367470562, - "learning_rate": 0.00019999962862163627, - "loss": 46.0, - "step": 5397 - }, - { - "epoch": 0.8692781512943355, - "grad_norm": 0.0007868690881878138, - "learning_rate": 0.00019999962848374407, - "loss": 46.0, - "step": 5398 - }, - { - "epoch": 0.869439188373123, - "grad_norm": 0.0017590143252164125, - "learning_rate": 0.0001999996283458263, - "loss": 46.0, - "step": 5399 - }, - { - "epoch": 0.8696002254519103, - "grad_norm": 0.0032193479128181934, - "learning_rate": 0.00019999962820788292, - "loss": 46.0, - "step": 5400 - }, - { - "epoch": 0.8697612625306977, - "grad_norm": 0.001666118623688817, - "learning_rate": 0.00019999962806991394, - "loss": 46.0, - "step": 5401 - }, - { - "epoch": 0.8699222996094851, - "grad_norm": 0.0012950766831636429, - "learning_rate": 0.00019999962793191935, - "loss": 46.0, - "step": 5402 - }, - { - "epoch": 0.8700833366882724, - "grad_norm": 0.0007219741819426417, - "learning_rate": 0.00019999962779389922, - "loss": 46.0, - "step": 5403 - }, - { - "epoch": 0.8702443737670599, - "grad_norm": 0.0006782657583244145, - "learning_rate": 0.00019999962765585345, - "loss": 46.0, - "step": 5404 - }, - { - "epoch": 0.8704054108458472, - "grad_norm": 0.0006449512438848615, - "learning_rate": 0.0001999996275177821, - "loss": 46.0, - "step": 5405 - }, - { - "epoch": 0.8705664479246347, - "grad_norm": 0.0008436316275037825, - "learning_rate": 0.00019999962737968517, - "loss": 46.0, - "step": 5406 - }, - { - "epoch": 0.870727485003422, - "grad_norm": 0.00047500242362730205, - "learning_rate": 0.0001999996272415626, - "loss": 46.0, - "step": 5407 - }, - { - "epoch": 0.8708885220822095, - "grad_norm": 0.0009098974987864494, - "learning_rate": 0.0001999996271034145, - "loss": 46.0, - "step": 5408 - }, - { - "epoch": 0.8710495591609968, - "grad_norm": 0.0013777875574305654, - "learning_rate": 0.00019999962696524076, - "loss": 46.0, - "step": 5409 - }, - { - "epoch": 0.8712105962397843, - "grad_norm": 0.0009028459899127483, - "learning_rate": 0.00019999962682704144, - "loss": 46.0, - "step": 5410 - }, - { - "epoch": 0.8713716333185716, - "grad_norm": 0.0035416826140135527, - "learning_rate": 0.0001999996266888165, - "loss": 46.0, - "step": 5411 - }, - { - "epoch": 0.871532670397359, - "grad_norm": 0.00044533328036777675, - "learning_rate": 0.000199999626550566, - "loss": 46.0, - "step": 5412 - }, - { - "epoch": 0.8716937074761464, - "grad_norm": 0.0020277979783713818, - "learning_rate": 0.0001999996264122899, - "loss": 46.0, - "step": 5413 - }, - { - "epoch": 0.8718547445549337, - "grad_norm": 0.0010008830577135086, - "learning_rate": 0.00019999962627398818, - "loss": 46.0, - "step": 5414 - }, - { - "epoch": 0.8720157816337212, - "grad_norm": 0.0005248826346360147, - "learning_rate": 0.0001999996261356609, - "loss": 46.0, - "step": 5415 - }, - { - "epoch": 0.8721768187125085, - "grad_norm": 0.000740030431188643, - "learning_rate": 0.000199999625997308, - "loss": 46.0, - "step": 5416 - }, - { - "epoch": 0.872337855791296, - "grad_norm": 0.0006334640784189105, - "learning_rate": 0.00019999962585892954, - "loss": 46.0, - "step": 5417 - }, - { - "epoch": 0.8724988928700833, - "grad_norm": 0.0008111082715913653, - "learning_rate": 0.00019999962572052544, - "loss": 46.0, - "step": 5418 - }, - { - "epoch": 0.8726599299488708, - "grad_norm": 0.0012994561111554503, - "learning_rate": 0.00019999962558209578, - "loss": 46.0, - "step": 5419 - }, - { - "epoch": 0.8728209670276581, - "grad_norm": 0.0011322868522256613, - "learning_rate": 0.00019999962544364048, - "loss": 46.0, - "step": 5420 - }, - { - "epoch": 0.8729820041064456, - "grad_norm": 0.0005961346323601902, - "learning_rate": 0.00019999962530515961, - "loss": 46.0, - "step": 5421 - }, - { - "epoch": 0.8731430411852329, - "grad_norm": 0.005132320336997509, - "learning_rate": 0.00019999962516665317, - "loss": 46.0, - "step": 5422 - }, - { - "epoch": 0.8733040782640203, - "grad_norm": 0.0015798952663317323, - "learning_rate": 0.0001999996250281211, - "loss": 46.0, - "step": 5423 - }, - { - "epoch": 0.8734651153428077, - "grad_norm": 0.0006656598416157067, - "learning_rate": 0.00019999962488956345, - "loss": 46.0, - "step": 5424 - }, - { - "epoch": 0.873626152421595, - "grad_norm": 0.001495807315222919, - "learning_rate": 0.0001999996247509802, - "loss": 46.0, - "step": 5425 - }, - { - "epoch": 0.8737871895003825, - "grad_norm": 0.0006847336771897972, - "learning_rate": 0.0001999996246123714, - "loss": 46.0, - "step": 5426 - }, - { - "epoch": 0.8739482265791698, - "grad_norm": 0.0017865067347884178, - "learning_rate": 0.00019999962447373696, - "loss": 46.0, - "step": 5427 - }, - { - "epoch": 0.8741092636579573, - "grad_norm": 0.0022510443814098835, - "learning_rate": 0.0001999996243350769, - "loss": 46.0, - "step": 5428 - }, - { - "epoch": 0.8742703007367446, - "grad_norm": 0.002816633088514209, - "learning_rate": 0.0001999996241963913, - "loss": 46.0, - "step": 5429 - }, - { - "epoch": 0.8744313378155321, - "grad_norm": 0.0013194011989980936, - "learning_rate": 0.00019999962405768006, - "loss": 46.0, - "step": 5430 - }, - { - "epoch": 0.8745923748943194, - "grad_norm": 0.0011149300262331963, - "learning_rate": 0.00019999962391894325, - "loss": 46.0, - "step": 5431 - }, - { - "epoch": 0.8747534119731069, - "grad_norm": 0.0005363540258258581, - "learning_rate": 0.00019999962378018085, - "loss": 46.0, - "step": 5432 - }, - { - "epoch": 0.8749144490518942, - "grad_norm": 0.0017888746224343777, - "learning_rate": 0.00019999962364139283, - "loss": 46.0, - "step": 5433 - }, - { - "epoch": 0.8750754861306815, - "grad_norm": 0.002010643482208252, - "learning_rate": 0.00019999962350257923, - "loss": 46.0, - "step": 5434 - }, - { - "epoch": 0.875236523209469, - "grad_norm": 0.00449001369997859, - "learning_rate": 0.00019999962336374004, - "loss": 46.0, - "step": 5435 - }, - { - "epoch": 0.8753975602882563, - "grad_norm": 0.0009560094913467765, - "learning_rate": 0.00019999962322487527, - "loss": 46.0, - "step": 5436 - }, - { - "epoch": 0.8755585973670438, - "grad_norm": 0.0014784683007746935, - "learning_rate": 0.00019999962308598487, - "loss": 46.0, - "step": 5437 - }, - { - "epoch": 0.8757196344458311, - "grad_norm": 0.0007613886846229434, - "learning_rate": 0.00019999962294706892, - "loss": 46.0, - "step": 5438 - }, - { - "epoch": 0.8758806715246186, - "grad_norm": 0.0017266601789742708, - "learning_rate": 0.00019999962280812733, - "loss": 46.0, - "step": 5439 - }, - { - "epoch": 0.8760417086034059, - "grad_norm": 0.002928278874605894, - "learning_rate": 0.00019999962266916015, - "loss": 46.0, - "step": 5440 - }, - { - "epoch": 0.8762027456821934, - "grad_norm": 0.0007385804201476276, - "learning_rate": 0.00019999962253016738, - "loss": 46.0, - "step": 5441 - }, - { - "epoch": 0.8763637827609807, - "grad_norm": 0.0007163072004914284, - "learning_rate": 0.00019999962239114906, - "loss": 46.0, - "step": 5442 - }, - { - "epoch": 0.8765248198397682, - "grad_norm": 0.00032691159867681563, - "learning_rate": 0.0001999996222521051, - "loss": 46.0, - "step": 5443 - }, - { - "epoch": 0.8766858569185555, - "grad_norm": 0.0009719034424051642, - "learning_rate": 0.00019999962211303553, - "loss": 46.0, - "step": 5444 - }, - { - "epoch": 0.8768468939973428, - "grad_norm": 0.00503349956125021, - "learning_rate": 0.0001999996219739404, - "loss": 46.0, - "step": 5445 - }, - { - "epoch": 0.8770079310761303, - "grad_norm": 0.0019561199005693197, - "learning_rate": 0.0001999996218348197, - "loss": 46.0, - "step": 5446 - }, - { - "epoch": 0.8771689681549176, - "grad_norm": 0.001731664757244289, - "learning_rate": 0.00019999962169567334, - "loss": 46.0, - "step": 5447 - }, - { - "epoch": 0.8773300052337051, - "grad_norm": 0.0007576161879114807, - "learning_rate": 0.00019999962155650144, - "loss": 46.0, - "step": 5448 - }, - { - "epoch": 0.8774910423124924, - "grad_norm": 0.0023225368931889534, - "learning_rate": 0.0001999996214173039, - "loss": 46.0, - "step": 5449 - }, - { - "epoch": 0.8776520793912799, - "grad_norm": 0.002116841496899724, - "learning_rate": 0.00019999962127808076, - "loss": 46.0, - "step": 5450 - }, - { - "epoch": 0.8778131164700672, - "grad_norm": 0.000905901484657079, - "learning_rate": 0.00019999962113883207, - "loss": 46.0, - "step": 5451 - }, - { - "epoch": 0.8779741535488547, - "grad_norm": 0.001066633383743465, - "learning_rate": 0.0001999996209995578, - "loss": 46.0, - "step": 5452 - }, - { - "epoch": 0.878135190627642, - "grad_norm": 0.003724220674484968, - "learning_rate": 0.00019999962086025787, - "loss": 46.0, - "step": 5453 - }, - { - "epoch": 0.8782962277064295, - "grad_norm": 0.0016309581696987152, - "learning_rate": 0.00019999962072093236, - "loss": 46.0, - "step": 5454 - }, - { - "epoch": 0.8784572647852168, - "grad_norm": 0.0006372524658218026, - "learning_rate": 0.0001999996205815813, - "loss": 46.0, - "step": 5455 - }, - { - "epoch": 0.8786183018640041, - "grad_norm": 0.004177498165518045, - "learning_rate": 0.0001999996204422046, - "loss": 46.0, - "step": 5456 - }, - { - "epoch": 0.8787793389427916, - "grad_norm": 0.0012889906065538526, - "learning_rate": 0.00019999962030280234, - "loss": 46.0, - "step": 5457 - }, - { - "epoch": 0.8789403760215789, - "grad_norm": 0.0012320299865677953, - "learning_rate": 0.00019999962016337442, - "loss": 46.0, - "step": 5458 - }, - { - "epoch": 0.8791014131003664, - "grad_norm": 0.002860185457393527, - "learning_rate": 0.00019999962002392098, - "loss": 46.0, - "step": 5459 - }, - { - "epoch": 0.8792624501791537, - "grad_norm": 0.0008574715466238558, - "learning_rate": 0.00019999961988444192, - "loss": 46.0, - "step": 5460 - }, - { - "epoch": 0.8794234872579412, - "grad_norm": 0.0009642100776545703, - "learning_rate": 0.00019999961974493725, - "loss": 46.0, - "step": 5461 - }, - { - "epoch": 0.8795845243367285, - "grad_norm": 0.004119659774005413, - "learning_rate": 0.00019999961960540704, - "loss": 46.0, - "step": 5462 - }, - { - "epoch": 0.879745561415516, - "grad_norm": 0.0006561786867678165, - "learning_rate": 0.00019999961946585114, - "loss": 46.0, - "step": 5463 - }, - { - "epoch": 0.8799065984943033, - "grad_norm": 0.007290414068847895, - "learning_rate": 0.00019999961932626973, - "loss": 46.0, - "step": 5464 - }, - { - "epoch": 0.8800676355730908, - "grad_norm": 0.0007360965246334672, - "learning_rate": 0.0001999996191866627, - "loss": 46.0, - "step": 5465 - }, - { - "epoch": 0.8802286726518781, - "grad_norm": 0.005771649070084095, - "learning_rate": 0.00019999961904703008, - "loss": 46.0, - "step": 5466 - }, - { - "epoch": 0.8803897097306654, - "grad_norm": 0.0018337457440793514, - "learning_rate": 0.00019999961890737185, - "loss": 46.0, - "step": 5467 - }, - { - "epoch": 0.8805507468094529, - "grad_norm": 0.0016193913761526346, - "learning_rate": 0.00019999961876768801, - "loss": 46.0, - "step": 5468 - }, - { - "epoch": 0.8807117838882402, - "grad_norm": 0.002049280796200037, - "learning_rate": 0.0001999996186279786, - "loss": 46.0, - "step": 5469 - }, - { - "epoch": 0.8808728209670277, - "grad_norm": 0.00038791593397036195, - "learning_rate": 0.0001999996184882436, - "loss": 46.0, - "step": 5470 - }, - { - "epoch": 0.881033858045815, - "grad_norm": 0.0014635240659117699, - "learning_rate": 0.000199999618348483, - "loss": 46.0, - "step": 5471 - }, - { - "epoch": 0.8811948951246025, - "grad_norm": 0.0008576675318181515, - "learning_rate": 0.00019999961820869677, - "loss": 46.0, - "step": 5472 - }, - { - "epoch": 0.8813559322033898, - "grad_norm": 0.0004542733950074762, - "learning_rate": 0.000199999618068885, - "loss": 46.0, - "step": 5473 - }, - { - "epoch": 0.8815169692821773, - "grad_norm": 0.0005807816050946712, - "learning_rate": 0.0001999996179290476, - "loss": 46.0, - "step": 5474 - }, - { - "epoch": 0.8816780063609646, - "grad_norm": 0.002323566470295191, - "learning_rate": 0.0001999996177891846, - "loss": 46.0, - "step": 5475 - }, - { - "epoch": 0.8818390434397521, - "grad_norm": 0.0022160017397254705, - "learning_rate": 0.00019999961764929601, - "loss": 46.0, - "step": 5476 - }, - { - "epoch": 0.8820000805185394, - "grad_norm": 0.002299319254234433, - "learning_rate": 0.00019999961750938187, - "loss": 46.0, - "step": 5477 - }, - { - "epoch": 0.8821611175973267, - "grad_norm": 0.0012396796373650432, - "learning_rate": 0.0001999996173694421, - "loss": 46.0, - "step": 5478 - }, - { - "epoch": 0.8823221546761142, - "grad_norm": 0.0007765289628878236, - "learning_rate": 0.00019999961722947672, - "loss": 46.0, - "step": 5479 - }, - { - "epoch": 0.8824831917549015, - "grad_norm": 0.0007036970928311348, - "learning_rate": 0.0001999996170894858, - "loss": 46.0, - "step": 5480 - }, - { - "epoch": 0.882644228833689, - "grad_norm": 0.000626065768301487, - "learning_rate": 0.00019999961694946923, - "loss": 46.0, - "step": 5481 - }, - { - "epoch": 0.8828052659124763, - "grad_norm": 0.0008546230383217335, - "learning_rate": 0.0001999996168094271, - "loss": 46.0, - "step": 5482 - }, - { - "epoch": 0.8829663029912638, - "grad_norm": 0.004829421639442444, - "learning_rate": 0.00019999961666935934, - "loss": 46.0, - "step": 5483 - }, - { - "epoch": 0.8831273400700511, - "grad_norm": 0.0008491275948472321, - "learning_rate": 0.000199999616529266, - "loss": 46.0, - "step": 5484 - }, - { - "epoch": 0.8832883771488386, - "grad_norm": 0.003391230246052146, - "learning_rate": 0.00019999961638914705, - "loss": 46.0, - "step": 5485 - }, - { - "epoch": 0.8834494142276259, - "grad_norm": 0.0014533017529174685, - "learning_rate": 0.00019999961624900253, - "loss": 46.0, - "step": 5486 - }, - { - "epoch": 0.8836104513064132, - "grad_norm": 0.0034319807309657335, - "learning_rate": 0.00019999961610883243, - "loss": 46.0, - "step": 5487 - }, - { - "epoch": 0.8837714883852007, - "grad_norm": 0.0006078967708162963, - "learning_rate": 0.00019999961596863669, - "loss": 46.0, - "step": 5488 - }, - { - "epoch": 0.883932525463988, - "grad_norm": 0.00028470889083109796, - "learning_rate": 0.0001999996158284154, - "loss": 46.0, - "step": 5489 - }, - { - "epoch": 0.8840935625427755, - "grad_norm": 0.001391082420013845, - "learning_rate": 0.0001999996156881685, - "loss": 46.0, - "step": 5490 - }, - { - "epoch": 0.8842545996215628, - "grad_norm": 0.0005989590426906943, - "learning_rate": 0.000199999615547896, - "loss": 46.0, - "step": 5491 - }, - { - "epoch": 0.8844156367003503, - "grad_norm": 0.0017267513321712613, - "learning_rate": 0.0001999996154075979, - "loss": 46.0, - "step": 5492 - }, - { - "epoch": 0.8845766737791376, - "grad_norm": 0.0006365755689330399, - "learning_rate": 0.0001999996152672742, - "loss": 46.0, - "step": 5493 - }, - { - "epoch": 0.8847377108579251, - "grad_norm": 0.0016927524702623487, - "learning_rate": 0.00019999961512692492, - "loss": 46.0, - "step": 5494 - }, - { - "epoch": 0.8848987479367124, - "grad_norm": 0.0007248780457302928, - "learning_rate": 0.00019999961498655004, - "loss": 46.0, - "step": 5495 - }, - { - "epoch": 0.8850597850154999, - "grad_norm": 0.0002985633327625692, - "learning_rate": 0.00019999961484614955, - "loss": 46.0, - "step": 5496 - }, - { - "epoch": 0.8852208220942872, - "grad_norm": 0.0013461465714499354, - "learning_rate": 0.0001999996147057235, - "loss": 46.0, - "step": 5497 - }, - { - "epoch": 0.8853818591730745, - "grad_norm": 0.0025419513694941998, - "learning_rate": 0.00019999961456527182, - "loss": 46.0, - "step": 5498 - }, - { - "epoch": 0.885542896251862, - "grad_norm": 0.00048314695595763624, - "learning_rate": 0.0001999996144247946, - "loss": 46.0, - "step": 5499 - }, - { - "epoch": 0.8857039333306493, - "grad_norm": 0.0005405222764238715, - "learning_rate": 0.00019999961428429172, - "loss": 46.0, - "step": 5500 - }, - { - "epoch": 0.8858649704094368, - "grad_norm": 0.0009830312337726355, - "learning_rate": 0.00019999961414376326, - "loss": 46.0, - "step": 5501 - }, - { - "epoch": 0.8860260074882241, - "grad_norm": 0.0010159957455471158, - "learning_rate": 0.00019999961400320925, - "loss": 46.0, - "step": 5502 - }, - { - "epoch": 0.8861870445670116, - "grad_norm": 0.002079539932310581, - "learning_rate": 0.00019999961386262959, - "loss": 46.0, - "step": 5503 - }, - { - "epoch": 0.8863480816457989, - "grad_norm": 0.0027389926835894585, - "learning_rate": 0.00019999961372202437, - "loss": 46.0, - "step": 5504 - }, - { - "epoch": 0.8865091187245864, - "grad_norm": 0.001533121452666819, - "learning_rate": 0.00019999961358139353, - "loss": 46.0, - "step": 5505 - }, - { - "epoch": 0.8866701558033737, - "grad_norm": 0.0008724723011255264, - "learning_rate": 0.0001999996134407371, - "loss": 46.0, - "step": 5506 - }, - { - "epoch": 0.8868311928821612, - "grad_norm": 0.0034404757898300886, - "learning_rate": 0.0001999996133000551, - "loss": 46.0, - "step": 5507 - }, - { - "epoch": 0.8869922299609485, - "grad_norm": 0.0017426405102014542, - "learning_rate": 0.00019999961315934748, - "loss": 46.0, - "step": 5508 - }, - { - "epoch": 0.8871532670397358, - "grad_norm": 0.000947405700571835, - "learning_rate": 0.00019999961301861427, - "loss": 46.0, - "step": 5509 - }, - { - "epoch": 0.8873143041185233, - "grad_norm": 0.0005466010188683867, - "learning_rate": 0.0001999996128778555, - "loss": 46.0, - "step": 5510 - }, - { - "epoch": 0.8874753411973106, - "grad_norm": 0.0017102526035159826, - "learning_rate": 0.0001999996127370711, - "loss": 46.0, - "step": 5511 - }, - { - "epoch": 0.8876363782760981, - "grad_norm": 0.0003407685726415366, - "learning_rate": 0.0001999996125962611, - "loss": 46.0, - "step": 5512 - }, - { - "epoch": 0.8877974153548854, - "grad_norm": 0.0015317793004214764, - "learning_rate": 0.0001999996124554255, - "loss": 46.0, - "step": 5513 - }, - { - "epoch": 0.8879584524336729, - "grad_norm": 0.001786372042261064, - "learning_rate": 0.00019999961231456433, - "loss": 46.0, - "step": 5514 - }, - { - "epoch": 0.8881194895124602, - "grad_norm": 0.0005082154530100524, - "learning_rate": 0.00019999961217367755, - "loss": 46.0, - "step": 5515 - }, - { - "epoch": 0.8882805265912477, - "grad_norm": 0.0008765160455368459, - "learning_rate": 0.00019999961203276517, - "loss": 46.0, - "step": 5516 - }, - { - "epoch": 0.888441563670035, - "grad_norm": 0.0014389300486072898, - "learning_rate": 0.0001999996118918272, - "loss": 46.0, - "step": 5517 - }, - { - "epoch": 0.8886026007488225, - "grad_norm": 0.0007574508199468255, - "learning_rate": 0.00019999961175086366, - "loss": 46.0, - "step": 5518 - }, - { - "epoch": 0.8887636378276098, - "grad_norm": 0.0010004672221839428, - "learning_rate": 0.0001999996116098745, - "loss": 46.0, - "step": 5519 - }, - { - "epoch": 0.8889246749063971, - "grad_norm": 0.0006244581891223788, - "learning_rate": 0.00019999961146885978, - "loss": 46.0, - "step": 5520 - }, - { - "epoch": 0.8890857119851846, - "grad_norm": 0.0005050482577644289, - "learning_rate": 0.00019999961132781941, - "loss": 46.0, - "step": 5521 - }, - { - "epoch": 0.8892467490639719, - "grad_norm": 0.001633846783079207, - "learning_rate": 0.00019999961118675346, - "loss": 46.0, - "step": 5522 - }, - { - "epoch": 0.8894077861427594, - "grad_norm": 0.0005150714423507452, - "learning_rate": 0.00019999961104566193, - "loss": 46.0, - "step": 5523 - }, - { - "epoch": 0.8895688232215467, - "grad_norm": 0.0013295452808961272, - "learning_rate": 0.00019999961090454483, - "loss": 46.0, - "step": 5524 - }, - { - "epoch": 0.8897298603003342, - "grad_norm": 0.0009041702724061906, - "learning_rate": 0.00019999961076340211, - "loss": 46.0, - "step": 5525 - }, - { - "epoch": 0.8898908973791215, - "grad_norm": 0.00155701267067343, - "learning_rate": 0.0001999996106222338, - "loss": 46.0, - "step": 5526 - }, - { - "epoch": 0.890051934457909, - "grad_norm": 0.0006994783179834485, - "learning_rate": 0.00019999961048103985, - "loss": 46.0, - "step": 5527 - }, - { - "epoch": 0.8902129715366963, - "grad_norm": 0.004856567829847336, - "learning_rate": 0.00019999961033982037, - "loss": 46.0, - "step": 5528 - }, - { - "epoch": 0.8903740086154838, - "grad_norm": 0.0012894722167402506, - "learning_rate": 0.00019999961019857528, - "loss": 46.0, - "step": 5529 - }, - { - "epoch": 0.8905350456942711, - "grad_norm": 0.0009183423244394362, - "learning_rate": 0.00019999961005730455, - "loss": 46.0, - "step": 5530 - }, - { - "epoch": 0.8906960827730585, - "grad_norm": 0.0011541893472895026, - "learning_rate": 0.0001999996099160083, - "loss": 46.0, - "step": 5531 - }, - { - "epoch": 0.8908571198518459, - "grad_norm": 0.0033031266648322344, - "learning_rate": 0.00019999960977468639, - "loss": 46.0, - "step": 5532 - }, - { - "epoch": 0.8910181569306332, - "grad_norm": 0.001081187860108912, - "learning_rate": 0.0001999996096333389, - "loss": 46.0, - "step": 5533 - }, - { - "epoch": 0.8911791940094207, - "grad_norm": 0.0005713963182643056, - "learning_rate": 0.00019999960949196584, - "loss": 46.0, - "step": 5534 - }, - { - "epoch": 0.891340231088208, - "grad_norm": 0.001636003260500729, - "learning_rate": 0.00019999960935056715, - "loss": 46.0, - "step": 5535 - }, - { - "epoch": 0.8915012681669955, - "grad_norm": 0.0020119203254580498, - "learning_rate": 0.00019999960920914292, - "loss": 46.0, - "step": 5536 - }, - { - "epoch": 0.8916623052457828, - "grad_norm": 0.001871225074864924, - "learning_rate": 0.00019999960906769303, - "loss": 46.0, - "step": 5537 - }, - { - "epoch": 0.8918233423245703, - "grad_norm": 0.0006526735960505903, - "learning_rate": 0.00019999960892621758, - "loss": 46.0, - "step": 5538 - }, - { - "epoch": 0.8919843794033576, - "grad_norm": 0.0018917681882157922, - "learning_rate": 0.00019999960878471653, - "loss": 46.0, - "step": 5539 - }, - { - "epoch": 0.892145416482145, - "grad_norm": 0.0007902439683675766, - "learning_rate": 0.0001999996086431899, - "loss": 46.0, - "step": 5540 - }, - { - "epoch": 0.8923064535609324, - "grad_norm": 0.002496239962056279, - "learning_rate": 0.00019999960850163766, - "loss": 46.0, - "step": 5541 - }, - { - "epoch": 0.8924674906397198, - "grad_norm": 0.0005020059179514647, - "learning_rate": 0.00019999960836005983, - "loss": 46.0, - "step": 5542 - }, - { - "epoch": 0.8926285277185072, - "grad_norm": 0.001550781773403287, - "learning_rate": 0.00019999960821845641, - "loss": 46.0, - "step": 5543 - }, - { - "epoch": 0.8927895647972945, - "grad_norm": 0.0007806345238350332, - "learning_rate": 0.00019999960807682738, - "loss": 46.0, - "step": 5544 - }, - { - "epoch": 0.892950601876082, - "grad_norm": 0.0017287826631218195, - "learning_rate": 0.00019999960793517274, - "loss": 46.0, - "step": 5545 - }, - { - "epoch": 0.8931116389548693, - "grad_norm": 0.0007485355599783361, - "learning_rate": 0.00019999960779349253, - "loss": 46.0, - "step": 5546 - }, - { - "epoch": 0.8932726760336568, - "grad_norm": 0.0016300444258376956, - "learning_rate": 0.00019999960765178674, - "loss": 46.0, - "step": 5547 - }, - { - "epoch": 0.8934337131124441, - "grad_norm": 0.00109186302870512, - "learning_rate": 0.00019999960751005533, - "loss": 46.0, - "step": 5548 - }, - { - "epoch": 0.8935947501912316, - "grad_norm": 0.0007668111938983202, - "learning_rate": 0.00019999960736829834, - "loss": 46.0, - "step": 5549 - }, - { - "epoch": 0.8937557872700189, - "grad_norm": 0.0016533834859728813, - "learning_rate": 0.00019999960722651573, - "loss": 46.0, - "step": 5550 - }, - { - "epoch": 0.8939168243488063, - "grad_norm": 0.003962287213653326, - "learning_rate": 0.00019999960708470756, - "loss": 46.0, - "step": 5551 - }, - { - "epoch": 0.8940778614275937, - "grad_norm": 0.0022235820069909096, - "learning_rate": 0.00019999960694287377, - "loss": 46.0, - "step": 5552 - }, - { - "epoch": 0.894238898506381, - "grad_norm": 0.0011650846572592854, - "learning_rate": 0.0001999996068010144, - "loss": 46.0, - "step": 5553 - }, - { - "epoch": 0.8943999355851685, - "grad_norm": 0.000557522289454937, - "learning_rate": 0.00019999960665912942, - "loss": 46.0, - "step": 5554 - }, - { - "epoch": 0.8945609726639558, - "grad_norm": 0.0014321466442197561, - "learning_rate": 0.00019999960651721885, - "loss": 46.0, - "step": 5555 - }, - { - "epoch": 0.8947220097427433, - "grad_norm": 0.0008956867386586964, - "learning_rate": 0.0001999996063752827, - "loss": 46.0, - "step": 5556 - }, - { - "epoch": 0.8948830468215306, - "grad_norm": 0.004334675148129463, - "learning_rate": 0.00019999960623332094, - "loss": 46.0, - "step": 5557 - }, - { - "epoch": 0.8950440839003181, - "grad_norm": 0.001942360308021307, - "learning_rate": 0.0001999996060913336, - "loss": 46.0, - "step": 5558 - }, - { - "epoch": 0.8952051209791054, - "grad_norm": 0.0013158803340047598, - "learning_rate": 0.00019999960594932066, - "loss": 46.0, - "step": 5559 - }, - { - "epoch": 0.8953661580578929, - "grad_norm": 0.0010415952419862151, - "learning_rate": 0.00019999960580728213, - "loss": 46.0, - "step": 5560 - }, - { - "epoch": 0.8955271951366802, - "grad_norm": 0.0019848961383104324, - "learning_rate": 0.00019999960566521798, - "loss": 46.0, - "step": 5561 - }, - { - "epoch": 0.8956882322154676, - "grad_norm": 0.005372314713895321, - "learning_rate": 0.00019999960552312827, - "loss": 46.0, - "step": 5562 - }, - { - "epoch": 0.895849269294255, - "grad_norm": 0.0013121406082063913, - "learning_rate": 0.00019999960538101292, - "loss": 46.0, - "step": 5563 - }, - { - "epoch": 0.8960103063730424, - "grad_norm": 0.0017942595295608044, - "learning_rate": 0.000199999605238872, - "loss": 46.0, - "step": 5564 - }, - { - "epoch": 0.8961713434518298, - "grad_norm": 0.0036651925183832645, - "learning_rate": 0.0001999996050967055, - "loss": 46.0, - "step": 5565 - }, - { - "epoch": 0.8963323805306171, - "grad_norm": 0.0016075974563136697, - "learning_rate": 0.00019999960495451337, - "loss": 46.0, - "step": 5566 - }, - { - "epoch": 0.8964934176094046, - "grad_norm": 0.002354175318032503, - "learning_rate": 0.00019999960481229567, - "loss": 46.0, - "step": 5567 - }, - { - "epoch": 0.8966544546881919, - "grad_norm": 0.0004906888934783638, - "learning_rate": 0.00019999960467005239, - "loss": 46.0, - "step": 5568 - }, - { - "epoch": 0.8968154917669794, - "grad_norm": 0.0006193263689056039, - "learning_rate": 0.0001999996045277835, - "loss": 46.0, - "step": 5569 - }, - { - "epoch": 0.8969765288457667, - "grad_norm": 0.004660008475184441, - "learning_rate": 0.00019999960438548902, - "loss": 46.0, - "step": 5570 - }, - { - "epoch": 0.8971375659245542, - "grad_norm": 0.0016145760891959071, - "learning_rate": 0.00019999960424316892, - "loss": 46.0, - "step": 5571 - }, - { - "epoch": 0.8972986030033415, - "grad_norm": 0.0008006156422197819, - "learning_rate": 0.00019999960410082326, - "loss": 46.0, - "step": 5572 - }, - { - "epoch": 0.8974596400821289, - "grad_norm": 0.0028087706305086613, - "learning_rate": 0.00019999960395845196, - "loss": 46.0, - "step": 5573 - }, - { - "epoch": 0.8976206771609163, - "grad_norm": 0.005286147352308035, - "learning_rate": 0.00019999960381605512, - "loss": 46.0, - "step": 5574 - }, - { - "epoch": 0.8977817142397037, - "grad_norm": 0.0018280866788700223, - "learning_rate": 0.00019999960367363264, - "loss": 46.0, - "step": 5575 - }, - { - "epoch": 0.8979427513184911, - "grad_norm": 0.001505769439972937, - "learning_rate": 0.00019999960353118458, - "loss": 46.0, - "step": 5576 - }, - { - "epoch": 0.8981037883972784, - "grad_norm": 0.0006667435518465936, - "learning_rate": 0.00019999960338871095, - "loss": 46.0, - "step": 5577 - }, - { - "epoch": 0.8982648254760659, - "grad_norm": 0.004198502283543348, - "learning_rate": 0.00019999960324621168, - "loss": 46.0, - "step": 5578 - }, - { - "epoch": 0.8984258625548532, - "grad_norm": 0.0030197720043361187, - "learning_rate": 0.00019999960310368686, - "loss": 46.0, - "step": 5579 - }, - { - "epoch": 0.8985868996336407, - "grad_norm": 0.0007644347497262061, - "learning_rate": 0.00019999960296113644, - "loss": 46.0, - "step": 5580 - }, - { - "epoch": 0.898747936712428, - "grad_norm": 0.0010789462830871344, - "learning_rate": 0.00019999960281856042, - "loss": 46.0, - "step": 5581 - }, - { - "epoch": 0.8989089737912155, - "grad_norm": 0.0008686682558618486, - "learning_rate": 0.00019999960267595877, - "loss": 46.0, - "step": 5582 - }, - { - "epoch": 0.8990700108700028, - "grad_norm": 0.0025454119313508272, - "learning_rate": 0.00019999960253333154, - "loss": 46.0, - "step": 5583 - }, - { - "epoch": 0.8992310479487902, - "grad_norm": 0.0009476811392232776, - "learning_rate": 0.00019999960239067876, - "loss": 46.0, - "step": 5584 - }, - { - "epoch": 0.8993920850275776, - "grad_norm": 0.0009671699372120202, - "learning_rate": 0.00019999960224800035, - "loss": 46.0, - "step": 5585 - }, - { - "epoch": 0.899553122106365, - "grad_norm": 0.000562049972359091, - "learning_rate": 0.00019999960210529633, - "loss": 46.0, - "step": 5586 - }, - { - "epoch": 0.8997141591851524, - "grad_norm": 0.0005062755662947893, - "learning_rate": 0.00019999960196256673, - "loss": 46.0, - "step": 5587 - }, - { - "epoch": 0.8998751962639397, - "grad_norm": 0.00045780991786159575, - "learning_rate": 0.00019999960181981154, - "loss": 46.0, - "step": 5588 - }, - { - "epoch": 0.9000362333427272, - "grad_norm": 0.0012315440690144897, - "learning_rate": 0.00019999960167703076, - "loss": 46.0, - "step": 5589 - }, - { - "epoch": 0.9001972704215145, - "grad_norm": 0.00034972140565514565, - "learning_rate": 0.00019999960153422436, - "loss": 46.0, - "step": 5590 - }, - { - "epoch": 0.900358307500302, - "grad_norm": 0.0012553875567391515, - "learning_rate": 0.0001999996013913924, - "loss": 46.0, - "step": 5591 - }, - { - "epoch": 0.9005193445790893, - "grad_norm": 0.0021845693700015545, - "learning_rate": 0.00019999960124853484, - "loss": 46.0, - "step": 5592 - }, - { - "epoch": 0.9006803816578767, - "grad_norm": 0.0016618001973256469, - "learning_rate": 0.00019999960110565166, - "loss": 46.0, - "step": 5593 - }, - { - "epoch": 0.9008414187366641, - "grad_norm": 0.0011843597749248147, - "learning_rate": 0.00019999960096274291, - "loss": 46.0, - "step": 5594 - }, - { - "epoch": 0.9010024558154515, - "grad_norm": 0.000884176348336041, - "learning_rate": 0.00019999960081980853, - "loss": 46.0, - "step": 5595 - }, - { - "epoch": 0.9011634928942389, - "grad_norm": 0.0023520884569734335, - "learning_rate": 0.0001999996006768486, - "loss": 46.0, - "step": 5596 - }, - { - "epoch": 0.9013245299730263, - "grad_norm": 0.003695164807140827, - "learning_rate": 0.00019999960053386303, - "loss": 46.0, - "step": 5597 - }, - { - "epoch": 0.9014855670518137, - "grad_norm": 0.0038136360235512257, - "learning_rate": 0.0001999996003908519, - "loss": 46.0, - "step": 5598 - }, - { - "epoch": 0.901646604130601, - "grad_norm": 0.0005589498905465007, - "learning_rate": 0.00019999960024781518, - "loss": 46.0, - "step": 5599 - }, - { - "epoch": 0.9018076412093885, - "grad_norm": 0.0005771736614406109, - "learning_rate": 0.00019999960010475283, - "loss": 46.0, - "step": 5600 - }, - { - "epoch": 0.9019686782881758, - "grad_norm": 0.0008498323149979115, - "learning_rate": 0.00019999959996166492, - "loss": 46.0, - "step": 5601 - }, - { - "epoch": 0.9021297153669633, - "grad_norm": 0.0008164996979758143, - "learning_rate": 0.0001999995998185514, - "loss": 46.0, - "step": 5602 - }, - { - "epoch": 0.9022907524457506, - "grad_norm": 0.004340397194027901, - "learning_rate": 0.00019999959967541227, - "loss": 46.0, - "step": 5603 - }, - { - "epoch": 0.902451789524538, - "grad_norm": 0.0010171361500397325, - "learning_rate": 0.00019999959953224757, - "loss": 46.0, - "step": 5604 - }, - { - "epoch": 0.9026128266033254, - "grad_norm": 0.004007955547422171, - "learning_rate": 0.0001999995993890573, - "loss": 46.0, - "step": 5605 - }, - { - "epoch": 0.9027738636821128, - "grad_norm": 0.0009484782931394875, - "learning_rate": 0.00019999959924584136, - "loss": 46.0, - "step": 5606 - }, - { - "epoch": 0.9029349007609002, - "grad_norm": 0.0010315830586478114, - "learning_rate": 0.00019999959910259988, - "loss": 46.0, - "step": 5607 - }, - { - "epoch": 0.9030959378396876, - "grad_norm": 0.002179377945140004, - "learning_rate": 0.0001999995989593328, - "loss": 46.0, - "step": 5608 - }, - { - "epoch": 0.903256974918475, - "grad_norm": 0.0018888365011662245, - "learning_rate": 0.0001999995988160401, - "loss": 46.0, - "step": 5609 - }, - { - "epoch": 0.9034180119972623, - "grad_norm": 0.0010518798371776938, - "learning_rate": 0.00019999959867272182, - "loss": 46.0, - "step": 5610 - }, - { - "epoch": 0.9035790490760498, - "grad_norm": 0.0016064199153333902, - "learning_rate": 0.00019999959852937796, - "loss": 46.0, - "step": 5611 - }, - { - "epoch": 0.9037400861548371, - "grad_norm": 0.002031848533079028, - "learning_rate": 0.0001999995983860085, - "loss": 46.0, - "step": 5612 - }, - { - "epoch": 0.9039011232336246, - "grad_norm": 0.0007423665374517441, - "learning_rate": 0.00019999959824261343, - "loss": 46.0, - "step": 5613 - }, - { - "epoch": 0.9040621603124119, - "grad_norm": 0.005528111010789871, - "learning_rate": 0.00019999959809919275, - "loss": 46.0, - "step": 5614 - }, - { - "epoch": 0.9042231973911993, - "grad_norm": 0.0009552434785291553, - "learning_rate": 0.00019999959795574652, - "loss": 46.0, - "step": 5615 - }, - { - "epoch": 0.9043842344699867, - "grad_norm": 0.0010574173647910357, - "learning_rate": 0.00019999959781227467, - "loss": 46.0, - "step": 5616 - }, - { - "epoch": 0.9045452715487741, - "grad_norm": 0.001811443711631, - "learning_rate": 0.00019999959766877723, - "loss": 46.0, - "step": 5617 - }, - { - "epoch": 0.9047063086275615, - "grad_norm": 0.001466817338950932, - "learning_rate": 0.00019999959752525418, - "loss": 46.0, - "step": 5618 - }, - { - "epoch": 0.9048673457063489, - "grad_norm": 0.0010563834803178906, - "learning_rate": 0.00019999959738170557, - "loss": 46.0, - "step": 5619 - }, - { - "epoch": 0.9050283827851363, - "grad_norm": 0.0012623857473954558, - "learning_rate": 0.00019999959723813134, - "loss": 46.0, - "step": 5620 - }, - { - "epoch": 0.9051894198639236, - "grad_norm": 0.0020953472703695297, - "learning_rate": 0.0001999995970945315, - "loss": 46.0, - "step": 5621 - }, - { - "epoch": 0.9053504569427111, - "grad_norm": 0.0009686333360150456, - "learning_rate": 0.0001999995969509061, - "loss": 46.0, - "step": 5622 - }, - { - "epoch": 0.9055114940214984, - "grad_norm": 0.001027517719194293, - "learning_rate": 0.00019999959680725509, - "loss": 46.0, - "step": 5623 - }, - { - "epoch": 0.9056725311002859, - "grad_norm": 0.0004785277706105262, - "learning_rate": 0.0001999995966635785, - "loss": 46.0, - "step": 5624 - }, - { - "epoch": 0.9058335681790732, - "grad_norm": 0.000614838267210871, - "learning_rate": 0.0001999995965198763, - "loss": 46.0, - "step": 5625 - }, - { - "epoch": 0.9059946052578606, - "grad_norm": 0.0012222366640344262, - "learning_rate": 0.00019999959637614852, - "loss": 46.0, - "step": 5626 - }, - { - "epoch": 0.906155642336648, - "grad_norm": 0.0007167730946093798, - "learning_rate": 0.0001999995962323951, - "loss": 46.0, - "step": 5627 - }, - { - "epoch": 0.9063166794154354, - "grad_norm": 0.001726824208162725, - "learning_rate": 0.00019999959608861612, - "loss": 46.0, - "step": 5628 - }, - { - "epoch": 0.9064777164942228, - "grad_norm": 0.0005700625479221344, - "learning_rate": 0.00019999959594481156, - "loss": 46.0, - "step": 5629 - }, - { - "epoch": 0.9066387535730102, - "grad_norm": 0.003727745031937957, - "learning_rate": 0.00019999959580098138, - "loss": 46.0, - "step": 5630 - }, - { - "epoch": 0.9067997906517976, - "grad_norm": 0.000594357552472502, - "learning_rate": 0.00019999959565712561, - "loss": 46.0, - "step": 5631 - }, - { - "epoch": 0.906960827730585, - "grad_norm": 0.003796704113483429, - "learning_rate": 0.00019999959551324423, - "loss": 46.0, - "step": 5632 - }, - { - "epoch": 0.9071218648093724, - "grad_norm": 0.00047868990805000067, - "learning_rate": 0.0001999995953693373, - "loss": 46.0, - "step": 5633 - }, - { - "epoch": 0.9072829018881597, - "grad_norm": 0.002626352244988084, - "learning_rate": 0.00019999959522540474, - "loss": 46.0, - "step": 5634 - }, - { - "epoch": 0.9074439389669472, - "grad_norm": 0.003925809636712074, - "learning_rate": 0.0001999995950814466, - "loss": 46.0, - "step": 5635 - }, - { - "epoch": 0.9076049760457345, - "grad_norm": 0.0020973600912839174, - "learning_rate": 0.00019999959493746284, - "loss": 46.0, - "step": 5636 - }, - { - "epoch": 0.9077660131245219, - "grad_norm": 0.001194565906189382, - "learning_rate": 0.00019999959479345352, - "loss": 46.0, - "step": 5637 - }, - { - "epoch": 0.9079270502033093, - "grad_norm": 0.0007073345477692783, - "learning_rate": 0.0001999995946494186, - "loss": 46.0, - "step": 5638 - }, - { - "epoch": 0.9080880872820967, - "grad_norm": 0.0011025781277567148, - "learning_rate": 0.00019999959450535805, - "loss": 46.0, - "step": 5639 - }, - { - "epoch": 0.9082491243608841, - "grad_norm": 0.003514037001878023, - "learning_rate": 0.00019999959436127194, - "loss": 46.0, - "step": 5640 - }, - { - "epoch": 0.9084101614396715, - "grad_norm": 0.0011931579792872071, - "learning_rate": 0.00019999959421716023, - "loss": 46.0, - "step": 5641 - }, - { - "epoch": 0.9085711985184589, - "grad_norm": 0.0006472069071605802, - "learning_rate": 0.00019999959407302292, - "loss": 46.0, - "step": 5642 - }, - { - "epoch": 0.9087322355972463, - "grad_norm": 0.0013911984860897064, - "learning_rate": 0.00019999959392886, - "loss": 46.0, - "step": 5643 - }, - { - "epoch": 0.9088932726760337, - "grad_norm": 0.0010574093321338296, - "learning_rate": 0.0001999995937846715, - "loss": 46.0, - "step": 5644 - }, - { - "epoch": 0.909054309754821, - "grad_norm": 0.002115755807608366, - "learning_rate": 0.00019999959364045742, - "loss": 46.0, - "step": 5645 - }, - { - "epoch": 0.9092153468336084, - "grad_norm": 0.001006089965812862, - "learning_rate": 0.00019999959349621774, - "loss": 46.0, - "step": 5646 - }, - { - "epoch": 0.9093763839123958, - "grad_norm": 0.0014010483864694834, - "learning_rate": 0.00019999959335195244, - "loss": 46.0, - "step": 5647 - }, - { - "epoch": 0.9095374209911832, - "grad_norm": 0.0005963574512861669, - "learning_rate": 0.00019999959320766156, - "loss": 46.0, - "step": 5648 - }, - { - "epoch": 0.9096984580699706, - "grad_norm": 0.0020547525491565466, - "learning_rate": 0.0001999995930633451, - "loss": 46.0, - "step": 5649 - }, - { - "epoch": 0.909859495148758, - "grad_norm": 0.0019808122888207436, - "learning_rate": 0.000199999592919003, - "loss": 46.0, - "step": 5650 - }, - { - "epoch": 0.9100205322275454, - "grad_norm": 0.0020298794843256474, - "learning_rate": 0.00019999959277463536, - "loss": 46.0, - "step": 5651 - }, - { - "epoch": 0.9101815693063328, - "grad_norm": 0.000573399942368269, - "learning_rate": 0.00019999959263024207, - "loss": 46.0, - "step": 5652 - }, - { - "epoch": 0.9103426063851202, - "grad_norm": 0.005184104200452566, - "learning_rate": 0.00019999959248582326, - "loss": 46.0, - "step": 5653 - }, - { - "epoch": 0.9105036434639076, - "grad_norm": 0.0014675381826236844, - "learning_rate": 0.0001999995923413788, - "loss": 46.0, - "step": 5654 - }, - { - "epoch": 0.910664680542695, - "grad_norm": 0.002199732232838869, - "learning_rate": 0.00019999959219690875, - "loss": 46.0, - "step": 5655 - }, - { - "epoch": 0.9108257176214823, - "grad_norm": 0.0011606800835579634, - "learning_rate": 0.0001999995920524131, - "loss": 46.0, - "step": 5656 - }, - { - "epoch": 0.9109867547002697, - "grad_norm": 0.004471417050808668, - "learning_rate": 0.00019999959190789192, - "loss": 46.0, - "step": 5657 - }, - { - "epoch": 0.9111477917790571, - "grad_norm": 0.0007940044160932302, - "learning_rate": 0.00019999959176334505, - "loss": 46.0, - "step": 5658 - }, - { - "epoch": 0.9113088288578445, - "grad_norm": 0.000527512573171407, - "learning_rate": 0.00019999959161877263, - "loss": 46.0, - "step": 5659 - }, - { - "epoch": 0.9114698659366319, - "grad_norm": 0.0015550669049844146, - "learning_rate": 0.00019999959147417462, - "loss": 46.0, - "step": 5660 - }, - { - "epoch": 0.9116309030154193, - "grad_norm": 0.0014338825130835176, - "learning_rate": 0.00019999959132955102, - "loss": 46.0, - "step": 5661 - }, - { - "epoch": 0.9117919400942067, - "grad_norm": 0.0022002665791660547, - "learning_rate": 0.0001999995911849018, - "loss": 46.0, - "step": 5662 - }, - { - "epoch": 0.9119529771729941, - "grad_norm": 0.0035580561961978674, - "learning_rate": 0.000199999591040227, - "loss": 46.0, - "step": 5663 - }, - { - "epoch": 0.9121140142517815, - "grad_norm": 0.0014668663498014212, - "learning_rate": 0.0001999995908955266, - "loss": 46.0, - "step": 5664 - }, - { - "epoch": 0.9122750513305689, - "grad_norm": 0.005410606972873211, - "learning_rate": 0.00019999959075080065, - "loss": 46.0, - "step": 5665 - }, - { - "epoch": 0.9124360884093563, - "grad_norm": 0.0010199042735621333, - "learning_rate": 0.00019999959060604906, - "loss": 46.0, - "step": 5666 - }, - { - "epoch": 0.9125971254881436, - "grad_norm": 0.0006429336499422789, - "learning_rate": 0.00019999959046127186, - "loss": 46.0, - "step": 5667 - }, - { - "epoch": 0.912758162566931, - "grad_norm": 0.00044360265019349754, - "learning_rate": 0.0001999995903164691, - "loss": 46.0, - "step": 5668 - }, - { - "epoch": 0.9129191996457184, - "grad_norm": 0.003370842197909951, - "learning_rate": 0.00019999959017164072, - "loss": 46.0, - "step": 5669 - }, - { - "epoch": 0.9130802367245058, - "grad_norm": 0.0008861338719725609, - "learning_rate": 0.00019999959002678675, - "loss": 46.0, - "step": 5670 - }, - { - "epoch": 0.9132412738032932, - "grad_norm": 0.0012375286314636469, - "learning_rate": 0.0001999995898819072, - "loss": 46.0, - "step": 5671 - }, - { - "epoch": 0.9134023108820806, - "grad_norm": 0.0008010775782167912, - "learning_rate": 0.00019999958973700204, - "loss": 46.0, - "step": 5672 - }, - { - "epoch": 0.913563347960868, - "grad_norm": 0.0012551102554425597, - "learning_rate": 0.0001999995895920713, - "loss": 46.0, - "step": 5673 - }, - { - "epoch": 0.9137243850396554, - "grad_norm": 0.0006155106239020824, - "learning_rate": 0.00019999958944711495, - "loss": 46.0, - "step": 5674 - }, - { - "epoch": 0.9138854221184428, - "grad_norm": 0.0007423459901474416, - "learning_rate": 0.00019999958930213302, - "loss": 46.0, - "step": 5675 - }, - { - "epoch": 0.9140464591972302, - "grad_norm": 0.0007193029741756618, - "learning_rate": 0.00019999958915712548, - "loss": 46.0, - "step": 5676 - }, - { - "epoch": 0.9142074962760176, - "grad_norm": 0.0012417606776580215, - "learning_rate": 0.00019999958901209235, - "loss": 46.0, - "step": 5677 - }, - { - "epoch": 0.914368533354805, - "grad_norm": 0.0021162887569516897, - "learning_rate": 0.00019999958886703363, - "loss": 46.0, - "step": 5678 - }, - { - "epoch": 0.9145295704335923, - "grad_norm": 0.0016738566337153316, - "learning_rate": 0.00019999958872194933, - "loss": 46.0, - "step": 5679 - }, - { - "epoch": 0.9146906075123797, - "grad_norm": 0.001967610325664282, - "learning_rate": 0.00019999958857683941, - "loss": 46.0, - "step": 5680 - }, - { - "epoch": 0.9148516445911671, - "grad_norm": 0.0006160161574371159, - "learning_rate": 0.0001999995884317039, - "loss": 46.0, - "step": 5681 - }, - { - "epoch": 0.9150126816699545, - "grad_norm": 0.000579670537263155, - "learning_rate": 0.00019999958828654282, - "loss": 46.0, - "step": 5682 - }, - { - "epoch": 0.9151737187487419, - "grad_norm": 0.0010190429165959358, - "learning_rate": 0.0001999995881413561, - "loss": 46.0, - "step": 5683 - }, - { - "epoch": 0.9153347558275293, - "grad_norm": 0.0032319920137524605, - "learning_rate": 0.00019999958799614382, - "loss": 46.0, - "step": 5684 - }, - { - "epoch": 0.9154957929063167, - "grad_norm": 0.001973667647689581, - "learning_rate": 0.00019999958785090594, - "loss": 46.0, - "step": 5685 - }, - { - "epoch": 0.9156568299851041, - "grad_norm": 0.0008971799979917705, - "learning_rate": 0.00019999958770564247, - "loss": 46.0, - "step": 5686 - }, - { - "epoch": 0.9158178670638915, - "grad_norm": 0.0017805973766371608, - "learning_rate": 0.00019999958756035337, - "loss": 46.0, - "step": 5687 - }, - { - "epoch": 0.9159789041426789, - "grad_norm": 0.0023819077759981155, - "learning_rate": 0.0001999995874150387, - "loss": 46.0, - "step": 5688 - }, - { - "epoch": 0.9161399412214662, - "grad_norm": 0.0023476211354136467, - "learning_rate": 0.00019999958726969844, - "loss": 46.0, - "step": 5689 - }, - { - "epoch": 0.9163009783002536, - "grad_norm": 0.0005144411697983742, - "learning_rate": 0.00019999958712433257, - "loss": 46.0, - "step": 5690 - }, - { - "epoch": 0.916462015379041, - "grad_norm": 0.0013660414842888713, - "learning_rate": 0.00019999958697894114, - "loss": 46.0, - "step": 5691 - }, - { - "epoch": 0.9166230524578284, - "grad_norm": 0.003989432007074356, - "learning_rate": 0.0001999995868335241, - "loss": 46.0, - "step": 5692 - }, - { - "epoch": 0.9167840895366158, - "grad_norm": 0.001444809720851481, - "learning_rate": 0.00019999958668808147, - "loss": 46.0, - "step": 5693 - }, - { - "epoch": 0.9169451266154032, - "grad_norm": 0.0006382620777003467, - "learning_rate": 0.0001999995865426132, - "loss": 46.0, - "step": 5694 - }, - { - "epoch": 0.9171061636941906, - "grad_norm": 0.005084271542727947, - "learning_rate": 0.00019999958639711936, - "loss": 46.0, - "step": 5695 - }, - { - "epoch": 0.917267200772978, - "grad_norm": 0.0015088410582393408, - "learning_rate": 0.00019999958625159994, - "loss": 46.0, - "step": 5696 - }, - { - "epoch": 0.9174282378517654, - "grad_norm": 0.0015899842837825418, - "learning_rate": 0.00019999958610605493, - "loss": 46.0, - "step": 5697 - }, - { - "epoch": 0.9175892749305528, - "grad_norm": 0.0006153786671347916, - "learning_rate": 0.0001999995859604843, - "loss": 46.0, - "step": 5698 - }, - { - "epoch": 0.9177503120093401, - "grad_norm": 0.0024292119778692722, - "learning_rate": 0.0001999995858148881, - "loss": 46.0, - "step": 5699 - }, - { - "epoch": 0.9179113490881275, - "grad_norm": 0.0004447857791092247, - "learning_rate": 0.00019999958566926628, - "loss": 46.0, - "step": 5700 - }, - { - "epoch": 0.9180723861669149, - "grad_norm": 0.0008880120585672557, - "learning_rate": 0.00019999958552361887, - "loss": 46.0, - "step": 5701 - }, - { - "epoch": 0.9182334232457023, - "grad_norm": 0.0012786429142579436, - "learning_rate": 0.0001999995853779459, - "loss": 46.0, - "step": 5702 - }, - { - "epoch": 0.9183944603244897, - "grad_norm": 0.0025664877612143755, - "learning_rate": 0.0001999995852322473, - "loss": 46.0, - "step": 5703 - }, - { - "epoch": 0.9185554974032771, - "grad_norm": 0.0007213133503682911, - "learning_rate": 0.0001999995850865231, - "loss": 46.0, - "step": 5704 - }, - { - "epoch": 0.9187165344820645, - "grad_norm": 0.0006415305542759597, - "learning_rate": 0.00019999958494077333, - "loss": 46.0, - "step": 5705 - }, - { - "epoch": 0.9188775715608519, - "grad_norm": 0.0004968970897607505, - "learning_rate": 0.00019999958479499796, - "loss": 46.0, - "step": 5706 - }, - { - "epoch": 0.9190386086396393, - "grad_norm": 0.0013987241545692086, - "learning_rate": 0.000199999584649197, - "loss": 46.0, - "step": 5707 - }, - { - "epoch": 0.9191996457184267, - "grad_norm": 0.0016386124771088362, - "learning_rate": 0.00019999958450337042, - "loss": 46.0, - "step": 5708 - }, - { - "epoch": 0.9193606827972141, - "grad_norm": 0.0006625541718676686, - "learning_rate": 0.00019999958435751826, - "loss": 46.0, - "step": 5709 - }, - { - "epoch": 0.9195217198760014, - "grad_norm": 0.0021546788047999144, - "learning_rate": 0.0001999995842116405, - "loss": 46.0, - "step": 5710 - }, - { - "epoch": 0.9196827569547888, - "grad_norm": 0.0012270796578377485, - "learning_rate": 0.00019999958406573715, - "loss": 46.0, - "step": 5711 - }, - { - "epoch": 0.9198437940335762, - "grad_norm": 0.0015551653923466802, - "learning_rate": 0.00019999958391980822, - "loss": 46.0, - "step": 5712 - }, - { - "epoch": 0.9200048311123636, - "grad_norm": 0.0005300075863488019, - "learning_rate": 0.0001999995837738537, - "loss": 46.0, - "step": 5713 - }, - { - "epoch": 0.920165868191151, - "grad_norm": 0.0013175175990909338, - "learning_rate": 0.00019999958362787354, - "loss": 46.0, - "step": 5714 - }, - { - "epoch": 0.9203269052699384, - "grad_norm": 0.0015574973076581955, - "learning_rate": 0.00019999958348186782, - "loss": 46.0, - "step": 5715 - }, - { - "epoch": 0.9204879423487258, - "grad_norm": 0.0014362430665642023, - "learning_rate": 0.00019999958333583647, - "loss": 46.0, - "step": 5716 - }, - { - "epoch": 0.9206489794275132, - "grad_norm": 0.001071386388503015, - "learning_rate": 0.00019999958318977958, - "loss": 46.0, - "step": 5717 - }, - { - "epoch": 0.9208100165063006, - "grad_norm": 0.0031662776600569487, - "learning_rate": 0.00019999958304369706, - "loss": 46.0, - "step": 5718 - }, - { - "epoch": 0.920971053585088, - "grad_norm": 0.002556638093665242, - "learning_rate": 0.00019999958289758897, - "loss": 46.0, - "step": 5719 - }, - { - "epoch": 0.9211320906638754, - "grad_norm": 0.0006898163119331002, - "learning_rate": 0.00019999958275145527, - "loss": 46.0, - "step": 5720 - }, - { - "epoch": 0.9212931277426627, - "grad_norm": 0.003467016154900193, - "learning_rate": 0.00019999958260529598, - "loss": 46.0, - "step": 5721 - }, - { - "epoch": 0.9214541648214502, - "grad_norm": 0.0011490893084555864, - "learning_rate": 0.00019999958245911107, - "loss": 46.0, - "step": 5722 - }, - { - "epoch": 0.9216152019002375, - "grad_norm": 0.001350061153061688, - "learning_rate": 0.00019999958231290058, - "loss": 46.0, - "step": 5723 - }, - { - "epoch": 0.9217762389790249, - "grad_norm": 0.002056166995316744, - "learning_rate": 0.0001999995821666645, - "loss": 46.0, - "step": 5724 - }, - { - "epoch": 0.9219372760578123, - "grad_norm": 0.004254150204360485, - "learning_rate": 0.00019999958202040284, - "loss": 46.0, - "step": 5725 - }, - { - "epoch": 0.9220983131365997, - "grad_norm": 0.0024846720043569803, - "learning_rate": 0.00019999958187411556, - "loss": 46.0, - "step": 5726 - }, - { - "epoch": 0.9222593502153871, - "grad_norm": 0.0014163716696202755, - "learning_rate": 0.0001999995817278027, - "loss": 46.0, - "step": 5727 - }, - { - "epoch": 0.9224203872941745, - "grad_norm": 0.0003627694386523217, - "learning_rate": 0.00019999958158146424, - "loss": 46.0, - "step": 5728 - }, - { - "epoch": 0.9225814243729619, - "grad_norm": 0.003675812855362892, - "learning_rate": 0.00019999958143510017, - "loss": 46.0, - "step": 5729 - }, - { - "epoch": 0.9227424614517493, - "grad_norm": 0.0007954856264404953, - "learning_rate": 0.00019999958128871055, - "loss": 46.0, - "step": 5730 - }, - { - "epoch": 0.9229034985305367, - "grad_norm": 0.0010724146850407124, - "learning_rate": 0.00019999958114229528, - "loss": 46.0, - "step": 5731 - }, - { - "epoch": 0.923064535609324, - "grad_norm": 0.0024460607673972845, - "learning_rate": 0.00019999958099585445, - "loss": 46.0, - "step": 5732 - }, - { - "epoch": 0.9232255726881115, - "grad_norm": 0.0005035282811149955, - "learning_rate": 0.000199999580849388, - "loss": 46.0, - "step": 5733 - }, - { - "epoch": 0.9233866097668988, - "grad_norm": 0.001239884877577424, - "learning_rate": 0.000199999580702896, - "loss": 46.0, - "step": 5734 - }, - { - "epoch": 0.9235476468456862, - "grad_norm": 0.004846284165978432, - "learning_rate": 0.00019999958055637836, - "loss": 46.0, - "step": 5735 - }, - { - "epoch": 0.9237086839244736, - "grad_norm": 0.0009243261883966625, - "learning_rate": 0.00019999958040983512, - "loss": 46.0, - "step": 5736 - }, - { - "epoch": 0.923869721003261, - "grad_norm": 0.0006513491389341652, - "learning_rate": 0.00019999958026326633, - "loss": 46.0, - "step": 5737 - }, - { - "epoch": 0.9240307580820484, - "grad_norm": 0.0007005998049862683, - "learning_rate": 0.00019999958011667192, - "loss": 46.0, - "step": 5738 - }, - { - "epoch": 0.9241917951608358, - "grad_norm": 0.0010139001533389091, - "learning_rate": 0.00019999957997005193, - "loss": 46.0, - "step": 5739 - }, - { - "epoch": 0.9243528322396232, - "grad_norm": 0.0007072404841892421, - "learning_rate": 0.00019999957982340632, - "loss": 46.0, - "step": 5740 - }, - { - "epoch": 0.9245138693184106, - "grad_norm": 0.0004549682780634612, - "learning_rate": 0.00019999957967673515, - "loss": 46.0, - "step": 5741 - }, - { - "epoch": 0.924674906397198, - "grad_norm": 0.00048193742986768484, - "learning_rate": 0.00019999957953003837, - "loss": 46.0, - "step": 5742 - }, - { - "epoch": 0.9248359434759853, - "grad_norm": 0.0022708552423864603, - "learning_rate": 0.00019999957938331598, - "loss": 46.0, - "step": 5743 - }, - { - "epoch": 0.9249969805547728, - "grad_norm": 0.0005292826099321246, - "learning_rate": 0.000199999579236568, - "loss": 46.0, - "step": 5744 - }, - { - "epoch": 0.9251580176335601, - "grad_norm": 0.0009936413262039423, - "learning_rate": 0.00019999957908979442, - "loss": 46.0, - "step": 5745 - }, - { - "epoch": 0.9253190547123475, - "grad_norm": 0.0023525094147771597, - "learning_rate": 0.00019999957894299526, - "loss": 46.0, - "step": 5746 - }, - { - "epoch": 0.9254800917911349, - "grad_norm": 0.0060401796363294125, - "learning_rate": 0.00019999957879617052, - "loss": 46.0, - "step": 5747 - }, - { - "epoch": 0.9256411288699223, - "grad_norm": 0.0020601237192749977, - "learning_rate": 0.00019999957864932016, - "loss": 46.0, - "step": 5748 - }, - { - "epoch": 0.9258021659487097, - "grad_norm": 0.0006590397679246962, - "learning_rate": 0.00019999957850244418, - "loss": 46.0, - "step": 5749 - }, - { - "epoch": 0.9259632030274971, - "grad_norm": 0.0006065714405849576, - "learning_rate": 0.00019999957835554265, - "loss": 46.0, - "step": 5750 - }, - { - "epoch": 0.9261242401062845, - "grad_norm": 0.000712477311026305, - "learning_rate": 0.00019999957820861553, - "loss": 46.0, - "step": 5751 - }, - { - "epoch": 0.9262852771850718, - "grad_norm": 0.00225071026943624, - "learning_rate": 0.00019999957806166277, - "loss": 46.0, - "step": 5752 - }, - { - "epoch": 0.9264463142638593, - "grad_norm": 0.00215413561090827, - "learning_rate": 0.00019999957791468444, - "loss": 46.0, - "step": 5753 - }, - { - "epoch": 0.9266073513426466, - "grad_norm": 0.0007556015043519437, - "learning_rate": 0.00019999957776768053, - "loss": 46.0, - "step": 5754 - }, - { - "epoch": 0.926768388421434, - "grad_norm": 0.002231378573924303, - "learning_rate": 0.000199999577620651, - "loss": 46.0, - "step": 5755 - }, - { - "epoch": 0.9269294255002214, - "grad_norm": 0.0008061740081757307, - "learning_rate": 0.00019999957747359587, - "loss": 46.0, - "step": 5756 - }, - { - "epoch": 0.9270904625790088, - "grad_norm": 0.00124307069927454, - "learning_rate": 0.00019999957732651517, - "loss": 46.0, - "step": 5757 - }, - { - "epoch": 0.9272514996577962, - "grad_norm": 0.0009388653561472893, - "learning_rate": 0.0001999995771794089, - "loss": 46.0, - "step": 5758 - }, - { - "epoch": 0.9274125367365836, - "grad_norm": 0.00048344730748794973, - "learning_rate": 0.000199999577032277, - "loss": 46.0, - "step": 5759 - }, - { - "epoch": 0.927573573815371, - "grad_norm": 0.0006193193839862943, - "learning_rate": 0.0001999995768851195, - "loss": 46.0, - "step": 5760 - }, - { - "epoch": 0.9277346108941584, - "grad_norm": 0.0007393594132736325, - "learning_rate": 0.0001999995767379364, - "loss": 46.0, - "step": 5761 - }, - { - "epoch": 0.9278956479729458, - "grad_norm": 0.0015328206354752183, - "learning_rate": 0.0001999995765907277, - "loss": 46.0, - "step": 5762 - }, - { - "epoch": 0.9280566850517331, - "grad_norm": 0.00216746237128973, - "learning_rate": 0.00019999957644349344, - "loss": 46.0, - "step": 5763 - }, - { - "epoch": 0.9282177221305206, - "grad_norm": 0.0024387789890170097, - "learning_rate": 0.00019999957629623357, - "loss": 46.0, - "step": 5764 - }, - { - "epoch": 0.9283787592093079, - "grad_norm": 0.00032075357739813626, - "learning_rate": 0.0001999995761489481, - "loss": 46.0, - "step": 5765 - }, - { - "epoch": 0.9285397962880954, - "grad_norm": 0.0038370888214558363, - "learning_rate": 0.00019999957600163706, - "loss": 46.0, - "step": 5766 - }, - { - "epoch": 0.9287008333668827, - "grad_norm": 0.002521386370062828, - "learning_rate": 0.00019999957585430038, - "loss": 46.0, - "step": 5767 - }, - { - "epoch": 0.9288618704456701, - "grad_norm": 0.0012193474685773253, - "learning_rate": 0.00019999957570693814, - "loss": 46.0, - "step": 5768 - }, - { - "epoch": 0.9290229075244575, - "grad_norm": 0.0020336946472525597, - "learning_rate": 0.0001999995755595503, - "loss": 46.0, - "step": 5769 - }, - { - "epoch": 0.9291839446032449, - "grad_norm": 0.0009782103588804603, - "learning_rate": 0.00019999957541213685, - "loss": 46.0, - "step": 5770 - }, - { - "epoch": 0.9293449816820323, - "grad_norm": 0.0008099670521914959, - "learning_rate": 0.00019999957526469782, - "loss": 46.0, - "step": 5771 - }, - { - "epoch": 0.9295060187608197, - "grad_norm": 0.0004197184753138572, - "learning_rate": 0.0001999995751172332, - "loss": 46.0, - "step": 5772 - }, - { - "epoch": 0.9296670558396071, - "grad_norm": 0.0006177496397867799, - "learning_rate": 0.00019999957496974298, - "loss": 46.0, - "step": 5773 - }, - { - "epoch": 0.9298280929183944, - "grad_norm": 0.0006528380909003317, - "learning_rate": 0.00019999957482222716, - "loss": 46.0, - "step": 5774 - }, - { - "epoch": 0.9299891299971819, - "grad_norm": 0.0010686871828511357, - "learning_rate": 0.00019999957467468576, - "loss": 46.0, - "step": 5775 - }, - { - "epoch": 0.9301501670759692, - "grad_norm": 0.0006162556237541139, - "learning_rate": 0.00019999957452711872, - "loss": 46.0, - "step": 5776 - }, - { - "epoch": 0.9303112041547567, - "grad_norm": 0.0004212743660900742, - "learning_rate": 0.0001999995743795261, - "loss": 46.0, - "step": 5777 - }, - { - "epoch": 0.930472241233544, - "grad_norm": 0.0004566374991554767, - "learning_rate": 0.00019999957423190795, - "loss": 46.0, - "step": 5778 - }, - { - "epoch": 0.9306332783123314, - "grad_norm": 0.000821933033876121, - "learning_rate": 0.00019999957408426414, - "loss": 46.0, - "step": 5779 - }, - { - "epoch": 0.9307943153911188, - "grad_norm": 0.0017378123011440039, - "learning_rate": 0.00019999957393659475, - "loss": 46.0, - "step": 5780 - }, - { - "epoch": 0.9309553524699062, - "grad_norm": 0.0002255280560348183, - "learning_rate": 0.00019999957378889977, - "loss": 46.0, - "step": 5781 - }, - { - "epoch": 0.9311163895486936, - "grad_norm": 0.005280837882310152, - "learning_rate": 0.00019999957364117918, - "loss": 46.0, - "step": 5782 - }, - { - "epoch": 0.931277426627481, - "grad_norm": 0.0015987369697540998, - "learning_rate": 0.00019999957349343302, - "loss": 46.0, - "step": 5783 - }, - { - "epoch": 0.9314384637062684, - "grad_norm": 0.002849208889529109, - "learning_rate": 0.00019999957334566125, - "loss": 46.0, - "step": 5784 - }, - { - "epoch": 0.9315995007850557, - "grad_norm": 0.0006868364871479571, - "learning_rate": 0.0001999995731978639, - "loss": 46.0, - "step": 5785 - }, - { - "epoch": 0.9317605378638432, - "grad_norm": 0.003278016345575452, - "learning_rate": 0.00019999957305004093, - "loss": 46.0, - "step": 5786 - }, - { - "epoch": 0.9319215749426305, - "grad_norm": 0.0007232369389384985, - "learning_rate": 0.0001999995729021924, - "loss": 46.0, - "step": 5787 - }, - { - "epoch": 0.932082612021418, - "grad_norm": 0.002182962140068412, - "learning_rate": 0.00019999957275431825, - "loss": 46.0, - "step": 5788 - }, - { - "epoch": 0.9322436491002053, - "grad_norm": 0.002265210961923003, - "learning_rate": 0.00019999957260641852, - "loss": 46.0, - "step": 5789 - }, - { - "epoch": 0.9324046861789927, - "grad_norm": 0.002262046094983816, - "learning_rate": 0.00019999957245849317, - "loss": 46.0, - "step": 5790 - }, - { - "epoch": 0.9325657232577801, - "grad_norm": 0.005041072610765696, - "learning_rate": 0.00019999957231054224, - "loss": 46.0, - "step": 5791 - }, - { - "epoch": 0.9327267603365675, - "grad_norm": 0.0015471072401851416, - "learning_rate": 0.00019999957216256572, - "loss": 46.0, - "step": 5792 - }, - { - "epoch": 0.9328877974153549, - "grad_norm": 0.0007371532265096903, - "learning_rate": 0.00019999957201456358, - "loss": 46.0, - "step": 5793 - }, - { - "epoch": 0.9330488344941423, - "grad_norm": 0.003574784379452467, - "learning_rate": 0.00019999957186653586, - "loss": 46.0, - "step": 5794 - }, - { - "epoch": 0.9332098715729297, - "grad_norm": 0.0003592149878386408, - "learning_rate": 0.00019999957171848255, - "loss": 46.0, - "step": 5795 - }, - { - "epoch": 0.933370908651717, - "grad_norm": 0.0006847637705504894, - "learning_rate": 0.00019999957157040369, - "loss": 46.0, - "step": 5796 - }, - { - "epoch": 0.9335319457305045, - "grad_norm": 0.0038793254643678665, - "learning_rate": 0.00019999957142229917, - "loss": 46.0, - "step": 5797 - }, - { - "epoch": 0.9336929828092918, - "grad_norm": 0.0005826210253871977, - "learning_rate": 0.00019999957127416908, - "loss": 46.0, - "step": 5798 - }, - { - "epoch": 0.9338540198880793, - "grad_norm": 0.001212799339555204, - "learning_rate": 0.00019999957112601337, - "loss": 46.0, - "step": 5799 - }, - { - "epoch": 0.9340150569668666, - "grad_norm": 0.0006134874420240521, - "learning_rate": 0.00019999957097783212, - "loss": 46.0, - "step": 5800 - }, - { - "epoch": 0.934176094045654, - "grad_norm": 0.0017032917821779847, - "learning_rate": 0.0001999995708296252, - "loss": 46.0, - "step": 5801 - }, - { - "epoch": 0.9343371311244414, - "grad_norm": 0.005153973586857319, - "learning_rate": 0.00019999957068139276, - "loss": 46.0, - "step": 5802 - }, - { - "epoch": 0.9344981682032288, - "grad_norm": 0.004429292399436235, - "learning_rate": 0.0001999995705331347, - "loss": 46.0, - "step": 5803 - }, - { - "epoch": 0.9346592052820162, - "grad_norm": 0.0010363658657297492, - "learning_rate": 0.00019999957038485105, - "loss": 46.0, - "step": 5804 - }, - { - "epoch": 0.9348202423608035, - "grad_norm": 0.0014482052065432072, - "learning_rate": 0.00019999957023654176, - "loss": 46.0, - "step": 5805 - }, - { - "epoch": 0.934981279439591, - "grad_norm": 0.008390652947127819, - "learning_rate": 0.00019999957008820691, - "loss": 46.0, - "step": 5806 - }, - { - "epoch": 0.9351423165183783, - "grad_norm": 0.00042950219358317554, - "learning_rate": 0.00019999956993984648, - "loss": 46.0, - "step": 5807 - }, - { - "epoch": 0.9353033535971658, - "grad_norm": 0.0008071645861491561, - "learning_rate": 0.00019999956979146043, - "loss": 46.0, - "step": 5808 - }, - { - "epoch": 0.9354643906759531, - "grad_norm": 0.0005485149449668825, - "learning_rate": 0.0001999995696430488, - "loss": 46.0, - "step": 5809 - }, - { - "epoch": 0.9356254277547406, - "grad_norm": 0.0007950183353386819, - "learning_rate": 0.00019999956949461156, - "loss": 46.0, - "step": 5810 - }, - { - "epoch": 0.9357864648335279, - "grad_norm": 0.0004549167933873832, - "learning_rate": 0.00019999956934614875, - "loss": 46.0, - "step": 5811 - }, - { - "epoch": 0.9359475019123153, - "grad_norm": 0.0013707609614357352, - "learning_rate": 0.00019999956919766033, - "loss": 46.0, - "step": 5812 - }, - { - "epoch": 0.9361085389911027, - "grad_norm": 0.0006515044951811433, - "learning_rate": 0.00019999956904914629, - "loss": 46.0, - "step": 5813 - }, - { - "epoch": 0.9362695760698901, - "grad_norm": 0.002092970535159111, - "learning_rate": 0.0001999995689006067, - "loss": 46.0, - "step": 5814 - }, - { - "epoch": 0.9364306131486775, - "grad_norm": 0.0017171568470075727, - "learning_rate": 0.00019999956875204152, - "loss": 46.0, - "step": 5815 - }, - { - "epoch": 0.9365916502274648, - "grad_norm": 0.0009057503193616867, - "learning_rate": 0.0001999995686034507, - "loss": 46.0, - "step": 5816 - }, - { - "epoch": 0.9367526873062523, - "grad_norm": 0.0009272541501559317, - "learning_rate": 0.0001999995684548343, - "loss": 46.0, - "step": 5817 - }, - { - "epoch": 0.9369137243850396, - "grad_norm": 0.001422026427462697, - "learning_rate": 0.00019999956830619233, - "loss": 46.0, - "step": 5818 - }, - { - "epoch": 0.9370747614638271, - "grad_norm": 0.0013472591526806355, - "learning_rate": 0.0001999995681575247, - "loss": 46.0, - "step": 5819 - }, - { - "epoch": 0.9372357985426144, - "grad_norm": 0.0024569458328187466, - "learning_rate": 0.00019999956800883156, - "loss": 46.0, - "step": 5820 - }, - { - "epoch": 0.9373968356214019, - "grad_norm": 0.0018863404402509332, - "learning_rate": 0.00019999956786011277, - "loss": 46.0, - "step": 5821 - }, - { - "epoch": 0.9375578727001892, - "grad_norm": 0.0006114105344749987, - "learning_rate": 0.00019999956771136841, - "loss": 46.0, - "step": 5822 - }, - { - "epoch": 0.9377189097789767, - "grad_norm": 0.0005984083982184529, - "learning_rate": 0.00019999956756259845, - "loss": 46.0, - "step": 5823 - }, - { - "epoch": 0.937879946857764, - "grad_norm": 0.0016396831488236785, - "learning_rate": 0.0001999995674138029, - "loss": 46.0, - "step": 5824 - }, - { - "epoch": 0.9380409839365514, - "grad_norm": 0.0022460962645709515, - "learning_rate": 0.00019999956726498175, - "loss": 46.0, - "step": 5825 - }, - { - "epoch": 0.9382020210153388, - "grad_norm": 0.001208878238685429, - "learning_rate": 0.000199999567116135, - "loss": 46.0, - "step": 5826 - }, - { - "epoch": 0.9383630580941261, - "grad_norm": 0.00513978349044919, - "learning_rate": 0.00019999956696726266, - "loss": 46.0, - "step": 5827 - }, - { - "epoch": 0.9385240951729136, - "grad_norm": 0.001671300269663334, - "learning_rate": 0.00019999956681836473, - "loss": 46.0, - "step": 5828 - }, - { - "epoch": 0.9386851322517009, - "grad_norm": 0.0008806994301266968, - "learning_rate": 0.0001999995666694412, - "loss": 46.0, - "step": 5829 - }, - { - "epoch": 0.9388461693304884, - "grad_norm": 0.0012135577853769064, - "learning_rate": 0.00019999956652049208, - "loss": 46.0, - "step": 5830 - }, - { - "epoch": 0.9390072064092757, - "grad_norm": 0.006969158537685871, - "learning_rate": 0.00019999956637151733, - "loss": 46.0, - "step": 5831 - }, - { - "epoch": 0.9391682434880632, - "grad_norm": 0.0003466187044978142, - "learning_rate": 0.00019999956622251703, - "loss": 46.0, - "step": 5832 - }, - { - "epoch": 0.9393292805668505, - "grad_norm": 0.0009236810728907585, - "learning_rate": 0.00019999956607349114, - "loss": 46.0, - "step": 5833 - }, - { - "epoch": 0.939490317645638, - "grad_norm": 0.0006928643560968339, - "learning_rate": 0.00019999956592443963, - "loss": 46.0, - "step": 5834 - }, - { - "epoch": 0.9396513547244253, - "grad_norm": 0.0008363567176274955, - "learning_rate": 0.0001999995657753625, - "loss": 46.0, - "step": 5835 - }, - { - "epoch": 0.9398123918032127, - "grad_norm": 0.004356647375971079, - "learning_rate": 0.00019999956562625983, - "loss": 46.0, - "step": 5836 - }, - { - "epoch": 0.9399734288820001, - "grad_norm": 0.0008948463364504278, - "learning_rate": 0.00019999956547713153, - "loss": 46.0, - "step": 5837 - }, - { - "epoch": 0.9401344659607874, - "grad_norm": 0.0028034490533173084, - "learning_rate": 0.00019999956532797765, - "loss": 46.0, - "step": 5838 - }, - { - "epoch": 0.9402955030395749, - "grad_norm": 0.004898726940155029, - "learning_rate": 0.00019999956517879818, - "loss": 46.0, - "step": 5839 - }, - { - "epoch": 0.9404565401183622, - "grad_norm": 0.0006895270198583603, - "learning_rate": 0.00019999956502959312, - "loss": 46.0, - "step": 5840 - }, - { - "epoch": 0.9406175771971497, - "grad_norm": 0.001562860794365406, - "learning_rate": 0.00019999956488036245, - "loss": 46.0, - "step": 5841 - }, - { - "epoch": 0.940778614275937, - "grad_norm": 0.000748677586670965, - "learning_rate": 0.00019999956473110616, - "loss": 46.0, - "step": 5842 - }, - { - "epoch": 0.9409396513547245, - "grad_norm": 0.0008708340465091169, - "learning_rate": 0.00019999956458182432, - "loss": 46.0, - "step": 5843 - }, - { - "epoch": 0.9411006884335118, - "grad_norm": 0.00749856187030673, - "learning_rate": 0.00019999956443251686, - "loss": 46.0, - "step": 5844 - }, - { - "epoch": 0.9412617255122993, - "grad_norm": 0.0004217370005790144, - "learning_rate": 0.00019999956428318384, - "loss": 46.0, - "step": 5845 - }, - { - "epoch": 0.9414227625910866, - "grad_norm": 0.0010512169683352113, - "learning_rate": 0.00019999956413382518, - "loss": 46.0, - "step": 5846 - }, - { - "epoch": 0.9415837996698739, - "grad_norm": 0.0006663579843007028, - "learning_rate": 0.00019999956398444093, - "loss": 46.0, - "step": 5847 - }, - { - "epoch": 0.9417448367486614, - "grad_norm": 0.0011730447877198458, - "learning_rate": 0.00019999956383503112, - "loss": 46.0, - "step": 5848 - }, - { - "epoch": 0.9419058738274487, - "grad_norm": 0.00117689638864249, - "learning_rate": 0.00019999956368559567, - "loss": 46.0, - "step": 5849 - }, - { - "epoch": 0.9420669109062362, - "grad_norm": 0.0011857007630169392, - "learning_rate": 0.00019999956353613466, - "loss": 46.0, - "step": 5850 - }, - { - "epoch": 0.9422279479850235, - "grad_norm": 0.002133960835635662, - "learning_rate": 0.00019999956338664804, - "loss": 46.0, - "step": 5851 - }, - { - "epoch": 0.942388985063811, - "grad_norm": 0.002602097112685442, - "learning_rate": 0.00019999956323713583, - "loss": 46.0, - "step": 5852 - }, - { - "epoch": 0.9425500221425983, - "grad_norm": 0.0007457935716956854, - "learning_rate": 0.00019999956308759803, - "loss": 46.0, - "step": 5853 - }, - { - "epoch": 0.9427110592213858, - "grad_norm": 0.0006414727540686727, - "learning_rate": 0.00019999956293803464, - "loss": 46.0, - "step": 5854 - }, - { - "epoch": 0.9428720963001731, - "grad_norm": 0.0017223377944901586, - "learning_rate": 0.00019999956278844561, - "loss": 46.0, - "step": 5855 - }, - { - "epoch": 0.9430331333789606, - "grad_norm": 0.0011650595115497708, - "learning_rate": 0.00019999956263883103, - "loss": 46.0, - "step": 5856 - }, - { - "epoch": 0.9431941704577479, - "grad_norm": 0.0007957100169733167, - "learning_rate": 0.00019999956248919088, - "loss": 46.0, - "step": 5857 - }, - { - "epoch": 0.9433552075365352, - "grad_norm": 0.0018951845122501254, - "learning_rate": 0.0001999995623395251, - "loss": 46.0, - "step": 5858 - }, - { - "epoch": 0.9435162446153227, - "grad_norm": 0.0031983281951397657, - "learning_rate": 0.00019999956218983372, - "loss": 46.0, - "step": 5859 - }, - { - "epoch": 0.94367728169411, - "grad_norm": 0.0008174518588930368, - "learning_rate": 0.00019999956204011675, - "loss": 46.0, - "step": 5860 - }, - { - "epoch": 0.9438383187728975, - "grad_norm": 0.0013758268905803561, - "learning_rate": 0.00019999956189037417, - "loss": 46.0, - "step": 5861 - }, - { - "epoch": 0.9439993558516848, - "grad_norm": 0.0009675536421127617, - "learning_rate": 0.00019999956174060604, - "loss": 46.0, - "step": 5862 - }, - { - "epoch": 0.9441603929304723, - "grad_norm": 0.0029827342368662357, - "learning_rate": 0.00019999956159081229, - "loss": 46.0, - "step": 5863 - }, - { - "epoch": 0.9443214300092596, - "grad_norm": 0.0030735281761735678, - "learning_rate": 0.00019999956144099295, - "loss": 46.0, - "step": 5864 - }, - { - "epoch": 0.9444824670880471, - "grad_norm": 0.0005248349043540657, - "learning_rate": 0.000199999561291148, - "loss": 46.0, - "step": 5865 - }, - { - "epoch": 0.9446435041668344, - "grad_norm": 0.001475742319598794, - "learning_rate": 0.00019999956114127745, - "loss": 46.0, - "step": 5866 - }, - { - "epoch": 0.9448045412456219, - "grad_norm": 0.0005304484511725605, - "learning_rate": 0.0001999995609913813, - "loss": 46.0, - "step": 5867 - }, - { - "epoch": 0.9449655783244092, - "grad_norm": 0.001460114843212068, - "learning_rate": 0.00019999956084145958, - "loss": 46.0, - "step": 5868 - }, - { - "epoch": 0.9451266154031965, - "grad_norm": 0.004002466797828674, - "learning_rate": 0.00019999956069151228, - "loss": 46.0, - "step": 5869 - }, - { - "epoch": 0.945287652481984, - "grad_norm": 0.0030936822295188904, - "learning_rate": 0.00019999956054153937, - "loss": 46.0, - "step": 5870 - }, - { - "epoch": 0.9454486895607713, - "grad_norm": 0.0013588797301054, - "learning_rate": 0.00019999956039154086, - "loss": 46.0, - "step": 5871 - }, - { - "epoch": 0.9456097266395588, - "grad_norm": 0.0015367834130302072, - "learning_rate": 0.00019999956024151675, - "loss": 46.0, - "step": 5872 - }, - { - "epoch": 0.9457707637183461, - "grad_norm": 0.0035890305880457163, - "learning_rate": 0.00019999956009146704, - "loss": 46.0, - "step": 5873 - }, - { - "epoch": 0.9459318007971336, - "grad_norm": 0.0009281002567149699, - "learning_rate": 0.00019999955994139175, - "loss": 46.0, - "step": 5874 - }, - { - "epoch": 0.9460928378759209, - "grad_norm": 0.0007293022354133427, - "learning_rate": 0.00019999955979129087, - "loss": 46.0, - "step": 5875 - }, - { - "epoch": 0.9462538749547084, - "grad_norm": 0.0013419538736343384, - "learning_rate": 0.0001999995596411644, - "loss": 46.0, - "step": 5876 - }, - { - "epoch": 0.9464149120334957, - "grad_norm": 0.007017700932919979, - "learning_rate": 0.0001999995594910123, - "loss": 46.0, - "step": 5877 - }, - { - "epoch": 0.9465759491122832, - "grad_norm": 0.0006878911517560482, - "learning_rate": 0.00019999955934083463, - "loss": 46.0, - "step": 5878 - }, - { - "epoch": 0.9467369861910705, - "grad_norm": 0.0011896201176568866, - "learning_rate": 0.00019999955919063135, - "loss": 46.0, - "step": 5879 - }, - { - "epoch": 0.9468980232698578, - "grad_norm": 0.0009057950810529292, - "learning_rate": 0.0001999995590404025, - "loss": 46.0, - "step": 5880 - }, - { - "epoch": 0.9470590603486453, - "grad_norm": 0.0023123540449887514, - "learning_rate": 0.00019999955889014802, - "loss": 46.0, - "step": 5881 - }, - { - "epoch": 0.9472200974274326, - "grad_norm": 0.0022262483835220337, - "learning_rate": 0.00019999955873986798, - "loss": 46.0, - "step": 5882 - }, - { - "epoch": 0.9473811345062201, - "grad_norm": 0.002448925981298089, - "learning_rate": 0.00019999955858956235, - "loss": 46.0, - "step": 5883 - }, - { - "epoch": 0.9475421715850074, - "grad_norm": 0.0006434541428461671, - "learning_rate": 0.00019999955843923108, - "loss": 46.0, - "step": 5884 - }, - { - "epoch": 0.9477032086637949, - "grad_norm": 0.0012345166178420186, - "learning_rate": 0.00019999955828887424, - "loss": 46.0, - "step": 5885 - }, - { - "epoch": 0.9478642457425822, - "grad_norm": 0.006125054322183132, - "learning_rate": 0.00019999955813849183, - "loss": 46.0, - "step": 5886 - }, - { - "epoch": 0.9480252828213697, - "grad_norm": 0.001413311343640089, - "learning_rate": 0.0001999995579880838, - "loss": 46.0, - "step": 5887 - }, - { - "epoch": 0.948186319900157, - "grad_norm": 0.0014180360594764352, - "learning_rate": 0.00019999955783765017, - "loss": 46.0, - "step": 5888 - }, - { - "epoch": 0.9483473569789445, - "grad_norm": 0.0007942916126921773, - "learning_rate": 0.00019999955768719096, - "loss": 46.0, - "step": 5889 - }, - { - "epoch": 0.9485083940577318, - "grad_norm": 0.003239106386899948, - "learning_rate": 0.00019999955753670614, - "loss": 46.0, - "step": 5890 - }, - { - "epoch": 0.9486694311365191, - "grad_norm": 0.0017419438809156418, - "learning_rate": 0.00019999955738619573, - "loss": 46.0, - "step": 5891 - }, - { - "epoch": 0.9488304682153066, - "grad_norm": 0.00036202429328113794, - "learning_rate": 0.00019999955723565976, - "loss": 46.0, - "step": 5892 - }, - { - "epoch": 0.9489915052940939, - "grad_norm": 0.0016309432685375214, - "learning_rate": 0.00019999955708509815, - "loss": 46.0, - "step": 5893 - }, - { - "epoch": 0.9491525423728814, - "grad_norm": 0.0016795072006061673, - "learning_rate": 0.00019999955693451095, - "loss": 46.0, - "step": 5894 - }, - { - "epoch": 0.9493135794516687, - "grad_norm": 0.0009731344762258232, - "learning_rate": 0.0001999995567838982, - "loss": 46.0, - "step": 5895 - }, - { - "epoch": 0.9494746165304562, - "grad_norm": 0.0026060317177325487, - "learning_rate": 0.0001999995566332598, - "loss": 46.0, - "step": 5896 - }, - { - "epoch": 0.9496356536092435, - "grad_norm": 0.0031692541670054197, - "learning_rate": 0.00019999955648259584, - "loss": 46.0, - "step": 5897 - }, - { - "epoch": 0.949796690688031, - "grad_norm": 0.0012209743726998568, - "learning_rate": 0.00019999955633190626, - "loss": 46.0, - "step": 5898 - }, - { - "epoch": 0.9499577277668183, - "grad_norm": 0.0004304127360228449, - "learning_rate": 0.00019999955618119108, - "loss": 46.0, - "step": 5899 - }, - { - "epoch": 0.9501187648456056, - "grad_norm": 0.0005454396014101803, - "learning_rate": 0.00019999955603045033, - "loss": 46.0, - "step": 5900 - }, - { - "epoch": 0.9502798019243931, - "grad_norm": 0.001358877052552998, - "learning_rate": 0.00019999955587968397, - "loss": 46.0, - "step": 5901 - }, - { - "epoch": 0.9504408390031804, - "grad_norm": 0.0015809446340426803, - "learning_rate": 0.00019999955572889202, - "loss": 46.0, - "step": 5902 - }, - { - "epoch": 0.9506018760819679, - "grad_norm": 0.0011048831511288881, - "learning_rate": 0.00019999955557807448, - "loss": 46.0, - "step": 5903 - }, - { - "epoch": 0.9507629131607552, - "grad_norm": 0.0015819414984434843, - "learning_rate": 0.00019999955542723136, - "loss": 46.0, - "step": 5904 - }, - { - "epoch": 0.9509239502395427, - "grad_norm": 0.0007353777764365077, - "learning_rate": 0.00019999955527636262, - "loss": 46.0, - "step": 5905 - }, - { - "epoch": 0.95108498731833, - "grad_norm": 0.00422245915979147, - "learning_rate": 0.00019999955512546832, - "loss": 46.0, - "step": 5906 - }, - { - "epoch": 0.9512460243971175, - "grad_norm": 0.0010974772740155458, - "learning_rate": 0.00019999955497454835, - "loss": 46.0, - "step": 5907 - }, - { - "epoch": 0.9514070614759048, - "grad_norm": 0.0035644201561808586, - "learning_rate": 0.00019999955482360285, - "loss": 46.0, - "step": 5908 - }, - { - "epoch": 0.9515680985546923, - "grad_norm": 0.0015335929347202182, - "learning_rate": 0.00019999955467263174, - "loss": 46.0, - "step": 5909 - }, - { - "epoch": 0.9517291356334796, - "grad_norm": 0.0008414181647822261, - "learning_rate": 0.00019999955452163504, - "loss": 46.0, - "step": 5910 - }, - { - "epoch": 0.951890172712267, - "grad_norm": 0.003015967318788171, - "learning_rate": 0.00019999955437061275, - "loss": 46.0, - "step": 5911 - }, - { - "epoch": 0.9520512097910544, - "grad_norm": 0.0017776868771761656, - "learning_rate": 0.00019999955421956485, - "loss": 46.0, - "step": 5912 - }, - { - "epoch": 0.9522122468698417, - "grad_norm": 0.0005025176797062159, - "learning_rate": 0.00019999955406849136, - "loss": 46.0, - "step": 5913 - }, - { - "epoch": 0.9523732839486292, - "grad_norm": 0.0004937931662425399, - "learning_rate": 0.00019999955391739228, - "loss": 46.0, - "step": 5914 - }, - { - "epoch": 0.9525343210274165, - "grad_norm": 0.00099847256205976, - "learning_rate": 0.0001999995537662676, - "loss": 46.0, - "step": 5915 - }, - { - "epoch": 0.952695358106204, - "grad_norm": 0.0013106995029374957, - "learning_rate": 0.0001999995536151173, - "loss": 46.0, - "step": 5916 - }, - { - "epoch": 0.9528563951849913, - "grad_norm": 0.002080469625070691, - "learning_rate": 0.00019999955346394145, - "loss": 46.0, - "step": 5917 - }, - { - "epoch": 0.9530174322637788, - "grad_norm": 0.0019621686078608036, - "learning_rate": 0.00019999955331274, - "loss": 46.0, - "step": 5918 - }, - { - "epoch": 0.9531784693425661, - "grad_norm": 0.0005493960343301296, - "learning_rate": 0.0001999995531615129, - "loss": 46.0, - "step": 5919 - }, - { - "epoch": 0.9533395064213536, - "grad_norm": 0.002767246449366212, - "learning_rate": 0.00019999955301026025, - "loss": 46.0, - "step": 5920 - }, - { - "epoch": 0.9535005435001409, - "grad_norm": 0.0032995629590004683, - "learning_rate": 0.00019999955285898203, - "loss": 46.0, - "step": 5921 - }, - { - "epoch": 0.9536615805789282, - "grad_norm": 0.0009536586003378034, - "learning_rate": 0.00019999955270767815, - "loss": 46.0, - "step": 5922 - }, - { - "epoch": 0.9538226176577157, - "grad_norm": 0.0013269956689327955, - "learning_rate": 0.00019999955255634873, - "loss": 46.0, - "step": 5923 - }, - { - "epoch": 0.953983654736503, - "grad_norm": 0.0009970528772100806, - "learning_rate": 0.0001999995524049937, - "loss": 46.0, - "step": 5924 - }, - { - "epoch": 0.9541446918152905, - "grad_norm": 0.0005479459068737924, - "learning_rate": 0.00019999955225361306, - "loss": 46.0, - "step": 5925 - }, - { - "epoch": 0.9543057288940778, - "grad_norm": 0.0019050290575250983, - "learning_rate": 0.00019999955210220686, - "loss": 46.0, - "step": 5926 - }, - { - "epoch": 0.9544667659728653, - "grad_norm": 0.00706638814881444, - "learning_rate": 0.00019999955195077504, - "loss": 46.0, - "step": 5927 - }, - { - "epoch": 0.9546278030516526, - "grad_norm": 0.0009241214138455689, - "learning_rate": 0.00019999955179931763, - "loss": 46.0, - "step": 5928 - }, - { - "epoch": 0.9547888401304401, - "grad_norm": 0.0005663994234055281, - "learning_rate": 0.0001999995516478346, - "loss": 46.0, - "step": 5929 - }, - { - "epoch": 0.9549498772092274, - "grad_norm": 0.0011111380299553275, - "learning_rate": 0.00019999955149632603, - "loss": 46.0, - "step": 5930 - }, - { - "epoch": 0.9551109142880149, - "grad_norm": 0.0005237525911070406, - "learning_rate": 0.00019999955134479184, - "loss": 46.0, - "step": 5931 - }, - { - "epoch": 0.9552719513668022, - "grad_norm": 0.001158655621111393, - "learning_rate": 0.00019999955119323203, - "loss": 46.0, - "step": 5932 - }, - { - "epoch": 0.9554329884455895, - "grad_norm": 0.0019604817498475313, - "learning_rate": 0.00019999955104164666, - "loss": 46.0, - "step": 5933 - }, - { - "epoch": 0.955594025524377, - "grad_norm": 0.0011811573058366776, - "learning_rate": 0.00019999955089003568, - "loss": 46.0, - "step": 5934 - }, - { - "epoch": 0.9557550626031643, - "grad_norm": 0.0034880167804658413, - "learning_rate": 0.0001999995507383991, - "loss": 46.0, - "step": 5935 - }, - { - "epoch": 0.9559160996819518, - "grad_norm": 0.0013889665715396404, - "learning_rate": 0.00019999955058673692, - "loss": 46.0, - "step": 5936 - }, - { - "epoch": 0.9560771367607391, - "grad_norm": 0.0012758998200297356, - "learning_rate": 0.00019999955043504918, - "loss": 46.0, - "step": 5937 - }, - { - "epoch": 0.9562381738395266, - "grad_norm": 0.0007953454623930156, - "learning_rate": 0.0001999995502833358, - "loss": 46.0, - "step": 5938 - }, - { - "epoch": 0.9563992109183139, - "grad_norm": 0.001341340015642345, - "learning_rate": 0.00019999955013159685, - "loss": 46.0, - "step": 5939 - }, - { - "epoch": 0.9565602479971014, - "grad_norm": 0.000832945283036679, - "learning_rate": 0.0001999995499798323, - "loss": 46.0, - "step": 5940 - }, - { - "epoch": 0.9567212850758887, - "grad_norm": 0.00111189647577703, - "learning_rate": 0.00019999954982804217, - "loss": 46.0, - "step": 5941 - }, - { - "epoch": 0.9568823221546762, - "grad_norm": 0.001660353154875338, - "learning_rate": 0.00019999954967622643, - "loss": 46.0, - "step": 5942 - }, - { - "epoch": 0.9570433592334635, - "grad_norm": 0.0005583629244938493, - "learning_rate": 0.00019999954952438508, - "loss": 46.0, - "step": 5943 - }, - { - "epoch": 0.9572043963122508, - "grad_norm": 0.0011081737466156483, - "learning_rate": 0.00019999954937251815, - "loss": 46.0, - "step": 5944 - }, - { - "epoch": 0.9573654333910383, - "grad_norm": 0.0011991882929578424, - "learning_rate": 0.00019999954922062565, - "loss": 46.0, - "step": 5945 - }, - { - "epoch": 0.9575264704698256, - "grad_norm": 0.005034707952290773, - "learning_rate": 0.00019999954906870751, - "loss": 46.0, - "step": 5946 - }, - { - "epoch": 0.9576875075486131, - "grad_norm": 0.00042172239045612514, - "learning_rate": 0.00019999954891676382, - "loss": 46.0, - "step": 5947 - }, - { - "epoch": 0.9578485446274004, - "grad_norm": 0.0019035013392567635, - "learning_rate": 0.0001999995487647945, - "loss": 46.0, - "step": 5948 - }, - { - "epoch": 0.9580095817061879, - "grad_norm": 0.0025095639284700155, - "learning_rate": 0.0001999995486127996, - "loss": 46.0, - "step": 5949 - }, - { - "epoch": 0.9581706187849752, - "grad_norm": 0.0015437612310051918, - "learning_rate": 0.00019999954846077912, - "loss": 46.0, - "step": 5950 - }, - { - "epoch": 0.9583316558637627, - "grad_norm": 0.002686878899112344, - "learning_rate": 0.00019999954830873302, - "loss": 46.0, - "step": 5951 - }, - { - "epoch": 0.95849269294255, - "grad_norm": 0.001571105094626546, - "learning_rate": 0.00019999954815666133, - "loss": 46.0, - "step": 5952 - }, - { - "epoch": 0.9586537300213374, - "grad_norm": 0.0015382863348349929, - "learning_rate": 0.00019999954800456403, - "loss": 46.0, - "step": 5953 - }, - { - "epoch": 0.9588147671001248, - "grad_norm": 0.0010662860004231334, - "learning_rate": 0.0001999995478524412, - "loss": 46.0, - "step": 5954 - }, - { - "epoch": 0.9589758041789122, - "grad_norm": 0.0037217061035335064, - "learning_rate": 0.00019999954770029272, - "loss": 46.0, - "step": 5955 - }, - { - "epoch": 0.9591368412576996, - "grad_norm": 0.0003007417544722557, - "learning_rate": 0.00019999954754811866, - "loss": 46.0, - "step": 5956 - }, - { - "epoch": 0.9592978783364869, - "grad_norm": 0.00027555067208595574, - "learning_rate": 0.000199999547395919, - "loss": 46.0, - "step": 5957 - }, - { - "epoch": 0.9594589154152744, - "grad_norm": 0.0029099839739501476, - "learning_rate": 0.00019999954724369374, - "loss": 46.0, - "step": 5958 - }, - { - "epoch": 0.9596199524940617, - "grad_norm": 0.0012330202152952552, - "learning_rate": 0.0001999995470914429, - "loss": 46.0, - "step": 5959 - }, - { - "epoch": 0.9597809895728492, - "grad_norm": 0.0026096138171851635, - "learning_rate": 0.00019999954693916645, - "loss": 46.0, - "step": 5960 - }, - { - "epoch": 0.9599420266516365, - "grad_norm": 0.0009886746993288398, - "learning_rate": 0.00019999954678686442, - "loss": 46.0, - "step": 5961 - }, - { - "epoch": 0.960103063730424, - "grad_norm": 0.0012607522075995803, - "learning_rate": 0.00019999954663453678, - "loss": 46.0, - "step": 5962 - }, - { - "epoch": 0.9602641008092113, - "grad_norm": 0.0012485279003158212, - "learning_rate": 0.00019999954648218356, - "loss": 46.0, - "step": 5963 - }, - { - "epoch": 0.9604251378879987, - "grad_norm": 0.0006192713626660407, - "learning_rate": 0.00019999954632980474, - "loss": 46.0, - "step": 5964 - }, - { - "epoch": 0.9605861749667861, - "grad_norm": 0.0023051968310028315, - "learning_rate": 0.00019999954617740028, - "loss": 46.0, - "step": 5965 - }, - { - "epoch": 0.9607472120455735, - "grad_norm": 0.0008143573650158942, - "learning_rate": 0.0001999995460249703, - "loss": 46.0, - "step": 5966 - }, - { - "epoch": 0.9609082491243609, - "grad_norm": 0.0017534703947603703, - "learning_rate": 0.0001999995458725147, - "loss": 46.0, - "step": 5967 - }, - { - "epoch": 0.9610692862031482, - "grad_norm": 0.01042917650192976, - "learning_rate": 0.00019999954572003347, - "loss": 46.0, - "step": 5968 - }, - { - "epoch": 0.9612303232819357, - "grad_norm": 0.0029092624317854643, - "learning_rate": 0.00019999954556752667, - "loss": 46.0, - "step": 5969 - }, - { - "epoch": 0.961391360360723, - "grad_norm": 0.0017549536423757672, - "learning_rate": 0.00019999954541499428, - "loss": 46.0, - "step": 5970 - }, - { - "epoch": 0.9615523974395105, - "grad_norm": 0.0007472428842447698, - "learning_rate": 0.00019999954526243632, - "loss": 46.0, - "step": 5971 - }, - { - "epoch": 0.9617134345182978, - "grad_norm": 0.0005359402857720852, - "learning_rate": 0.00019999954510985273, - "loss": 46.0, - "step": 5972 - }, - { - "epoch": 0.9618744715970853, - "grad_norm": 0.0010029306868091226, - "learning_rate": 0.00019999954495724355, - "loss": 46.0, - "step": 5973 - }, - { - "epoch": 0.9620355086758726, - "grad_norm": 0.0012263440294191241, - "learning_rate": 0.00019999954480460878, - "loss": 46.0, - "step": 5974 - }, - { - "epoch": 0.96219654575466, - "grad_norm": 0.0028613756876438856, - "learning_rate": 0.0001999995446519484, - "loss": 46.0, - "step": 5975 - }, - { - "epoch": 0.9623575828334474, - "grad_norm": 0.002050126902759075, - "learning_rate": 0.00019999954449926246, - "loss": 46.0, - "step": 5976 - }, - { - "epoch": 0.9625186199122348, - "grad_norm": 0.0005310737178660929, - "learning_rate": 0.0001999995443465509, - "loss": 46.0, - "step": 5977 - }, - { - "epoch": 0.9626796569910222, - "grad_norm": 0.0033825808204710484, - "learning_rate": 0.00019999954419381376, - "loss": 46.0, - "step": 5978 - }, - { - "epoch": 0.9628406940698095, - "grad_norm": 0.001322429277934134, - "learning_rate": 0.000199999544041051, - "loss": 46.0, - "step": 5979 - }, - { - "epoch": 0.963001731148597, - "grad_norm": 0.0008859616937115788, - "learning_rate": 0.00019999954388826268, - "loss": 46.0, - "step": 5980 - }, - { - "epoch": 0.9631627682273843, - "grad_norm": 0.0009631182183511555, - "learning_rate": 0.00019999954373544872, - "loss": 46.0, - "step": 5981 - }, - { - "epoch": 0.9633238053061718, - "grad_norm": 0.001079257926903665, - "learning_rate": 0.0001999995435826092, - "loss": 46.0, - "step": 5982 - }, - { - "epoch": 0.9634848423849591, - "grad_norm": 0.000829666038043797, - "learning_rate": 0.0001999995434297441, - "loss": 46.0, - "step": 5983 - }, - { - "epoch": 0.9636458794637466, - "grad_norm": 0.002197215799242258, - "learning_rate": 0.00019999954327685338, - "loss": 46.0, - "step": 5984 - }, - { - "epoch": 0.9638069165425339, - "grad_norm": 0.00848348904401064, - "learning_rate": 0.00019999954312393707, - "loss": 46.0, - "step": 5985 - }, - { - "epoch": 0.9639679536213213, - "grad_norm": 0.0003334719513077289, - "learning_rate": 0.00019999954297099514, - "loss": 46.0, - "step": 5986 - }, - { - "epoch": 0.9641289907001087, - "grad_norm": 0.0023209962528198957, - "learning_rate": 0.00019999954281802764, - "loss": 46.0, - "step": 5987 - }, - { - "epoch": 0.964290027778896, - "grad_norm": 0.0005076867528259754, - "learning_rate": 0.00019999954266503457, - "loss": 46.0, - "step": 5988 - }, - { - "epoch": 0.9644510648576835, - "grad_norm": 0.0031284557189792395, - "learning_rate": 0.00019999954251201585, - "loss": 46.0, - "step": 5989 - }, - { - "epoch": 0.9646121019364708, - "grad_norm": 0.0003518647572491318, - "learning_rate": 0.00019999954235897156, - "loss": 46.0, - "step": 5990 - }, - { - "epoch": 0.9647731390152583, - "grad_norm": 0.0013615258503705263, - "learning_rate": 0.0001999995422059017, - "loss": 46.0, - "step": 5991 - }, - { - "epoch": 0.9649341760940456, - "grad_norm": 0.0015308080473914742, - "learning_rate": 0.00019999954205280623, - "loss": 46.0, - "step": 5992 - }, - { - "epoch": 0.9650952131728331, - "grad_norm": 0.0005138955893926322, - "learning_rate": 0.00019999954189968514, - "loss": 46.0, - "step": 5993 - }, - { - "epoch": 0.9652562502516204, - "grad_norm": 0.0006996492738835514, - "learning_rate": 0.00019999954174653847, - "loss": 46.0, - "step": 5994 - }, - { - "epoch": 0.9654172873304079, - "grad_norm": 0.0010123642859980464, - "learning_rate": 0.00019999954159336623, - "loss": 46.0, - "step": 5995 - }, - { - "epoch": 0.9655783244091952, - "grad_norm": 0.0007726819603703916, - "learning_rate": 0.00019999954144016838, - "loss": 46.0, - "step": 5996 - }, - { - "epoch": 0.9657393614879826, - "grad_norm": 0.0010896530002355576, - "learning_rate": 0.00019999954128694492, - "loss": 46.0, - "step": 5997 - }, - { - "epoch": 0.96590039856677, - "grad_norm": 0.0035233329981565475, - "learning_rate": 0.00019999954113369587, - "loss": 46.0, - "step": 5998 - }, - { - "epoch": 0.9660614356455574, - "grad_norm": 0.0015133344568312168, - "learning_rate": 0.00019999954098042123, - "loss": 46.0, - "step": 5999 - }, - { - "epoch": 0.9662224727243448, - "grad_norm": 0.0007407511002384126, - "learning_rate": 0.000199999540827121, - "loss": 46.0, - "step": 6000 - }, - { - "epoch": 0.9663835098031321, - "grad_norm": 0.0012960054446011782, - "learning_rate": 0.00019999954067379517, - "loss": 46.0, - "step": 6001 - }, - { - "epoch": 0.9665445468819196, - "grad_norm": 0.0016822877805680037, - "learning_rate": 0.00019999954052044375, - "loss": 46.0, - "step": 6002 - }, - { - "epoch": 0.9667055839607069, - "grad_norm": 0.001962418667972088, - "learning_rate": 0.00019999954036706673, - "loss": 46.0, - "step": 6003 - }, - { - "epoch": 0.9668666210394944, - "grad_norm": 0.00287793786264956, - "learning_rate": 0.0001999995402136641, - "loss": 46.0, - "step": 6004 - }, - { - "epoch": 0.9670276581182817, - "grad_norm": 0.003555223112925887, - "learning_rate": 0.00019999954006023592, - "loss": 46.0, - "step": 6005 - }, - { - "epoch": 0.9671886951970691, - "grad_norm": 0.0007314881077036262, - "learning_rate": 0.00019999953990678212, - "loss": 46.0, - "step": 6006 - }, - { - "epoch": 0.9673497322758565, - "grad_norm": 0.0008118348196148872, - "learning_rate": 0.00019999953975330273, - "loss": 46.0, - "step": 6007 - }, - { - "epoch": 0.9675107693546439, - "grad_norm": 0.007767799776047468, - "learning_rate": 0.00019999953959979773, - "loss": 46.0, - "step": 6008 - }, - { - "epoch": 0.9676718064334313, - "grad_norm": 0.0008505704463459551, - "learning_rate": 0.00019999953944626714, - "loss": 46.0, - "step": 6009 - }, - { - "epoch": 0.9678328435122187, - "grad_norm": 0.0016857705777511, - "learning_rate": 0.00019999953929271096, - "loss": 46.0, - "step": 6010 - }, - { - "epoch": 0.9679938805910061, - "grad_norm": 0.0015252127777785063, - "learning_rate": 0.00019999953913912917, - "loss": 46.0, - "step": 6011 - }, - { - "epoch": 0.9681549176697934, - "grad_norm": 0.0004844553186558187, - "learning_rate": 0.00019999953898552182, - "loss": 46.0, - "step": 6012 - }, - { - "epoch": 0.9683159547485809, - "grad_norm": 0.0029004274401813745, - "learning_rate": 0.00019999953883188885, - "loss": 46.0, - "step": 6013 - }, - { - "epoch": 0.9684769918273682, - "grad_norm": 0.0016834705602377653, - "learning_rate": 0.0001999995386782303, - "loss": 46.0, - "step": 6014 - }, - { - "epoch": 0.9686380289061557, - "grad_norm": 0.0020192009396851063, - "learning_rate": 0.0001999995385245461, - "loss": 46.0, - "step": 6015 - }, - { - "epoch": 0.968799065984943, - "grad_norm": 0.0019935774616897106, - "learning_rate": 0.00019999953837083638, - "loss": 46.0, - "step": 6016 - }, - { - "epoch": 0.9689601030637304, - "grad_norm": 0.001451910356990993, - "learning_rate": 0.00019999953821710103, - "loss": 46.0, - "step": 6017 - }, - { - "epoch": 0.9691211401425178, - "grad_norm": 0.0007066584075801075, - "learning_rate": 0.0001999995380633401, - "loss": 46.0, - "step": 6018 - }, - { - "epoch": 0.9692821772213052, - "grad_norm": 0.0014588235644623637, - "learning_rate": 0.00019999953790955356, - "loss": 46.0, - "step": 6019 - }, - { - "epoch": 0.9694432143000926, - "grad_norm": 0.0026323446072638035, - "learning_rate": 0.00019999953775574146, - "loss": 46.0, - "step": 6020 - }, - { - "epoch": 0.96960425137888, - "grad_norm": 0.0007777338032610714, - "learning_rate": 0.00019999953760190372, - "loss": 46.0, - "step": 6021 - }, - { - "epoch": 0.9697652884576674, - "grad_norm": 0.0020895982161164284, - "learning_rate": 0.00019999953744804038, - "loss": 46.0, - "step": 6022 - }, - { - "epoch": 0.9699263255364547, - "grad_norm": 0.0007298138807527721, - "learning_rate": 0.0001999995372941515, - "loss": 46.0, - "step": 6023 - }, - { - "epoch": 0.9700873626152422, - "grad_norm": 0.001311998930759728, - "learning_rate": 0.00019999953714023696, - "loss": 46.0, - "step": 6024 - }, - { - "epoch": 0.9702483996940295, - "grad_norm": 0.0003248398134019226, - "learning_rate": 0.00019999953698629687, - "loss": 46.0, - "step": 6025 - }, - { - "epoch": 0.970409436772817, - "grad_norm": 0.004449520725756884, - "learning_rate": 0.00019999953683233116, - "loss": 46.0, - "step": 6026 - }, - { - "epoch": 0.9705704738516043, - "grad_norm": 0.002645839238539338, - "learning_rate": 0.0001999995366783399, - "loss": 46.0, - "step": 6027 - }, - { - "epoch": 0.9707315109303917, - "grad_norm": 0.002953458344563842, - "learning_rate": 0.00019999953652432298, - "loss": 46.0, - "step": 6028 - }, - { - "epoch": 0.9708925480091791, - "grad_norm": 0.00044393062125891447, - "learning_rate": 0.00019999953637028051, - "loss": 46.0, - "step": 6029 - }, - { - "epoch": 0.9710535850879665, - "grad_norm": 0.0023680441081523895, - "learning_rate": 0.00019999953621621243, - "loss": 46.0, - "step": 6030 - }, - { - "epoch": 0.9712146221667539, - "grad_norm": 0.0012556271394714713, - "learning_rate": 0.00019999953606211876, - "loss": 46.0, - "step": 6031 - }, - { - "epoch": 0.9713756592455413, - "grad_norm": 0.0029499754309654236, - "learning_rate": 0.0001999995359079995, - "loss": 46.0, - "step": 6032 - }, - { - "epoch": 0.9715366963243287, - "grad_norm": 0.003256219904869795, - "learning_rate": 0.00019999953575385463, - "loss": 46.0, - "step": 6033 - }, - { - "epoch": 0.971697733403116, - "grad_norm": 0.0016910432605072856, - "learning_rate": 0.00019999953559968417, - "loss": 46.0, - "step": 6034 - }, - { - "epoch": 0.9718587704819035, - "grad_norm": 0.002183531643822789, - "learning_rate": 0.00019999953544548813, - "loss": 46.0, - "step": 6035 - }, - { - "epoch": 0.9720198075606908, - "grad_norm": 0.0009558479650877416, - "learning_rate": 0.0001999995352912665, - "loss": 46.0, - "step": 6036 - }, - { - "epoch": 0.9721808446394783, - "grad_norm": 0.0009155339212156832, - "learning_rate": 0.00019999953513701925, - "loss": 46.0, - "step": 6037 - }, - { - "epoch": 0.9723418817182656, - "grad_norm": 0.001110175158828497, - "learning_rate": 0.0001999995349827464, - "loss": 46.0, - "step": 6038 - }, - { - "epoch": 0.972502918797053, - "grad_norm": 0.0020170703064650297, - "learning_rate": 0.000199999534828448, - "loss": 46.0, - "step": 6039 - }, - { - "epoch": 0.9726639558758404, - "grad_norm": 0.004659927915781736, - "learning_rate": 0.00019999953467412395, - "loss": 46.0, - "step": 6040 - }, - { - "epoch": 0.9728249929546278, - "grad_norm": 0.0008497110102325678, - "learning_rate": 0.00019999953451977436, - "loss": 46.0, - "step": 6041 - }, - { - "epoch": 0.9729860300334152, - "grad_norm": 0.000985593069344759, - "learning_rate": 0.00019999953436539915, - "loss": 46.0, - "step": 6042 - }, - { - "epoch": 0.9731470671122026, - "grad_norm": 0.0009190439595840871, - "learning_rate": 0.00019999953421099832, - "loss": 46.0, - "step": 6043 - }, - { - "epoch": 0.97330810419099, - "grad_norm": 0.004370598588138819, - "learning_rate": 0.00019999953405657194, - "loss": 46.0, - "step": 6044 - }, - { - "epoch": 0.9734691412697773, - "grad_norm": 0.00344197079539299, - "learning_rate": 0.00019999953390211994, - "loss": 46.0, - "step": 6045 - }, - { - "epoch": 0.9736301783485648, - "grad_norm": 0.0007090232684276998, - "learning_rate": 0.00019999953374764232, - "loss": 46.0, - "step": 6046 - }, - { - "epoch": 0.9737912154273521, - "grad_norm": 0.004273815080523491, - "learning_rate": 0.00019999953359313915, - "loss": 46.0, - "step": 6047 - }, - { - "epoch": 0.9739522525061396, - "grad_norm": 0.0013869086978957057, - "learning_rate": 0.00019999953343861036, - "loss": 46.0, - "step": 6048 - }, - { - "epoch": 0.9741132895849269, - "grad_norm": 0.002385001163929701, - "learning_rate": 0.000199999533284056, - "loss": 46.0, - "step": 6049 - }, - { - "epoch": 0.9742743266637143, - "grad_norm": 0.000777302891947329, - "learning_rate": 0.00019999953312947603, - "loss": 46.0, - "step": 6050 - }, - { - "epoch": 0.9744353637425017, - "grad_norm": 0.003631291911005974, - "learning_rate": 0.00019999953297487048, - "loss": 46.0, - "step": 6051 - }, - { - "epoch": 0.9745964008212891, - "grad_norm": 0.0011221128515899181, - "learning_rate": 0.00019999953282023929, - "loss": 46.0, - "step": 6052 - }, - { - "epoch": 0.9747574379000765, - "grad_norm": 0.002175306435674429, - "learning_rate": 0.00019999953266558253, - "loss": 46.0, - "step": 6053 - }, - { - "epoch": 0.9749184749788639, - "grad_norm": 0.002293642144650221, - "learning_rate": 0.00019999953251090017, - "loss": 46.0, - "step": 6054 - }, - { - "epoch": 0.9750795120576513, - "grad_norm": 0.0025275596417486668, - "learning_rate": 0.00019999953235619224, - "loss": 46.0, - "step": 6055 - }, - { - "epoch": 0.9752405491364387, - "grad_norm": 0.0005897557130083442, - "learning_rate": 0.0001999995322014587, - "loss": 46.0, - "step": 6056 - }, - { - "epoch": 0.9754015862152261, - "grad_norm": 0.0048878611996769905, - "learning_rate": 0.00019999953204669958, - "loss": 46.0, - "step": 6057 - }, - { - "epoch": 0.9755626232940134, - "grad_norm": 0.002488535363227129, - "learning_rate": 0.00019999953189191484, - "loss": 46.0, - "step": 6058 - }, - { - "epoch": 0.9757236603728008, - "grad_norm": 0.0006691949092783034, - "learning_rate": 0.00019999953173710454, - "loss": 46.0, - "step": 6059 - }, - { - "epoch": 0.9758846974515882, - "grad_norm": 0.003292854642495513, - "learning_rate": 0.0001999995315822686, - "loss": 46.0, - "step": 6060 - }, - { - "epoch": 0.9760457345303756, - "grad_norm": 0.0017195430118590593, - "learning_rate": 0.0001999995314274071, - "loss": 46.0, - "step": 6061 - }, - { - "epoch": 0.976206771609163, - "grad_norm": 0.0008291300036944449, - "learning_rate": 0.00019999953127251998, - "loss": 46.0, - "step": 6062 - }, - { - "epoch": 0.9763678086879504, - "grad_norm": 0.001436087884940207, - "learning_rate": 0.00019999953111760727, - "loss": 46.0, - "step": 6063 - }, - { - "epoch": 0.9765288457667378, - "grad_norm": 0.0002890973992180079, - "learning_rate": 0.00019999953096266898, - "loss": 46.0, - "step": 6064 - }, - { - "epoch": 0.9766898828455252, - "grad_norm": 0.0010119887301698327, - "learning_rate": 0.00019999953080770508, - "loss": 46.0, - "step": 6065 - }, - { - "epoch": 0.9768509199243126, - "grad_norm": 0.0005323869991116226, - "learning_rate": 0.00019999953065271558, - "loss": 46.0, - "step": 6066 - }, - { - "epoch": 0.9770119570031, - "grad_norm": 0.007950590923428535, - "learning_rate": 0.00019999953049770053, - "loss": 46.0, - "step": 6067 - }, - { - "epoch": 0.9771729940818874, - "grad_norm": 0.0006231916486285627, - "learning_rate": 0.00019999953034265984, - "loss": 46.0, - "step": 6068 - }, - { - "epoch": 0.9773340311606747, - "grad_norm": 0.002003576373681426, - "learning_rate": 0.00019999953018759356, - "loss": 46.0, - "step": 6069 - }, - { - "epoch": 0.9774950682394621, - "grad_norm": 0.0023399987258017063, - "learning_rate": 0.00019999953003250172, - "loss": 46.0, - "step": 6070 - }, - { - "epoch": 0.9776561053182495, - "grad_norm": 0.0010155565105378628, - "learning_rate": 0.00019999952987738423, - "loss": 46.0, - "step": 6071 - }, - { - "epoch": 0.9778171423970369, - "grad_norm": 0.0037375055253505707, - "learning_rate": 0.0001999995297222412, - "loss": 46.0, - "step": 6072 - }, - { - "epoch": 0.9779781794758243, - "grad_norm": 0.0005912245251238346, - "learning_rate": 0.00019999952956707253, - "loss": 46.0, - "step": 6073 - }, - { - "epoch": 0.9781392165546117, - "grad_norm": 0.0022507691755890846, - "learning_rate": 0.0001999995294118783, - "loss": 46.0, - "step": 6074 - }, - { - "epoch": 0.9783002536333991, - "grad_norm": 0.000521384528838098, - "learning_rate": 0.00019999952925665846, - "loss": 46.0, - "step": 6075 - }, - { - "epoch": 0.9784612907121865, - "grad_norm": 0.001833320944570005, - "learning_rate": 0.00019999952910141299, - "loss": 46.0, - "step": 6076 - }, - { - "epoch": 0.9786223277909739, - "grad_norm": 0.0007558513316325843, - "learning_rate": 0.00019999952894614198, - "loss": 46.0, - "step": 6077 - }, - { - "epoch": 0.9787833648697613, - "grad_norm": 0.0008772094151936471, - "learning_rate": 0.00019999952879084536, - "loss": 46.0, - "step": 6078 - }, - { - "epoch": 0.9789444019485487, - "grad_norm": 0.0007068760460242629, - "learning_rate": 0.00019999952863552313, - "loss": 46.0, - "step": 6079 - }, - { - "epoch": 0.979105439027336, - "grad_norm": 0.00278997584246099, - "learning_rate": 0.0001999995284801753, - "loss": 46.0, - "step": 6080 - }, - { - "epoch": 0.9792664761061234, - "grad_norm": 0.0016853873385116458, - "learning_rate": 0.0001999995283248019, - "loss": 46.0, - "step": 6081 - }, - { - "epoch": 0.9794275131849108, - "grad_norm": 0.00478401780128479, - "learning_rate": 0.00019999952816940293, - "loss": 46.0, - "step": 6082 - }, - { - "epoch": 0.9795885502636982, - "grad_norm": 0.003959829453378916, - "learning_rate": 0.00019999952801397832, - "loss": 46.0, - "step": 6083 - }, - { - "epoch": 0.9797495873424856, - "grad_norm": 0.0004952999297529459, - "learning_rate": 0.00019999952785852812, - "loss": 46.0, - "step": 6084 - }, - { - "epoch": 0.979910624421273, - "grad_norm": 0.0024953284300863743, - "learning_rate": 0.00019999952770305234, - "loss": 46.0, - "step": 6085 - }, - { - "epoch": 0.9800716615000604, - "grad_norm": 0.0019067783141508698, - "learning_rate": 0.00019999952754755097, - "loss": 46.0, - "step": 6086 - }, - { - "epoch": 0.9802326985788478, - "grad_norm": 0.002127291401848197, - "learning_rate": 0.00019999952739202395, - "loss": 46.0, - "step": 6087 - }, - { - "epoch": 0.9803937356576352, - "grad_norm": 0.0008091144845820963, - "learning_rate": 0.0001999995272364714, - "loss": 46.0, - "step": 6088 - }, - { - "epoch": 0.9805547727364226, - "grad_norm": 0.0011850473238155246, - "learning_rate": 0.00019999952708089325, - "loss": 46.0, - "step": 6089 - }, - { - "epoch": 0.98071580981521, - "grad_norm": 0.0023602095898240805, - "learning_rate": 0.0001999995269252895, - "loss": 46.0, - "step": 6090 - }, - { - "epoch": 0.9808768468939973, - "grad_norm": 0.0015418841503560543, - "learning_rate": 0.0001999995267696601, - "loss": 46.0, - "step": 6091 - }, - { - "epoch": 0.9810378839727847, - "grad_norm": 0.0027921677101403475, - "learning_rate": 0.00019999952661400516, - "loss": 46.0, - "step": 6092 - }, - { - "epoch": 0.9811989210515721, - "grad_norm": 0.007058043964207172, - "learning_rate": 0.00019999952645832463, - "loss": 46.0, - "step": 6093 - }, - { - "epoch": 0.9813599581303595, - "grad_norm": 0.002366888104006648, - "learning_rate": 0.00019999952630261848, - "loss": 46.0, - "step": 6094 - }, - { - "epoch": 0.9815209952091469, - "grad_norm": 0.0012653230223804712, - "learning_rate": 0.00019999952614688674, - "loss": 46.0, - "step": 6095 - }, - { - "epoch": 0.9816820322879343, - "grad_norm": 0.003651617793366313, - "learning_rate": 0.00019999952599112942, - "loss": 46.0, - "step": 6096 - }, - { - "epoch": 0.9818430693667217, - "grad_norm": 0.0010562005918473005, - "learning_rate": 0.0001999995258353465, - "loss": 46.0, - "step": 6097 - }, - { - "epoch": 0.9820041064455091, - "grad_norm": 0.001356892753392458, - "learning_rate": 0.00019999952567953798, - "loss": 46.0, - "step": 6098 - }, - { - "epoch": 0.9821651435242965, - "grad_norm": 0.0009286191780120134, - "learning_rate": 0.00019999952552370387, - "loss": 46.0, - "step": 6099 - }, - { - "epoch": 0.9823261806030839, - "grad_norm": 0.002225863514468074, - "learning_rate": 0.00019999952536784417, - "loss": 46.0, - "step": 6100 - }, - { - "epoch": 0.9824872176818713, - "grad_norm": 0.0033421157859265804, - "learning_rate": 0.00019999952521195885, - "loss": 46.0, - "step": 6101 - }, - { - "epoch": 0.9826482547606586, - "grad_norm": 0.0005790418363176286, - "learning_rate": 0.00019999952505604795, - "loss": 46.0, - "step": 6102 - }, - { - "epoch": 0.982809291839446, - "grad_norm": 0.0025347555056214333, - "learning_rate": 0.00019999952490011146, - "loss": 46.0, - "step": 6103 - }, - { - "epoch": 0.9829703289182334, - "grad_norm": 0.0011037011863663793, - "learning_rate": 0.00019999952474414939, - "loss": 46.0, - "step": 6104 - }, - { - "epoch": 0.9831313659970208, - "grad_norm": 0.001047627185471356, - "learning_rate": 0.0001999995245881617, - "loss": 46.0, - "step": 6105 - }, - { - "epoch": 0.9832924030758082, - "grad_norm": 0.0007858876488171518, - "learning_rate": 0.00019999952443214842, - "loss": 46.0, - "step": 6106 - }, - { - "epoch": 0.9834534401545956, - "grad_norm": 0.0009689973667263985, - "learning_rate": 0.00019999952427610955, - "loss": 46.0, - "step": 6107 - }, - { - "epoch": 0.983614477233383, - "grad_norm": 0.001379107590764761, - "learning_rate": 0.00019999952412004507, - "loss": 46.0, - "step": 6108 - }, - { - "epoch": 0.9837755143121704, - "grad_norm": 0.002949258778244257, - "learning_rate": 0.00019999952396395504, - "loss": 46.0, - "step": 6109 - }, - { - "epoch": 0.9839365513909578, - "grad_norm": 0.00038959752419032156, - "learning_rate": 0.00019999952380783938, - "loss": 46.0, - "step": 6110 - }, - { - "epoch": 0.9840975884697452, - "grad_norm": 0.005300094839185476, - "learning_rate": 0.00019999952365169811, - "loss": 46.0, - "step": 6111 - }, - { - "epoch": 0.9842586255485325, - "grad_norm": 0.012284980155527592, - "learning_rate": 0.0001999995234955313, - "loss": 46.0, - "step": 6112 - }, - { - "epoch": 0.98441966262732, - "grad_norm": 0.0005090790218673646, - "learning_rate": 0.00019999952333933882, - "loss": 46.0, - "step": 6113 - }, - { - "epoch": 0.9845806997061073, - "grad_norm": 0.0006898490828461945, - "learning_rate": 0.0001999995231831208, - "loss": 46.0, - "step": 6114 - }, - { - "epoch": 0.9847417367848947, - "grad_norm": 0.0010778669966384768, - "learning_rate": 0.00019999952302687717, - "loss": 46.0, - "step": 6115 - }, - { - "epoch": 0.9849027738636821, - "grad_norm": 0.002038671402260661, - "learning_rate": 0.00019999952287060792, - "loss": 46.0, - "step": 6116 - }, - { - "epoch": 0.9850638109424695, - "grad_norm": 0.0024660706985741854, - "learning_rate": 0.00019999952271431313, - "loss": 46.0, - "step": 6117 - }, - { - "epoch": 0.9852248480212569, - "grad_norm": 0.0007153709884732962, - "learning_rate": 0.0001999995225579927, - "loss": 46.0, - "step": 6118 - }, - { - "epoch": 0.9853858851000443, - "grad_norm": 0.001145454472862184, - "learning_rate": 0.0001999995224016467, - "loss": 46.0, - "step": 6119 - }, - { - "epoch": 0.9855469221788317, - "grad_norm": 0.0011187976924702525, - "learning_rate": 0.0001999995222452751, - "loss": 46.0, - "step": 6120 - }, - { - "epoch": 0.9857079592576191, - "grad_norm": 0.0016687295865267515, - "learning_rate": 0.0001999995220888779, - "loss": 46.0, - "step": 6121 - }, - { - "epoch": 0.9858689963364065, - "grad_norm": 0.0014770758571103215, - "learning_rate": 0.0001999995219324551, - "loss": 46.0, - "step": 6122 - }, - { - "epoch": 0.9860300334151938, - "grad_norm": 0.002323250286281109, - "learning_rate": 0.00019999952177600673, - "loss": 46.0, - "step": 6123 - }, - { - "epoch": 0.9861910704939812, - "grad_norm": 0.00024691029102541506, - "learning_rate": 0.00019999952161953272, - "loss": 46.0, - "step": 6124 - }, - { - "epoch": 0.9863521075727686, - "grad_norm": 0.0010923704830929637, - "learning_rate": 0.00019999952146303315, - "loss": 46.0, - "step": 6125 - }, - { - "epoch": 0.986513144651556, - "grad_norm": 0.0007815223070792854, - "learning_rate": 0.000199999521306508, - "loss": 46.0, - "step": 6126 - }, - { - "epoch": 0.9866741817303434, - "grad_norm": 0.0011243766639381647, - "learning_rate": 0.00019999952114995722, - "loss": 46.0, - "step": 6127 - }, - { - "epoch": 0.9868352188091308, - "grad_norm": 0.0031221131794154644, - "learning_rate": 0.00019999952099338084, - "loss": 46.0, - "step": 6128 - }, - { - "epoch": 0.9869962558879182, - "grad_norm": 0.0011449974263086915, - "learning_rate": 0.00019999952083677892, - "loss": 46.0, - "step": 6129 - }, - { - "epoch": 0.9871572929667056, - "grad_norm": 0.003295786678791046, - "learning_rate": 0.00019999952068015136, - "loss": 46.0, - "step": 6130 - }, - { - "epoch": 0.987318330045493, - "grad_norm": 0.0012135219294577837, - "learning_rate": 0.00019999952052349819, - "loss": 46.0, - "step": 6131 - }, - { - "epoch": 0.9874793671242804, - "grad_norm": 0.0015659809578210115, - "learning_rate": 0.00019999952036681945, - "loss": 46.0, - "step": 6132 - }, - { - "epoch": 0.9876404042030678, - "grad_norm": 0.0018343861447647214, - "learning_rate": 0.00019999952021011513, - "loss": 46.0, - "step": 6133 - }, - { - "epoch": 0.9878014412818551, - "grad_norm": 0.0014180458383634686, - "learning_rate": 0.0001999995200533852, - "loss": 46.0, - "step": 6134 - }, - { - "epoch": 0.9879624783606425, - "grad_norm": 0.0023068648297339678, - "learning_rate": 0.00019999951989662967, - "loss": 46.0, - "step": 6135 - }, - { - "epoch": 0.9881235154394299, - "grad_norm": 0.0016210369067266583, - "learning_rate": 0.00019999951973984854, - "loss": 46.0, - "step": 6136 - }, - { - "epoch": 0.9882845525182173, - "grad_norm": 0.002114700386300683, - "learning_rate": 0.00019999951958304184, - "loss": 46.0, - "step": 6137 - }, - { - "epoch": 0.9884455895970047, - "grad_norm": 0.0036070598289370537, - "learning_rate": 0.0001999995194262095, - "loss": 46.0, - "step": 6138 - }, - { - "epoch": 0.9886066266757921, - "grad_norm": 0.0006896013510413468, - "learning_rate": 0.0001999995192693516, - "loss": 46.0, - "step": 6139 - }, - { - "epoch": 0.9887676637545795, - "grad_norm": 0.000816740095615387, - "learning_rate": 0.00019999951911246812, - "loss": 46.0, - "step": 6140 - }, - { - "epoch": 0.9889287008333669, - "grad_norm": 0.0014266902580857277, - "learning_rate": 0.00019999951895555902, - "loss": 46.0, - "step": 6141 - }, - { - "epoch": 0.9890897379121543, - "grad_norm": 0.0009997619781643152, - "learning_rate": 0.00019999951879862433, - "loss": 46.0, - "step": 6142 - }, - { - "epoch": 0.9892507749909417, - "grad_norm": 0.0007714173989370465, - "learning_rate": 0.00019999951864166406, - "loss": 46.0, - "step": 6143 - }, - { - "epoch": 0.9894118120697291, - "grad_norm": 0.004053396638482809, - "learning_rate": 0.00019999951848467817, - "loss": 46.0, - "step": 6144 - }, - { - "epoch": 0.9895728491485164, - "grad_norm": 0.0006980125326663256, - "learning_rate": 0.0001999995183276667, - "loss": 46.0, - "step": 6145 - }, - { - "epoch": 0.9897338862273038, - "grad_norm": 0.0022529347334057093, - "learning_rate": 0.00019999951817062966, - "loss": 46.0, - "step": 6146 - }, - { - "epoch": 0.9898949233060912, - "grad_norm": 0.00037822689046151936, - "learning_rate": 0.00019999951801356699, - "loss": 46.0, - "step": 6147 - }, - { - "epoch": 0.9900559603848786, - "grad_norm": 0.002851964207366109, - "learning_rate": 0.00019999951785647872, - "loss": 46.0, - "step": 6148 - }, - { - "epoch": 0.990216997463666, - "grad_norm": 0.003097419859841466, - "learning_rate": 0.0001999995176993649, - "loss": 46.0, - "step": 6149 - }, - { - "epoch": 0.9903780345424534, - "grad_norm": 0.001526164822280407, - "learning_rate": 0.00019999951754222543, - "loss": 46.0, - "step": 6150 - }, - { - "epoch": 0.9905390716212408, - "grad_norm": 0.0011916793882846832, - "learning_rate": 0.0001999995173850604, - "loss": 46.0, - "step": 6151 - }, - { - "epoch": 0.9907001087000282, - "grad_norm": 0.0030071926303207874, - "learning_rate": 0.00019999951722786977, - "loss": 46.0, - "step": 6152 - }, - { - "epoch": 0.9908611457788156, - "grad_norm": 0.003600070718675852, - "learning_rate": 0.00019999951707065354, - "loss": 46.0, - "step": 6153 - }, - { - "epoch": 0.991022182857603, - "grad_norm": 0.001538835815154016, - "learning_rate": 0.0001999995169134117, - "loss": 46.0, - "step": 6154 - }, - { - "epoch": 0.9911832199363904, - "grad_norm": 0.001332353800535202, - "learning_rate": 0.00019999951675614427, - "loss": 46.0, - "step": 6155 - }, - { - "epoch": 0.9913442570151777, - "grad_norm": 0.001979838591068983, - "learning_rate": 0.00019999951659885126, - "loss": 46.0, - "step": 6156 - }, - { - "epoch": 0.9915052940939652, - "grad_norm": 0.0010774682741612196, - "learning_rate": 0.00019999951644153265, - "loss": 46.0, - "step": 6157 - }, - { - "epoch": 0.9916663311727525, - "grad_norm": 0.0015265361871570349, - "learning_rate": 0.00019999951628418846, - "loss": 46.0, - "step": 6158 - }, - { - "epoch": 0.9918273682515399, - "grad_norm": 0.0020841562654823065, - "learning_rate": 0.00019999951612681866, - "loss": 46.0, - "step": 6159 - }, - { - "epoch": 0.9919884053303273, - "grad_norm": 0.0016260124975815415, - "learning_rate": 0.00019999951596942327, - "loss": 46.0, - "step": 6160 - }, - { - "epoch": 0.9921494424091147, - "grad_norm": 0.00033598599839024246, - "learning_rate": 0.00019999951581200226, - "loss": 46.0, - "step": 6161 - }, - { - "epoch": 0.9923104794879021, - "grad_norm": 0.001430588774383068, - "learning_rate": 0.00019999951565455567, - "loss": 46.0, - "step": 6162 - }, - { - "epoch": 0.9924715165666895, - "grad_norm": 0.0015464945463463664, - "learning_rate": 0.00019999951549708352, - "loss": 46.0, - "step": 6163 - }, - { - "epoch": 0.9926325536454769, - "grad_norm": 0.0009622170473448932, - "learning_rate": 0.00019999951533958575, - "loss": 46.0, - "step": 6164 - }, - { - "epoch": 0.9927935907242642, - "grad_norm": 0.0022616719361394644, - "learning_rate": 0.0001999995151820624, - "loss": 46.0, - "step": 6165 - }, - { - "epoch": 0.9929546278030517, - "grad_norm": 0.002432753797620535, - "learning_rate": 0.00019999951502451343, - "loss": 46.0, - "step": 6166 - }, - { - "epoch": 0.993115664881839, - "grad_norm": 0.002840930363163352, - "learning_rate": 0.00019999951486693887, - "loss": 46.0, - "step": 6167 - }, - { - "epoch": 0.9932767019606265, - "grad_norm": 0.005911883432418108, - "learning_rate": 0.0001999995147093387, - "loss": 46.0, - "step": 6168 - }, - { - "epoch": 0.9934377390394138, - "grad_norm": 0.0017866501584649086, - "learning_rate": 0.00019999951455171297, - "loss": 46.0, - "step": 6169 - }, - { - "epoch": 0.9935987761182012, - "grad_norm": 0.0006273352773860097, - "learning_rate": 0.00019999951439406163, - "loss": 46.0, - "step": 6170 - }, - { - "epoch": 0.9937598131969886, - "grad_norm": 0.0024944450706243515, - "learning_rate": 0.00019999951423638467, - "loss": 46.0, - "step": 6171 - }, - { - "epoch": 0.993920850275776, - "grad_norm": 0.0007860513869673014, - "learning_rate": 0.00019999951407868215, - "loss": 46.0, - "step": 6172 - }, - { - "epoch": 0.9940818873545634, - "grad_norm": 0.00033432774944230914, - "learning_rate": 0.00019999951392095402, - "loss": 46.0, - "step": 6173 - }, - { - "epoch": 0.9942429244333508, - "grad_norm": 0.00497489282861352, - "learning_rate": 0.0001999995137632003, - "loss": 46.0, - "step": 6174 - }, - { - "epoch": 0.9944039615121382, - "grad_norm": 0.003430631710216403, - "learning_rate": 0.000199999513605421, - "loss": 46.0, - "step": 6175 - }, - { - "epoch": 0.9945649985909255, - "grad_norm": 0.00470144534483552, - "learning_rate": 0.00019999951344761607, - "loss": 46.0, - "step": 6176 - }, - { - "epoch": 0.994726035669713, - "grad_norm": 0.0005960713024251163, - "learning_rate": 0.00019999951328978556, - "loss": 46.0, - "step": 6177 - }, - { - "epoch": 0.9948870727485003, - "grad_norm": 0.00037680185050703585, - "learning_rate": 0.0001999995131319295, - "loss": 46.0, - "step": 6178 - }, - { - "epoch": 0.9950481098272878, - "grad_norm": 0.0030698818154633045, - "learning_rate": 0.00019999951297404778, - "loss": 46.0, - "step": 6179 - }, - { - "epoch": 0.9952091469060751, - "grad_norm": 0.0023581250570714474, - "learning_rate": 0.00019999951281614048, - "loss": 46.0, - "step": 6180 - }, - { - "epoch": 0.9953701839848625, - "grad_norm": 0.0025764857418835163, - "learning_rate": 0.00019999951265820763, - "loss": 46.0, - "step": 6181 - }, - { - "epoch": 0.9955312210636499, - "grad_norm": 0.003658497938886285, - "learning_rate": 0.00019999951250024913, - "loss": 46.0, - "step": 6182 - }, - { - "epoch": 0.9956922581424373, - "grad_norm": 0.005764478351920843, - "learning_rate": 0.00019999951234226507, - "loss": 46.0, - "step": 6183 - }, - { - "epoch": 0.9958532952212247, - "grad_norm": 0.000824173737782985, - "learning_rate": 0.00019999951218425542, - "loss": 46.0, - "step": 6184 - }, - { - "epoch": 0.9960143323000121, - "grad_norm": 0.0009022654267027974, - "learning_rate": 0.00019999951202622014, - "loss": 46.0, - "step": 6185 - }, - { - "epoch": 0.9961753693787995, - "grad_norm": 0.0014750818954780698, - "learning_rate": 0.0001999995118681593, - "loss": 46.0, - "step": 6186 - }, - { - "epoch": 0.9963364064575868, - "grad_norm": 0.0011005938285961747, - "learning_rate": 0.00019999951171007283, - "loss": 46.0, - "step": 6187 - }, - { - "epoch": 0.9964974435363743, - "grad_norm": 0.0008392144227400422, - "learning_rate": 0.0001999995115519608, - "loss": 46.0, - "step": 6188 - }, - { - "epoch": 0.9966584806151616, - "grad_norm": 0.004620526451617479, - "learning_rate": 0.00019999951139382317, - "loss": 46.0, - "step": 6189 - }, - { - "epoch": 0.996819517693949, - "grad_norm": 0.0014713795389980078, - "learning_rate": 0.0001999995112356599, - "loss": 46.0, - "step": 6190 - }, - { - "epoch": 0.9969805547727364, - "grad_norm": 0.006917021702975035, - "learning_rate": 0.00019999951107747108, - "loss": 46.0, - "step": 6191 - }, - { - "epoch": 0.9971415918515238, - "grad_norm": 0.0013163541443645954, - "learning_rate": 0.00019999951091925663, - "loss": 46.0, - "step": 6192 - }, - { - "epoch": 0.9973026289303112, - "grad_norm": 0.001013058121316135, - "learning_rate": 0.00019999951076101665, - "loss": 46.0, - "step": 6193 - }, - { - "epoch": 0.9974636660090986, - "grad_norm": 0.001080639660358429, - "learning_rate": 0.00019999951060275102, - "loss": 46.0, - "step": 6194 - }, - { - "epoch": 0.997624703087886, - "grad_norm": 0.002676317933946848, - "learning_rate": 0.0001999995104444598, - "loss": 46.0, - "step": 6195 - }, - { - "epoch": 0.9977857401666734, - "grad_norm": 0.003335464047268033, - "learning_rate": 0.00019999951028614298, - "loss": 46.0, - "step": 6196 - }, - { - "epoch": 0.9979467772454608, - "grad_norm": 0.0012337196385487914, - "learning_rate": 0.0001999995101278006, - "loss": 46.0, - "step": 6197 - }, - { - "epoch": 0.9981078143242481, - "grad_norm": 0.0004911215510219336, - "learning_rate": 0.0001999995099694326, - "loss": 46.0, - "step": 6198 - }, - { - "epoch": 0.9982688514030356, - "grad_norm": 0.0061832452192902565, - "learning_rate": 0.00019999950981103903, - "loss": 46.0, - "step": 6199 - }, - { - "epoch": 0.9984298884818229, - "grad_norm": 0.000253285194048658, - "learning_rate": 0.00019999950965261985, - "loss": 46.0, - "step": 6200 - }, - { - "epoch": 0.9985909255606104, - "grad_norm": 0.0027165801730006933, - "learning_rate": 0.0001999995094941751, - "loss": 46.0, - "step": 6201 - }, - { - "epoch": 0.9987519626393977, - "grad_norm": 0.006130980793386698, - "learning_rate": 0.00019999950933570468, - "loss": 46.0, - "step": 6202 - }, - { - "epoch": 0.9989129997181851, - "grad_norm": 0.0023346436209976673, - "learning_rate": 0.00019999950917720872, - "loss": 46.0, - "step": 6203 - }, - { - "epoch": 0.9990740367969725, - "grad_norm": 0.001313018728978932, - "learning_rate": 0.00019999950901868717, - "loss": 46.0, - "step": 6204 - }, - { - "epoch": 0.9992350738757599, - "grad_norm": 0.0004894621088169515, - "learning_rate": 0.00019999950886014, - "loss": 46.0, - "step": 6205 - }, - { - "epoch": 0.9993961109545473, - "grad_norm": 0.0008191371452994645, - "learning_rate": 0.00019999950870156725, - "loss": 46.0, - "step": 6206 - }, - { - "epoch": 0.9995571480333347, - "grad_norm": 0.0005024021957069635, - "learning_rate": 0.0001999995085429689, - "loss": 46.0, - "step": 6207 - }, - { - "epoch": 0.9997181851121221, - "grad_norm": 0.0012005992466583848, - "learning_rate": 0.00019999950838434498, - "loss": 46.0, - "step": 6208 - }, - { - "epoch": 0.9998792221909094, - "grad_norm": 0.0037025536876171827, - "learning_rate": 0.00019999950822569544, - "loss": 46.0, - "step": 6209 - }, - { - "epoch": 0.9998792221909094, - "eval_loss": 11.5, - "eval_runtime": 14.5868, - "eval_samples_per_second": 179.272, - "eval_steps_per_second": 89.67, - "step": 6209 - }, - { - "epoch": 1.0000805185393937, - "grad_norm": 0.003451248398050666, - "learning_rate": 0.0001999995080670203, - "loss": 46.0, - "step": 6210 - }, - { - "epoch": 1.0002415556181812, - "grad_norm": 0.0024098986759781837, - "learning_rate": 0.0001999995079083196, - "loss": 46.0, - "step": 6211 - }, - { - "epoch": 1.0004025926969684, - "grad_norm": 0.0011229664087295532, - "learning_rate": 0.00019999950774959326, - "loss": 46.0, - "step": 6212 - }, - { - "epoch": 1.0005636297757559, - "grad_norm": 0.0014348207041621208, - "learning_rate": 0.00019999950759084134, - "loss": 46.0, - "step": 6213 - }, - { - "epoch": 1.0007246668545433, - "grad_norm": 0.003249640576541424, - "learning_rate": 0.00019999950743206384, - "loss": 46.0, - "step": 6214 - }, - { - "epoch": 1.0008857039333305, - "grad_norm": 0.0010153213515877724, - "learning_rate": 0.00019999950727326074, - "loss": 46.0, - "step": 6215 - }, - { - "epoch": 1.001046741012118, - "grad_norm": 0.0006885263719595969, - "learning_rate": 0.00019999950711443204, - "loss": 46.0, - "step": 6216 - }, - { - "epoch": 1.0012077780909054, - "grad_norm": 0.0004964792169630527, - "learning_rate": 0.00019999950695557774, - "loss": 46.0, - "step": 6217 - }, - { - "epoch": 1.001368815169693, - "grad_norm": 0.0010101882508024573, - "learning_rate": 0.00019999950679669786, - "loss": 46.0, - "step": 6218 - }, - { - "epoch": 1.0015298522484801, - "grad_norm": 0.0008583376184105873, - "learning_rate": 0.00019999950663779237, - "loss": 46.0, - "step": 6219 - }, - { - "epoch": 1.0016908893272676, - "grad_norm": 0.002517750021070242, - "learning_rate": 0.00019999950647886129, - "loss": 46.0, - "step": 6220 - }, - { - "epoch": 1.001851926406055, - "grad_norm": 0.0023000449873507023, - "learning_rate": 0.00019999950631990462, - "loss": 46.0, - "step": 6221 - }, - { - "epoch": 1.0020129634848425, - "grad_norm": 0.003362311515957117, - "learning_rate": 0.00019999950616092233, - "loss": 46.0, - "step": 6222 - }, - { - "epoch": 1.0021740005636297, - "grad_norm": 0.001914225285872817, - "learning_rate": 0.0001999995060019145, - "loss": 46.0, - "step": 6223 - }, - { - "epoch": 1.0023350376424172, - "grad_norm": 0.001551188761368394, - "learning_rate": 0.00019999950584288103, - "loss": 46.0, - "step": 6224 - }, - { - "epoch": 1.0024960747212046, - "grad_norm": 0.0007930328138172626, - "learning_rate": 0.00019999950568382199, - "loss": 46.0, - "step": 6225 - }, - { - "epoch": 1.0026571117999918, - "grad_norm": 0.0013733393279835582, - "learning_rate": 0.00019999950552473733, - "loss": 46.0, - "step": 6226 - }, - { - "epoch": 1.0028181488787793, - "grad_norm": 0.0002670790418051183, - "learning_rate": 0.00019999950536562708, - "loss": 46.0, - "step": 6227 - }, - { - "epoch": 1.0029791859575667, - "grad_norm": 0.0027424772270023823, - "learning_rate": 0.00019999950520649125, - "loss": 46.0, - "step": 6228 - }, - { - "epoch": 1.0031402230363542, - "grad_norm": 0.0014997286489233375, - "learning_rate": 0.00019999950504732982, - "loss": 46.0, - "step": 6229 - }, - { - "epoch": 1.0033012601151414, - "grad_norm": 0.0002768393896985799, - "learning_rate": 0.00019999950488814282, - "loss": 46.0, - "step": 6230 - }, - { - "epoch": 1.0034622971939289, - "grad_norm": 0.0011887544533237815, - "learning_rate": 0.00019999950472893017, - "loss": 46.0, - "step": 6231 - }, - { - "epoch": 1.0036233342727163, - "grad_norm": 0.0019595029298216105, - "learning_rate": 0.00019999950456969193, - "loss": 46.0, - "step": 6232 - }, - { - "epoch": 1.0037843713515038, - "grad_norm": 0.0012800341937690973, - "learning_rate": 0.00019999950441042813, - "loss": 46.0, - "step": 6233 - }, - { - "epoch": 1.003945408430291, - "grad_norm": 0.0013854592107236385, - "learning_rate": 0.00019999950425113875, - "loss": 46.0, - "step": 6234 - }, - { - "epoch": 1.0041064455090785, - "grad_norm": 0.0010163936531171203, - "learning_rate": 0.00019999950409182375, - "loss": 46.0, - "step": 6235 - }, - { - "epoch": 1.004267482587866, - "grad_norm": 0.000986337661743164, - "learning_rate": 0.00019999950393248317, - "loss": 46.0, - "step": 6236 - }, - { - "epoch": 1.0044285196666531, - "grad_norm": 0.0013223548885434866, - "learning_rate": 0.00019999950377311697, - "loss": 46.0, - "step": 6237 - }, - { - "epoch": 1.0045895567454406, - "grad_norm": 0.005291024222970009, - "learning_rate": 0.00019999950361372518, - "loss": 46.0, - "step": 6238 - }, - { - "epoch": 1.004750593824228, - "grad_norm": 0.001994798658415675, - "learning_rate": 0.0001999995034543078, - "loss": 46.0, - "step": 6239 - }, - { - "epoch": 1.0049116309030155, - "grad_norm": 0.0061939433217048645, - "learning_rate": 0.00019999950329486484, - "loss": 46.0, - "step": 6240 - }, - { - "epoch": 1.0050726679818027, - "grad_norm": 0.0008416906930506229, - "learning_rate": 0.00019999950313539627, - "loss": 46.0, - "step": 6241 - }, - { - "epoch": 1.0052337050605902, - "grad_norm": 0.0013776666019111872, - "learning_rate": 0.00019999950297590208, - "loss": 46.0, - "step": 6242 - }, - { - "epoch": 1.0053947421393776, - "grad_norm": 0.0006404008599929512, - "learning_rate": 0.00019999950281638236, - "loss": 46.0, - "step": 6243 - }, - { - "epoch": 1.005555779218165, - "grad_norm": 0.0013202581321820617, - "learning_rate": 0.000199999502656837, - "loss": 46.0, - "step": 6244 - }, - { - "epoch": 1.0057168162969523, - "grad_norm": 0.0026587299071252346, - "learning_rate": 0.00019999950249726604, - "loss": 46.0, - "step": 6245 - }, - { - "epoch": 1.0058778533757398, - "grad_norm": 0.0008761993958614767, - "learning_rate": 0.0001999995023376695, - "loss": 46.0, - "step": 6246 - }, - { - "epoch": 1.0060388904545272, - "grad_norm": 0.0009400885901413858, - "learning_rate": 0.00019999950217804738, - "loss": 46.0, - "step": 6247 - }, - { - "epoch": 1.0061999275333144, - "grad_norm": 0.0012943897163495421, - "learning_rate": 0.00019999950201839964, - "loss": 46.0, - "step": 6248 - }, - { - "epoch": 1.006360964612102, - "grad_norm": 0.0029224518220871687, - "learning_rate": 0.0001999995018587263, - "loss": 46.0, - "step": 6249 - }, - { - "epoch": 1.0065220016908893, - "grad_norm": 0.003726367373019457, - "learning_rate": 0.0001999995016990274, - "loss": 46.0, - "step": 6250 - }, - { - "epoch": 1.0066830387696768, - "grad_norm": 0.0005758950719609857, - "learning_rate": 0.0001999995015393029, - "loss": 46.0, - "step": 6251 - }, - { - "epoch": 1.006844075848464, - "grad_norm": 0.0006697188946418464, - "learning_rate": 0.00019999950137955278, - "loss": 46.0, - "step": 6252 - }, - { - "epoch": 1.0070051129272515, - "grad_norm": 0.0005896021611988544, - "learning_rate": 0.00019999950121977708, - "loss": 46.0, - "step": 6253 - }, - { - "epoch": 1.007166150006039, - "grad_norm": 0.0008820137591101229, - "learning_rate": 0.00019999950105997576, - "loss": 46.0, - "step": 6254 - }, - { - "epoch": 1.0073271870848264, - "grad_norm": 0.008077237755060196, - "learning_rate": 0.00019999950090014888, - "loss": 46.0, - "step": 6255 - }, - { - "epoch": 1.0074882241636136, - "grad_norm": 0.001512572169303894, - "learning_rate": 0.0001999995007402964, - "loss": 46.0, - "step": 6256 - }, - { - "epoch": 1.007649261242401, - "grad_norm": 0.003671261016279459, - "learning_rate": 0.0001999995005804183, - "loss": 46.0, - "step": 6257 - }, - { - "epoch": 1.0078102983211885, - "grad_norm": 0.0007956259069032967, - "learning_rate": 0.00019999950042051462, - "loss": 46.0, - "step": 6258 - }, - { - "epoch": 1.0079713353999757, - "grad_norm": 0.0009433970553800464, - "learning_rate": 0.00019999950026058536, - "loss": 46.0, - "step": 6259 - }, - { - "epoch": 1.0081323724787632, - "grad_norm": 0.0012183671351522207, - "learning_rate": 0.00019999950010063047, - "loss": 46.0, - "step": 6260 - }, - { - "epoch": 1.0082934095575506, - "grad_norm": 0.0008284273208118975, - "learning_rate": 0.00019999949994065002, - "loss": 46.0, - "step": 6261 - }, - { - "epoch": 1.008454446636338, - "grad_norm": 0.0007555127376690507, - "learning_rate": 0.00019999949978064397, - "loss": 46.0, - "step": 6262 - }, - { - "epoch": 1.0086154837151253, - "grad_norm": 0.000943769293371588, - "learning_rate": 0.00019999949962061232, - "loss": 46.0, - "step": 6263 - }, - { - "epoch": 1.0087765207939128, - "grad_norm": 0.001409639255143702, - "learning_rate": 0.00019999949946055505, - "loss": 46.0, - "step": 6264 - }, - { - "epoch": 1.0089375578727002, - "grad_norm": 0.0004043137887492776, - "learning_rate": 0.00019999949930047222, - "loss": 46.0, - "step": 6265 - }, - { - "epoch": 1.0090985949514877, - "grad_norm": 0.00034933481947518885, - "learning_rate": 0.0001999994991403638, - "loss": 46.0, - "step": 6266 - }, - { - "epoch": 1.009259632030275, - "grad_norm": 0.0020100735127925873, - "learning_rate": 0.00019999949898022977, - "loss": 46.0, - "step": 6267 - }, - { - "epoch": 1.0094206691090624, - "grad_norm": 0.0033263566438108683, - "learning_rate": 0.00019999949882007015, - "loss": 46.0, - "step": 6268 - }, - { - "epoch": 1.0095817061878498, - "grad_norm": 0.001117824693210423, - "learning_rate": 0.0001999994986598849, - "loss": 46.0, - "step": 6269 - }, - { - "epoch": 1.009742743266637, - "grad_norm": 0.0016281973803415895, - "learning_rate": 0.00019999949849967407, - "loss": 46.0, - "step": 6270 - }, - { - "epoch": 1.0099037803454245, - "grad_norm": 0.002180148847401142, - "learning_rate": 0.00019999949833943766, - "loss": 46.0, - "step": 6271 - }, - { - "epoch": 1.010064817424212, - "grad_norm": 0.0015702710952609777, - "learning_rate": 0.0001999994981791757, - "loss": 46.0, - "step": 6272 - }, - { - "epoch": 1.0102258545029994, - "grad_norm": 0.004505687393248081, - "learning_rate": 0.00019999949801888809, - "loss": 46.0, - "step": 6273 - }, - { - "epoch": 1.0103868915817866, - "grad_norm": 0.0006167357787489891, - "learning_rate": 0.0001999994978585749, - "loss": 46.0, - "step": 6274 - }, - { - "epoch": 1.010547928660574, - "grad_norm": 0.0011231355601921678, - "learning_rate": 0.0001999994976982361, - "loss": 46.0, - "step": 6275 - }, - { - "epoch": 1.0107089657393615, - "grad_norm": 0.0024628296960145235, - "learning_rate": 0.0001999994975378717, - "loss": 46.0, - "step": 6276 - }, - { - "epoch": 1.010870002818149, - "grad_norm": 0.0019293796503916383, - "learning_rate": 0.00019999949737748172, - "loss": 46.0, - "step": 6277 - }, - { - "epoch": 1.0110310398969362, - "grad_norm": 0.00152417435310781, - "learning_rate": 0.00019999949721706615, - "loss": 46.0, - "step": 6278 - }, - { - "epoch": 1.0111920769757237, - "grad_norm": 0.0005781169165857136, - "learning_rate": 0.000199999497056625, - "loss": 46.0, - "step": 6279 - }, - { - "epoch": 1.0113531140545111, - "grad_norm": 0.0015225467504933476, - "learning_rate": 0.00019999949689615822, - "loss": 46.0, - "step": 6280 - }, - { - "epoch": 1.0115141511332983, - "grad_norm": 0.006066483445465565, - "learning_rate": 0.00019999949673566586, - "loss": 46.0, - "step": 6281 - }, - { - "epoch": 1.0116751882120858, - "grad_norm": 0.006567680276930332, - "learning_rate": 0.00019999949657514794, - "loss": 46.0, - "step": 6282 - }, - { - "epoch": 1.0118362252908732, - "grad_norm": 0.0009104766068048775, - "learning_rate": 0.00019999949641460435, - "loss": 46.0, - "step": 6283 - }, - { - "epoch": 1.0119972623696607, - "grad_norm": 0.0005526019376702607, - "learning_rate": 0.00019999949625403525, - "loss": 46.0, - "step": 6284 - }, - { - "epoch": 1.012158299448448, - "grad_norm": 0.002163332886993885, - "learning_rate": 0.00019999949609344052, - "loss": 46.0, - "step": 6285 - }, - { - "epoch": 1.0123193365272354, - "grad_norm": 0.00041490071453154087, - "learning_rate": 0.00019999949593282017, - "loss": 46.0, - "step": 6286 - }, - { - "epoch": 1.0124803736060228, - "grad_norm": 0.0006570565747097135, - "learning_rate": 0.00019999949577217426, - "loss": 46.0, - "step": 6287 - }, - { - "epoch": 1.0126414106848103, - "grad_norm": 0.0015555809950456023, - "learning_rate": 0.00019999949561150273, - "loss": 46.0, - "step": 6288 - }, - { - "epoch": 1.0128024477635975, - "grad_norm": 0.0017545541049912572, - "learning_rate": 0.00019999949545080562, - "loss": 46.0, - "step": 6289 - }, - { - "epoch": 1.012963484842385, - "grad_norm": 0.0017391914734616876, - "learning_rate": 0.0001999994952900829, - "loss": 46.0, - "step": 6290 - }, - { - "epoch": 1.0131245219211724, - "grad_norm": 0.003497850615531206, - "learning_rate": 0.0001999994951293346, - "loss": 46.0, - "step": 6291 - }, - { - "epoch": 1.0132855589999596, - "grad_norm": 0.0010639895917847753, - "learning_rate": 0.0001999994949685607, - "loss": 46.0, - "step": 6292 - }, - { - "epoch": 1.013446596078747, - "grad_norm": 0.001168062910437584, - "learning_rate": 0.00019999949480776122, - "loss": 46.0, - "step": 6293 - }, - { - "epoch": 1.0136076331575345, - "grad_norm": 0.002597469836473465, - "learning_rate": 0.00019999949464693612, - "loss": 46.0, - "step": 6294 - }, - { - "epoch": 1.013768670236322, - "grad_norm": 0.0006971464608795941, - "learning_rate": 0.00019999949448608543, - "loss": 46.0, - "step": 6295 - }, - { - "epoch": 1.0139297073151092, - "grad_norm": 0.006824319716542959, - "learning_rate": 0.00019999949432520915, - "loss": 46.0, - "step": 6296 - }, - { - "epoch": 1.0140907443938967, - "grad_norm": 0.004372299648821354, - "learning_rate": 0.0001999994941643073, - "loss": 46.0, - "step": 6297 - }, - { - "epoch": 1.0142517814726841, - "grad_norm": 0.0016161275561898947, - "learning_rate": 0.0001999994940033798, - "loss": 46.0, - "step": 6298 - }, - { - "epoch": 1.0144128185514716, - "grad_norm": 0.0012474050745368004, - "learning_rate": 0.00019999949384242674, - "loss": 46.0, - "step": 6299 - }, - { - "epoch": 1.0145738556302588, - "grad_norm": 0.0009295811760239303, - "learning_rate": 0.0001999994936814481, - "loss": 46.0, - "step": 6300 - }, - { - "epoch": 1.0147348927090463, - "grad_norm": 0.0023029143922030926, - "learning_rate": 0.00019999949352044385, - "loss": 46.0, - "step": 6301 - }, - { - "epoch": 1.0148959297878337, - "grad_norm": 0.003243709448724985, - "learning_rate": 0.000199999493359414, - "loss": 46.0, - "step": 6302 - }, - { - "epoch": 1.015056966866621, - "grad_norm": 0.0025950914714485407, - "learning_rate": 0.00019999949319835856, - "loss": 46.0, - "step": 6303 - }, - { - "epoch": 1.0152180039454084, - "grad_norm": 0.00060755864251405, - "learning_rate": 0.0001999994930372775, - "loss": 46.0, - "step": 6304 - }, - { - "epoch": 1.0153790410241958, - "grad_norm": 0.0015983114717528224, - "learning_rate": 0.0001999994928761709, - "loss": 46.0, - "step": 6305 - }, - { - "epoch": 1.0155400781029833, - "grad_norm": 0.0027972955722361803, - "learning_rate": 0.00019999949271503863, - "loss": 46.0, - "step": 6306 - }, - { - "epoch": 1.0157011151817705, - "grad_norm": 0.0010275689419358969, - "learning_rate": 0.00019999949255388084, - "loss": 46.0, - "step": 6307 - }, - { - "epoch": 1.015862152260558, - "grad_norm": 0.0013191692996770144, - "learning_rate": 0.0001999994923926974, - "loss": 46.0, - "step": 6308 - }, - { - "epoch": 1.0160231893393454, - "grad_norm": 0.0011703158961609006, - "learning_rate": 0.00019999949223148842, - "loss": 46.0, - "step": 6309 - }, - { - "epoch": 1.0161842264181327, - "grad_norm": 0.0005833222530782223, - "learning_rate": 0.0001999994920702538, - "loss": 46.0, - "step": 6310 - }, - { - "epoch": 1.0163452634969201, - "grad_norm": 0.002289395546540618, - "learning_rate": 0.00019999949190899362, - "loss": 46.0, - "step": 6311 - }, - { - "epoch": 1.0165063005757076, - "grad_norm": 0.0058857472613453865, - "learning_rate": 0.00019999949174770781, - "loss": 46.0, - "step": 6312 - }, - { - "epoch": 1.016667337654495, - "grad_norm": 0.004240898415446281, - "learning_rate": 0.00019999949158639645, - "loss": 46.0, - "step": 6313 - }, - { - "epoch": 1.0168283747332822, - "grad_norm": 0.00167121272534132, - "learning_rate": 0.00019999949142505944, - "loss": 46.0, - "step": 6314 - }, - { - "epoch": 1.0169894118120697, - "grad_norm": 0.0036721595097333193, - "learning_rate": 0.00019999949126369687, - "loss": 46.0, - "step": 6315 - }, - { - "epoch": 1.0171504488908572, - "grad_norm": 0.002000790787860751, - "learning_rate": 0.0001999994911023087, - "loss": 46.0, - "step": 6316 - }, - { - "epoch": 1.0173114859696446, - "grad_norm": 0.0015422106953337789, - "learning_rate": 0.00019999949094089492, - "loss": 46.0, - "step": 6317 - }, - { - "epoch": 1.0174725230484318, - "grad_norm": 0.0019068531692028046, - "learning_rate": 0.00019999949077945556, - "loss": 46.0, - "step": 6318 - }, - { - "epoch": 1.0176335601272193, - "grad_norm": 0.0013318938435986638, - "learning_rate": 0.0001999994906179906, - "loss": 46.0, - "step": 6319 - }, - { - "epoch": 1.0177945972060067, - "grad_norm": 0.0036504121962934732, - "learning_rate": 0.00019999949045650003, - "loss": 46.0, - "step": 6320 - }, - { - "epoch": 1.0179556342847942, - "grad_norm": 0.0029523440171033144, - "learning_rate": 0.00019999949029498391, - "loss": 46.0, - "step": 6321 - }, - { - "epoch": 1.0181166713635814, - "grad_norm": 0.003808134701102972, - "learning_rate": 0.00019999949013344215, - "loss": 46.0, - "step": 6322 - }, - { - "epoch": 1.0182777084423689, - "grad_norm": 0.0011177808046340942, - "learning_rate": 0.0001999994899718748, - "loss": 46.0, - "step": 6323 - }, - { - "epoch": 1.0184387455211563, - "grad_norm": 0.0025043736677616835, - "learning_rate": 0.0001999994898102819, - "loss": 46.0, - "step": 6324 - }, - { - "epoch": 1.0185997825999435, - "grad_norm": 0.00926585216075182, - "learning_rate": 0.00019999948964866335, - "loss": 46.0, - "step": 6325 - }, - { - "epoch": 1.018760819678731, - "grad_norm": 0.004121112637221813, - "learning_rate": 0.00019999948948701924, - "loss": 46.0, - "step": 6326 - }, - { - "epoch": 1.0189218567575185, - "grad_norm": 0.0010230483021587133, - "learning_rate": 0.00019999948932534952, - "loss": 46.0, - "step": 6327 - }, - { - "epoch": 1.019082893836306, - "grad_norm": 0.0010295668616890907, - "learning_rate": 0.0001999994891636542, - "loss": 46.0, - "step": 6328 - }, - { - "epoch": 1.0192439309150931, - "grad_norm": 0.00418697576969862, - "learning_rate": 0.00019999948900193331, - "loss": 46.0, - "step": 6329 - }, - { - "epoch": 1.0194049679938806, - "grad_norm": 0.0012874839594587684, - "learning_rate": 0.00019999948884018683, - "loss": 46.0, - "step": 6330 - }, - { - "epoch": 1.019566005072668, - "grad_norm": 0.00042142043821513653, - "learning_rate": 0.0001999994886784147, - "loss": 46.0, - "step": 6331 - }, - { - "epoch": 1.0197270421514553, - "grad_norm": 0.0008691985858604312, - "learning_rate": 0.00019999948851661702, - "loss": 46.0, - "step": 6332 - }, - { - "epoch": 1.0198880792302427, - "grad_norm": 0.004565051756799221, - "learning_rate": 0.00019999948835479377, - "loss": 46.0, - "step": 6333 - }, - { - "epoch": 1.0200491163090302, - "grad_norm": 0.0029316702857613564, - "learning_rate": 0.00019999948819294489, - "loss": 46.0, - "step": 6334 - }, - { - "epoch": 1.0202101533878176, - "grad_norm": 0.0013656109804287553, - "learning_rate": 0.0001999994880310704, - "loss": 46.0, - "step": 6335 - }, - { - "epoch": 1.0203711904666048, - "grad_norm": 0.0027131284587085247, - "learning_rate": 0.00019999948786917032, - "loss": 46.0, - "step": 6336 - }, - { - "epoch": 1.0205322275453923, - "grad_norm": 0.0009259256767109036, - "learning_rate": 0.00019999948770724467, - "loss": 46.0, - "step": 6337 - }, - { - "epoch": 1.0206932646241798, - "grad_norm": 0.0009209402487613261, - "learning_rate": 0.0001999994875452934, - "loss": 46.0, - "step": 6338 - }, - { - "epoch": 1.0208543017029672, - "grad_norm": 0.000972357636783272, - "learning_rate": 0.00019999948738331656, - "loss": 46.0, - "step": 6339 - }, - { - "epoch": 1.0210153387817544, - "grad_norm": 0.0014667310751974583, - "learning_rate": 0.0001999994872213141, - "loss": 46.0, - "step": 6340 - }, - { - "epoch": 1.0211763758605419, - "grad_norm": 0.005109541118144989, - "learning_rate": 0.00019999948705928607, - "loss": 46.0, - "step": 6341 - }, - { - "epoch": 1.0213374129393293, - "grad_norm": 0.0029329066164791584, - "learning_rate": 0.00019999948689723246, - "loss": 46.0, - "step": 6342 - }, - { - "epoch": 1.0214984500181166, - "grad_norm": 0.0017394509632140398, - "learning_rate": 0.0001999994867351532, - "loss": 46.0, - "step": 6343 - }, - { - "epoch": 1.021659487096904, - "grad_norm": 0.000741521071176976, - "learning_rate": 0.0001999994865730484, - "loss": 46.0, - "step": 6344 - }, - { - "epoch": 1.0218205241756915, - "grad_norm": 0.0040016938000917435, - "learning_rate": 0.00019999948641091794, - "loss": 46.0, - "step": 6345 - }, - { - "epoch": 1.021981561254479, - "grad_norm": 0.0010161378886550665, - "learning_rate": 0.00019999948624876195, - "loss": 46.0, - "step": 6346 - }, - { - "epoch": 1.0221425983332662, - "grad_norm": 0.0007833954878151417, - "learning_rate": 0.00019999948608658032, - "loss": 46.0, - "step": 6347 - }, - { - "epoch": 1.0223036354120536, - "grad_norm": 0.00138769019395113, - "learning_rate": 0.00019999948592437313, - "loss": 46.0, - "step": 6348 - }, - { - "epoch": 1.022464672490841, - "grad_norm": 0.0010274132946506143, - "learning_rate": 0.00019999948576214033, - "loss": 46.0, - "step": 6349 - }, - { - "epoch": 1.0226257095696285, - "grad_norm": 0.00833053793758154, - "learning_rate": 0.00019999948559988194, - "loss": 46.0, - "step": 6350 - }, - { - "epoch": 1.0227867466484157, - "grad_norm": 0.0006872293306514621, - "learning_rate": 0.00019999948543759794, - "loss": 46.0, - "step": 6351 - }, - { - "epoch": 1.0229477837272032, - "grad_norm": 0.0022577079944312572, - "learning_rate": 0.00019999948527528834, - "loss": 46.0, - "step": 6352 - }, - { - "epoch": 1.0231088208059906, - "grad_norm": 0.000995549955405295, - "learning_rate": 0.0001999994851129532, - "loss": 46.0, - "step": 6353 - }, - { - "epoch": 1.0232698578847779, - "grad_norm": 0.0005734183941967785, - "learning_rate": 0.0001999994849505924, - "loss": 46.0, - "step": 6354 - }, - { - "epoch": 1.0234308949635653, - "grad_norm": 0.0009836237877607346, - "learning_rate": 0.00019999948478820602, - "loss": 46.0, - "step": 6355 - }, - { - "epoch": 1.0235919320423528, - "grad_norm": 0.001009430387057364, - "learning_rate": 0.00019999948462579408, - "loss": 46.0, - "step": 6356 - }, - { - "epoch": 1.0237529691211402, - "grad_norm": 0.0026525987777858973, - "learning_rate": 0.00019999948446335652, - "loss": 46.0, - "step": 6357 - }, - { - "epoch": 1.0239140061999275, - "grad_norm": 0.0013297498226165771, - "learning_rate": 0.00019999948430089338, - "loss": 46.0, - "step": 6358 - }, - { - "epoch": 1.024075043278715, - "grad_norm": 0.000306474685203284, - "learning_rate": 0.00019999948413840463, - "loss": 46.0, - "step": 6359 - }, - { - "epoch": 1.0242360803575024, - "grad_norm": 0.0015022518346086144, - "learning_rate": 0.00019999948397589028, - "loss": 46.0, - "step": 6360 - }, - { - "epoch": 1.0243971174362898, - "grad_norm": 0.0032586290035396814, - "learning_rate": 0.00019999948381335035, - "loss": 46.0, - "step": 6361 - }, - { - "epoch": 1.024558154515077, - "grad_norm": 0.0025659811217337847, - "learning_rate": 0.0001999994836507848, - "loss": 46.0, - "step": 6362 - }, - { - "epoch": 1.0247191915938645, - "grad_norm": 0.0005430930759757757, - "learning_rate": 0.0001999994834881937, - "loss": 46.0, - "step": 6363 - }, - { - "epoch": 1.024880228672652, - "grad_norm": 0.0007301949081011117, - "learning_rate": 0.00019999948332557696, - "loss": 46.0, - "step": 6364 - }, - { - "epoch": 1.0250412657514392, - "grad_norm": 0.004017373081296682, - "learning_rate": 0.00019999948316293465, - "loss": 46.0, - "step": 6365 - }, - { - "epoch": 1.0252023028302266, - "grad_norm": 0.0013297304976731539, - "learning_rate": 0.00019999948300026673, - "loss": 46.0, - "step": 6366 - }, - { - "epoch": 1.025363339909014, - "grad_norm": 0.00029139453545212746, - "learning_rate": 0.00019999948283757325, - "loss": 46.0, - "step": 6367 - }, - { - "epoch": 1.0255243769878015, - "grad_norm": 0.0018476119730621576, - "learning_rate": 0.00019999948267485413, - "loss": 46.0, - "step": 6368 - }, - { - "epoch": 1.0256854140665888, - "grad_norm": 0.0031040534377098083, - "learning_rate": 0.00019999948251210944, - "loss": 46.0, - "step": 6369 - }, - { - "epoch": 1.0258464511453762, - "grad_norm": 0.0005913755740039051, - "learning_rate": 0.00019999948234933918, - "loss": 46.0, - "step": 6370 - }, - { - "epoch": 1.0260074882241637, - "grad_norm": 0.010243549011647701, - "learning_rate": 0.00019999948218654326, - "loss": 46.0, - "step": 6371 - }, - { - "epoch": 1.026168525302951, - "grad_norm": 0.006522131618112326, - "learning_rate": 0.0001999994820237218, - "loss": 46.0, - "step": 6372 - }, - { - "epoch": 1.0263295623817383, - "grad_norm": 0.0024571893736720085, - "learning_rate": 0.0001999994818608747, - "loss": 46.0, - "step": 6373 - }, - { - "epoch": 1.0264905994605258, - "grad_norm": 0.0019137188792228699, - "learning_rate": 0.00019999948169800206, - "loss": 46.0, - "step": 6374 - }, - { - "epoch": 1.0266516365393132, - "grad_norm": 0.0012045350158587098, - "learning_rate": 0.0001999994815351038, - "loss": 46.0, - "step": 6375 - }, - { - "epoch": 1.0268126736181005, - "grad_norm": 0.002175415400415659, - "learning_rate": 0.00019999948137217993, - "loss": 46.0, - "step": 6376 - }, - { - "epoch": 1.026973710696888, - "grad_norm": 0.0003425965260248631, - "learning_rate": 0.00019999948120923047, - "loss": 46.0, - "step": 6377 - }, - { - "epoch": 1.0271347477756754, - "grad_norm": 0.00037628054269589484, - "learning_rate": 0.00019999948104625542, - "loss": 46.0, - "step": 6378 - }, - { - "epoch": 1.0272957848544628, - "grad_norm": 0.002756637055426836, - "learning_rate": 0.0001999994808832548, - "loss": 46.0, - "step": 6379 - }, - { - "epoch": 1.02745682193325, - "grad_norm": 0.0026547012384980917, - "learning_rate": 0.00019999948072022856, - "loss": 46.0, - "step": 6380 - }, - { - "epoch": 1.0276178590120375, - "grad_norm": 0.0008873838814906776, - "learning_rate": 0.00019999948055717673, - "loss": 46.0, - "step": 6381 - }, - { - "epoch": 1.027778896090825, - "grad_norm": 0.0028432304970920086, - "learning_rate": 0.0001999994803940993, - "loss": 46.0, - "step": 6382 - }, - { - "epoch": 1.0279399331696124, - "grad_norm": 0.003174504265189171, - "learning_rate": 0.0001999994802309963, - "loss": 46.0, - "step": 6383 - }, - { - "epoch": 1.0281009702483996, - "grad_norm": 0.0007440876215696335, - "learning_rate": 0.00019999948006786767, - "loss": 46.0, - "step": 6384 - }, - { - "epoch": 1.028262007327187, - "grad_norm": 0.0019496644381433725, - "learning_rate": 0.00019999947990471346, - "loss": 46.0, - "step": 6385 - }, - { - "epoch": 1.0284230444059745, - "grad_norm": 0.0023042848333716393, - "learning_rate": 0.00019999947974153366, - "loss": 46.0, - "step": 6386 - }, - { - "epoch": 1.0285840814847618, - "grad_norm": 0.0005255836294963956, - "learning_rate": 0.00019999947957832824, - "loss": 46.0, - "step": 6387 - }, - { - "epoch": 1.0287451185635492, - "grad_norm": 0.002195813460275531, - "learning_rate": 0.00019999947941509727, - "loss": 46.0, - "step": 6388 - }, - { - "epoch": 1.0289061556423367, - "grad_norm": 0.0016663081478327513, - "learning_rate": 0.00019999947925184065, - "loss": 46.0, - "step": 6389 - }, - { - "epoch": 1.0290671927211241, - "grad_norm": 0.0007405616925098002, - "learning_rate": 0.00019999947908855848, - "loss": 46.0, - "step": 6390 - }, - { - "epoch": 1.0292282297999114, - "grad_norm": 0.0011065138969570398, - "learning_rate": 0.00019999947892525072, - "loss": 46.0, - "step": 6391 - }, - { - "epoch": 1.0293892668786988, - "grad_norm": 0.001951077370904386, - "learning_rate": 0.00019999947876191734, - "loss": 46.0, - "step": 6392 - }, - { - "epoch": 1.0295503039574863, - "grad_norm": 0.004220605827867985, - "learning_rate": 0.00019999947859855837, - "loss": 46.0, - "step": 6393 - }, - { - "epoch": 1.0297113410362737, - "grad_norm": 0.0007138778455555439, - "learning_rate": 0.0001999994784351738, - "loss": 46.0, - "step": 6394 - }, - { - "epoch": 1.029872378115061, - "grad_norm": 0.0004966930719092488, - "learning_rate": 0.00019999947827176363, - "loss": 46.0, - "step": 6395 - }, - { - "epoch": 1.0300334151938484, - "grad_norm": 0.0006892001256346703, - "learning_rate": 0.0001999994781083279, - "loss": 46.0, - "step": 6396 - }, - { - "epoch": 1.0301944522726358, - "grad_norm": 0.0012400030391290784, - "learning_rate": 0.00019999947794486654, - "loss": 46.0, - "step": 6397 - }, - { - "epoch": 1.030355489351423, - "grad_norm": 0.0011087305610999465, - "learning_rate": 0.0001999994777813796, - "loss": 46.0, - "step": 6398 - }, - { - "epoch": 1.0305165264302105, - "grad_norm": 0.0029914244078099728, - "learning_rate": 0.00019999947761786707, - "loss": 46.0, - "step": 6399 - }, - { - "epoch": 1.030677563508998, - "grad_norm": 0.0016571154119446874, - "learning_rate": 0.00019999947745432894, - "loss": 46.0, - "step": 6400 - }, - { - "epoch": 1.0308386005877854, - "grad_norm": 0.0011288548121228814, - "learning_rate": 0.00019999947729076523, - "loss": 46.0, - "step": 6401 - }, - { - "epoch": 1.0309996376665727, - "grad_norm": 0.0012964156921952963, - "learning_rate": 0.0001999994771271759, - "loss": 46.0, - "step": 6402 - }, - { - "epoch": 1.03116067474536, - "grad_norm": 0.003449299605563283, - "learning_rate": 0.00019999947696356095, - "loss": 46.0, - "step": 6403 - }, - { - "epoch": 1.0313217118241476, - "grad_norm": 0.0014574660453945398, - "learning_rate": 0.00019999947679992048, - "loss": 46.0, - "step": 6404 - }, - { - "epoch": 1.0314827489029348, - "grad_norm": 0.003803375642746687, - "learning_rate": 0.00019999947663625438, - "loss": 46.0, - "step": 6405 - }, - { - "epoch": 1.0316437859817222, - "grad_norm": 0.0012264088727533817, - "learning_rate": 0.00019999947647256265, - "loss": 46.0, - "step": 6406 - }, - { - "epoch": 1.0318048230605097, - "grad_norm": 0.00090862310025841, - "learning_rate": 0.00019999947630884536, - "loss": 46.0, - "step": 6407 - }, - { - "epoch": 1.0319658601392971, - "grad_norm": 0.003105464158579707, - "learning_rate": 0.00019999947614510248, - "loss": 46.0, - "step": 6408 - }, - { - "epoch": 1.0321268972180844, - "grad_norm": 0.0008733617141842842, - "learning_rate": 0.00019999947598133399, - "loss": 46.0, - "step": 6409 - }, - { - "epoch": 1.0322879342968718, - "grad_norm": 0.0036914756055921316, - "learning_rate": 0.0001999994758175399, - "loss": 46.0, - "step": 6410 - }, - { - "epoch": 1.0324489713756593, - "grad_norm": 0.001088437158614397, - "learning_rate": 0.00019999947565372024, - "loss": 46.0, - "step": 6411 - }, - { - "epoch": 1.0326100084544467, - "grad_norm": 0.003957650158554316, - "learning_rate": 0.00019999947548987498, - "loss": 46.0, - "step": 6412 - }, - { - "epoch": 1.032771045533234, - "grad_norm": 0.0009474657708778977, - "learning_rate": 0.0001999994753260041, - "loss": 46.0, - "step": 6413 - }, - { - "epoch": 1.0329320826120214, - "grad_norm": 0.0010248093167319894, - "learning_rate": 0.00019999947516210765, - "loss": 46.0, - "step": 6414 - }, - { - "epoch": 1.0330931196908089, - "grad_norm": 0.002393402624875307, - "learning_rate": 0.0001999994749981856, - "loss": 46.0, - "step": 6415 - }, - { - "epoch": 1.0332541567695963, - "grad_norm": 0.0007063332595862448, - "learning_rate": 0.00019999947483423795, - "loss": 46.0, - "step": 6416 - }, - { - "epoch": 1.0334151938483835, - "grad_norm": 0.000694070418830961, - "learning_rate": 0.00019999947467026473, - "loss": 46.0, - "step": 6417 - }, - { - "epoch": 1.033576230927171, - "grad_norm": 0.003966480027884245, - "learning_rate": 0.00019999947450626587, - "loss": 46.0, - "step": 6418 - }, - { - "epoch": 1.0337372680059584, - "grad_norm": 0.002380999969318509, - "learning_rate": 0.00019999947434224145, - "loss": 46.0, - "step": 6419 - }, - { - "epoch": 1.0338983050847457, - "grad_norm": 0.000830343400593847, - "learning_rate": 0.00019999947417819142, - "loss": 46.0, - "step": 6420 - }, - { - "epoch": 1.0340593421635331, - "grad_norm": 0.008824542164802551, - "learning_rate": 0.0001999994740141158, - "loss": 46.0, - "step": 6421 - }, - { - "epoch": 1.0342203792423206, - "grad_norm": 0.002761711599305272, - "learning_rate": 0.00019999947385001459, - "loss": 46.0, - "step": 6422 - }, - { - "epoch": 1.034381416321108, - "grad_norm": 0.00043466774513944983, - "learning_rate": 0.0001999994736858878, - "loss": 46.0, - "step": 6423 - }, - { - "epoch": 1.0345424533998953, - "grad_norm": 0.0008645984344184399, - "learning_rate": 0.00019999947352173538, - "loss": 46.0, - "step": 6424 - }, - { - "epoch": 1.0347034904786827, - "grad_norm": 0.0027858305256813765, - "learning_rate": 0.00019999947335755738, - "loss": 46.0, - "step": 6425 - }, - { - "epoch": 1.0348645275574702, - "grad_norm": 0.000993607915006578, - "learning_rate": 0.0001999994731933538, - "loss": 46.0, - "step": 6426 - }, - { - "epoch": 1.0350255646362574, - "grad_norm": 0.0009302391554228961, - "learning_rate": 0.00019999947302912457, - "loss": 46.0, - "step": 6427 - }, - { - "epoch": 1.0351866017150448, - "grad_norm": 0.0005664990749210119, - "learning_rate": 0.00019999947286486979, - "loss": 46.0, - "step": 6428 - }, - { - "epoch": 1.0353476387938323, - "grad_norm": 0.003783769905567169, - "learning_rate": 0.00019999947270058944, - "loss": 46.0, - "step": 6429 - }, - { - "epoch": 1.0355086758726197, - "grad_norm": 0.0017254615668207407, - "learning_rate": 0.00019999947253628345, - "loss": 46.0, - "step": 6430 - }, - { - "epoch": 1.035669712951407, - "grad_norm": 0.00509891239926219, - "learning_rate": 0.00019999947237195188, - "loss": 46.0, - "step": 6431 - }, - { - "epoch": 1.0358307500301944, - "grad_norm": 0.0012198666809126735, - "learning_rate": 0.00019999947220759472, - "loss": 46.0, - "step": 6432 - }, - { - "epoch": 1.0359917871089819, - "grad_norm": 0.004192420747131109, - "learning_rate": 0.00019999947204321197, - "loss": 46.0, - "step": 6433 - }, - { - "epoch": 1.0361528241877693, - "grad_norm": 0.0004590976459439844, - "learning_rate": 0.00019999947187880363, - "loss": 46.0, - "step": 6434 - }, - { - "epoch": 1.0363138612665566, - "grad_norm": 0.0010390236275270581, - "learning_rate": 0.00019999947171436965, - "loss": 46.0, - "step": 6435 - }, - { - "epoch": 1.036474898345344, - "grad_norm": 0.00484297564253211, - "learning_rate": 0.00019999947154991014, - "loss": 46.0, - "step": 6436 - }, - { - "epoch": 1.0366359354241315, - "grad_norm": 0.0010238185059279203, - "learning_rate": 0.00019999947138542502, - "loss": 46.0, - "step": 6437 - }, - { - "epoch": 1.0367969725029187, - "grad_norm": 0.00038492766907438636, - "learning_rate": 0.00019999947122091425, - "loss": 46.0, - "step": 6438 - }, - { - "epoch": 1.0369580095817061, - "grad_norm": 0.001970977522432804, - "learning_rate": 0.00019999947105637795, - "loss": 46.0, - "step": 6439 - }, - { - "epoch": 1.0371190466604936, - "grad_norm": 0.0012383307330310345, - "learning_rate": 0.00019999947089181604, - "loss": 46.0, - "step": 6440 - }, - { - "epoch": 1.037280083739281, - "grad_norm": 0.002113117603585124, - "learning_rate": 0.00019999947072722848, - "loss": 46.0, - "step": 6441 - }, - { - "epoch": 1.0374411208180683, - "grad_norm": 0.0011089416220784187, - "learning_rate": 0.0001999994705626154, - "loss": 46.0, - "step": 6442 - }, - { - "epoch": 1.0376021578968557, - "grad_norm": 0.004895965103060007, - "learning_rate": 0.0001999994703979767, - "loss": 46.0, - "step": 6443 - }, - { - "epoch": 1.0377631949756432, - "grad_norm": 0.003429856151342392, - "learning_rate": 0.00019999947023331238, - "loss": 46.0, - "step": 6444 - }, - { - "epoch": 1.0379242320544306, - "grad_norm": 0.0034076583106070757, - "learning_rate": 0.0001999994700686225, - "loss": 46.0, - "step": 6445 - }, - { - "epoch": 1.0380852691332179, - "grad_norm": 0.0024926767218858004, - "learning_rate": 0.000199999469903907, - "loss": 46.0, - "step": 6446 - }, - { - "epoch": 1.0382463062120053, - "grad_norm": 0.0011860548984259367, - "learning_rate": 0.00019999946973916593, - "loss": 46.0, - "step": 6447 - }, - { - "epoch": 1.0384073432907928, - "grad_norm": 0.0075462814420461655, - "learning_rate": 0.00019999946957439924, - "loss": 46.0, - "step": 6448 - }, - { - "epoch": 1.03856838036958, - "grad_norm": 0.0015892903320491314, - "learning_rate": 0.00019999946940960696, - "loss": 46.0, - "step": 6449 - }, - { - "epoch": 1.0387294174483674, - "grad_norm": 0.00043625105172395706, - "learning_rate": 0.00019999946924478912, - "loss": 46.0, - "step": 6450 - }, - { - "epoch": 1.038890454527155, - "grad_norm": 0.0007666315650567412, - "learning_rate": 0.00019999946907994564, - "loss": 46.0, - "step": 6451 - }, - { - "epoch": 1.0390514916059423, - "grad_norm": 0.0015255830949172378, - "learning_rate": 0.0001999994689150766, - "loss": 46.0, - "step": 6452 - }, - { - "epoch": 1.0392125286847296, - "grad_norm": 0.003198272315785289, - "learning_rate": 0.00019999946875018192, - "loss": 46.0, - "step": 6453 - }, - { - "epoch": 1.039373565763517, - "grad_norm": 0.0020768484100699425, - "learning_rate": 0.00019999946858526168, - "loss": 46.0, - "step": 6454 - }, - { - "epoch": 1.0395346028423045, - "grad_norm": 0.0009525313507765532, - "learning_rate": 0.00019999946842031585, - "loss": 46.0, - "step": 6455 - }, - { - "epoch": 1.039695639921092, - "grad_norm": 0.004989515524357557, - "learning_rate": 0.0001999994682553444, - "loss": 46.0, - "step": 6456 - }, - { - "epoch": 1.0398566769998792, - "grad_norm": 0.0013025598600506783, - "learning_rate": 0.00019999946809034737, - "loss": 46.0, - "step": 6457 - }, - { - "epoch": 1.0400177140786666, - "grad_norm": 0.000817545922473073, - "learning_rate": 0.00019999946792532476, - "loss": 46.0, - "step": 6458 - }, - { - "epoch": 1.040178751157454, - "grad_norm": 0.0007511943113058805, - "learning_rate": 0.00019999946776027655, - "loss": 46.0, - "step": 6459 - }, - { - "epoch": 1.0403397882362413, - "grad_norm": 0.00288000819273293, - "learning_rate": 0.0001999994675952027, - "loss": 46.0, - "step": 6460 - }, - { - "epoch": 1.0405008253150287, - "grad_norm": 0.0014183727325871587, - "learning_rate": 0.0001999994674301033, - "loss": 46.0, - "step": 6461 - }, - { - "epoch": 1.0406618623938162, - "grad_norm": 0.0005618046852760017, - "learning_rate": 0.0001999994672649783, - "loss": 46.0, - "step": 6462 - }, - { - "epoch": 1.0408228994726036, - "grad_norm": 0.0005620548618026078, - "learning_rate": 0.0001999994670998277, - "loss": 46.0, - "step": 6463 - }, - { - "epoch": 1.0409839365513909, - "grad_norm": 0.0006562886410392821, - "learning_rate": 0.00019999946693465153, - "loss": 46.0, - "step": 6464 - }, - { - "epoch": 1.0411449736301783, - "grad_norm": 0.0034100054763257504, - "learning_rate": 0.00019999946676944972, - "loss": 46.0, - "step": 6465 - }, - { - "epoch": 1.0413060107089658, - "grad_norm": 0.0016375224804505706, - "learning_rate": 0.00019999946660422232, - "loss": 46.0, - "step": 6466 - }, - { - "epoch": 1.0414670477877532, - "grad_norm": 0.0009322431869804859, - "learning_rate": 0.00019999946643896937, - "loss": 46.0, - "step": 6467 - }, - { - "epoch": 1.0416280848665405, - "grad_norm": 0.0007160952081903815, - "learning_rate": 0.00019999946627369077, - "loss": 46.0, - "step": 6468 - }, - { - "epoch": 1.041789121945328, - "grad_norm": 0.0005553049268200994, - "learning_rate": 0.00019999946610838664, - "loss": 46.0, - "step": 6469 - }, - { - "epoch": 1.0419501590241154, - "grad_norm": 0.0008507245220243931, - "learning_rate": 0.00019999946594305686, - "loss": 46.0, - "step": 6470 - }, - { - "epoch": 1.0421111961029026, - "grad_norm": 0.0011316745076328516, - "learning_rate": 0.0001999994657777015, - "loss": 46.0, - "step": 6471 - }, - { - "epoch": 1.04227223318169, - "grad_norm": 0.0013225803850218654, - "learning_rate": 0.00019999946561232056, - "loss": 46.0, - "step": 6472 - }, - { - "epoch": 1.0424332702604775, - "grad_norm": 0.009100942872464657, - "learning_rate": 0.000199999465446914, - "loss": 46.0, - "step": 6473 - }, - { - "epoch": 1.042594307339265, - "grad_norm": 0.0006715467316098511, - "learning_rate": 0.00019999946528148185, - "loss": 46.0, - "step": 6474 - }, - { - "epoch": 1.0427553444180522, - "grad_norm": 0.008572102524340153, - "learning_rate": 0.0001999994651160241, - "loss": 46.0, - "step": 6475 - }, - { - "epoch": 1.0429163814968396, - "grad_norm": 0.0006895264377817512, - "learning_rate": 0.0001999994649505408, - "loss": 46.0, - "step": 6476 - }, - { - "epoch": 1.043077418575627, - "grad_norm": 0.003065183525905013, - "learning_rate": 0.00019999946478503185, - "loss": 46.0, - "step": 6477 - }, - { - "epoch": 1.0432384556544145, - "grad_norm": 0.0008019735105335712, - "learning_rate": 0.00019999946461949736, - "loss": 46.0, - "step": 6478 - }, - { - "epoch": 1.0433994927332018, - "grad_norm": 0.0020395226310938597, - "learning_rate": 0.00019999946445393722, - "loss": 46.0, - "step": 6479 - }, - { - "epoch": 1.0435605298119892, - "grad_norm": 0.0023664236068725586, - "learning_rate": 0.00019999946428835152, - "loss": 46.0, - "step": 6480 - }, - { - "epoch": 1.0437215668907767, - "grad_norm": 0.0007842747145332396, - "learning_rate": 0.00019999946412274023, - "loss": 46.0, - "step": 6481 - }, - { - "epoch": 1.043882603969564, - "grad_norm": 0.0040185400284826756, - "learning_rate": 0.0001999994639571033, - "loss": 46.0, - "step": 6482 - }, - { - "epoch": 1.0440436410483513, - "grad_norm": 0.0007887196261435747, - "learning_rate": 0.00019999946379144082, - "loss": 46.0, - "step": 6483 - }, - { - "epoch": 1.0442046781271388, - "grad_norm": 0.0006416417891159654, - "learning_rate": 0.00019999946362575272, - "loss": 46.0, - "step": 6484 - }, - { - "epoch": 1.0443657152059262, - "grad_norm": 0.0019829971715807915, - "learning_rate": 0.00019999946346003906, - "loss": 46.0, - "step": 6485 - }, - { - "epoch": 1.0445267522847135, - "grad_norm": 0.0009399994742125273, - "learning_rate": 0.00019999946329429978, - "loss": 46.0, - "step": 6486 - }, - { - "epoch": 1.044687789363501, - "grad_norm": 0.0013273813528940082, - "learning_rate": 0.0001999994631285349, - "loss": 46.0, - "step": 6487 - }, - { - "epoch": 1.0448488264422884, - "grad_norm": 0.0029128033202141523, - "learning_rate": 0.00019999946296274444, - "loss": 46.0, - "step": 6488 - }, - { - "epoch": 1.0450098635210758, - "grad_norm": 0.0012755950447171926, - "learning_rate": 0.00019999946279692838, - "loss": 46.0, - "step": 6489 - }, - { - "epoch": 1.045170900599863, - "grad_norm": 0.0035470547154545784, - "learning_rate": 0.00019999946263108673, - "loss": 46.0, - "step": 6490 - }, - { - "epoch": 1.0453319376786505, - "grad_norm": 0.0008489295141771436, - "learning_rate": 0.00019999946246521946, - "loss": 46.0, - "step": 6491 - }, - { - "epoch": 1.045492974757438, - "grad_norm": 0.0044787791557610035, - "learning_rate": 0.00019999946229932664, - "loss": 46.0, - "step": 6492 - }, - { - "epoch": 1.0456540118362252, - "grad_norm": 0.0030958594288676977, - "learning_rate": 0.00019999946213340817, - "loss": 46.0, - "step": 6493 - }, - { - "epoch": 1.0458150489150126, - "grad_norm": 0.0024783099070191383, - "learning_rate": 0.00019999946196746414, - "loss": 46.0, - "step": 6494 - }, - { - "epoch": 1.0459760859938, - "grad_norm": 0.001907074823975563, - "learning_rate": 0.00019999946180149453, - "loss": 46.0, - "step": 6495 - }, - { - "epoch": 1.0461371230725875, - "grad_norm": 0.001247995998710394, - "learning_rate": 0.0001999994616354993, - "loss": 46.0, - "step": 6496 - }, - { - "epoch": 1.0462981601513748, - "grad_norm": 0.00138873013202101, - "learning_rate": 0.00019999946146947845, - "loss": 46.0, - "step": 6497 - }, - { - "epoch": 1.0464591972301622, - "grad_norm": 0.0014799445634707808, - "learning_rate": 0.00019999946130343205, - "loss": 46.0, - "step": 6498 - }, - { - "epoch": 1.0466202343089497, - "grad_norm": 0.0005235079443082213, - "learning_rate": 0.00019999946113736003, - "loss": 46.0, - "step": 6499 - }, - { - "epoch": 1.0467812713877371, - "grad_norm": 0.0013926014071330428, - "learning_rate": 0.00019999946097126246, - "loss": 46.0, - "step": 6500 - }, - { - "epoch": 1.0469423084665244, - "grad_norm": 0.002305097645148635, - "learning_rate": 0.00019999946080513924, - "loss": 46.0, - "step": 6501 - }, - { - "epoch": 1.0471033455453118, - "grad_norm": 0.0009211329161189497, - "learning_rate": 0.00019999946063899043, - "loss": 46.0, - "step": 6502 - }, - { - "epoch": 1.0472643826240993, - "grad_norm": 0.0018815496005117893, - "learning_rate": 0.00019999946047281607, - "loss": 46.0, - "step": 6503 - }, - { - "epoch": 1.0474254197028865, - "grad_norm": 0.0011471969773992896, - "learning_rate": 0.00019999946030661606, - "loss": 46.0, - "step": 6504 - }, - { - "epoch": 1.047586456781674, - "grad_norm": 0.0037697593215852976, - "learning_rate": 0.0001999994601403905, - "loss": 46.0, - "step": 6505 - }, - { - "epoch": 1.0477474938604614, - "grad_norm": 0.0010831323452293873, - "learning_rate": 0.0001999994599741393, - "loss": 46.0, - "step": 6506 - }, - { - "epoch": 1.0479085309392489, - "grad_norm": 0.0006948586087673903, - "learning_rate": 0.00019999945980786254, - "loss": 46.0, - "step": 6507 - }, - { - "epoch": 1.048069568018036, - "grad_norm": 0.0007108302670530975, - "learning_rate": 0.00019999945964156019, - "loss": 46.0, - "step": 6508 - }, - { - "epoch": 1.0482306050968235, - "grad_norm": 0.001614067587070167, - "learning_rate": 0.00019999945947523222, - "loss": 46.0, - "step": 6509 - }, - { - "epoch": 1.048391642175611, - "grad_norm": 0.001632467843592167, - "learning_rate": 0.00019999945930887869, - "loss": 46.0, - "step": 6510 - }, - { - "epoch": 1.0485526792543984, - "grad_norm": 0.0008568188641220331, - "learning_rate": 0.00019999945914249954, - "loss": 46.0, - "step": 6511 - }, - { - "epoch": 1.0487137163331857, - "grad_norm": 0.0009465326438657939, - "learning_rate": 0.0001999994589760948, - "loss": 46.0, - "step": 6512 - }, - { - "epoch": 1.0488747534119731, - "grad_norm": 0.003011822234839201, - "learning_rate": 0.00019999945880966446, - "loss": 46.0, - "step": 6513 - }, - { - "epoch": 1.0490357904907606, - "grad_norm": 0.0006395489326678216, - "learning_rate": 0.00019999945864320853, - "loss": 46.0, - "step": 6514 - }, - { - "epoch": 1.0491968275695478, - "grad_norm": 0.0012729649897664785, - "learning_rate": 0.000199999458476727, - "loss": 46.0, - "step": 6515 - }, - { - "epoch": 1.0493578646483352, - "grad_norm": 0.0007966042612679303, - "learning_rate": 0.0001999994583102199, - "loss": 46.0, - "step": 6516 - }, - { - "epoch": 1.0495189017271227, - "grad_norm": 0.0023721344769001007, - "learning_rate": 0.00019999945814368718, - "loss": 46.0, - "step": 6517 - }, - { - "epoch": 1.0496799388059102, - "grad_norm": 0.0023809236008673906, - "learning_rate": 0.00019999945797712884, - "loss": 46.0, - "step": 6518 - }, - { - "epoch": 1.0498409758846974, - "grad_norm": 0.002216779161244631, - "learning_rate": 0.00019999945781054495, - "loss": 46.0, - "step": 6519 - }, - { - "epoch": 1.0500020129634848, - "grad_norm": 0.000763561693020165, - "learning_rate": 0.00019999945764393546, - "loss": 46.0, - "step": 6520 - }, - { - "epoch": 1.0501630500422723, - "grad_norm": 0.0009227501577697694, - "learning_rate": 0.00019999945747730037, - "loss": 46.0, - "step": 6521 - }, - { - "epoch": 1.0503240871210595, - "grad_norm": 0.0016767920460551977, - "learning_rate": 0.00019999945731063968, - "loss": 46.0, - "step": 6522 - }, - { - "epoch": 1.050485124199847, - "grad_norm": 0.0015196447493508458, - "learning_rate": 0.0001999994571439534, - "loss": 46.0, - "step": 6523 - }, - { - "epoch": 1.0506461612786344, - "grad_norm": 0.00454733707010746, - "learning_rate": 0.00019999945697724152, - "loss": 46.0, - "step": 6524 - }, - { - "epoch": 1.0508071983574219, - "grad_norm": 0.0025887833908200264, - "learning_rate": 0.00019999945681050405, - "loss": 46.0, - "step": 6525 - }, - { - "epoch": 1.050968235436209, - "grad_norm": 0.0030436452943831682, - "learning_rate": 0.000199999456643741, - "loss": 46.0, - "step": 6526 - }, - { - "epoch": 1.0511292725149965, - "grad_norm": 0.001121029956266284, - "learning_rate": 0.00019999945647695234, - "loss": 46.0, - "step": 6527 - }, - { - "epoch": 1.051290309593784, - "grad_norm": 0.004655222874134779, - "learning_rate": 0.00019999945631013805, - "loss": 46.0, - "step": 6528 - }, - { - "epoch": 1.0514513466725715, - "grad_norm": 0.0009520708117634058, - "learning_rate": 0.0001999994561432982, - "loss": 46.0, - "step": 6529 - }, - { - "epoch": 1.0516123837513587, - "grad_norm": 0.0004847474046982825, - "learning_rate": 0.00019999945597643274, - "loss": 46.0, - "step": 6530 - }, - { - "epoch": 1.0517734208301461, - "grad_norm": 0.003370928578078747, - "learning_rate": 0.00019999945580954172, - "loss": 46.0, - "step": 6531 - }, - { - "epoch": 1.0519344579089336, - "grad_norm": 0.0007878663600422442, - "learning_rate": 0.00019999945564262508, - "loss": 46.0, - "step": 6532 - }, - { - "epoch": 1.052095494987721, - "grad_norm": 0.0005941825802437961, - "learning_rate": 0.00019999945547568285, - "loss": 46.0, - "step": 6533 - }, - { - "epoch": 1.0522565320665083, - "grad_norm": 0.001493097865022719, - "learning_rate": 0.00019999945530871501, - "loss": 46.0, - "step": 6534 - }, - { - "epoch": 1.0524175691452957, - "grad_norm": 0.003453702200204134, - "learning_rate": 0.00019999945514172161, - "loss": 46.0, - "step": 6535 - }, - { - "epoch": 1.0525786062240832, - "grad_norm": 0.0004432977002579719, - "learning_rate": 0.00019999945497470257, - "loss": 46.0, - "step": 6536 - }, - { - "epoch": 1.0527396433028704, - "grad_norm": 0.004447145387530327, - "learning_rate": 0.00019999945480765797, - "loss": 46.0, - "step": 6537 - }, - { - "epoch": 1.0529006803816578, - "grad_norm": 0.004706928972154856, - "learning_rate": 0.00019999945464058778, - "loss": 46.0, - "step": 6538 - }, - { - "epoch": 1.0530617174604453, - "grad_norm": 0.0007073060842230916, - "learning_rate": 0.00019999945447349198, - "loss": 46.0, - "step": 6539 - }, - { - "epoch": 1.0532227545392328, - "grad_norm": 0.001652634353376925, - "learning_rate": 0.00019999945430637057, - "loss": 46.0, - "step": 6540 - }, - { - "epoch": 1.05338379161802, - "grad_norm": 0.0024723096285015345, - "learning_rate": 0.00019999945413922362, - "loss": 46.0, - "step": 6541 - }, - { - "epoch": 1.0535448286968074, - "grad_norm": 0.007637431845068932, - "learning_rate": 0.00019999945397205103, - "loss": 46.0, - "step": 6542 - }, - { - "epoch": 1.0537058657755949, - "grad_norm": 0.005236849654465914, - "learning_rate": 0.00019999945380485285, - "loss": 46.0, - "step": 6543 - }, - { - "epoch": 1.0538669028543821, - "grad_norm": 0.0006668042042292655, - "learning_rate": 0.00019999945363762906, - "loss": 46.0, - "step": 6544 - }, - { - "epoch": 1.0540279399331696, - "grad_norm": 0.0012896248372271657, - "learning_rate": 0.0001999994534703797, - "loss": 46.0, - "step": 6545 - }, - { - "epoch": 1.054188977011957, - "grad_norm": 0.001372764934785664, - "learning_rate": 0.00019999945330310474, - "loss": 46.0, - "step": 6546 - }, - { - "epoch": 1.0543500140907445, - "grad_norm": 0.0012557193404063582, - "learning_rate": 0.00019999945313580418, - "loss": 46.0, - "step": 6547 - }, - { - "epoch": 1.0545110511695317, - "grad_norm": 0.0017768212128430605, - "learning_rate": 0.00019999945296847801, - "loss": 46.0, - "step": 6548 - }, - { - "epoch": 1.0546720882483192, - "grad_norm": 0.002252156613394618, - "learning_rate": 0.00019999945280112629, - "loss": 46.0, - "step": 6549 - }, - { - "epoch": 1.0548331253271066, - "grad_norm": 0.002895511919632554, - "learning_rate": 0.00019999945263374892, - "loss": 46.0, - "step": 6550 - }, - { - "epoch": 1.054994162405894, - "grad_norm": 0.0017416340997442603, - "learning_rate": 0.000199999452466346, - "loss": 46.0, - "step": 6551 - }, - { - "epoch": 1.0551551994846813, - "grad_norm": 0.0004678528930526227, - "learning_rate": 0.00019999945229891747, - "loss": 46.0, - "step": 6552 - }, - { - "epoch": 1.0553162365634687, - "grad_norm": 0.0014010531594976783, - "learning_rate": 0.00019999945213146334, - "loss": 46.0, - "step": 6553 - }, - { - "epoch": 1.0554772736422562, - "grad_norm": 0.00083293387433514, - "learning_rate": 0.00019999945196398362, - "loss": 46.0, - "step": 6554 - }, - { - "epoch": 1.0556383107210434, - "grad_norm": 0.002613247139379382, - "learning_rate": 0.00019999945179647832, - "loss": 46.0, - "step": 6555 - }, - { - "epoch": 1.0557993477998309, - "grad_norm": 0.0029394791927188635, - "learning_rate": 0.0001999994516289474, - "loss": 46.0, - "step": 6556 - }, - { - "epoch": 1.0559603848786183, - "grad_norm": 0.0015253908932209015, - "learning_rate": 0.0001999994514613909, - "loss": 46.0, - "step": 6557 - }, - { - "epoch": 1.0561214219574058, - "grad_norm": 0.001834549126215279, - "learning_rate": 0.00019999945129380882, - "loss": 46.0, - "step": 6558 - }, - { - "epoch": 1.056282459036193, - "grad_norm": 0.003350884886458516, - "learning_rate": 0.0001999994511262011, - "loss": 46.0, - "step": 6559 - }, - { - "epoch": 1.0564434961149805, - "grad_norm": 0.0011456571519374847, - "learning_rate": 0.00019999945095856782, - "loss": 46.0, - "step": 6560 - }, - { - "epoch": 1.056604533193768, - "grad_norm": 0.0008790491847321391, - "learning_rate": 0.00019999945079090894, - "loss": 46.0, - "step": 6561 - }, - { - "epoch": 1.0567655702725554, - "grad_norm": 0.0011808705748990178, - "learning_rate": 0.00019999945062322447, - "loss": 46.0, - "step": 6562 - }, - { - "epoch": 1.0569266073513426, - "grad_norm": 0.0038276398554444313, - "learning_rate": 0.0001999994504555144, - "loss": 46.0, - "step": 6563 - }, - { - "epoch": 1.05708764443013, - "grad_norm": 0.003473035292699933, - "learning_rate": 0.0001999994502877787, - "loss": 46.0, - "step": 6564 - }, - { - "epoch": 1.0572486815089175, - "grad_norm": 0.001024050754494965, - "learning_rate": 0.00019999945012001745, - "loss": 46.0, - "step": 6565 - }, - { - "epoch": 1.0574097185877047, - "grad_norm": 0.0014409383293241262, - "learning_rate": 0.0001999994499522306, - "loss": 46.0, - "step": 6566 - }, - { - "epoch": 1.0575707556664922, - "grad_norm": 0.0012544054770842195, - "learning_rate": 0.00019999944978441815, - "loss": 46.0, - "step": 6567 - }, - { - "epoch": 1.0577317927452796, - "grad_norm": 0.003618619404733181, - "learning_rate": 0.0001999994496165801, - "loss": 46.0, - "step": 6568 - }, - { - "epoch": 1.057892829824067, - "grad_norm": 0.001071578823029995, - "learning_rate": 0.00019999944944871646, - "loss": 46.0, - "step": 6569 - }, - { - "epoch": 1.0580538669028543, - "grad_norm": 0.0038524705450981855, - "learning_rate": 0.00019999944928082724, - "loss": 46.0, - "step": 6570 - }, - { - "epoch": 1.0582149039816418, - "grad_norm": 0.0068861437030136585, - "learning_rate": 0.0001999994491129124, - "loss": 46.0, - "step": 6571 - }, - { - "epoch": 1.0583759410604292, - "grad_norm": 0.007640148047357798, - "learning_rate": 0.00019999944894497199, - "loss": 46.0, - "step": 6572 - }, - { - "epoch": 1.0585369781392167, - "grad_norm": 0.004169008228927851, - "learning_rate": 0.00019999944877700595, - "loss": 46.0, - "step": 6573 - }, - { - "epoch": 1.0586980152180039, - "grad_norm": 0.0015938846627250314, - "learning_rate": 0.00019999944860901435, - "loss": 46.0, - "step": 6574 - }, - { - "epoch": 1.0588590522967913, - "grad_norm": 0.0011657922295853496, - "learning_rate": 0.00019999944844099714, - "loss": 46.0, - "step": 6575 - }, - { - "epoch": 1.0590200893755788, - "grad_norm": 0.0004942793166264892, - "learning_rate": 0.00019999944827295434, - "loss": 46.0, - "step": 6576 - }, - { - "epoch": 1.059181126454366, - "grad_norm": 0.0010235768277198076, - "learning_rate": 0.00019999944810488593, - "loss": 46.0, - "step": 6577 - }, - { - "epoch": 1.0593421635331535, - "grad_norm": 0.000977733638137579, - "learning_rate": 0.00019999944793679193, - "loss": 46.0, - "step": 6578 - }, - { - "epoch": 1.059503200611941, - "grad_norm": 0.0009617151226848364, - "learning_rate": 0.00019999944776867237, - "loss": 46.0, - "step": 6579 - }, - { - "epoch": 1.0596642376907284, - "grad_norm": 0.001249989727512002, - "learning_rate": 0.00019999944760052717, - "loss": 46.0, - "step": 6580 - }, - { - "epoch": 1.0598252747695156, - "grad_norm": 0.005666469223797321, - "learning_rate": 0.00019999944743235638, - "loss": 46.0, - "step": 6581 - }, - { - "epoch": 1.059986311848303, - "grad_norm": 0.001946413074620068, - "learning_rate": 0.00019999944726416003, - "loss": 46.0, - "step": 6582 - }, - { - "epoch": 1.0601473489270905, - "grad_norm": 0.004809623118489981, - "learning_rate": 0.00019999944709593807, - "loss": 46.0, - "step": 6583 - }, - { - "epoch": 1.060308386005878, - "grad_norm": 0.005720277316868305, - "learning_rate": 0.00019999944692769052, - "loss": 46.0, - "step": 6584 - }, - { - "epoch": 1.0604694230846652, - "grad_norm": 0.005022967234253883, - "learning_rate": 0.00019999944675941732, - "loss": 46.0, - "step": 6585 - }, - { - "epoch": 1.0606304601634526, - "grad_norm": 0.004009624943137169, - "learning_rate": 0.00019999944659111857, - "loss": 46.0, - "step": 6586 - }, - { - "epoch": 1.06079149724224, - "grad_norm": 0.0013781647430732846, - "learning_rate": 0.00019999944642279423, - "loss": 46.0, - "step": 6587 - }, - { - "epoch": 1.0609525343210273, - "grad_norm": 0.000837072788272053, - "learning_rate": 0.00019999944625444428, - "loss": 46.0, - "step": 6588 - }, - { - "epoch": 1.0611135713998148, - "grad_norm": 0.001662730472162366, - "learning_rate": 0.00019999944608606877, - "loss": 46.0, - "step": 6589 - }, - { - "epoch": 1.0612746084786022, - "grad_norm": 0.0010261985007673502, - "learning_rate": 0.00019999944591766764, - "loss": 46.0, - "step": 6590 - }, - { - "epoch": 1.0614356455573897, - "grad_norm": 0.0037187645211815834, - "learning_rate": 0.0001999994457492409, - "loss": 46.0, - "step": 6591 - }, - { - "epoch": 1.061596682636177, - "grad_norm": 0.00648907758295536, - "learning_rate": 0.00019999944558078857, - "loss": 46.0, - "step": 6592 - }, - { - "epoch": 1.0617577197149644, - "grad_norm": 0.003581485478207469, - "learning_rate": 0.00019999944541231068, - "loss": 46.0, - "step": 6593 - }, - { - "epoch": 1.0619187567937518, - "grad_norm": 0.0022922155912965536, - "learning_rate": 0.00019999944524380715, - "loss": 46.0, - "step": 6594 - }, - { - "epoch": 1.0620797938725393, - "grad_norm": 0.002288256539031863, - "learning_rate": 0.00019999944507527806, - "loss": 46.0, - "step": 6595 - }, - { - "epoch": 1.0622408309513265, - "grad_norm": 0.002190555213019252, - "learning_rate": 0.00019999944490672338, - "loss": 46.0, - "step": 6596 - }, - { - "epoch": 1.062401868030114, - "grad_norm": 0.0005871456232853234, - "learning_rate": 0.00019999944473814306, - "loss": 46.0, - "step": 6597 - }, - { - "epoch": 1.0625629051089014, - "grad_norm": 0.0016827057115733624, - "learning_rate": 0.00019999944456953718, - "loss": 46.0, - "step": 6598 - }, - { - "epoch": 1.0627239421876886, - "grad_norm": 0.00044768163934350014, - "learning_rate": 0.0001999994444009057, - "loss": 46.0, - "step": 6599 - }, - { - "epoch": 1.062884979266476, - "grad_norm": 0.0016417003935202956, - "learning_rate": 0.00019999944423224864, - "loss": 46.0, - "step": 6600 - }, - { - "epoch": 1.0630460163452635, - "grad_norm": 0.0014568604528903961, - "learning_rate": 0.00019999944406356597, - "loss": 46.0, - "step": 6601 - }, - { - "epoch": 1.063207053424051, - "grad_norm": 0.0007451761630363762, - "learning_rate": 0.0001999994438948577, - "loss": 46.0, - "step": 6602 - }, - { - "epoch": 1.0633680905028382, - "grad_norm": 0.00037166790571063757, - "learning_rate": 0.00019999944372612382, - "loss": 46.0, - "step": 6603 - }, - { - "epoch": 1.0635291275816257, - "grad_norm": 0.007840367034077644, - "learning_rate": 0.00019999944355736436, - "loss": 46.0, - "step": 6604 - }, - { - "epoch": 1.063690164660413, - "grad_norm": 0.0015667991247028112, - "learning_rate": 0.00019999944338857932, - "loss": 46.0, - "step": 6605 - }, - { - "epoch": 1.0638512017392006, - "grad_norm": 0.0016182404942810535, - "learning_rate": 0.00019999944321976866, - "loss": 46.0, - "step": 6606 - }, - { - "epoch": 1.0640122388179878, - "grad_norm": 0.0012526375940069556, - "learning_rate": 0.00019999944305093244, - "loss": 46.0, - "step": 6607 - }, - { - "epoch": 1.0641732758967752, - "grad_norm": 0.003042567288503051, - "learning_rate": 0.0001999994428820706, - "loss": 46.0, - "step": 6608 - }, - { - "epoch": 1.0643343129755627, - "grad_norm": 0.0016685795271769166, - "learning_rate": 0.0001999994427131832, - "loss": 46.0, - "step": 6609 - }, - { - "epoch": 1.06449535005435, - "grad_norm": 0.0008374441531486809, - "learning_rate": 0.00019999944254427013, - "loss": 46.0, - "step": 6610 - }, - { - "epoch": 1.0646563871331374, - "grad_norm": 0.0019776904955506325, - "learning_rate": 0.00019999944237533154, - "loss": 46.0, - "step": 6611 - }, - { - "epoch": 1.0648174242119248, - "grad_norm": 0.001994943479076028, - "learning_rate": 0.0001999994422063673, - "loss": 46.0, - "step": 6612 - }, - { - "epoch": 1.0649784612907123, - "grad_norm": 0.0004205632722005248, - "learning_rate": 0.0001999994420373775, - "loss": 46.0, - "step": 6613 - }, - { - "epoch": 1.0651394983694995, - "grad_norm": 0.004561276640743017, - "learning_rate": 0.0001999994418683621, - "loss": 46.0, - "step": 6614 - }, - { - "epoch": 1.065300535448287, - "grad_norm": 0.0008837788482196629, - "learning_rate": 0.0001999994416993211, - "loss": 46.0, - "step": 6615 - }, - { - "epoch": 1.0654615725270744, - "grad_norm": 0.001033691456541419, - "learning_rate": 0.0001999994415302545, - "loss": 46.0, - "step": 6616 - }, - { - "epoch": 1.0656226096058616, - "grad_norm": 0.004542914219200611, - "learning_rate": 0.00019999944136116232, - "loss": 46.0, - "step": 6617 - }, - { - "epoch": 1.065783646684649, - "grad_norm": 0.001034711254760623, - "learning_rate": 0.00019999944119204453, - "loss": 46.0, - "step": 6618 - }, - { - "epoch": 1.0659446837634365, - "grad_norm": 0.0005485193687491119, - "learning_rate": 0.00019999944102290116, - "loss": 46.0, - "step": 6619 - }, - { - "epoch": 1.066105720842224, - "grad_norm": 0.002726083155721426, - "learning_rate": 0.0001999994408537322, - "loss": 46.0, - "step": 6620 - }, - { - "epoch": 1.0662667579210112, - "grad_norm": 0.002699152799323201, - "learning_rate": 0.00019999944068453763, - "loss": 46.0, - "step": 6621 - }, - { - "epoch": 1.0664277949997987, - "grad_norm": 0.0004922068328596652, - "learning_rate": 0.00019999944051531747, - "loss": 46.0, - "step": 6622 - }, - { - "epoch": 1.0665888320785861, - "grad_norm": 0.0008032437763176858, - "learning_rate": 0.0001999994403460717, - "loss": 46.0, - "step": 6623 - }, - { - "epoch": 1.0667498691573736, - "grad_norm": 0.002333537209779024, - "learning_rate": 0.00019999944017680036, - "loss": 46.0, - "step": 6624 - }, - { - "epoch": 1.0669109062361608, - "grad_norm": 0.0003647351113613695, - "learning_rate": 0.0001999994400075034, - "loss": 46.0, - "step": 6625 - }, - { - "epoch": 1.0670719433149483, - "grad_norm": 0.0071593099273741245, - "learning_rate": 0.00019999943983818084, - "loss": 46.0, - "step": 6626 - }, - { - "epoch": 1.0672329803937357, - "grad_norm": 0.000454216351499781, - "learning_rate": 0.00019999943966883272, - "loss": 46.0, - "step": 6627 - }, - { - "epoch": 1.0673940174725232, - "grad_norm": 0.00444055488333106, - "learning_rate": 0.000199999439499459, - "loss": 46.0, - "step": 6628 - }, - { - "epoch": 1.0675550545513104, - "grad_norm": 0.0045326100662350655, - "learning_rate": 0.00019999943933005966, - "loss": 46.0, - "step": 6629 - }, - { - "epoch": 1.0677160916300978, - "grad_norm": 0.0038118334487080574, - "learning_rate": 0.00019999943916063474, - "loss": 46.0, - "step": 6630 - }, - { - "epoch": 1.0678771287088853, - "grad_norm": 0.0044481027871370316, - "learning_rate": 0.00019999943899118425, - "loss": 46.0, - "step": 6631 - }, - { - "epoch": 1.0680381657876725, - "grad_norm": 0.00202358141541481, - "learning_rate": 0.00019999943882170813, - "loss": 46.0, - "step": 6632 - }, - { - "epoch": 1.06819920286646, - "grad_norm": 0.002532506361603737, - "learning_rate": 0.00019999943865220643, - "loss": 46.0, - "step": 6633 - }, - { - "epoch": 1.0683602399452474, - "grad_norm": 0.0014133837539702654, - "learning_rate": 0.00019999943848267914, - "loss": 46.0, - "step": 6634 - }, - { - "epoch": 1.0685212770240349, - "grad_norm": 0.001062836148776114, - "learning_rate": 0.00019999943831312624, - "loss": 46.0, - "step": 6635 - }, - { - "epoch": 1.068682314102822, - "grad_norm": 0.001685763243585825, - "learning_rate": 0.00019999943814354775, - "loss": 46.0, - "step": 6636 - }, - { - "epoch": 1.0688433511816096, - "grad_norm": 0.0011892104521393776, - "learning_rate": 0.0001999994379739437, - "loss": 46.0, - "step": 6637 - }, - { - "epoch": 1.069004388260397, - "grad_norm": 0.0014539366820827127, - "learning_rate": 0.000199999437804314, - "loss": 46.0, - "step": 6638 - }, - { - "epoch": 1.0691654253391842, - "grad_norm": 0.0028371347580105066, - "learning_rate": 0.00019999943763465873, - "loss": 46.0, - "step": 6639 - }, - { - "epoch": 1.0693264624179717, - "grad_norm": 0.0009564222418703139, - "learning_rate": 0.00019999943746497786, - "loss": 46.0, - "step": 6640 - }, - { - "epoch": 1.0694874994967591, - "grad_norm": 0.002137527335435152, - "learning_rate": 0.00019999943729527139, - "loss": 46.0, - "step": 6641 - }, - { - "epoch": 1.0696485365755466, - "grad_norm": 0.0014893212355673313, - "learning_rate": 0.00019999943712553935, - "loss": 46.0, - "step": 6642 - }, - { - "epoch": 1.0698095736543338, - "grad_norm": 0.0016106904949992895, - "learning_rate": 0.0001999994369557817, - "loss": 46.0, - "step": 6643 - }, - { - "epoch": 1.0699706107331213, - "grad_norm": 0.0011139264097437263, - "learning_rate": 0.00019999943678599845, - "loss": 46.0, - "step": 6644 - }, - { - "epoch": 1.0701316478119087, - "grad_norm": 0.0014995168894529343, - "learning_rate": 0.0001999994366161896, - "loss": 46.0, - "step": 6645 - }, - { - "epoch": 1.0702926848906962, - "grad_norm": 0.0015475673135370016, - "learning_rate": 0.00019999943644635518, - "loss": 46.0, - "step": 6646 - }, - { - "epoch": 1.0704537219694834, - "grad_norm": 0.0013178804656490684, - "learning_rate": 0.00019999943627649515, - "loss": 46.0, - "step": 6647 - }, - { - "epoch": 1.0706147590482709, - "grad_norm": 0.0007449607946909964, - "learning_rate": 0.00019999943610660953, - "loss": 46.0, - "step": 6648 - }, - { - "epoch": 1.0707757961270583, - "grad_norm": 0.0007005247171036899, - "learning_rate": 0.0001999994359366983, - "loss": 46.0, - "step": 6649 - }, - { - "epoch": 1.0709368332058458, - "grad_norm": 0.001908188103698194, - "learning_rate": 0.00019999943576676148, - "loss": 46.0, - "step": 6650 - }, - { - "epoch": 1.071097870284633, - "grad_norm": 0.0014229501830413938, - "learning_rate": 0.00019999943559679908, - "loss": 46.0, - "step": 6651 - }, - { - "epoch": 1.0712589073634204, - "grad_norm": 0.0010961941443383694, - "learning_rate": 0.00019999943542681108, - "loss": 46.0, - "step": 6652 - }, - { - "epoch": 1.071419944442208, - "grad_norm": 0.0024302878882735968, - "learning_rate": 0.0001999994352567975, - "loss": 46.0, - "step": 6653 - }, - { - "epoch": 1.0715809815209951, - "grad_norm": 0.0012020883150398731, - "learning_rate": 0.00019999943508675828, - "loss": 46.0, - "step": 6654 - }, - { - "epoch": 1.0717420185997826, - "grad_norm": 0.003786248154938221, - "learning_rate": 0.0001999994349166935, - "loss": 46.0, - "step": 6655 - }, - { - "epoch": 1.07190305567857, - "grad_norm": 0.0012250854633748531, - "learning_rate": 0.00019999943474660313, - "loss": 46.0, - "step": 6656 - }, - { - "epoch": 1.0720640927573575, - "grad_norm": 0.002798636443912983, - "learning_rate": 0.00019999943457648715, - "loss": 46.0, - "step": 6657 - }, - { - "epoch": 1.0722251298361447, - "grad_norm": 0.0004034322628285736, - "learning_rate": 0.00019999943440634555, - "loss": 46.0, - "step": 6658 - }, - { - "epoch": 1.0723861669149322, - "grad_norm": 0.0023523124400526285, - "learning_rate": 0.0001999994342361784, - "loss": 46.0, - "step": 6659 - }, - { - "epoch": 1.0725472039937196, - "grad_norm": 0.0028253584168851376, - "learning_rate": 0.00019999943406598565, - "loss": 46.0, - "step": 6660 - }, - { - "epoch": 1.0727082410725068, - "grad_norm": 0.0015455724205821753, - "learning_rate": 0.00019999943389576726, - "loss": 46.0, - "step": 6661 - }, - { - "epoch": 1.0728692781512943, - "grad_norm": 0.0009458760032430291, - "learning_rate": 0.00019999943372552335, - "loss": 46.0, - "step": 6662 - }, - { - "epoch": 1.0730303152300817, - "grad_norm": 0.007920492440462112, - "learning_rate": 0.00019999943355525379, - "loss": 46.0, - "step": 6663 - }, - { - "epoch": 1.0731913523088692, - "grad_norm": 0.0004345966153778136, - "learning_rate": 0.00019999943338495866, - "loss": 46.0, - "step": 6664 - }, - { - "epoch": 1.0733523893876564, - "grad_norm": 0.005320028867572546, - "learning_rate": 0.00019999943321463793, - "loss": 46.0, - "step": 6665 - }, - { - "epoch": 1.0735134264664439, - "grad_norm": 0.0019873802084475756, - "learning_rate": 0.00019999943304429158, - "loss": 46.0, - "step": 6666 - }, - { - "epoch": 1.0736744635452313, - "grad_norm": 0.0007979539805091918, - "learning_rate": 0.00019999943287391967, - "loss": 46.0, - "step": 6667 - }, - { - "epoch": 1.0738355006240188, - "grad_norm": 0.0006145013612695038, - "learning_rate": 0.00019999943270352215, - "loss": 46.0, - "step": 6668 - }, - { - "epoch": 1.073996537702806, - "grad_norm": 0.0025720722042024136, - "learning_rate": 0.00019999943253309904, - "loss": 46.0, - "step": 6669 - }, - { - "epoch": 1.0741575747815935, - "grad_norm": 0.0013371184468269348, - "learning_rate": 0.00019999943236265034, - "loss": 46.0, - "step": 6670 - }, - { - "epoch": 1.074318611860381, - "grad_norm": 0.0010885385563597083, - "learning_rate": 0.00019999943219217603, - "loss": 46.0, - "step": 6671 - }, - { - "epoch": 1.0744796489391681, - "grad_norm": 0.003194937715306878, - "learning_rate": 0.00019999943202167613, - "loss": 46.0, - "step": 6672 - }, - { - "epoch": 1.0746406860179556, - "grad_norm": 0.0012660721549764276, - "learning_rate": 0.00019999943185115065, - "loss": 46.0, - "step": 6673 - }, - { - "epoch": 1.074801723096743, - "grad_norm": 0.0026645513717085123, - "learning_rate": 0.00019999943168059954, - "loss": 46.0, - "step": 6674 - }, - { - "epoch": 1.0749627601755305, - "grad_norm": 0.0016193187329918146, - "learning_rate": 0.00019999943151002288, - "loss": 46.0, - "step": 6675 - }, - { - "epoch": 1.0751237972543177, - "grad_norm": 0.007118883077055216, - "learning_rate": 0.00019999943133942058, - "loss": 46.0, - "step": 6676 - }, - { - "epoch": 1.0752848343331052, - "grad_norm": 0.0007365164929069579, - "learning_rate": 0.00019999943116879272, - "loss": 46.0, - "step": 6677 - }, - { - "epoch": 1.0754458714118926, - "grad_norm": 0.003612341359257698, - "learning_rate": 0.00019999943099813927, - "loss": 46.0, - "step": 6678 - }, - { - "epoch": 1.07560690849068, - "grad_norm": 0.0019475065637379885, - "learning_rate": 0.0001999994308274602, - "loss": 46.0, - "step": 6679 - }, - { - "epoch": 1.0757679455694673, - "grad_norm": 0.0010829285020008683, - "learning_rate": 0.00019999943065675556, - "loss": 46.0, - "step": 6680 - }, - { - "epoch": 1.0759289826482548, - "grad_norm": 0.003223517443984747, - "learning_rate": 0.00019999943048602527, - "loss": 46.0, - "step": 6681 - }, - { - "epoch": 1.0760900197270422, - "grad_norm": 0.0016091213328763843, - "learning_rate": 0.00019999943031526944, - "loss": 46.0, - "step": 6682 - }, - { - "epoch": 1.0762510568058294, - "grad_norm": 0.001880658557638526, - "learning_rate": 0.000199999430144488, - "loss": 46.0, - "step": 6683 - }, - { - "epoch": 1.076412093884617, - "grad_norm": 0.0012574403081089258, - "learning_rate": 0.00019999942997368098, - "loss": 46.0, - "step": 6684 - }, - { - "epoch": 1.0765731309634043, - "grad_norm": 0.0010737484553828835, - "learning_rate": 0.00019999942980284833, - "loss": 46.0, - "step": 6685 - }, - { - "epoch": 1.0767341680421918, - "grad_norm": 0.0016168591100722551, - "learning_rate": 0.0001999994296319901, - "loss": 46.0, - "step": 6686 - }, - { - "epoch": 1.076895205120979, - "grad_norm": 0.0037596020847558975, - "learning_rate": 0.0001999994294611063, - "loss": 46.0, - "step": 6687 - }, - { - "epoch": 1.0770562421997665, - "grad_norm": 0.0009737680666148663, - "learning_rate": 0.00019999942929019692, - "loss": 46.0, - "step": 6688 - }, - { - "epoch": 1.077217279278554, - "grad_norm": 0.003411609446629882, - "learning_rate": 0.00019999942911926187, - "loss": 46.0, - "step": 6689 - }, - { - "epoch": 1.0773783163573414, - "grad_norm": 0.0033264923840761185, - "learning_rate": 0.00019999942894830127, - "loss": 46.0, - "step": 6690 - }, - { - "epoch": 1.0775393534361286, - "grad_norm": 0.0007492205477319658, - "learning_rate": 0.00019999942877731508, - "loss": 46.0, - "step": 6691 - }, - { - "epoch": 1.077700390514916, - "grad_norm": 0.0008512393105775118, - "learning_rate": 0.0001999994286063033, - "loss": 46.0, - "step": 6692 - }, - { - "epoch": 1.0778614275937035, - "grad_norm": 0.008641490712761879, - "learning_rate": 0.00019999942843526588, - "loss": 46.0, - "step": 6693 - }, - { - "epoch": 1.0780224646724907, - "grad_norm": 0.002022741362452507, - "learning_rate": 0.00019999942826420293, - "loss": 46.0, - "step": 6694 - }, - { - "epoch": 1.0781835017512782, - "grad_norm": 0.0009082197211682796, - "learning_rate": 0.00019999942809311434, - "loss": 46.0, - "step": 6695 - }, - { - "epoch": 1.0783445388300656, - "grad_norm": 0.0042103007435798645, - "learning_rate": 0.00019999942792200018, - "loss": 46.0, - "step": 6696 - }, - { - "epoch": 1.078505575908853, - "grad_norm": 0.0010721717262640595, - "learning_rate": 0.00019999942775086042, - "loss": 46.0, - "step": 6697 - }, - { - "epoch": 1.0786666129876403, - "grad_norm": 0.002388992113992572, - "learning_rate": 0.00019999942757969506, - "loss": 46.0, - "step": 6698 - }, - { - "epoch": 1.0788276500664278, - "grad_norm": 0.002236271509900689, - "learning_rate": 0.0001999994274085041, - "loss": 46.0, - "step": 6699 - }, - { - "epoch": 1.0789886871452152, - "grad_norm": 0.0028281386476010084, - "learning_rate": 0.00019999942723728756, - "loss": 46.0, - "step": 6700 - }, - { - "epoch": 1.0791497242240027, - "grad_norm": 0.0015305830165743828, - "learning_rate": 0.0001999994270660454, - "loss": 46.0, - "step": 6701 - }, - { - "epoch": 1.07931076130279, - "grad_norm": 0.00285886088386178, - "learning_rate": 0.00019999942689477766, - "loss": 46.0, - "step": 6702 - }, - { - "epoch": 1.0794717983815774, - "grad_norm": 0.00805856753140688, - "learning_rate": 0.00019999942672348434, - "loss": 46.0, - "step": 6703 - }, - { - "epoch": 1.0796328354603648, - "grad_norm": 0.003864812897518277, - "learning_rate": 0.0001999994265521654, - "loss": 46.0, - "step": 6704 - }, - { - "epoch": 1.079793872539152, - "grad_norm": 0.0018149057868868113, - "learning_rate": 0.00019999942638082086, - "loss": 46.0, - "step": 6705 - }, - { - "epoch": 1.0799549096179395, - "grad_norm": 0.0015075807459652424, - "learning_rate": 0.00019999942620945076, - "loss": 46.0, - "step": 6706 - }, - { - "epoch": 1.080115946696727, - "grad_norm": 0.0005201415624469519, - "learning_rate": 0.00019999942603805506, - "loss": 46.0, - "step": 6707 - }, - { - "epoch": 1.0802769837755144, - "grad_norm": 0.0013000390026718378, - "learning_rate": 0.00019999942586663373, - "loss": 46.0, - "step": 6708 - }, - { - "epoch": 1.0804380208543016, - "grad_norm": 0.002200833521783352, - "learning_rate": 0.00019999942569518683, - "loss": 46.0, - "step": 6709 - }, - { - "epoch": 1.080599057933089, - "grad_norm": 0.009349943138659, - "learning_rate": 0.00019999942552371432, - "loss": 46.0, - "step": 6710 - }, - { - "epoch": 1.0807600950118765, - "grad_norm": 0.00045281765051186085, - "learning_rate": 0.00019999942535221625, - "loss": 46.0, - "step": 6711 - }, - { - "epoch": 1.0809211320906638, - "grad_norm": 0.005064214114099741, - "learning_rate": 0.00019999942518069257, - "loss": 46.0, - "step": 6712 - }, - { - "epoch": 1.0810821691694512, - "grad_norm": 0.0003647044359240681, - "learning_rate": 0.00019999942500914327, - "loss": 46.0, - "step": 6713 - }, - { - "epoch": 1.0812432062482387, - "grad_norm": 0.004237434361129999, - "learning_rate": 0.0001999994248375684, - "loss": 46.0, - "step": 6714 - }, - { - "epoch": 1.0814042433270261, - "grad_norm": 0.0019238648237660527, - "learning_rate": 0.00019999942466596794, - "loss": 46.0, - "step": 6715 - }, - { - "epoch": 1.0815652804058133, - "grad_norm": 0.0005576108233071864, - "learning_rate": 0.00019999942449434185, - "loss": 46.0, - "step": 6716 - }, - { - "epoch": 1.0817263174846008, - "grad_norm": 0.0017798951594159007, - "learning_rate": 0.0001999994243226902, - "loss": 46.0, - "step": 6717 - }, - { - "epoch": 1.0818873545633882, - "grad_norm": 0.004515098407864571, - "learning_rate": 0.00019999942415101294, - "loss": 46.0, - "step": 6718 - }, - { - "epoch": 1.0820483916421757, - "grad_norm": 0.0021000646520406008, - "learning_rate": 0.0001999994239793101, - "loss": 46.0, - "step": 6719 - }, - { - "epoch": 1.082209428720963, - "grad_norm": 0.0025087560061365366, - "learning_rate": 0.00019999942380758166, - "loss": 46.0, - "step": 6720 - }, - { - "epoch": 1.0823704657997504, - "grad_norm": 0.006699474528431892, - "learning_rate": 0.0001999994236358276, - "loss": 46.0, - "step": 6721 - }, - { - "epoch": 1.0825315028785378, - "grad_norm": 0.0008362880325876176, - "learning_rate": 0.00019999942346404797, - "loss": 46.0, - "step": 6722 - }, - { - "epoch": 1.0826925399573253, - "grad_norm": 0.0028973568696528673, - "learning_rate": 0.00019999942329224278, - "loss": 46.0, - "step": 6723 - }, - { - "epoch": 1.0828535770361125, - "grad_norm": 0.0021264434326440096, - "learning_rate": 0.00019999942312041194, - "loss": 46.0, - "step": 6724 - }, - { - "epoch": 1.0830146141149, - "grad_norm": 0.0006560378824360669, - "learning_rate": 0.0001999994229485555, - "loss": 46.0, - "step": 6725 - }, - { - "epoch": 1.0831756511936874, - "grad_norm": 0.0004560265224426985, - "learning_rate": 0.0001999994227766735, - "loss": 46.0, - "step": 6726 - }, - { - "epoch": 1.0833366882724746, - "grad_norm": 0.005121591035276651, - "learning_rate": 0.0001999994226047659, - "loss": 46.0, - "step": 6727 - }, - { - "epoch": 1.083497725351262, - "grad_norm": 0.0008057549712248147, - "learning_rate": 0.00019999942243283269, - "loss": 46.0, - "step": 6728 - }, - { - "epoch": 1.0836587624300495, - "grad_norm": 0.0007746375631541014, - "learning_rate": 0.0001999994222608739, - "loss": 46.0, - "step": 6729 - }, - { - "epoch": 1.083819799508837, - "grad_norm": 0.0009633756708353758, - "learning_rate": 0.00019999942208888952, - "loss": 46.0, - "step": 6730 - }, - { - "epoch": 1.0839808365876242, - "grad_norm": 0.001698309788480401, - "learning_rate": 0.00019999942191687952, - "loss": 46.0, - "step": 6731 - }, - { - "epoch": 1.0841418736664117, - "grad_norm": 0.0007028720574453473, - "learning_rate": 0.00019999942174484393, - "loss": 46.0, - "step": 6732 - }, - { - "epoch": 1.0843029107451991, - "grad_norm": 0.0017314193537458777, - "learning_rate": 0.00019999942157278278, - "loss": 46.0, - "step": 6733 - }, - { - "epoch": 1.0844639478239864, - "grad_norm": 0.004393136128783226, - "learning_rate": 0.000199999421400696, - "loss": 46.0, - "step": 6734 - }, - { - "epoch": 1.0846249849027738, - "grad_norm": 0.0031233245972543955, - "learning_rate": 0.0001999994212285836, - "loss": 46.0, - "step": 6735 - }, - { - "epoch": 1.0847860219815613, - "grad_norm": 0.0013053212314844131, - "learning_rate": 0.00019999942105644567, - "loss": 46.0, - "step": 6736 - }, - { - "epoch": 1.0849470590603487, - "grad_norm": 0.001647605444304645, - "learning_rate": 0.00019999942088428212, - "loss": 46.0, - "step": 6737 - }, - { - "epoch": 1.085108096139136, - "grad_norm": 0.0013077345211058855, - "learning_rate": 0.00019999942071209298, - "loss": 46.0, - "step": 6738 - }, - { - "epoch": 1.0852691332179234, - "grad_norm": 0.0014701293548569083, - "learning_rate": 0.00019999942053987822, - "loss": 46.0, - "step": 6739 - }, - { - "epoch": 1.0854301702967109, - "grad_norm": 0.0010987964924424887, - "learning_rate": 0.0001999994203676379, - "loss": 46.0, - "step": 6740 - }, - { - "epoch": 1.0855912073754983, - "grad_norm": 0.0034313222859054804, - "learning_rate": 0.00019999942019537195, - "loss": 46.0, - "step": 6741 - }, - { - "epoch": 1.0857522444542855, - "grad_norm": 0.00329037313349545, - "learning_rate": 0.00019999942002308044, - "loss": 46.0, - "step": 6742 - }, - { - "epoch": 1.085913281533073, - "grad_norm": 0.0015830009942874312, - "learning_rate": 0.0001999994198507633, - "loss": 46.0, - "step": 6743 - }, - { - "epoch": 1.0860743186118604, - "grad_norm": 0.00520638981834054, - "learning_rate": 0.0001999994196784206, - "loss": 46.0, - "step": 6744 - }, - { - "epoch": 1.0862353556906479, - "grad_norm": 0.0029223295859992504, - "learning_rate": 0.00019999941950605229, - "loss": 46.0, - "step": 6745 - }, - { - "epoch": 1.0863963927694351, - "grad_norm": 0.0019706296734511852, - "learning_rate": 0.00019999941933365837, - "loss": 46.0, - "step": 6746 - }, - { - "epoch": 1.0865574298482226, - "grad_norm": 0.0026354717556387186, - "learning_rate": 0.0001999994191612389, - "loss": 46.0, - "step": 6747 - }, - { - "epoch": 1.08671846692701, - "grad_norm": 0.000934434006921947, - "learning_rate": 0.00019999941898879377, - "loss": 46.0, - "step": 6748 - }, - { - "epoch": 1.0868795040057972, - "grad_norm": 0.0021225588861852884, - "learning_rate": 0.0001999994188163231, - "loss": 46.0, - "step": 6749 - }, - { - "epoch": 1.0870405410845847, - "grad_norm": 0.0012658957857638597, - "learning_rate": 0.00019999941864382682, - "loss": 46.0, - "step": 6750 - }, - { - "epoch": 1.0872015781633722, - "grad_norm": 0.0008927016751840711, - "learning_rate": 0.0001999994184713049, - "loss": 46.0, - "step": 6751 - }, - { - "epoch": 1.0873626152421596, - "grad_norm": 0.0006779024843126535, - "learning_rate": 0.00019999941829875744, - "loss": 46.0, - "step": 6752 - }, - { - "epoch": 1.0875236523209468, - "grad_norm": 0.0025173358153551817, - "learning_rate": 0.00019999941812618436, - "loss": 46.0, - "step": 6753 - }, - { - "epoch": 1.0876846893997343, - "grad_norm": 0.006518359761685133, - "learning_rate": 0.0001999994179535857, - "loss": 46.0, - "step": 6754 - }, - { - "epoch": 1.0878457264785217, - "grad_norm": 0.0003246386186219752, - "learning_rate": 0.00019999941778096143, - "loss": 46.0, - "step": 6755 - }, - { - "epoch": 1.088006763557309, - "grad_norm": 0.0023419458884745836, - "learning_rate": 0.00019999941760831159, - "loss": 46.0, - "step": 6756 - }, - { - "epoch": 1.0881678006360964, - "grad_norm": 0.0032568045426160097, - "learning_rate": 0.00019999941743563615, - "loss": 46.0, - "step": 6757 - }, - { - "epoch": 1.0883288377148839, - "grad_norm": 0.0010872224811464548, - "learning_rate": 0.0001999994172629351, - "loss": 46.0, - "step": 6758 - }, - { - "epoch": 1.0884898747936713, - "grad_norm": 0.0010463227517902851, - "learning_rate": 0.00019999941709020847, - "loss": 46.0, - "step": 6759 - }, - { - "epoch": 1.0886509118724585, - "grad_norm": 0.0005634522531181574, - "learning_rate": 0.00019999941691745623, - "loss": 46.0, - "step": 6760 - }, - { - "epoch": 1.088811948951246, - "grad_norm": 0.0021014881785959005, - "learning_rate": 0.00019999941674467842, - "loss": 46.0, - "step": 6761 - }, - { - "epoch": 1.0889729860300335, - "grad_norm": 0.001237224554643035, - "learning_rate": 0.00019999941657187497, - "loss": 46.0, - "step": 6762 - }, - { - "epoch": 1.089134023108821, - "grad_norm": 0.0007352237007580698, - "learning_rate": 0.00019999941639904596, - "loss": 46.0, - "step": 6763 - }, - { - "epoch": 1.0892950601876081, - "grad_norm": 0.0009498083963990211, - "learning_rate": 0.00019999941622619134, - "loss": 46.0, - "step": 6764 - }, - { - "epoch": 1.0894560972663956, - "grad_norm": 0.0015944220358505845, - "learning_rate": 0.00019999941605331113, - "loss": 46.0, - "step": 6765 - }, - { - "epoch": 1.089617134345183, - "grad_norm": 0.0013007746310904622, - "learning_rate": 0.00019999941588040533, - "loss": 46.0, - "step": 6766 - }, - { - "epoch": 1.0897781714239703, - "grad_norm": 0.002352488460019231, - "learning_rate": 0.00019999941570747397, - "loss": 46.0, - "step": 6767 - }, - { - "epoch": 1.0899392085027577, - "grad_norm": 0.0005662386538460851, - "learning_rate": 0.00019999941553451695, - "loss": 46.0, - "step": 6768 - }, - { - "epoch": 1.0901002455815452, - "grad_norm": 0.0014966700691729784, - "learning_rate": 0.00019999941536153436, - "loss": 46.0, - "step": 6769 - }, - { - "epoch": 1.0902612826603326, - "grad_norm": 0.0016662016278132796, - "learning_rate": 0.00019999941518852619, - "loss": 46.0, - "step": 6770 - }, - { - "epoch": 1.0904223197391198, - "grad_norm": 0.0013122496893629432, - "learning_rate": 0.00019999941501549242, - "loss": 46.0, - "step": 6771 - }, - { - "epoch": 1.0905833568179073, - "grad_norm": 0.00036764625110663474, - "learning_rate": 0.00019999941484243302, - "loss": 46.0, - "step": 6772 - }, - { - "epoch": 1.0907443938966948, - "grad_norm": 0.0008052490884438157, - "learning_rate": 0.0001999994146693481, - "loss": 46.0, - "step": 6773 - }, - { - "epoch": 1.0909054309754822, - "grad_norm": 0.0010541316587477922, - "learning_rate": 0.0001999994144962375, - "loss": 46.0, - "step": 6774 - }, - { - "epoch": 1.0910664680542694, - "grad_norm": 0.0012618504697456956, - "learning_rate": 0.00019999941432310137, - "loss": 46.0, - "step": 6775 - }, - { - "epoch": 1.0912275051330569, - "grad_norm": 0.0013316760305315256, - "learning_rate": 0.00019999941414993962, - "loss": 46.0, - "step": 6776 - }, - { - "epoch": 1.0913885422118443, - "grad_norm": 0.0008574627572670579, - "learning_rate": 0.00019999941397675226, - "loss": 46.0, - "step": 6777 - }, - { - "epoch": 1.0915495792906316, - "grad_norm": 0.0014363937079906464, - "learning_rate": 0.00019999941380353936, - "loss": 46.0, - "step": 6778 - }, - { - "epoch": 1.091710616369419, - "grad_norm": 0.008141132071614265, - "learning_rate": 0.00019999941363030082, - "loss": 46.0, - "step": 6779 - }, - { - "epoch": 1.0918716534482065, - "grad_norm": 0.0018811540212482214, - "learning_rate": 0.00019999941345703667, - "loss": 46.0, - "step": 6780 - }, - { - "epoch": 1.092032690526994, - "grad_norm": 0.0008768909028731287, - "learning_rate": 0.00019999941328374695, - "loss": 46.0, - "step": 6781 - }, - { - "epoch": 1.0921937276057812, - "grad_norm": 0.002609496470540762, - "learning_rate": 0.00019999941311043165, - "loss": 46.0, - "step": 6782 - }, - { - "epoch": 1.0923547646845686, - "grad_norm": 0.004338674712926149, - "learning_rate": 0.0001999994129370907, - "loss": 46.0, - "step": 6783 - }, - { - "epoch": 1.092515801763356, - "grad_norm": 0.0009369601611979306, - "learning_rate": 0.00019999941276372424, - "loss": 46.0, - "step": 6784 - }, - { - "epoch": 1.0926768388421435, - "grad_norm": 0.002386624226346612, - "learning_rate": 0.00019999941259033212, - "loss": 46.0, - "step": 6785 - }, - { - "epoch": 1.0928378759209307, - "grad_norm": 0.001545778359286487, - "learning_rate": 0.00019999941241691442, - "loss": 46.0, - "step": 6786 - }, - { - "epoch": 1.0929989129997182, - "grad_norm": 0.0007242248393595219, - "learning_rate": 0.00019999941224347115, - "loss": 46.0, - "step": 6787 - }, - { - "epoch": 1.0931599500785056, - "grad_norm": 0.004175287671387196, - "learning_rate": 0.00019999941207000225, - "loss": 46.0, - "step": 6788 - }, - { - "epoch": 1.0933209871572929, - "grad_norm": 0.004481961019337177, - "learning_rate": 0.00019999941189650778, - "loss": 46.0, - "step": 6789 - }, - { - "epoch": 1.0934820242360803, - "grad_norm": 0.0010569783626124263, - "learning_rate": 0.0001999994117229877, - "loss": 46.0, - "step": 6790 - }, - { - "epoch": 1.0936430613148678, - "grad_norm": 0.0016120291547849774, - "learning_rate": 0.00019999941154944203, - "loss": 46.0, - "step": 6791 - }, - { - "epoch": 1.0938040983936552, - "grad_norm": 0.001475730910897255, - "learning_rate": 0.00019999941137587075, - "loss": 46.0, - "step": 6792 - }, - { - "epoch": 1.0939651354724425, - "grad_norm": 0.0006012814701534808, - "learning_rate": 0.0001999994112022739, - "loss": 46.0, - "step": 6793 - }, - { - "epoch": 1.09412617255123, - "grad_norm": 0.001240721670910716, - "learning_rate": 0.00019999941102865146, - "loss": 46.0, - "step": 6794 - }, - { - "epoch": 1.0942872096300174, - "grad_norm": 0.0008815933251753449, - "learning_rate": 0.0001999994108550034, - "loss": 46.0, - "step": 6795 - }, - { - "epoch": 1.0944482467088048, - "grad_norm": 0.0003538366872817278, - "learning_rate": 0.00019999941068132976, - "loss": 46.0, - "step": 6796 - }, - { - "epoch": 1.094609283787592, - "grad_norm": 0.002052916446700692, - "learning_rate": 0.0001999994105076305, - "loss": 46.0, - "step": 6797 - }, - { - "epoch": 1.0947703208663795, - "grad_norm": 0.0032214089296758175, - "learning_rate": 0.0001999994103339057, - "loss": 46.0, - "step": 6798 - }, - { - "epoch": 1.094931357945167, - "grad_norm": 0.0043851411901414394, - "learning_rate": 0.00019999941016015526, - "loss": 46.0, - "step": 6799 - }, - { - "epoch": 1.0950923950239542, - "grad_norm": 0.0014077771920710802, - "learning_rate": 0.00019999940998637926, - "loss": 46.0, - "step": 6800 - }, - { - "epoch": 1.0952534321027416, - "grad_norm": 0.004380984231829643, - "learning_rate": 0.0001999994098125776, - "loss": 46.0, - "step": 6801 - }, - { - "epoch": 1.095414469181529, - "grad_norm": 0.00826328620314598, - "learning_rate": 0.0001999994096387504, - "loss": 46.0, - "step": 6802 - }, - { - "epoch": 1.0955755062603165, - "grad_norm": 0.0007693120278418064, - "learning_rate": 0.00019999940946489758, - "loss": 46.0, - "step": 6803 - }, - { - "epoch": 1.0957365433391038, - "grad_norm": 0.0010625033173710108, - "learning_rate": 0.0001999994092910192, - "loss": 46.0, - "step": 6804 - }, - { - "epoch": 1.0958975804178912, - "grad_norm": 0.0011092345230281353, - "learning_rate": 0.0001999994091171152, - "loss": 46.0, - "step": 6805 - }, - { - "epoch": 1.0960586174966787, - "grad_norm": 0.0014680151361972094, - "learning_rate": 0.00019999940894318562, - "loss": 46.0, - "step": 6806 - }, - { - "epoch": 1.0962196545754659, - "grad_norm": 0.001505075255408883, - "learning_rate": 0.00019999940876923043, - "loss": 46.0, - "step": 6807 - }, - { - "epoch": 1.0963806916542533, - "grad_norm": 0.0010777050629258156, - "learning_rate": 0.00019999940859524967, - "loss": 46.0, - "step": 6808 - }, - { - "epoch": 1.0965417287330408, - "grad_norm": 0.0018195834709331393, - "learning_rate": 0.00019999940842124327, - "loss": 46.0, - "step": 6809 - }, - { - "epoch": 1.0967027658118282, - "grad_norm": 0.0023834342136979103, - "learning_rate": 0.00019999940824721134, - "loss": 46.0, - "step": 6810 - }, - { - "epoch": 1.0968638028906155, - "grad_norm": 0.0006886071059852839, - "learning_rate": 0.00019999940807315377, - "loss": 46.0, - "step": 6811 - }, - { - "epoch": 1.097024839969403, - "grad_norm": 0.0017052574548870325, - "learning_rate": 0.00019999940789907058, - "loss": 46.0, - "step": 6812 - }, - { - "epoch": 1.0971858770481904, - "grad_norm": 0.0003902724711224437, - "learning_rate": 0.00019999940772496183, - "loss": 46.0, - "step": 6813 - }, - { - "epoch": 1.0973469141269778, - "grad_norm": 0.0005600210279226303, - "learning_rate": 0.0001999994075508275, - "loss": 46.0, - "step": 6814 - }, - { - "epoch": 1.097507951205765, - "grad_norm": 0.0019528912380337715, - "learning_rate": 0.00019999940737666755, - "loss": 46.0, - "step": 6815 - }, - { - "epoch": 1.0976689882845525, - "grad_norm": 0.0006542624323628843, - "learning_rate": 0.00019999940720248201, - "loss": 46.0, - "step": 6816 - }, - { - "epoch": 1.09783002536334, - "grad_norm": 0.0009030955261550844, - "learning_rate": 0.00019999940702827086, - "loss": 46.0, - "step": 6817 - }, - { - "epoch": 1.0979910624421274, - "grad_norm": 0.0014593598898500204, - "learning_rate": 0.00019999940685403415, - "loss": 46.0, - "step": 6818 - }, - { - "epoch": 1.0981520995209146, - "grad_norm": 0.000376840413082391, - "learning_rate": 0.00019999940667977183, - "loss": 46.0, - "step": 6819 - }, - { - "epoch": 1.098313136599702, - "grad_norm": 0.001374902785755694, - "learning_rate": 0.00019999940650548392, - "loss": 46.0, - "step": 6820 - }, - { - "epoch": 1.0984741736784895, - "grad_norm": 0.0014474052004516125, - "learning_rate": 0.0001999994063311704, - "loss": 46.0, - "step": 6821 - }, - { - "epoch": 1.0986352107572768, - "grad_norm": 0.0009384119184687734, - "learning_rate": 0.0001999994061568313, - "loss": 46.0, - "step": 6822 - }, - { - "epoch": 1.0987962478360642, - "grad_norm": 0.006101029459387064, - "learning_rate": 0.0001999994059824666, - "loss": 46.0, - "step": 6823 - }, - { - "epoch": 1.0989572849148517, - "grad_norm": 0.00048114280798472464, - "learning_rate": 0.0001999994058080763, - "loss": 46.0, - "step": 6824 - }, - { - "epoch": 1.0991183219936391, - "grad_norm": 0.0008684517233632505, - "learning_rate": 0.00019999940563366042, - "loss": 46.0, - "step": 6825 - }, - { - "epoch": 1.0992793590724264, - "grad_norm": 0.0012686484260484576, - "learning_rate": 0.00019999940545921893, - "loss": 46.0, - "step": 6826 - }, - { - "epoch": 1.0994403961512138, - "grad_norm": 0.002097642980515957, - "learning_rate": 0.00019999940528475185, - "loss": 46.0, - "step": 6827 - }, - { - "epoch": 1.0996014332300013, - "grad_norm": 0.0007745048496872187, - "learning_rate": 0.00019999940511025916, - "loss": 46.0, - "step": 6828 - }, - { - "epoch": 1.0997624703087885, - "grad_norm": 0.0015851400094106793, - "learning_rate": 0.00019999940493574088, - "loss": 46.0, - "step": 6829 - }, - { - "epoch": 1.099923507387576, - "grad_norm": 0.0006307436851784587, - "learning_rate": 0.00019999940476119705, - "loss": 46.0, - "step": 6830 - }, - { - "epoch": 1.1000845444663634, - "grad_norm": 0.003988225478678942, - "learning_rate": 0.00019999940458662757, - "loss": 46.0, - "step": 6831 - }, - { - "epoch": 1.1002455815451508, - "grad_norm": 0.002111879177391529, - "learning_rate": 0.00019999940441203253, - "loss": 46.0, - "step": 6832 - }, - { - "epoch": 1.100406618623938, - "grad_norm": 0.0018911371007561684, - "learning_rate": 0.0001999994042374119, - "loss": 46.0, - "step": 6833 - }, - { - "epoch": 1.1005676557027255, - "grad_norm": 0.0010132085299119353, - "learning_rate": 0.00019999940406276566, - "loss": 46.0, - "step": 6834 - }, - { - "epoch": 1.100728692781513, - "grad_norm": 0.0006456273840740323, - "learning_rate": 0.0001999994038880938, - "loss": 46.0, - "step": 6835 - }, - { - "epoch": 1.1008897298603004, - "grad_norm": 0.000746123434510082, - "learning_rate": 0.0001999994037133964, - "loss": 46.0, - "step": 6836 - }, - { - "epoch": 1.1010507669390877, - "grad_norm": 0.0006802022689953446, - "learning_rate": 0.00019999940353867336, - "loss": 46.0, - "step": 6837 - }, - { - "epoch": 1.101211804017875, - "grad_norm": 0.0008651096140965819, - "learning_rate": 0.00019999940336392472, - "loss": 46.0, - "step": 6838 - }, - { - "epoch": 1.1013728410966626, - "grad_norm": 0.003311104141175747, - "learning_rate": 0.00019999940318915052, - "loss": 46.0, - "step": 6839 - }, - { - "epoch": 1.10153387817545, - "grad_norm": 0.007221200969070196, - "learning_rate": 0.0001999994030143507, - "loss": 46.0, - "step": 6840 - }, - { - "epoch": 1.1016949152542372, - "grad_norm": 0.003303400706499815, - "learning_rate": 0.0001999994028395253, - "loss": 46.0, - "step": 6841 - }, - { - "epoch": 1.1018559523330247, - "grad_norm": 0.0015620229532942176, - "learning_rate": 0.0001999994026646743, - "loss": 46.0, - "step": 6842 - }, - { - "epoch": 1.1020169894118121, - "grad_norm": 0.001295104855671525, - "learning_rate": 0.00019999940248979772, - "loss": 46.0, - "step": 6843 - }, - { - "epoch": 1.1021780264905994, - "grad_norm": 0.0012015128741040826, - "learning_rate": 0.0001999994023148955, - "loss": 46.0, - "step": 6844 - }, - { - "epoch": 1.1023390635693868, - "grad_norm": 0.003155094338580966, - "learning_rate": 0.00019999940213996772, - "loss": 46.0, - "step": 6845 - }, - { - "epoch": 1.1025001006481743, - "grad_norm": 0.001283306279219687, - "learning_rate": 0.00019999940196501435, - "loss": 46.0, - "step": 6846 - }, - { - "epoch": 1.1026611377269617, - "grad_norm": 0.0008306552772410214, - "learning_rate": 0.00019999940179003537, - "loss": 46.0, - "step": 6847 - }, - { - "epoch": 1.102822174805749, - "grad_norm": 0.004457590635865927, - "learning_rate": 0.0001999994016150308, - "loss": 46.0, - "step": 6848 - }, - { - "epoch": 1.1029832118845364, - "grad_norm": 0.0037536320742219687, - "learning_rate": 0.00019999940144000067, - "loss": 46.0, - "step": 6849 - }, - { - "epoch": 1.1031442489633239, - "grad_norm": 0.0015843348810449243, - "learning_rate": 0.0001999994012649449, - "loss": 46.0, - "step": 6850 - }, - { - "epoch": 1.103305286042111, - "grad_norm": 0.0005735429003834724, - "learning_rate": 0.00019999940108986355, - "loss": 46.0, - "step": 6851 - }, - { - "epoch": 1.1034663231208985, - "grad_norm": 0.0034495049621909857, - "learning_rate": 0.0001999994009147566, - "loss": 46.0, - "step": 6852 - }, - { - "epoch": 1.103627360199686, - "grad_norm": 0.0008431568858213723, - "learning_rate": 0.00019999940073962407, - "loss": 46.0, - "step": 6853 - }, - { - "epoch": 1.1037883972784734, - "grad_norm": 0.0005970880738459527, - "learning_rate": 0.00019999940056446592, - "loss": 46.0, - "step": 6854 - }, - { - "epoch": 1.1039494343572607, - "grad_norm": 0.0007126206764951348, - "learning_rate": 0.0001999994003892822, - "loss": 46.0, - "step": 6855 - }, - { - "epoch": 1.1041104714360481, - "grad_norm": 0.000985718797892332, - "learning_rate": 0.00019999940021407287, - "loss": 46.0, - "step": 6856 - }, - { - "epoch": 1.1042715085148356, - "grad_norm": 0.0038014438468962908, - "learning_rate": 0.00019999940003883793, - "loss": 46.0, - "step": 6857 - }, - { - "epoch": 1.104432545593623, - "grad_norm": 0.0004968709545210004, - "learning_rate": 0.0001999993998635774, - "loss": 46.0, - "step": 6858 - }, - { - "epoch": 1.1045935826724103, - "grad_norm": 0.0015719138318672776, - "learning_rate": 0.0001999993996882913, - "loss": 46.0, - "step": 6859 - }, - { - "epoch": 1.1047546197511977, - "grad_norm": 0.006736638955771923, - "learning_rate": 0.0001999993995129796, - "loss": 46.0, - "step": 6860 - }, - { - "epoch": 1.1049156568299852, - "grad_norm": 0.0026190783828496933, - "learning_rate": 0.00019999939933764232, - "loss": 46.0, - "step": 6861 - }, - { - "epoch": 1.1050766939087726, - "grad_norm": 0.0017144321464002132, - "learning_rate": 0.00019999939916227942, - "loss": 46.0, - "step": 6862 - }, - { - "epoch": 1.1052377309875598, - "grad_norm": 0.0010487454710528255, - "learning_rate": 0.00019999939898689094, - "loss": 46.0, - "step": 6863 - }, - { - "epoch": 1.1053987680663473, - "grad_norm": 0.0012597967870533466, - "learning_rate": 0.00019999939881147684, - "loss": 46.0, - "step": 6864 - }, - { - "epoch": 1.1055598051451347, - "grad_norm": 0.0018494793912395835, - "learning_rate": 0.00019999939863603715, - "loss": 46.0, - "step": 6865 - }, - { - "epoch": 1.105720842223922, - "grad_norm": 0.0009795259684324265, - "learning_rate": 0.0001999993984605719, - "loss": 46.0, - "step": 6866 - }, - { - "epoch": 1.1058818793027094, - "grad_norm": 0.0018543446203693748, - "learning_rate": 0.00019999939828508102, - "loss": 46.0, - "step": 6867 - }, - { - "epoch": 1.1060429163814969, - "grad_norm": 0.0024540466256439686, - "learning_rate": 0.00019999939810956457, - "loss": 46.0, - "step": 6868 - }, - { - "epoch": 1.1062039534602843, - "grad_norm": 0.001838721102103591, - "learning_rate": 0.00019999939793402248, - "loss": 46.0, - "step": 6869 - }, - { - "epoch": 1.1063649905390716, - "grad_norm": 0.0008247812511399388, - "learning_rate": 0.00019999939775845486, - "loss": 46.0, - "step": 6870 - }, - { - "epoch": 1.106526027617859, - "grad_norm": 0.005626663099974394, - "learning_rate": 0.00019999939758286162, - "loss": 46.0, - "step": 6871 - }, - { - "epoch": 1.1066870646966465, - "grad_norm": 0.003019805531948805, - "learning_rate": 0.00019999939740724277, - "loss": 46.0, - "step": 6872 - }, - { - "epoch": 1.1068481017754337, - "grad_norm": 0.0013774039689451456, - "learning_rate": 0.00019999939723159833, - "loss": 46.0, - "step": 6873 - }, - { - "epoch": 1.1070091388542211, - "grad_norm": 0.0008389906142838299, - "learning_rate": 0.0001999993970559283, - "loss": 46.0, - "step": 6874 - }, - { - "epoch": 1.1071701759330086, - "grad_norm": 0.002184239448979497, - "learning_rate": 0.0001999993968802327, - "loss": 46.0, - "step": 6875 - }, - { - "epoch": 1.107331213011796, - "grad_norm": 0.002110743895173073, - "learning_rate": 0.00019999939670451147, - "loss": 46.0, - "step": 6876 - }, - { - "epoch": 1.1074922500905833, - "grad_norm": 0.0015071170637384057, - "learning_rate": 0.00019999939652876462, - "loss": 46.0, - "step": 6877 - }, - { - "epoch": 1.1076532871693707, - "grad_norm": 0.0038081109523773193, - "learning_rate": 0.00019999939635299222, - "loss": 46.0, - "step": 6878 - }, - { - "epoch": 1.1078143242481582, - "grad_norm": 0.002234517829492688, - "learning_rate": 0.0001999993961771942, - "loss": 46.0, - "step": 6879 - }, - { - "epoch": 1.1079753613269456, - "grad_norm": 0.0027877003885805607, - "learning_rate": 0.00019999939600137063, - "loss": 46.0, - "step": 6880 - }, - { - "epoch": 1.1081363984057329, - "grad_norm": 0.0018064118921756744, - "learning_rate": 0.00019999939582552144, - "loss": 46.0, - "step": 6881 - }, - { - "epoch": 1.1082974354845203, - "grad_norm": 0.0010408146772533655, - "learning_rate": 0.00019999939564964664, - "loss": 46.0, - "step": 6882 - }, - { - "epoch": 1.1084584725633078, - "grad_norm": 0.006879028398543596, - "learning_rate": 0.00019999939547374627, - "loss": 46.0, - "step": 6883 - }, - { - "epoch": 1.108619509642095, - "grad_norm": 0.0004829927929677069, - "learning_rate": 0.00019999939529782027, - "loss": 46.0, - "step": 6884 - }, - { - "epoch": 1.1087805467208824, - "grad_norm": 0.0015737295616418123, - "learning_rate": 0.0001999993951218687, - "loss": 46.0, - "step": 6885 - }, - { - "epoch": 1.10894158379967, - "grad_norm": 0.0008924181456677616, - "learning_rate": 0.00019999939494589155, - "loss": 46.0, - "step": 6886 - }, - { - "epoch": 1.1091026208784573, - "grad_norm": 0.0016155537450686097, - "learning_rate": 0.00019999939476988878, - "loss": 46.0, - "step": 6887 - }, - { - "epoch": 1.1092636579572446, - "grad_norm": 0.0012315240455791354, - "learning_rate": 0.00019999939459386043, - "loss": 46.0, - "step": 6888 - }, - { - "epoch": 1.109424695036032, - "grad_norm": 0.0035185578744858503, - "learning_rate": 0.00019999939441780646, - "loss": 46.0, - "step": 6889 - }, - { - "epoch": 1.1095857321148195, - "grad_norm": 0.004599418491125107, - "learning_rate": 0.0001999993942417269, - "loss": 46.0, - "step": 6890 - }, - { - "epoch": 1.109746769193607, - "grad_norm": 0.0008205798803828657, - "learning_rate": 0.0001999993940656218, - "loss": 46.0, - "step": 6891 - }, - { - "epoch": 1.1099078062723942, - "grad_norm": 0.0006457548006437719, - "learning_rate": 0.00019999939388949106, - "loss": 46.0, - "step": 6892 - }, - { - "epoch": 1.1100688433511816, - "grad_norm": 0.0024489983916282654, - "learning_rate": 0.00019999939371333471, - "loss": 46.0, - "step": 6893 - }, - { - "epoch": 1.110229880429969, - "grad_norm": 0.002080045873299241, - "learning_rate": 0.00019999939353715278, - "loss": 46.0, - "step": 6894 - }, - { - "epoch": 1.1103909175087563, - "grad_norm": 0.0023526570294052362, - "learning_rate": 0.0001999993933609453, - "loss": 46.0, - "step": 6895 - }, - { - "epoch": 1.1105519545875437, - "grad_norm": 0.0025399865116924047, - "learning_rate": 0.00019999939318471216, - "loss": 46.0, - "step": 6896 - }, - { - "epoch": 1.1107129916663312, - "grad_norm": 0.002852516481652856, - "learning_rate": 0.00019999939300845344, - "loss": 46.0, - "step": 6897 - }, - { - "epoch": 1.1108740287451186, - "grad_norm": 0.0005394486943259835, - "learning_rate": 0.00019999939283216916, - "loss": 46.0, - "step": 6898 - }, - { - "epoch": 1.1110350658239059, - "grad_norm": 0.0025807165075093508, - "learning_rate": 0.00019999939265585926, - "loss": 46.0, - "step": 6899 - }, - { - "epoch": 1.1111961029026933, - "grad_norm": 0.0040620192885398865, - "learning_rate": 0.00019999939247952376, - "loss": 46.0, - "step": 6900 - }, - { - "epoch": 1.1113571399814808, - "grad_norm": 0.0039823055267333984, - "learning_rate": 0.00019999939230316266, - "loss": 46.0, - "step": 6901 - }, - { - "epoch": 1.111518177060268, - "grad_norm": 0.0021288138814270496, - "learning_rate": 0.00019999939212677598, - "loss": 46.0, - "step": 6902 - }, - { - "epoch": 1.1116792141390555, - "grad_norm": 0.0011207035277038813, - "learning_rate": 0.00019999939195036373, - "loss": 46.0, - "step": 6903 - }, - { - "epoch": 1.111840251217843, - "grad_norm": 0.008520621806383133, - "learning_rate": 0.00019999939177392585, - "loss": 46.0, - "step": 6904 - }, - { - "epoch": 1.1120012882966304, - "grad_norm": 0.001497647725045681, - "learning_rate": 0.00019999939159746238, - "loss": 46.0, - "step": 6905 - }, - { - "epoch": 1.1121623253754176, - "grad_norm": 0.001006730948574841, - "learning_rate": 0.0001999993914209733, - "loss": 46.0, - "step": 6906 - }, - { - "epoch": 1.112323362454205, - "grad_norm": 0.0009130086400546134, - "learning_rate": 0.00019999939124445865, - "loss": 46.0, - "step": 6907 - }, - { - "epoch": 1.1124843995329925, - "grad_norm": 0.001340411719866097, - "learning_rate": 0.0001999993910679184, - "loss": 46.0, - "step": 6908 - }, - { - "epoch": 1.11264543661178, - "grad_norm": 0.0021440873388201, - "learning_rate": 0.00019999939089135256, - "loss": 46.0, - "step": 6909 - }, - { - "epoch": 1.1128064736905672, - "grad_norm": 0.00844606477767229, - "learning_rate": 0.00019999939071476113, - "loss": 46.0, - "step": 6910 - }, - { - "epoch": 1.1129675107693546, - "grad_norm": 0.001317870570346713, - "learning_rate": 0.00019999939053814408, - "loss": 46.0, - "step": 6911 - }, - { - "epoch": 1.113128547848142, - "grad_norm": 0.0011348029365763068, - "learning_rate": 0.00019999939036150145, - "loss": 46.0, - "step": 6912 - }, - { - "epoch": 1.1132895849269295, - "grad_norm": 0.00458654435351491, - "learning_rate": 0.00019999939018483322, - "loss": 46.0, - "step": 6913 - }, - { - "epoch": 1.1134506220057168, - "grad_norm": 0.005441383924335241, - "learning_rate": 0.0001999993900081394, - "loss": 46.0, - "step": 6914 - }, - { - "epoch": 1.1136116590845042, - "grad_norm": 0.0005340529605746269, - "learning_rate": 0.00019999938983142, - "loss": 46.0, - "step": 6915 - }, - { - "epoch": 1.1137726961632917, - "grad_norm": 0.001655799918808043, - "learning_rate": 0.00019999938965467497, - "loss": 46.0, - "step": 6916 - }, - { - "epoch": 1.113933733242079, - "grad_norm": 0.0025167756248265505, - "learning_rate": 0.00019999938947790438, - "loss": 46.0, - "step": 6917 - }, - { - "epoch": 1.1140947703208663, - "grad_norm": 0.0007257751422002912, - "learning_rate": 0.0001999993893011082, - "loss": 46.0, - "step": 6918 - }, - { - "epoch": 1.1142558073996538, - "grad_norm": 0.0004469645209610462, - "learning_rate": 0.0001999993891242864, - "loss": 46.0, - "step": 6919 - }, - { - "epoch": 1.1144168444784412, - "grad_norm": 0.0010013979626819491, - "learning_rate": 0.000199999388947439, - "loss": 46.0, - "step": 6920 - }, - { - "epoch": 1.1145778815572285, - "grad_norm": 0.002204634714871645, - "learning_rate": 0.000199999388770566, - "loss": 46.0, - "step": 6921 - }, - { - "epoch": 1.114738918636016, - "grad_norm": 0.0015005613677203655, - "learning_rate": 0.00019999938859366744, - "loss": 46.0, - "step": 6922 - }, - { - "epoch": 1.1148999557148034, - "grad_norm": 0.006144202779978514, - "learning_rate": 0.00019999938841674326, - "loss": 46.0, - "step": 6923 - }, - { - "epoch": 1.1150609927935906, - "grad_norm": 0.0006316326325759292, - "learning_rate": 0.00019999938823979353, - "loss": 46.0, - "step": 6924 - }, - { - "epoch": 1.115222029872378, - "grad_norm": 0.0026485207490622997, - "learning_rate": 0.00019999938806281815, - "loss": 46.0, - "step": 6925 - }, - { - "epoch": 1.1153830669511655, - "grad_norm": 0.0012750618625432253, - "learning_rate": 0.0001999993878858172, - "loss": 46.0, - "step": 6926 - }, - { - "epoch": 1.115544104029953, - "grad_norm": 0.006311360280960798, - "learning_rate": 0.00019999938770879063, - "loss": 46.0, - "step": 6927 - }, - { - "epoch": 1.1157051411087402, - "grad_norm": 0.012786568142473698, - "learning_rate": 0.0001999993875317385, - "loss": 46.0, - "step": 6928 - }, - { - "epoch": 1.1158661781875276, - "grad_norm": 0.0030675893649458885, - "learning_rate": 0.00019999938735466074, - "loss": 46.0, - "step": 6929 - }, - { - "epoch": 1.116027215266315, - "grad_norm": 0.0030196455772966146, - "learning_rate": 0.00019999938717755742, - "loss": 46.0, - "step": 6930 - }, - { - "epoch": 1.1161882523451025, - "grad_norm": 0.0015942202880978584, - "learning_rate": 0.0001999993870004285, - "loss": 46.0, - "step": 6931 - }, - { - "epoch": 1.1163492894238898, - "grad_norm": 0.006608026567846537, - "learning_rate": 0.00019999938682327395, - "loss": 46.0, - "step": 6932 - }, - { - "epoch": 1.1165103265026772, - "grad_norm": 0.0030436262022703886, - "learning_rate": 0.00019999938664609385, - "loss": 46.0, - "step": 6933 - }, - { - "epoch": 1.1166713635814647, - "grad_norm": 0.002232971368357539, - "learning_rate": 0.00019999938646888814, - "loss": 46.0, - "step": 6934 - }, - { - "epoch": 1.1168324006602521, - "grad_norm": 0.000834618229418993, - "learning_rate": 0.00019999938629165683, - "loss": 46.0, - "step": 6935 - }, - { - "epoch": 1.1169934377390394, - "grad_norm": 0.0016175888013094664, - "learning_rate": 0.00019999938611439991, - "loss": 46.0, - "step": 6936 - }, - { - "epoch": 1.1171544748178268, - "grad_norm": 0.0007204286521300673, - "learning_rate": 0.00019999938593711744, - "loss": 46.0, - "step": 6937 - }, - { - "epoch": 1.1173155118966143, - "grad_norm": 0.0012183092767372727, - "learning_rate": 0.00019999938575980934, - "loss": 46.0, - "step": 6938 - }, - { - "epoch": 1.1174765489754015, - "grad_norm": 0.0014345013769343495, - "learning_rate": 0.00019999938558247564, - "loss": 46.0, - "step": 6939 - }, - { - "epoch": 1.117637586054189, - "grad_norm": 0.0010583841940388083, - "learning_rate": 0.00019999938540511637, - "loss": 46.0, - "step": 6940 - }, - { - "epoch": 1.1177986231329764, - "grad_norm": 0.016156338155269623, - "learning_rate": 0.0001999993852277315, - "loss": 46.0, - "step": 6941 - }, - { - "epoch": 1.1179596602117639, - "grad_norm": 0.0037791484501212835, - "learning_rate": 0.00019999938505032102, - "loss": 46.0, - "step": 6942 - }, - { - "epoch": 1.118120697290551, - "grad_norm": 0.0006212688167579472, - "learning_rate": 0.00019999938487288494, - "loss": 46.0, - "step": 6943 - }, - { - "epoch": 1.1182817343693385, - "grad_norm": 0.0011253413977101445, - "learning_rate": 0.0001999993846954233, - "loss": 46.0, - "step": 6944 - }, - { - "epoch": 1.118442771448126, - "grad_norm": 0.0038602829445153475, - "learning_rate": 0.00019999938451793604, - "loss": 46.0, - "step": 6945 - }, - { - "epoch": 1.1186038085269132, - "grad_norm": 0.0014489177847281098, - "learning_rate": 0.00019999938434042317, - "loss": 46.0, - "step": 6946 - }, - { - "epoch": 1.1187648456057007, - "grad_norm": 0.0029896770138293505, - "learning_rate": 0.00019999938416288474, - "loss": 46.0, - "step": 6947 - }, - { - "epoch": 1.1189258826844881, - "grad_norm": 0.0008623116300441325, - "learning_rate": 0.0001999993839853207, - "loss": 46.0, - "step": 6948 - }, - { - "epoch": 1.1190869197632756, - "grad_norm": 0.0012256938498467207, - "learning_rate": 0.00019999938380773106, - "loss": 46.0, - "step": 6949 - }, - { - "epoch": 1.1192479568420628, - "grad_norm": 0.0012182240607216954, - "learning_rate": 0.00019999938363011584, - "loss": 46.0, - "step": 6950 - }, - { - "epoch": 1.1194089939208502, - "grad_norm": 0.01165693812072277, - "learning_rate": 0.000199999383452475, - "loss": 46.0, - "step": 6951 - }, - { - "epoch": 1.1195700309996377, - "grad_norm": 0.002061246894299984, - "learning_rate": 0.0001999993832748086, - "loss": 46.0, - "step": 6952 - }, - { - "epoch": 1.1197310680784252, - "grad_norm": 0.006831257604062557, - "learning_rate": 0.00019999938309711658, - "loss": 46.0, - "step": 6953 - }, - { - "epoch": 1.1198921051572124, - "grad_norm": 0.0019761386793106794, - "learning_rate": 0.00019999938291939898, - "loss": 46.0, - "step": 6954 - }, - { - "epoch": 1.1200531422359998, - "grad_norm": 0.0024278834462165833, - "learning_rate": 0.00019999938274165577, - "loss": 46.0, - "step": 6955 - }, - { - "epoch": 1.1202141793147873, - "grad_norm": 0.00209654844366014, - "learning_rate": 0.00019999938256388698, - "loss": 46.0, - "step": 6956 - }, - { - "epoch": 1.1203752163935747, - "grad_norm": 0.0024988569784909487, - "learning_rate": 0.00019999938238609256, - "loss": 46.0, - "step": 6957 - }, - { - "epoch": 1.120536253472362, - "grad_norm": 0.0006901687593199313, - "learning_rate": 0.0001999993822082726, - "loss": 46.0, - "step": 6958 - }, - { - "epoch": 1.1206972905511494, - "grad_norm": 0.0005171674420125782, - "learning_rate": 0.000199999382030427, - "loss": 46.0, - "step": 6959 - }, - { - "epoch": 1.1208583276299369, - "grad_norm": 0.0021077613346278667, - "learning_rate": 0.00019999938185255584, - "loss": 46.0, - "step": 6960 - }, - { - "epoch": 1.121019364708724, - "grad_norm": 0.00047611480113118887, - "learning_rate": 0.00019999938167465908, - "loss": 46.0, - "step": 6961 - }, - { - "epoch": 1.1211804017875115, - "grad_norm": 0.0010797953000292182, - "learning_rate": 0.0001999993814967367, - "loss": 46.0, - "step": 6962 - }, - { - "epoch": 1.121341438866299, - "grad_norm": 0.0005051333573646843, - "learning_rate": 0.0001999993813187887, - "loss": 46.0, - "step": 6963 - }, - { - "epoch": 1.1215024759450865, - "grad_norm": 0.0012195693561807275, - "learning_rate": 0.0001999993811408152, - "loss": 46.0, - "step": 6964 - }, - { - "epoch": 1.1216635130238737, - "grad_norm": 0.0005467704613693058, - "learning_rate": 0.00019999938096281606, - "loss": 46.0, - "step": 6965 - }, - { - "epoch": 1.1218245501026611, - "grad_norm": 0.0033836483489722013, - "learning_rate": 0.0001999993807847913, - "loss": 46.0, - "step": 6966 - }, - { - "epoch": 1.1219855871814486, - "grad_norm": 0.001822745194658637, - "learning_rate": 0.00019999938060674094, - "loss": 46.0, - "step": 6967 - }, - { - "epoch": 1.1221466242602358, - "grad_norm": 0.0010443114442750812, - "learning_rate": 0.00019999938042866502, - "loss": 46.0, - "step": 6968 - }, - { - "epoch": 1.1223076613390233, - "grad_norm": 0.0022754084784537554, - "learning_rate": 0.00019999938025056348, - "loss": 46.0, - "step": 6969 - }, - { - "epoch": 1.1224686984178107, - "grad_norm": 0.002585018053650856, - "learning_rate": 0.00019999938007243638, - "loss": 46.0, - "step": 6970 - }, - { - "epoch": 1.1226297354965982, - "grad_norm": 0.002244913950562477, - "learning_rate": 0.00019999937989428364, - "loss": 46.0, - "step": 6971 - }, - { - "epoch": 1.1227907725753854, - "grad_norm": 0.0019119534408673644, - "learning_rate": 0.00019999937971610534, - "loss": 46.0, - "step": 6972 - }, - { - "epoch": 1.1229518096541729, - "grad_norm": 0.0038164029829204082, - "learning_rate": 0.00019999937953790143, - "loss": 46.0, - "step": 6973 - }, - { - "epoch": 1.1231128467329603, - "grad_norm": 0.0006349575123749673, - "learning_rate": 0.00019999937935967192, - "loss": 46.0, - "step": 6974 - }, - { - "epoch": 1.1232738838117478, - "grad_norm": 0.006171821616590023, - "learning_rate": 0.00019999937918141684, - "loss": 46.0, - "step": 6975 - }, - { - "epoch": 1.123434920890535, - "grad_norm": 0.0037995383609086275, - "learning_rate": 0.00019999937900313613, - "loss": 46.0, - "step": 6976 - }, - { - "epoch": 1.1235959579693224, - "grad_norm": 0.0012308359146118164, - "learning_rate": 0.00019999937882482987, - "loss": 46.0, - "step": 6977 - }, - { - "epoch": 1.1237569950481099, - "grad_norm": 0.0010234408546239138, - "learning_rate": 0.000199999378646498, - "loss": 46.0, - "step": 6978 - }, - { - "epoch": 1.1239180321268971, - "grad_norm": 0.0017657250864431262, - "learning_rate": 0.0001999993784681405, - "loss": 46.0, - "step": 6979 - }, - { - "epoch": 1.1240790692056846, - "grad_norm": 0.0010053877485916018, - "learning_rate": 0.00019999937828975742, - "loss": 46.0, - "step": 6980 - }, - { - "epoch": 1.124240106284472, - "grad_norm": 0.0008249666425399482, - "learning_rate": 0.00019999937811134876, - "loss": 46.0, - "step": 6981 - }, - { - "epoch": 1.1244011433632595, - "grad_norm": 0.0029670277144759893, - "learning_rate": 0.0001999993779329145, - "loss": 46.0, - "step": 6982 - }, - { - "epoch": 1.1245621804420467, - "grad_norm": 0.005452977493405342, - "learning_rate": 0.00019999937775445466, - "loss": 46.0, - "step": 6983 - }, - { - "epoch": 1.1247232175208342, - "grad_norm": 0.0006747141596861184, - "learning_rate": 0.0001999993775759692, - "loss": 46.0, - "step": 6984 - }, - { - "epoch": 1.1248842545996216, - "grad_norm": 0.002479668240994215, - "learning_rate": 0.00019999937739745817, - "loss": 46.0, - "step": 6985 - }, - { - "epoch": 1.125045291678409, - "grad_norm": 0.003732510609552264, - "learning_rate": 0.0001999993772189215, - "loss": 46.0, - "step": 6986 - }, - { - "epoch": 1.1252063287571963, - "grad_norm": 0.0010793304536491632, - "learning_rate": 0.0001999993770403593, - "loss": 46.0, - "step": 6987 - }, - { - "epoch": 1.1253673658359837, - "grad_norm": 0.0019333201926201582, - "learning_rate": 0.00019999937686177147, - "loss": 46.0, - "step": 6988 - }, - { - "epoch": 1.1255284029147712, - "grad_norm": 0.0018508994253352284, - "learning_rate": 0.00019999937668315805, - "loss": 46.0, - "step": 6989 - }, - { - "epoch": 1.1256894399935584, - "grad_norm": 0.00103022123221308, - "learning_rate": 0.00019999937650451904, - "loss": 46.0, - "step": 6990 - }, - { - "epoch": 1.1258504770723459, - "grad_norm": 0.004638159181922674, - "learning_rate": 0.0001999993763258544, - "loss": 46.0, - "step": 6991 - }, - { - "epoch": 1.1260115141511333, - "grad_norm": 0.0008935858495533466, - "learning_rate": 0.0001999993761471642, - "loss": 46.0, - "step": 6992 - }, - { - "epoch": 1.1261725512299208, - "grad_norm": 0.0008927615126594901, - "learning_rate": 0.0001999993759684484, - "loss": 46.0, - "step": 6993 - }, - { - "epoch": 1.126333588308708, - "grad_norm": 0.0019390768138691783, - "learning_rate": 0.000199999375789707, - "loss": 46.0, - "step": 6994 - }, - { - "epoch": 1.1264946253874955, - "grad_norm": 0.0005231575923971832, - "learning_rate": 0.00019999937561094, - "loss": 46.0, - "step": 6995 - }, - { - "epoch": 1.126655662466283, - "grad_norm": 0.0029506096616387367, - "learning_rate": 0.00019999937543214741, - "loss": 46.0, - "step": 6996 - }, - { - "epoch": 1.1268166995450701, - "grad_norm": 0.007539860438555479, - "learning_rate": 0.00019999937525332924, - "loss": 46.0, - "step": 6997 - }, - { - "epoch": 1.1269777366238576, - "grad_norm": 0.0010391726391389966, - "learning_rate": 0.00019999937507448546, - "loss": 46.0, - "step": 6998 - }, - { - "epoch": 1.127138773702645, - "grad_norm": 0.0033676750026643276, - "learning_rate": 0.00019999937489561612, - "loss": 46.0, - "step": 6999 - }, - { - "epoch": 1.1272998107814325, - "grad_norm": 0.0012933469843119383, - "learning_rate": 0.00019999937471672113, - "loss": 46.0, - "step": 7000 - }, - { - "epoch": 1.12746084786022, - "grad_norm": 0.0030426557641476393, - "learning_rate": 0.0001999993745378006, - "loss": 46.0, - "step": 7001 - }, - { - "epoch": 1.1276218849390072, - "grad_norm": 0.001418073894456029, - "learning_rate": 0.0001999993743588544, - "loss": 46.0, - "step": 7002 - }, - { - "epoch": 1.1277829220177946, - "grad_norm": 0.0005312525318004191, - "learning_rate": 0.00019999937417988268, - "loss": 46.0, - "step": 7003 - }, - { - "epoch": 1.127943959096582, - "grad_norm": 0.00363905425183475, - "learning_rate": 0.00019999937400088532, - "loss": 46.0, - "step": 7004 - }, - { - "epoch": 1.1281049961753693, - "grad_norm": 0.004000222310423851, - "learning_rate": 0.0001999993738218624, - "loss": 46.0, - "step": 7005 - }, - { - "epoch": 1.1282660332541568, - "grad_norm": 0.0005363834789022803, - "learning_rate": 0.00019999937364281384, - "loss": 46.0, - "step": 7006 - }, - { - "epoch": 1.1284270703329442, - "grad_norm": 0.001692510792054236, - "learning_rate": 0.00019999937346373974, - "loss": 46.0, - "step": 7007 - }, - { - "epoch": 1.1285881074117317, - "grad_norm": 0.0005582827143371105, - "learning_rate": 0.00019999937328464, - "loss": 46.0, - "step": 7008 - }, - { - "epoch": 1.1287491444905189, - "grad_norm": 0.0021540625020861626, - "learning_rate": 0.0001999993731055147, - "loss": 46.0, - "step": 7009 - }, - { - "epoch": 1.1289101815693063, - "grad_norm": 0.00135741604026407, - "learning_rate": 0.0001999993729263638, - "loss": 46.0, - "step": 7010 - }, - { - "epoch": 1.1290712186480938, - "grad_norm": 0.002207091310992837, - "learning_rate": 0.00019999937274718728, - "loss": 46.0, - "step": 7011 - }, - { - "epoch": 1.129232255726881, - "grad_norm": 0.0011262692278251052, - "learning_rate": 0.00019999937256798516, - "loss": 46.0, - "step": 7012 - }, - { - "epoch": 1.1293932928056685, - "grad_norm": 0.004497850779443979, - "learning_rate": 0.00019999937238875747, - "loss": 46.0, - "step": 7013 - }, - { - "epoch": 1.129554329884456, - "grad_norm": 0.0009169720578938723, - "learning_rate": 0.0001999993722095042, - "loss": 46.0, - "step": 7014 - }, - { - "epoch": 1.1297153669632434, - "grad_norm": 0.0029274439439177513, - "learning_rate": 0.0001999993720302253, - "loss": 46.0, - "step": 7015 - }, - { - "epoch": 1.1298764040420306, - "grad_norm": 0.0007946903351694345, - "learning_rate": 0.00019999937185092082, - "loss": 46.0, - "step": 7016 - }, - { - "epoch": 1.130037441120818, - "grad_norm": 0.002871324075385928, - "learning_rate": 0.00019999937167159074, - "loss": 46.0, - "step": 7017 - }, - { - "epoch": 1.1301984781996055, - "grad_norm": 0.0010288202902302146, - "learning_rate": 0.00019999937149223505, - "loss": 46.0, - "step": 7018 - }, - { - "epoch": 1.1303595152783927, - "grad_norm": 0.008500954136252403, - "learning_rate": 0.00019999937131285383, - "loss": 46.0, - "step": 7019 - }, - { - "epoch": 1.1305205523571802, - "grad_norm": 0.006231991108506918, - "learning_rate": 0.00019999937113344697, - "loss": 46.0, - "step": 7020 - }, - { - "epoch": 1.1306815894359676, - "grad_norm": 0.0017418725183233619, - "learning_rate": 0.00019999937095401452, - "loss": 46.0, - "step": 7021 - }, - { - "epoch": 1.130842626514755, - "grad_norm": 0.0019019623287022114, - "learning_rate": 0.00019999937077455645, - "loss": 46.0, - "step": 7022 - }, - { - "epoch": 1.1310036635935423, - "grad_norm": 0.009048969484865665, - "learning_rate": 0.0001999993705950728, - "loss": 46.0, - "step": 7023 - }, - { - "epoch": 1.1311647006723298, - "grad_norm": 0.000814581464510411, - "learning_rate": 0.00019999937041556359, - "loss": 46.0, - "step": 7024 - }, - { - "epoch": 1.1313257377511172, - "grad_norm": 0.0012518613366410136, - "learning_rate": 0.00019999937023602876, - "loss": 46.0, - "step": 7025 - }, - { - "epoch": 1.1314867748299047, - "grad_norm": 0.0014614618849009275, - "learning_rate": 0.00019999937005646832, - "loss": 46.0, - "step": 7026 - }, - { - "epoch": 1.131647811908692, - "grad_norm": 0.007246515713632107, - "learning_rate": 0.0001999993698768823, - "loss": 46.0, - "step": 7027 - }, - { - "epoch": 1.1318088489874794, - "grad_norm": 0.001126777264289558, - "learning_rate": 0.00019999936969727067, - "loss": 46.0, - "step": 7028 - }, - { - "epoch": 1.1319698860662668, - "grad_norm": 0.0008017787477001548, - "learning_rate": 0.00019999936951763347, - "loss": 46.0, - "step": 7029 - }, - { - "epoch": 1.1321309231450543, - "grad_norm": 0.0003722316469065845, - "learning_rate": 0.00019999936933797066, - "loss": 46.0, - "step": 7030 - }, - { - "epoch": 1.1322919602238415, - "grad_norm": 0.0008829387370496988, - "learning_rate": 0.00019999936915828225, - "loss": 46.0, - "step": 7031 - }, - { - "epoch": 1.132452997302629, - "grad_norm": 0.000955459545366466, - "learning_rate": 0.0001999993689785683, - "loss": 46.0, - "step": 7032 - }, - { - "epoch": 1.1326140343814164, - "grad_norm": 0.0025489337276667356, - "learning_rate": 0.00019999936879882868, - "loss": 46.0, - "step": 7033 - }, - { - "epoch": 1.1327750714602036, - "grad_norm": 0.0011475490173324943, - "learning_rate": 0.0001999993686190635, - "loss": 46.0, - "step": 7034 - }, - { - "epoch": 1.132936108538991, - "grad_norm": 0.009986408054828644, - "learning_rate": 0.0001999993684392727, - "loss": 46.0, - "step": 7035 - }, - { - "epoch": 1.1330971456177785, - "grad_norm": 0.0021582748740911484, - "learning_rate": 0.00019999936825945634, - "loss": 46.0, - "step": 7036 - }, - { - "epoch": 1.133258182696566, - "grad_norm": 0.0006016548722982407, - "learning_rate": 0.00019999936807961436, - "loss": 46.0, - "step": 7037 - }, - { - "epoch": 1.1334192197753532, - "grad_norm": 0.0005165237234905362, - "learning_rate": 0.0001999993678997468, - "loss": 46.0, - "step": 7038 - }, - { - "epoch": 1.1335802568541407, - "grad_norm": 0.0011950712651014328, - "learning_rate": 0.00019999936771985364, - "loss": 46.0, - "step": 7039 - }, - { - "epoch": 1.133741293932928, - "grad_norm": 0.005373006220906973, - "learning_rate": 0.0001999993675399349, - "loss": 46.0, - "step": 7040 - }, - { - "epoch": 1.1339023310117153, - "grad_norm": 0.0016636315267533064, - "learning_rate": 0.00019999936735999057, - "loss": 46.0, - "step": 7041 - }, - { - "epoch": 1.1340633680905028, - "grad_norm": 0.002621646737679839, - "learning_rate": 0.00019999936718002062, - "loss": 46.0, - "step": 7042 - }, - { - "epoch": 1.1342244051692902, - "grad_norm": 0.0009070023079402745, - "learning_rate": 0.0001999993670000251, - "loss": 46.0, - "step": 7043 - }, - { - "epoch": 1.1343854422480777, - "grad_norm": 0.0005544809391722083, - "learning_rate": 0.00019999936682000392, - "loss": 46.0, - "step": 7044 - }, - { - "epoch": 1.134546479326865, - "grad_norm": 0.000758998969104141, - "learning_rate": 0.0001999993666399572, - "loss": 46.0, - "step": 7045 - }, - { - "epoch": 1.1347075164056524, - "grad_norm": 0.0022243887651711702, - "learning_rate": 0.0001999993664598849, - "loss": 46.0, - "step": 7046 - }, - { - "epoch": 1.1348685534844398, - "grad_norm": 0.0008686042856425047, - "learning_rate": 0.00019999936627978699, - "loss": 46.0, - "step": 7047 - }, - { - "epoch": 1.1350295905632273, - "grad_norm": 0.0031492654234170914, - "learning_rate": 0.00019999936609966346, - "loss": 46.0, - "step": 7048 - }, - { - "epoch": 1.1351906276420145, - "grad_norm": 0.0041769687086343765, - "learning_rate": 0.00019999936591951436, - "loss": 46.0, - "step": 7049 - }, - { - "epoch": 1.135351664720802, - "grad_norm": 0.0015751132741570473, - "learning_rate": 0.00019999936573933966, - "loss": 46.0, - "step": 7050 - }, - { - "epoch": 1.1355127017995894, - "grad_norm": 0.0004279755230527371, - "learning_rate": 0.00019999936555913935, - "loss": 46.0, - "step": 7051 - }, - { - "epoch": 1.1356737388783769, - "grad_norm": 0.002107194159179926, - "learning_rate": 0.00019999936537891348, - "loss": 46.0, - "step": 7052 - }, - { - "epoch": 1.135834775957164, - "grad_norm": 0.0016004648059606552, - "learning_rate": 0.00019999936519866197, - "loss": 46.0, - "step": 7053 - }, - { - "epoch": 1.1359958130359515, - "grad_norm": 0.0026183025911450386, - "learning_rate": 0.00019999936501838492, - "loss": 46.0, - "step": 7054 - }, - { - "epoch": 1.136156850114739, - "grad_norm": 0.0004843698407057673, - "learning_rate": 0.00019999936483808224, - "loss": 46.0, - "step": 7055 - }, - { - "epoch": 1.1363178871935262, - "grad_norm": 0.0010027765529230237, - "learning_rate": 0.00019999936465775397, - "loss": 46.0, - "step": 7056 - }, - { - "epoch": 1.1364789242723137, - "grad_norm": 0.0016240050317719579, - "learning_rate": 0.0001999993644774001, - "loss": 46.0, - "step": 7057 - }, - { - "epoch": 1.1366399613511011, - "grad_norm": 0.0015592457493767142, - "learning_rate": 0.00019999936429702063, - "loss": 46.0, - "step": 7058 - }, - { - "epoch": 1.1368009984298886, - "grad_norm": 0.0026613944210112095, - "learning_rate": 0.00019999936411661562, - "loss": 46.0, - "step": 7059 - }, - { - "epoch": 1.1369620355086758, - "grad_norm": 0.0009260620572604239, - "learning_rate": 0.00019999936393618497, - "loss": 46.0, - "step": 7060 - }, - { - "epoch": 1.1371230725874633, - "grad_norm": 0.005152855534106493, - "learning_rate": 0.0001999993637557287, - "loss": 46.0, - "step": 7061 - }, - { - "epoch": 1.1372841096662507, - "grad_norm": 0.0038004093803465366, - "learning_rate": 0.0001999993635752469, - "loss": 46.0, - "step": 7062 - }, - { - "epoch": 1.137445146745038, - "grad_norm": 0.0005911357002332807, - "learning_rate": 0.00019999936339473945, - "loss": 46.0, - "step": 7063 - }, - { - "epoch": 1.1376061838238254, - "grad_norm": 0.0011656349524855614, - "learning_rate": 0.00019999936321420643, - "loss": 46.0, - "step": 7064 - }, - { - "epoch": 1.1377672209026128, - "grad_norm": 0.002185005694627762, - "learning_rate": 0.00019999936303364781, - "loss": 46.0, - "step": 7065 - }, - { - "epoch": 1.1379282579814003, - "grad_norm": 0.0018050329526886344, - "learning_rate": 0.0001999993628530636, - "loss": 46.0, - "step": 7066 - }, - { - "epoch": 1.1380892950601875, - "grad_norm": 0.0029668938368558884, - "learning_rate": 0.00019999936267245378, - "loss": 46.0, - "step": 7067 - }, - { - "epoch": 1.138250332138975, - "grad_norm": 0.0016116938786581159, - "learning_rate": 0.00019999936249181838, - "loss": 46.0, - "step": 7068 - }, - { - "epoch": 1.1384113692177624, - "grad_norm": 0.001741871703416109, - "learning_rate": 0.0001999993623111574, - "loss": 46.0, - "step": 7069 - }, - { - "epoch": 1.1385724062965499, - "grad_norm": 0.0011630411026999354, - "learning_rate": 0.00019999936213047076, - "loss": 46.0, - "step": 7070 - }, - { - "epoch": 1.138733443375337, - "grad_norm": 0.0006004149909131229, - "learning_rate": 0.0001999993619497586, - "loss": 46.0, - "step": 7071 - }, - { - "epoch": 1.1388944804541246, - "grad_norm": 0.0016764452448114753, - "learning_rate": 0.00019999936176902082, - "loss": 46.0, - "step": 7072 - }, - { - "epoch": 1.139055517532912, - "grad_norm": 0.0013854493154212832, - "learning_rate": 0.00019999936158825743, - "loss": 46.0, - "step": 7073 - }, - { - "epoch": 1.1392165546116995, - "grad_norm": 0.0014358782209455967, - "learning_rate": 0.00019999936140746845, - "loss": 46.0, - "step": 7074 - }, - { - "epoch": 1.1393775916904867, - "grad_norm": 0.005022861994802952, - "learning_rate": 0.0001999993612266539, - "loss": 46.0, - "step": 7075 - }, - { - "epoch": 1.1395386287692741, - "grad_norm": 0.000553262943867594, - "learning_rate": 0.00019999936104581374, - "loss": 46.0, - "step": 7076 - }, - { - "epoch": 1.1396996658480616, - "grad_norm": 0.005023543257266283, - "learning_rate": 0.00019999936086494797, - "loss": 46.0, - "step": 7077 - }, - { - "epoch": 1.1398607029268488, - "grad_norm": 0.0012056485284119844, - "learning_rate": 0.00019999936068405662, - "loss": 46.0, - "step": 7078 - }, - { - "epoch": 1.1400217400056363, - "grad_norm": 0.0006060278392396867, - "learning_rate": 0.00019999936050313968, - "loss": 46.0, - "step": 7079 - }, - { - "epoch": 1.1401827770844237, - "grad_norm": 0.0006432574591599405, - "learning_rate": 0.00019999936032219712, - "loss": 46.0, - "step": 7080 - }, - { - "epoch": 1.1403438141632112, - "grad_norm": 0.0017074752831831574, - "learning_rate": 0.000199999360141229, - "loss": 46.0, - "step": 7081 - }, - { - "epoch": 1.1405048512419984, - "grad_norm": 0.002766041085124016, - "learning_rate": 0.00019999935996023528, - "loss": 46.0, - "step": 7082 - }, - { - "epoch": 1.1406658883207859, - "grad_norm": 0.0030352489557117224, - "learning_rate": 0.00019999935977921593, - "loss": 46.0, - "step": 7083 - }, - { - "epoch": 1.1408269253995733, - "grad_norm": 0.0029198937118053436, - "learning_rate": 0.00019999935959817103, - "loss": 46.0, - "step": 7084 - }, - { - "epoch": 1.1409879624783605, - "grad_norm": 0.003776973346248269, - "learning_rate": 0.0001999993594171005, - "loss": 46.0, - "step": 7085 - }, - { - "epoch": 1.141148999557148, - "grad_norm": 0.003045264631509781, - "learning_rate": 0.0001999993592360044, - "loss": 46.0, - "step": 7086 - }, - { - "epoch": 1.1413100366359354, - "grad_norm": 0.00043938314775004983, - "learning_rate": 0.0001999993590548827, - "loss": 46.0, - "step": 7087 - }, - { - "epoch": 1.141471073714723, - "grad_norm": 0.0018584945937618613, - "learning_rate": 0.0001999993588737354, - "loss": 46.0, - "step": 7088 - }, - { - "epoch": 1.1416321107935101, - "grad_norm": 0.002093725837767124, - "learning_rate": 0.0001999993586925625, - "loss": 46.0, - "step": 7089 - }, - { - "epoch": 1.1417931478722976, - "grad_norm": 0.0020268792286515236, - "learning_rate": 0.000199999358511364, - "loss": 46.0, - "step": 7090 - }, - { - "epoch": 1.141954184951085, - "grad_norm": 0.0018076930427923799, - "learning_rate": 0.00019999935833013991, - "loss": 46.0, - "step": 7091 - }, - { - "epoch": 1.1421152220298723, - "grad_norm": 0.005321699660271406, - "learning_rate": 0.00019999935814889026, - "loss": 46.0, - "step": 7092 - }, - { - "epoch": 1.1422762591086597, - "grad_norm": 0.0018107375362887979, - "learning_rate": 0.00019999935796761496, - "loss": 46.0, - "step": 7093 - }, - { - "epoch": 1.1424372961874472, - "grad_norm": 0.0012942444300279021, - "learning_rate": 0.0001999993577863141, - "loss": 46.0, - "step": 7094 - }, - { - "epoch": 1.1425983332662346, - "grad_norm": 0.0019107672851532698, - "learning_rate": 0.00019999935760498766, - "loss": 46.0, - "step": 7095 - }, - { - "epoch": 1.142759370345022, - "grad_norm": 0.003010336309671402, - "learning_rate": 0.00019999935742363558, - "loss": 46.0, - "step": 7096 - }, - { - "epoch": 1.1429204074238093, - "grad_norm": 0.0010296377586200833, - "learning_rate": 0.00019999935724225796, - "loss": 46.0, - "step": 7097 - }, - { - "epoch": 1.1430814445025967, - "grad_norm": 0.0009530838578939438, - "learning_rate": 0.0001999993570608547, - "loss": 46.0, - "step": 7098 - }, - { - "epoch": 1.1432424815813842, - "grad_norm": 0.0004894350422546268, - "learning_rate": 0.00019999935687942585, - "loss": 46.0, - "step": 7099 - }, - { - "epoch": 1.1434035186601714, - "grad_norm": 0.0013363274047151208, - "learning_rate": 0.00019999935669797142, - "loss": 46.0, - "step": 7100 - }, - { - "epoch": 1.1435645557389589, - "grad_norm": 0.005797158926725388, - "learning_rate": 0.0001999993565164914, - "loss": 46.0, - "step": 7101 - }, - { - "epoch": 1.1437255928177463, - "grad_norm": 0.0009479834116064012, - "learning_rate": 0.00019999935633498576, - "loss": 46.0, - "step": 7102 - }, - { - "epoch": 1.1438866298965338, - "grad_norm": 0.0013979620998725295, - "learning_rate": 0.00019999935615345454, - "loss": 46.0, - "step": 7103 - }, - { - "epoch": 1.144047666975321, - "grad_norm": 0.0022883708588778973, - "learning_rate": 0.00019999935597189773, - "loss": 46.0, - "step": 7104 - }, - { - "epoch": 1.1442087040541085, - "grad_norm": 0.0005104238516651094, - "learning_rate": 0.00019999935579031533, - "loss": 46.0, - "step": 7105 - }, - { - "epoch": 1.144369741132896, - "grad_norm": 0.003547649597749114, - "learning_rate": 0.00019999935560870732, - "loss": 46.0, - "step": 7106 - }, - { - "epoch": 1.1445307782116831, - "grad_norm": 0.002930952003225684, - "learning_rate": 0.00019999935542707373, - "loss": 46.0, - "step": 7107 - }, - { - "epoch": 1.1446918152904706, - "grad_norm": 0.0011777014005929232, - "learning_rate": 0.00019999935524541454, - "loss": 46.0, - "step": 7108 - }, - { - "epoch": 1.144852852369258, - "grad_norm": 0.0031634350307285786, - "learning_rate": 0.00019999935506372974, - "loss": 46.0, - "step": 7109 - }, - { - "epoch": 1.1450138894480455, - "grad_norm": 0.0040549724362790585, - "learning_rate": 0.00019999935488201938, - "loss": 46.0, - "step": 7110 - }, - { - "epoch": 1.1451749265268327, - "grad_norm": 0.0045827715657651424, - "learning_rate": 0.00019999935470028338, - "loss": 46.0, - "step": 7111 - }, - { - "epoch": 1.1453359636056202, - "grad_norm": 0.003957042470574379, - "learning_rate": 0.00019999935451852182, - "loss": 46.0, - "step": 7112 - }, - { - "epoch": 1.1454970006844076, - "grad_norm": 0.0020988809410482645, - "learning_rate": 0.00019999935433673467, - "loss": 46.0, - "step": 7113 - }, - { - "epoch": 1.1456580377631949, - "grad_norm": 0.0010748854838311672, - "learning_rate": 0.00019999935415492188, - "loss": 46.0, - "step": 7114 - }, - { - "epoch": 1.1458190748419823, - "grad_norm": 0.002551599871367216, - "learning_rate": 0.00019999935397308353, - "loss": 46.0, - "step": 7115 - }, - { - "epoch": 1.1459801119207698, - "grad_norm": 0.0006531869294121861, - "learning_rate": 0.00019999935379121957, - "loss": 46.0, - "step": 7116 - }, - { - "epoch": 1.1461411489995572, - "grad_norm": 0.0010318257845938206, - "learning_rate": 0.00019999935360933004, - "loss": 46.0, - "step": 7117 - }, - { - "epoch": 1.1463021860783444, - "grad_norm": 0.003948194440454245, - "learning_rate": 0.0001999993534274149, - "loss": 46.0, - "step": 7118 - }, - { - "epoch": 1.146463223157132, - "grad_norm": 0.002598825842142105, - "learning_rate": 0.00019999935324547415, - "loss": 46.0, - "step": 7119 - }, - { - "epoch": 1.1466242602359193, - "grad_norm": 0.0009227597038261592, - "learning_rate": 0.00019999935306350784, - "loss": 46.0, - "step": 7120 - }, - { - "epoch": 1.1467852973147068, - "grad_norm": 0.0015254870522767305, - "learning_rate": 0.0001999993528815159, - "loss": 46.0, - "step": 7121 - }, - { - "epoch": 1.146946334393494, - "grad_norm": 0.002448051003739238, - "learning_rate": 0.0001999993526994984, - "loss": 46.0, - "step": 7122 - }, - { - "epoch": 1.1471073714722815, - "grad_norm": 0.0005201072199270129, - "learning_rate": 0.00019999935251745527, - "loss": 46.0, - "step": 7123 - }, - { - "epoch": 1.147268408551069, - "grad_norm": 0.0027441829442977905, - "learning_rate": 0.00019999935233538655, - "loss": 46.0, - "step": 7124 - }, - { - "epoch": 1.1474294456298564, - "grad_norm": 0.0030863042920827866, - "learning_rate": 0.00019999935215329225, - "loss": 46.0, - "step": 7125 - }, - { - "epoch": 1.1475904827086436, - "grad_norm": 0.0007674030493944883, - "learning_rate": 0.0001999993519711724, - "loss": 46.0, - "step": 7126 - }, - { - "epoch": 1.147751519787431, - "grad_norm": 0.0026222362648695707, - "learning_rate": 0.00019999935178902688, - "loss": 46.0, - "step": 7127 - }, - { - "epoch": 1.1479125568662185, - "grad_norm": 0.003919719718396664, - "learning_rate": 0.0001999993516068558, - "loss": 46.0, - "step": 7128 - }, - { - "epoch": 1.1480735939450057, - "grad_norm": 0.00328479940071702, - "learning_rate": 0.00019999935142465912, - "loss": 46.0, - "step": 7129 - }, - { - "epoch": 1.1482346310237932, - "grad_norm": 0.0028343668673187494, - "learning_rate": 0.00019999935124243682, - "loss": 46.0, - "step": 7130 - }, - { - "epoch": 1.1483956681025806, - "grad_norm": 0.0006880885921418667, - "learning_rate": 0.00019999935106018897, - "loss": 46.0, - "step": 7131 - }, - { - "epoch": 1.148556705181368, - "grad_norm": 0.005528413224965334, - "learning_rate": 0.0001999993508779155, - "loss": 46.0, - "step": 7132 - }, - { - "epoch": 1.1487177422601553, - "grad_norm": 0.0032945945858955383, - "learning_rate": 0.00019999935069561645, - "loss": 46.0, - "step": 7133 - }, - { - "epoch": 1.1488787793389428, - "grad_norm": 0.00171490793582052, - "learning_rate": 0.00019999935051329178, - "loss": 46.0, - "step": 7134 - }, - { - "epoch": 1.1490398164177302, - "grad_norm": 0.001397947664372623, - "learning_rate": 0.00019999935033094155, - "loss": 46.0, - "step": 7135 - }, - { - "epoch": 1.1492008534965175, - "grad_norm": 0.0019568291027098894, - "learning_rate": 0.0001999993501485657, - "loss": 46.0, - "step": 7136 - }, - { - "epoch": 1.149361890575305, - "grad_norm": 0.0018681793007999659, - "learning_rate": 0.00019999934996616425, - "loss": 46.0, - "step": 7137 - }, - { - "epoch": 1.1495229276540924, - "grad_norm": 0.002650015288963914, - "learning_rate": 0.00019999934978373723, - "loss": 46.0, - "step": 7138 - }, - { - "epoch": 1.1496839647328798, - "grad_norm": 0.0010102004744112492, - "learning_rate": 0.0001999993496012846, - "loss": 46.0, - "step": 7139 - }, - { - "epoch": 1.149845001811667, - "grad_norm": 0.002710662316530943, - "learning_rate": 0.00019999934941880638, - "loss": 46.0, - "step": 7140 - }, - { - "epoch": 1.1500060388904545, - "grad_norm": 0.0019491378916427493, - "learning_rate": 0.00019999934923630255, - "loss": 46.0, - "step": 7141 - }, - { - "epoch": 1.150167075969242, - "grad_norm": 0.0015951680252328515, - "learning_rate": 0.00019999934905377313, - "loss": 46.0, - "step": 7142 - }, - { - "epoch": 1.1503281130480294, - "grad_norm": 0.0041846781969070435, - "learning_rate": 0.00019999934887121815, - "loss": 46.0, - "step": 7143 - }, - { - "epoch": 1.1504891501268166, - "grad_norm": 0.0010823012562468648, - "learning_rate": 0.00019999934868863753, - "loss": 46.0, - "step": 7144 - }, - { - "epoch": 1.150650187205604, - "grad_norm": 0.001921929302625358, - "learning_rate": 0.00019999934850603132, - "loss": 46.0, - "step": 7145 - }, - { - "epoch": 1.1508112242843915, - "grad_norm": 0.0019734189845621586, - "learning_rate": 0.00019999934832339955, - "loss": 46.0, - "step": 7146 - }, - { - "epoch": 1.150972261363179, - "grad_norm": 0.004689333029091358, - "learning_rate": 0.00019999934814074214, - "loss": 46.0, - "step": 7147 - }, - { - "epoch": 1.1511332984419662, - "grad_norm": 0.006085149943828583, - "learning_rate": 0.00019999934795805917, - "loss": 46.0, - "step": 7148 - }, - { - "epoch": 1.1512943355207537, - "grad_norm": 0.0030720606446266174, - "learning_rate": 0.0001999993477753506, - "loss": 46.0, - "step": 7149 - }, - { - "epoch": 1.1514553725995411, - "grad_norm": 0.0005983941373415291, - "learning_rate": 0.0001999993475926164, - "loss": 46.0, - "step": 7150 - }, - { - "epoch": 1.1516164096783283, - "grad_norm": 0.0017330769915133715, - "learning_rate": 0.00019999934740985665, - "loss": 46.0, - "step": 7151 - }, - { - "epoch": 1.1517774467571158, - "grad_norm": 0.0025837167631834745, - "learning_rate": 0.0001999993472270713, - "loss": 46.0, - "step": 7152 - }, - { - "epoch": 1.1519384838359032, - "grad_norm": 0.0020570303313434124, - "learning_rate": 0.00019999934704426035, - "loss": 46.0, - "step": 7153 - }, - { - "epoch": 1.1520995209146907, - "grad_norm": 0.004520559683442116, - "learning_rate": 0.0001999993468614238, - "loss": 46.0, - "step": 7154 - }, - { - "epoch": 1.152260557993478, - "grad_norm": 0.0005783848464488983, - "learning_rate": 0.00019999934667856164, - "loss": 46.0, - "step": 7155 - }, - { - "epoch": 1.1524215950722654, - "grad_norm": 0.001902230316773057, - "learning_rate": 0.00019999934649567392, - "loss": 46.0, - "step": 7156 - }, - { - "epoch": 1.1525826321510528, - "grad_norm": 0.0016362016322091222, - "learning_rate": 0.00019999934631276058, - "loss": 46.0, - "step": 7157 - }, - { - "epoch": 1.15274366922984, - "grad_norm": 0.0024809706956148148, - "learning_rate": 0.00019999934612982166, - "loss": 46.0, - "step": 7158 - }, - { - "epoch": 1.1529047063086275, - "grad_norm": 0.0056978934444487095, - "learning_rate": 0.00019999934594685712, - "loss": 46.0, - "step": 7159 - }, - { - "epoch": 1.153065743387415, - "grad_norm": 0.0004316475533414632, - "learning_rate": 0.000199999345763867, - "loss": 46.0, - "step": 7160 - }, - { - "epoch": 1.1532267804662024, - "grad_norm": 0.001528278342448175, - "learning_rate": 0.0001999993455808513, - "loss": 46.0, - "step": 7161 - }, - { - "epoch": 1.1533878175449896, - "grad_norm": 0.0010144361294806004, - "learning_rate": 0.00019999934539781, - "loss": 46.0, - "step": 7162 - }, - { - "epoch": 1.153548854623777, - "grad_norm": 0.001706136972643435, - "learning_rate": 0.0001999993452147431, - "loss": 46.0, - "step": 7163 - }, - { - "epoch": 1.1537098917025645, - "grad_norm": 0.0011096884263679385, - "learning_rate": 0.0001999993450316506, - "loss": 46.0, - "step": 7164 - }, - { - "epoch": 1.153870928781352, - "grad_norm": 0.008691283874213696, - "learning_rate": 0.0001999993448485325, - "loss": 46.0, - "step": 7165 - }, - { - "epoch": 1.1540319658601392, - "grad_norm": 0.0010033799335360527, - "learning_rate": 0.0001999993446653888, - "loss": 46.0, - "step": 7166 - }, - { - "epoch": 1.1541930029389267, - "grad_norm": 0.0005838305805809796, - "learning_rate": 0.00019999934448221954, - "loss": 46.0, - "step": 7167 - }, - { - "epoch": 1.1543540400177141, - "grad_norm": 0.001546303159557283, - "learning_rate": 0.00019999934429902467, - "loss": 46.0, - "step": 7168 - }, - { - "epoch": 1.1545150770965016, - "grad_norm": 0.0004625291330739856, - "learning_rate": 0.0001999993441158042, - "loss": 46.0, - "step": 7169 - }, - { - "epoch": 1.1546761141752888, - "grad_norm": 0.0023863117676228285, - "learning_rate": 0.00019999934393255813, - "loss": 46.0, - "step": 7170 - }, - { - "epoch": 1.1548371512540763, - "grad_norm": 0.0021304620895534754, - "learning_rate": 0.00019999934374928646, - "loss": 46.0, - "step": 7171 - }, - { - "epoch": 1.1549981883328637, - "grad_norm": 0.0034188157878816128, - "learning_rate": 0.0001999993435659892, - "loss": 46.0, - "step": 7172 - }, - { - "epoch": 1.155159225411651, - "grad_norm": 0.002695410279557109, - "learning_rate": 0.00019999934338266637, - "loss": 46.0, - "step": 7173 - }, - { - "epoch": 1.1553202624904384, - "grad_norm": 0.004395594354718924, - "learning_rate": 0.00019999934319931794, - "loss": 46.0, - "step": 7174 - }, - { - "epoch": 1.1554812995692259, - "grad_norm": 0.0012124684872105718, - "learning_rate": 0.0001999993430159439, - "loss": 46.0, - "step": 7175 - }, - { - "epoch": 1.1556423366480133, - "grad_norm": 0.005230535753071308, - "learning_rate": 0.00019999934283254428, - "loss": 46.0, - "step": 7176 - }, - { - "epoch": 1.1558033737268005, - "grad_norm": 0.0009849924826994538, - "learning_rate": 0.00019999934264911903, - "loss": 46.0, - "step": 7177 - }, - { - "epoch": 1.155964410805588, - "grad_norm": 0.0028686292935162783, - "learning_rate": 0.00019999934246566823, - "loss": 46.0, - "step": 7178 - }, - { - "epoch": 1.1561254478843754, - "grad_norm": 0.0017365794628858566, - "learning_rate": 0.0001999993422821918, - "loss": 46.0, - "step": 7179 - }, - { - "epoch": 1.1562864849631627, - "grad_norm": 0.0004623793647624552, - "learning_rate": 0.00019999934209868978, - "loss": 46.0, - "step": 7180 - }, - { - "epoch": 1.1564475220419501, - "grad_norm": 0.0009670014842413366, - "learning_rate": 0.0001999993419151622, - "loss": 46.0, - "step": 7181 - }, - { - "epoch": 1.1566085591207376, - "grad_norm": 0.0014378369087353349, - "learning_rate": 0.00019999934173160896, - "loss": 46.0, - "step": 7182 - }, - { - "epoch": 1.156769596199525, - "grad_norm": 0.0014132558135315776, - "learning_rate": 0.00019999934154803017, - "loss": 46.0, - "step": 7183 - }, - { - "epoch": 1.1569306332783122, - "grad_norm": 0.002175344852730632, - "learning_rate": 0.0001999993413644258, - "loss": 46.0, - "step": 7184 - }, - { - "epoch": 1.1570916703570997, - "grad_norm": 0.0045136939734220505, - "learning_rate": 0.0001999993411807958, - "loss": 46.0, - "step": 7185 - }, - { - "epoch": 1.1572527074358872, - "grad_norm": 0.009081362746655941, - "learning_rate": 0.00019999934099714021, - "loss": 46.0, - "step": 7186 - }, - { - "epoch": 1.1574137445146744, - "grad_norm": 0.0009936243295669556, - "learning_rate": 0.00019999934081345905, - "loss": 46.0, - "step": 7187 - }, - { - "epoch": 1.1575747815934618, - "grad_norm": 0.002974365372210741, - "learning_rate": 0.00019999934062975226, - "loss": 46.0, - "step": 7188 - }, - { - "epoch": 1.1577358186722493, - "grad_norm": 0.005104478448629379, - "learning_rate": 0.00019999934044601992, - "loss": 46.0, - "step": 7189 - }, - { - "epoch": 1.1578968557510367, - "grad_norm": 0.0020864761900156736, - "learning_rate": 0.00019999934026226196, - "loss": 46.0, - "step": 7190 - }, - { - "epoch": 1.1580578928298242, - "grad_norm": 0.001229654997587204, - "learning_rate": 0.0001999993400784784, - "loss": 46.0, - "step": 7191 - }, - { - "epoch": 1.1582189299086114, - "grad_norm": 0.001190318027511239, - "learning_rate": 0.00019999933989466926, - "loss": 46.0, - "step": 7192 - }, - { - "epoch": 1.1583799669873989, - "grad_norm": 0.0005623722681775689, - "learning_rate": 0.00019999933971083454, - "loss": 46.0, - "step": 7193 - }, - { - "epoch": 1.1585410040661863, - "grad_norm": 0.002388324122875929, - "learning_rate": 0.00019999933952697418, - "loss": 46.0, - "step": 7194 - }, - { - "epoch": 1.1587020411449735, - "grad_norm": 0.005184921436011791, - "learning_rate": 0.00019999933934308826, - "loss": 46.0, - "step": 7195 - }, - { - "epoch": 1.158863078223761, - "grad_norm": 0.0021305829286575317, - "learning_rate": 0.00019999933915917673, - "loss": 46.0, - "step": 7196 - }, - { - "epoch": 1.1590241153025485, - "grad_norm": 0.003159014508128166, - "learning_rate": 0.0001999993389752396, - "loss": 46.0, - "step": 7197 - }, - { - "epoch": 1.159185152381336, - "grad_norm": 0.0020916624926030636, - "learning_rate": 0.0001999993387912769, - "loss": 46.0, - "step": 7198 - }, - { - "epoch": 1.1593461894601231, - "grad_norm": 0.0012887873454019427, - "learning_rate": 0.00019999933860728858, - "loss": 46.0, - "step": 7199 - }, - { - "epoch": 1.1595072265389106, - "grad_norm": 0.0020860384684056044, - "learning_rate": 0.00019999933842327467, - "loss": 46.0, - "step": 7200 - }, - { - "epoch": 1.159668263617698, - "grad_norm": 0.0009818424005061388, - "learning_rate": 0.00019999933823923514, - "loss": 46.0, - "step": 7201 - }, - { - "epoch": 1.1598293006964853, - "grad_norm": 0.0026219182182103395, - "learning_rate": 0.0001999993380551701, - "loss": 46.0, - "step": 7202 - }, - { - "epoch": 1.1599903377752727, - "grad_norm": 0.0016805967316031456, - "learning_rate": 0.0001999993378710794, - "loss": 46.0, - "step": 7203 - }, - { - "epoch": 1.1601513748540602, - "grad_norm": 0.0015912859234958887, - "learning_rate": 0.0001999993376869631, - "loss": 46.0, - "step": 7204 - }, - { - "epoch": 1.1603124119328476, - "grad_norm": 0.001784482505172491, - "learning_rate": 0.00019999933750282123, - "loss": 46.0, - "step": 7205 - }, - { - "epoch": 1.1604734490116348, - "grad_norm": 0.0005728021496906877, - "learning_rate": 0.00019999933731865374, - "loss": 46.0, - "step": 7206 - }, - { - "epoch": 1.1606344860904223, - "grad_norm": 0.0021490806248039007, - "learning_rate": 0.0001999993371344607, - "loss": 46.0, - "step": 7207 - }, - { - "epoch": 1.1607955231692098, - "grad_norm": 0.0029135493095964193, - "learning_rate": 0.00019999933695024203, - "loss": 46.0, - "step": 7208 - }, - { - "epoch": 1.160956560247997, - "grad_norm": 0.0021653245203197002, - "learning_rate": 0.00019999933676599776, - "loss": 46.0, - "step": 7209 - }, - { - "epoch": 1.1611175973267844, - "grad_norm": 0.0006895381957292557, - "learning_rate": 0.00019999933658172792, - "loss": 46.0, - "step": 7210 - }, - { - "epoch": 1.1612786344055719, - "grad_norm": 0.0009092371328733861, - "learning_rate": 0.00019999933639743245, - "loss": 46.0, - "step": 7211 - }, - { - "epoch": 1.1614396714843593, - "grad_norm": 0.006001030560582876, - "learning_rate": 0.0001999993362131114, - "loss": 46.0, - "step": 7212 - }, - { - "epoch": 1.1616007085631468, - "grad_norm": 0.000743579410482198, - "learning_rate": 0.00019999933602876478, - "loss": 46.0, - "step": 7213 - }, - { - "epoch": 1.161761745641934, - "grad_norm": 0.0032386414241045713, - "learning_rate": 0.00019999933584439257, - "loss": 46.0, - "step": 7214 - }, - { - "epoch": 1.1619227827207215, - "grad_norm": 0.0024503853637725115, - "learning_rate": 0.00019999933565999475, - "loss": 46.0, - "step": 7215 - }, - { - "epoch": 1.162083819799509, - "grad_norm": 0.0004362843174021691, - "learning_rate": 0.0001999993354755713, - "loss": 46.0, - "step": 7216 - }, - { - "epoch": 1.1622448568782962, - "grad_norm": 0.0035724483896046877, - "learning_rate": 0.00019999933529112228, - "loss": 46.0, - "step": 7217 - }, - { - "epoch": 1.1624058939570836, - "grad_norm": 0.0008450191235169768, - "learning_rate": 0.00019999933510664767, - "loss": 46.0, - "step": 7218 - }, - { - "epoch": 1.162566931035871, - "grad_norm": 0.0011962229618802667, - "learning_rate": 0.0001999993349221475, - "loss": 46.0, - "step": 7219 - }, - { - "epoch": 1.1627279681146585, - "grad_norm": 0.0015914092073217034, - "learning_rate": 0.00019999933473762168, - "loss": 46.0, - "step": 7220 - }, - { - "epoch": 1.1628890051934457, - "grad_norm": 0.0011281126644462347, - "learning_rate": 0.00019999933455307027, - "loss": 46.0, - "step": 7221 - }, - { - "epoch": 1.1630500422722332, - "grad_norm": 0.002893853234127164, - "learning_rate": 0.0001999993343684933, - "loss": 46.0, - "step": 7222 - }, - { - "epoch": 1.1632110793510206, - "grad_norm": 0.0015901165315881371, - "learning_rate": 0.0001999993341838907, - "loss": 46.0, - "step": 7223 - }, - { - "epoch": 1.1633721164298079, - "grad_norm": 0.0025290148332715034, - "learning_rate": 0.00019999933399926254, - "loss": 46.0, - "step": 7224 - }, - { - "epoch": 1.1635331535085953, - "grad_norm": 0.0034997931215912104, - "learning_rate": 0.00019999933381460876, - "loss": 46.0, - "step": 7225 - }, - { - "epoch": 1.1636941905873828, - "grad_norm": 0.005680668167769909, - "learning_rate": 0.0001999993336299294, - "loss": 46.0, - "step": 7226 - }, - { - "epoch": 1.1638552276661702, - "grad_norm": 0.0025335336104035378, - "learning_rate": 0.00019999933344522444, - "loss": 46.0, - "step": 7227 - }, - { - "epoch": 1.1640162647449575, - "grad_norm": 0.0023252523969858885, - "learning_rate": 0.00019999933326049387, - "loss": 46.0, - "step": 7228 - }, - { - "epoch": 1.164177301823745, - "grad_norm": 0.0052711511962115765, - "learning_rate": 0.00019999933307573772, - "loss": 46.0, - "step": 7229 - }, - { - "epoch": 1.1643383389025324, - "grad_norm": 0.003125605173408985, - "learning_rate": 0.00019999933289095598, - "loss": 46.0, - "step": 7230 - }, - { - "epoch": 1.1644993759813196, - "grad_norm": 0.0004743531462736428, - "learning_rate": 0.00019999933270614862, - "loss": 46.0, - "step": 7231 - }, - { - "epoch": 1.164660413060107, - "grad_norm": 0.004543558228760958, - "learning_rate": 0.00019999933252131568, - "loss": 46.0, - "step": 7232 - }, - { - "epoch": 1.1648214501388945, - "grad_norm": 0.006632626056671143, - "learning_rate": 0.00019999933233645715, - "loss": 46.0, - "step": 7233 - }, - { - "epoch": 1.164982487217682, - "grad_norm": 0.00105772924143821, - "learning_rate": 0.00019999933215157306, - "loss": 46.0, - "step": 7234 - }, - { - "epoch": 1.1651435242964692, - "grad_norm": 0.0014500149991363287, - "learning_rate": 0.00019999933196666333, - "loss": 46.0, - "step": 7235 - }, - { - "epoch": 1.1653045613752566, - "grad_norm": 0.0018124900525435805, - "learning_rate": 0.000199999331781728, - "loss": 46.0, - "step": 7236 - }, - { - "epoch": 1.165465598454044, - "grad_norm": 0.0007351732929237187, - "learning_rate": 0.0001999993315967671, - "loss": 46.0, - "step": 7237 - }, - { - "epoch": 1.1656266355328315, - "grad_norm": 0.0017234667902812362, - "learning_rate": 0.0001999993314117806, - "loss": 46.0, - "step": 7238 - }, - { - "epoch": 1.1657876726116188, - "grad_norm": 0.0007107029086910188, - "learning_rate": 0.0001999993312267685, - "loss": 46.0, - "step": 7239 - }, - { - "epoch": 1.1659487096904062, - "grad_norm": 0.0015769059536978602, - "learning_rate": 0.0001999993310417308, - "loss": 46.0, - "step": 7240 - }, - { - "epoch": 1.1661097467691937, - "grad_norm": 0.0030417274683713913, - "learning_rate": 0.00019999933085666752, - "loss": 46.0, - "step": 7241 - }, - { - "epoch": 1.166270783847981, - "grad_norm": 0.001748350914567709, - "learning_rate": 0.00019999933067157863, - "loss": 46.0, - "step": 7242 - }, - { - "epoch": 1.1664318209267683, - "grad_norm": 0.0006359772523865104, - "learning_rate": 0.00019999933048646414, - "loss": 46.0, - "step": 7243 - }, - { - "epoch": 1.1665928580055558, - "grad_norm": 0.0017812632722780108, - "learning_rate": 0.00019999933030132407, - "loss": 46.0, - "step": 7244 - }, - { - "epoch": 1.1667538950843432, - "grad_norm": 0.0012820129049941897, - "learning_rate": 0.00019999933011615842, - "loss": 46.0, - "step": 7245 - }, - { - "epoch": 1.1669149321631305, - "grad_norm": 0.004732340574264526, - "learning_rate": 0.00019999932993096717, - "loss": 46.0, - "step": 7246 - }, - { - "epoch": 1.167075969241918, - "grad_norm": 0.0010660496773198247, - "learning_rate": 0.0001999993297457503, - "loss": 46.0, - "step": 7247 - }, - { - "epoch": 1.1672370063207054, - "grad_norm": 0.001341055380180478, - "learning_rate": 0.00019999932956050786, - "loss": 46.0, - "step": 7248 - }, - { - "epoch": 1.1673980433994928, - "grad_norm": 0.0010324451141059399, - "learning_rate": 0.0001999993293752398, - "loss": 46.0, - "step": 7249 - }, - { - "epoch": 1.16755908047828, - "grad_norm": 0.000381359423045069, - "learning_rate": 0.00019999932918994618, - "loss": 46.0, - "step": 7250 - }, - { - "epoch": 1.1677201175570675, - "grad_norm": 0.0008992409566417336, - "learning_rate": 0.00019999932900462692, - "loss": 46.0, - "step": 7251 - }, - { - "epoch": 1.167881154635855, - "grad_norm": 0.001845250721089542, - "learning_rate": 0.00019999932881928213, - "loss": 46.0, - "step": 7252 - }, - { - "epoch": 1.1680421917146422, - "grad_norm": 0.003021238837391138, - "learning_rate": 0.0001999993286339117, - "loss": 46.0, - "step": 7253 - }, - { - "epoch": 1.1682032287934296, - "grad_norm": 0.005578834097832441, - "learning_rate": 0.00019999932844851567, - "loss": 46.0, - "step": 7254 - }, - { - "epoch": 1.168364265872217, - "grad_norm": 0.0012006013421341777, - "learning_rate": 0.00019999932826309406, - "loss": 46.0, - "step": 7255 - }, - { - "epoch": 1.1685253029510045, - "grad_norm": 0.00130279955919832, - "learning_rate": 0.00019999932807764683, - "loss": 46.0, - "step": 7256 - }, - { - "epoch": 1.1686863400297918, - "grad_norm": 0.0006026290939189494, - "learning_rate": 0.00019999932789217405, - "loss": 46.0, - "step": 7257 - }, - { - "epoch": 1.1688473771085792, - "grad_norm": 0.004377065226435661, - "learning_rate": 0.00019999932770667562, - "loss": 46.0, - "step": 7258 - }, - { - "epoch": 1.1690084141873667, - "grad_norm": 0.002834476064890623, - "learning_rate": 0.00019999932752115166, - "loss": 46.0, - "step": 7259 - }, - { - "epoch": 1.1691694512661541, - "grad_norm": 0.004933305084705353, - "learning_rate": 0.00019999932733560206, - "loss": 46.0, - "step": 7260 - }, - { - "epoch": 1.1693304883449414, - "grad_norm": 0.0021575321443378925, - "learning_rate": 0.00019999932715002687, - "loss": 46.0, - "step": 7261 - }, - { - "epoch": 1.1694915254237288, - "grad_norm": 0.001254245056770742, - "learning_rate": 0.00019999932696442612, - "loss": 46.0, - "step": 7262 - }, - { - "epoch": 1.1696525625025163, - "grad_norm": 0.0013567038113251328, - "learning_rate": 0.0001999993267787997, - "loss": 46.0, - "step": 7263 - }, - { - "epoch": 1.1698135995813037, - "grad_norm": 0.0012087170034646988, - "learning_rate": 0.00019999932659314778, - "loss": 46.0, - "step": 7264 - }, - { - "epoch": 1.169974636660091, - "grad_norm": 0.002291240496560931, - "learning_rate": 0.00019999932640747022, - "loss": 46.0, - "step": 7265 - }, - { - "epoch": 1.1701356737388784, - "grad_norm": 0.005932828411459923, - "learning_rate": 0.00019999932622176704, - "loss": 46.0, - "step": 7266 - }, - { - "epoch": 1.1702967108176658, - "grad_norm": 0.0006304119015112519, - "learning_rate": 0.0001999993260360383, - "loss": 46.0, - "step": 7267 - }, - { - "epoch": 1.170457747896453, - "grad_norm": 0.0004513065214268863, - "learning_rate": 0.00019999932585028395, - "loss": 46.0, - "step": 7268 - }, - { - "epoch": 1.1706187849752405, - "grad_norm": 0.0024564098566770554, - "learning_rate": 0.000199999325664504, - "loss": 46.0, - "step": 7269 - }, - { - "epoch": 1.170779822054028, - "grad_norm": 0.0038533704355359077, - "learning_rate": 0.00019999932547869848, - "loss": 46.0, - "step": 7270 - }, - { - "epoch": 1.1709408591328154, - "grad_norm": 0.005282533820718527, - "learning_rate": 0.00019999932529286734, - "loss": 46.0, - "step": 7271 - }, - { - "epoch": 1.1711018962116027, - "grad_norm": 0.0011273275595158339, - "learning_rate": 0.00019999932510701062, - "loss": 46.0, - "step": 7272 - }, - { - "epoch": 1.17126293329039, - "grad_norm": 0.0006987282540649176, - "learning_rate": 0.00019999932492112833, - "loss": 46.0, - "step": 7273 - }, - { - "epoch": 1.1714239703691776, - "grad_norm": 0.0004023484652861953, - "learning_rate": 0.0001999993247352204, - "loss": 46.0, - "step": 7274 - }, - { - "epoch": 1.1715850074479648, - "grad_norm": 0.0016293387161567807, - "learning_rate": 0.00019999932454928688, - "loss": 46.0, - "step": 7275 - }, - { - "epoch": 1.1717460445267522, - "grad_norm": 0.0005998879205435514, - "learning_rate": 0.0001999993243633278, - "loss": 46.0, - "step": 7276 - }, - { - "epoch": 1.1719070816055397, - "grad_norm": 0.0014990271301940084, - "learning_rate": 0.00019999932417734309, - "loss": 46.0, - "step": 7277 - }, - { - "epoch": 1.1720681186843271, - "grad_norm": 0.0009858167031779885, - "learning_rate": 0.00019999932399133278, - "loss": 46.0, - "step": 7278 - }, - { - "epoch": 1.1722291557631144, - "grad_norm": 0.0030931327491998672, - "learning_rate": 0.00019999932380529691, - "loss": 46.0, - "step": 7279 - }, - { - "epoch": 1.1723901928419018, - "grad_norm": 0.0014890697784721851, - "learning_rate": 0.0001999993236192354, - "loss": 46.0, - "step": 7280 - }, - { - "epoch": 1.1725512299206893, - "grad_norm": 0.00184484978672117, - "learning_rate": 0.00019999932343314837, - "loss": 46.0, - "step": 7281 - }, - { - "epoch": 1.1727122669994765, - "grad_norm": 0.0006240683142095804, - "learning_rate": 0.00019999932324703569, - "loss": 46.0, - "step": 7282 - }, - { - "epoch": 1.172873304078264, - "grad_norm": 0.002089295070618391, - "learning_rate": 0.00019999932306089742, - "loss": 46.0, - "step": 7283 - }, - { - "epoch": 1.1730343411570514, - "grad_norm": 0.002121787518262863, - "learning_rate": 0.00019999932287473356, - "loss": 46.0, - "step": 7284 - }, - { - "epoch": 1.1731953782358389, - "grad_norm": 0.008654619567096233, - "learning_rate": 0.00019999932268854412, - "loss": 46.0, - "step": 7285 - }, - { - "epoch": 1.1733564153146263, - "grad_norm": 0.0018087526550516486, - "learning_rate": 0.00019999932250232906, - "loss": 46.0, - "step": 7286 - }, - { - "epoch": 1.1735174523934135, - "grad_norm": 0.0009565085638314486, - "learning_rate": 0.0001999993223160884, - "loss": 46.0, - "step": 7287 - }, - { - "epoch": 1.173678489472201, - "grad_norm": 0.0021637678146362305, - "learning_rate": 0.00019999932212982216, - "loss": 46.0, - "step": 7288 - }, - { - "epoch": 1.1738395265509884, - "grad_norm": 0.0012099015293642879, - "learning_rate": 0.00019999932194353031, - "loss": 46.0, - "step": 7289 - }, - { - "epoch": 1.1740005636297757, - "grad_norm": 0.002240993082523346, - "learning_rate": 0.0001999993217572129, - "loss": 46.0, - "step": 7290 - }, - { - "epoch": 1.1741616007085631, - "grad_norm": 0.0015738093061372638, - "learning_rate": 0.0001999993215708699, - "loss": 46.0, - "step": 7291 - }, - { - "epoch": 1.1743226377873506, - "grad_norm": 0.0020281930919736624, - "learning_rate": 0.00019999932138450128, - "loss": 46.0, - "step": 7292 - }, - { - "epoch": 1.174483674866138, - "grad_norm": 0.000978005351498723, - "learning_rate": 0.00019999932119810706, - "loss": 46.0, - "step": 7293 - }, - { - "epoch": 1.1746447119449253, - "grad_norm": 0.0012880610302090645, - "learning_rate": 0.00019999932101168722, - "loss": 46.0, - "step": 7294 - }, - { - "epoch": 1.1748057490237127, - "grad_norm": 0.0026429451536387205, - "learning_rate": 0.00019999932082524186, - "loss": 46.0, - "step": 7295 - }, - { - "epoch": 1.1749667861025002, - "grad_norm": 0.0023867327254265547, - "learning_rate": 0.00019999932063877084, - "loss": 46.0, - "step": 7296 - }, - { - "epoch": 1.1751278231812874, - "grad_norm": 0.0013893722789362073, - "learning_rate": 0.00019999932045227427, - "loss": 46.0, - "step": 7297 - }, - { - "epoch": 1.1752888602600748, - "grad_norm": 0.002012244425714016, - "learning_rate": 0.0001999993202657521, - "loss": 46.0, - "step": 7298 - }, - { - "epoch": 1.1754498973388623, - "grad_norm": 0.0033517684787511826, - "learning_rate": 0.0001999993200792043, - "loss": 46.0, - "step": 7299 - }, - { - "epoch": 1.1756109344176497, - "grad_norm": 0.0014964157016947865, - "learning_rate": 0.00019999931989263093, - "loss": 46.0, - "step": 7300 - }, - { - "epoch": 1.175771971496437, - "grad_norm": 0.0009307727450504899, - "learning_rate": 0.00019999931970603196, - "loss": 46.0, - "step": 7301 - }, - { - "epoch": 1.1759330085752244, - "grad_norm": 0.0026985756121575832, - "learning_rate": 0.00019999931951940737, - "loss": 46.0, - "step": 7302 - }, - { - "epoch": 1.1760940456540119, - "grad_norm": 0.005229425150901079, - "learning_rate": 0.00019999931933275723, - "loss": 46.0, - "step": 7303 - }, - { - "epoch": 1.176255082732799, - "grad_norm": 0.005166003946214914, - "learning_rate": 0.00019999931914608146, - "loss": 46.0, - "step": 7304 - }, - { - "epoch": 1.1764161198115866, - "grad_norm": 0.0017078024102374911, - "learning_rate": 0.00019999931895938011, - "loss": 46.0, - "step": 7305 - }, - { - "epoch": 1.176577156890374, - "grad_norm": 0.0006303245900198817, - "learning_rate": 0.00019999931877265318, - "loss": 46.0, - "step": 7306 - }, - { - "epoch": 1.1767381939691615, - "grad_norm": 0.00028924617799930274, - "learning_rate": 0.00019999931858590063, - "loss": 46.0, - "step": 7307 - }, - { - "epoch": 1.176899231047949, - "grad_norm": 0.002286311471834779, - "learning_rate": 0.00019999931839912252, - "loss": 46.0, - "step": 7308 - }, - { - "epoch": 1.1770602681267361, - "grad_norm": 0.0020275702700018883, - "learning_rate": 0.0001999993182123188, - "loss": 46.0, - "step": 7309 - }, - { - "epoch": 1.1772213052055236, - "grad_norm": 0.0018592944834381342, - "learning_rate": 0.00019999931802548945, - "loss": 46.0, - "step": 7310 - }, - { - "epoch": 1.177382342284311, - "grad_norm": 0.0011300251353532076, - "learning_rate": 0.00019999931783863456, - "loss": 46.0, - "step": 7311 - }, - { - "epoch": 1.1775433793630983, - "grad_norm": 0.0021666372194886208, - "learning_rate": 0.00019999931765175404, - "loss": 46.0, - "step": 7312 - }, - { - "epoch": 1.1777044164418857, - "grad_norm": 0.0016806612256914377, - "learning_rate": 0.00019999931746484791, - "loss": 46.0, - "step": 7313 - }, - { - "epoch": 1.1778654535206732, - "grad_norm": 0.001958418870344758, - "learning_rate": 0.00019999931727791623, - "loss": 46.0, - "step": 7314 - }, - { - "epoch": 1.1780264905994606, - "grad_norm": 0.0034716171212494373, - "learning_rate": 0.00019999931709095893, - "loss": 46.0, - "step": 7315 - }, - { - "epoch": 1.1781875276782479, - "grad_norm": 0.0005778723862022161, - "learning_rate": 0.00019999931690397604, - "loss": 46.0, - "step": 7316 - }, - { - "epoch": 1.1783485647570353, - "grad_norm": 0.005233352072536945, - "learning_rate": 0.00019999931671696753, - "loss": 46.0, - "step": 7317 - }, - { - "epoch": 1.1785096018358228, - "grad_norm": 0.002933028619736433, - "learning_rate": 0.00019999931652993347, - "loss": 46.0, - "step": 7318 - }, - { - "epoch": 1.17867063891461, - "grad_norm": 0.0032525649294257164, - "learning_rate": 0.00019999931634287382, - "loss": 46.0, - "step": 7319 - }, - { - "epoch": 1.1788316759933974, - "grad_norm": 0.0003374084772076458, - "learning_rate": 0.00019999931615578853, - "loss": 46.0, - "step": 7320 - }, - { - "epoch": 1.178992713072185, - "grad_norm": 0.002870365511626005, - "learning_rate": 0.00019999931596867768, - "loss": 46.0, - "step": 7321 - }, - { - "epoch": 1.1791537501509723, - "grad_norm": 0.0010719697456806898, - "learning_rate": 0.0001999993157815412, - "loss": 46.0, - "step": 7322 - }, - { - "epoch": 1.1793147872297596, - "grad_norm": 0.001004177494905889, - "learning_rate": 0.00019999931559437916, - "loss": 46.0, - "step": 7323 - }, - { - "epoch": 1.179475824308547, - "grad_norm": 0.0014371376018971205, - "learning_rate": 0.00019999931540719152, - "loss": 46.0, - "step": 7324 - }, - { - "epoch": 1.1796368613873345, - "grad_norm": 0.0006791423074901104, - "learning_rate": 0.00019999931521997826, - "loss": 46.0, - "step": 7325 - }, - { - "epoch": 1.1797978984661217, - "grad_norm": 0.0012079504085704684, - "learning_rate": 0.00019999931503273942, - "loss": 46.0, - "step": 7326 - }, - { - "epoch": 1.1799589355449092, - "grad_norm": 0.005674366839230061, - "learning_rate": 0.00019999931484547502, - "loss": 46.0, - "step": 7327 - }, - { - "epoch": 1.1801199726236966, - "grad_norm": 0.0011005571577697992, - "learning_rate": 0.00019999931465818497, - "loss": 46.0, - "step": 7328 - }, - { - "epoch": 1.180281009702484, - "grad_norm": 0.007835574448108673, - "learning_rate": 0.00019999931447086934, - "loss": 46.0, - "step": 7329 - }, - { - "epoch": 1.1804420467812713, - "grad_norm": 0.0007141850655898452, - "learning_rate": 0.00019999931428352815, - "loss": 46.0, - "step": 7330 - }, - { - "epoch": 1.1806030838600587, - "grad_norm": 0.0016291745705530047, - "learning_rate": 0.00019999931409616132, - "loss": 46.0, - "step": 7331 - }, - { - "epoch": 1.1807641209388462, - "grad_norm": 0.000596243015024811, - "learning_rate": 0.00019999931390876893, - "loss": 46.0, - "step": 7332 - }, - { - "epoch": 1.1809251580176336, - "grad_norm": 0.0007783645414747298, - "learning_rate": 0.00019999931372135092, - "loss": 46.0, - "step": 7333 - }, - { - "epoch": 1.1810861950964209, - "grad_norm": 0.0017039704835042357, - "learning_rate": 0.00019999931353390733, - "loss": 46.0, - "step": 7334 - }, - { - "epoch": 1.1812472321752083, - "grad_norm": 0.006421853322535753, - "learning_rate": 0.00019999931334643815, - "loss": 46.0, - "step": 7335 - }, - { - "epoch": 1.1814082692539958, - "grad_norm": 0.0016386555507779121, - "learning_rate": 0.00019999931315894335, - "loss": 46.0, - "step": 7336 - }, - { - "epoch": 1.1815693063327832, - "grad_norm": 0.004061900544911623, - "learning_rate": 0.00019999931297142297, - "loss": 46.0, - "step": 7337 - }, - { - "epoch": 1.1817303434115705, - "grad_norm": 0.001283218851312995, - "learning_rate": 0.000199999312783877, - "loss": 46.0, - "step": 7338 - }, - { - "epoch": 1.181891380490358, - "grad_norm": 0.0006205645040608943, - "learning_rate": 0.00019999931259630542, - "loss": 46.0, - "step": 7339 - }, - { - "epoch": 1.1820524175691454, - "grad_norm": 0.0011197656858712435, - "learning_rate": 0.00019999931240870827, - "loss": 46.0, - "step": 7340 - }, - { - "epoch": 1.1822134546479326, - "grad_norm": 0.004171561449766159, - "learning_rate": 0.00019999931222108551, - "loss": 46.0, - "step": 7341 - }, - { - "epoch": 1.18237449172672, - "grad_norm": 0.0030213803984224796, - "learning_rate": 0.00019999931203343717, - "loss": 46.0, - "step": 7342 - }, - { - "epoch": 1.1825355288055075, - "grad_norm": 0.002057740930467844, - "learning_rate": 0.0001999993118457632, - "loss": 46.0, - "step": 7343 - }, - { - "epoch": 1.182696565884295, - "grad_norm": 0.0021582413464784622, - "learning_rate": 0.00019999931165806366, - "loss": 46.0, - "step": 7344 - }, - { - "epoch": 1.1828576029630822, - "grad_norm": 0.004215786699205637, - "learning_rate": 0.00019999931147033856, - "loss": 46.0, - "step": 7345 - }, - { - "epoch": 1.1830186400418696, - "grad_norm": 0.0018000471172854304, - "learning_rate": 0.0001999993112825878, - "loss": 46.0, - "step": 7346 - }, - { - "epoch": 1.183179677120657, - "grad_norm": 0.0007439761538989842, - "learning_rate": 0.00019999931109481147, - "loss": 46.0, - "step": 7347 - }, - { - "epoch": 1.1833407141994443, - "grad_norm": 0.004136794712394476, - "learning_rate": 0.00019999931090700955, - "loss": 46.0, - "step": 7348 - }, - { - "epoch": 1.1835017512782318, - "grad_norm": 0.002294244011864066, - "learning_rate": 0.00019999931071918204, - "loss": 46.0, - "step": 7349 - }, - { - "epoch": 1.1836627883570192, - "grad_norm": 0.0035066576674580574, - "learning_rate": 0.00019999931053132892, - "loss": 46.0, - "step": 7350 - }, - { - "epoch": 1.1838238254358067, - "grad_norm": 0.000524053000845015, - "learning_rate": 0.00019999931034345023, - "loss": 46.0, - "step": 7351 - }, - { - "epoch": 1.183984862514594, - "grad_norm": 0.004031005315482616, - "learning_rate": 0.0001999993101555459, - "loss": 46.0, - "step": 7352 - }, - { - "epoch": 1.1841458995933813, - "grad_norm": 0.0007816303404979408, - "learning_rate": 0.00019999930996761602, - "loss": 46.0, - "step": 7353 - }, - { - "epoch": 1.1843069366721688, - "grad_norm": 0.0023815478198230267, - "learning_rate": 0.00019999930977966055, - "loss": 46.0, - "step": 7354 - }, - { - "epoch": 1.1844679737509562, - "grad_norm": 0.00274737854488194, - "learning_rate": 0.00019999930959167946, - "loss": 46.0, - "step": 7355 - }, - { - "epoch": 1.1846290108297435, - "grad_norm": 0.0018352791666984558, - "learning_rate": 0.00019999930940367276, - "loss": 46.0, - "step": 7356 - }, - { - "epoch": 1.184790047908531, - "grad_norm": 0.004598895087838173, - "learning_rate": 0.00019999930921564053, - "loss": 46.0, - "step": 7357 - }, - { - "epoch": 1.1849510849873184, - "grad_norm": 0.0040381369180977345, - "learning_rate": 0.00019999930902758262, - "loss": 46.0, - "step": 7358 - }, - { - "epoch": 1.1851121220661058, - "grad_norm": 0.0026830604765564203, - "learning_rate": 0.0001999993088394992, - "loss": 46.0, - "step": 7359 - }, - { - "epoch": 1.185273159144893, - "grad_norm": 0.004803386516869068, - "learning_rate": 0.00019999930865139014, - "loss": 46.0, - "step": 7360 - }, - { - "epoch": 1.1854341962236805, - "grad_norm": 0.002542213536798954, - "learning_rate": 0.00019999930846325547, - "loss": 46.0, - "step": 7361 - }, - { - "epoch": 1.185595233302468, - "grad_norm": 0.0015223032096400857, - "learning_rate": 0.00019999930827509522, - "loss": 46.0, - "step": 7362 - }, - { - "epoch": 1.1857562703812552, - "grad_norm": 0.004237472545355558, - "learning_rate": 0.00019999930808690939, - "loss": 46.0, - "step": 7363 - }, - { - "epoch": 1.1859173074600426, - "grad_norm": 0.0011035024654120207, - "learning_rate": 0.00019999930789869796, - "loss": 46.0, - "step": 7364 - }, - { - "epoch": 1.18607834453883, - "grad_norm": 0.001071646111086011, - "learning_rate": 0.00019999930771046095, - "loss": 46.0, - "step": 7365 - }, - { - "epoch": 1.1862393816176175, - "grad_norm": 0.0038553241174668074, - "learning_rate": 0.0001999993075221983, - "loss": 46.0, - "step": 7366 - }, - { - "epoch": 1.1864004186964048, - "grad_norm": 0.005944760516285896, - "learning_rate": 0.00019999930733391008, - "loss": 46.0, - "step": 7367 - }, - { - "epoch": 1.1865614557751922, - "grad_norm": 0.0004066654946655035, - "learning_rate": 0.00019999930714559628, - "loss": 46.0, - "step": 7368 - }, - { - "epoch": 1.1867224928539797, - "grad_norm": 0.0027252298314124346, - "learning_rate": 0.00019999930695725686, - "loss": 46.0, - "step": 7369 - }, - { - "epoch": 1.186883529932767, - "grad_norm": 0.0009907943895086646, - "learning_rate": 0.00019999930676889186, - "loss": 46.0, - "step": 7370 - }, - { - "epoch": 1.1870445670115544, - "grad_norm": 0.006962188985198736, - "learning_rate": 0.00019999930658050127, - "loss": 46.0, - "step": 7371 - }, - { - "epoch": 1.1872056040903418, - "grad_norm": 0.0016975963953882456, - "learning_rate": 0.00019999930639208506, - "loss": 46.0, - "step": 7372 - }, - { - "epoch": 1.1873666411691293, - "grad_norm": 0.00544242188334465, - "learning_rate": 0.0001999993062036433, - "loss": 46.0, - "step": 7373 - }, - { - "epoch": 1.1875276782479165, - "grad_norm": 0.0008141516009345651, - "learning_rate": 0.0001999993060151759, - "loss": 46.0, - "step": 7374 - }, - { - "epoch": 1.187688715326704, - "grad_norm": 0.0009726479183882475, - "learning_rate": 0.0001999993058266829, - "loss": 46.0, - "step": 7375 - }, - { - "epoch": 1.1878497524054914, - "grad_norm": 0.002043060725554824, - "learning_rate": 0.00019999930563816435, - "loss": 46.0, - "step": 7376 - }, - { - "epoch": 1.1880107894842789, - "grad_norm": 0.0012031319784000516, - "learning_rate": 0.00019999930544962018, - "loss": 46.0, - "step": 7377 - }, - { - "epoch": 1.188171826563066, - "grad_norm": 0.002612357959151268, - "learning_rate": 0.00019999930526105043, - "loss": 46.0, - "step": 7378 - }, - { - "epoch": 1.1883328636418535, - "grad_norm": 0.0018331792671233416, - "learning_rate": 0.00019999930507245508, - "loss": 46.0, - "step": 7379 - }, - { - "epoch": 1.188493900720641, - "grad_norm": 0.00260573229752481, - "learning_rate": 0.00019999930488383413, - "loss": 46.0, - "step": 7380 - }, - { - "epoch": 1.1886549377994284, - "grad_norm": 0.0032797090243548155, - "learning_rate": 0.00019999930469518759, - "loss": 46.0, - "step": 7381 - }, - { - "epoch": 1.1888159748782157, - "grad_norm": 0.0003878821444232017, - "learning_rate": 0.00019999930450651543, - "loss": 46.0, - "step": 7382 - }, - { - "epoch": 1.1889770119570031, - "grad_norm": 0.005170104093849659, - "learning_rate": 0.0001999993043178177, - "loss": 46.0, - "step": 7383 - }, - { - "epoch": 1.1891380490357906, - "grad_norm": 0.003059866139665246, - "learning_rate": 0.00019999930412909435, - "loss": 46.0, - "step": 7384 - }, - { - "epoch": 1.1892990861145778, - "grad_norm": 0.0032380837947130203, - "learning_rate": 0.00019999930394034543, - "loss": 46.0, - "step": 7385 - }, - { - "epoch": 1.1894601231933652, - "grad_norm": 0.000769248406868428, - "learning_rate": 0.00019999930375157093, - "loss": 46.0, - "step": 7386 - }, - { - "epoch": 1.1896211602721527, - "grad_norm": 0.0011289390968158841, - "learning_rate": 0.0001999993035627708, - "loss": 46.0, - "step": 7387 - }, - { - "epoch": 1.1897821973509402, - "grad_norm": 0.0006276924978010356, - "learning_rate": 0.0001999993033739451, - "loss": 46.0, - "step": 7388 - }, - { - "epoch": 1.1899432344297274, - "grad_norm": 0.0010052825091406703, - "learning_rate": 0.00019999930318509378, - "loss": 46.0, - "step": 7389 - }, - { - "epoch": 1.1901042715085148, - "grad_norm": 0.004562370013445616, - "learning_rate": 0.0001999993029962169, - "loss": 46.0, - "step": 7390 - }, - { - "epoch": 1.1902653085873023, - "grad_norm": 0.0004512219747994095, - "learning_rate": 0.0001999993028073144, - "loss": 46.0, - "step": 7391 - }, - { - "epoch": 1.1904263456660895, - "grad_norm": 0.0041924635879695415, - "learning_rate": 0.00019999930261838632, - "loss": 46.0, - "step": 7392 - }, - { - "epoch": 1.190587382744877, - "grad_norm": 0.000982915167696774, - "learning_rate": 0.00019999930242943265, - "loss": 46.0, - "step": 7393 - }, - { - "epoch": 1.1907484198236644, - "grad_norm": 0.0014111737255007029, - "learning_rate": 0.0001999993022404534, - "loss": 46.0, - "step": 7394 - }, - { - "epoch": 1.1909094569024519, - "grad_norm": 0.004768189042806625, - "learning_rate": 0.0001999993020514485, - "loss": 46.0, - "step": 7395 - }, - { - "epoch": 1.191070493981239, - "grad_norm": 0.002425622660666704, - "learning_rate": 0.00019999930186241803, - "loss": 46.0, - "step": 7396 - }, - { - "epoch": 1.1912315310600265, - "grad_norm": 0.0008161734440363944, - "learning_rate": 0.00019999930167336198, - "loss": 46.0, - "step": 7397 - }, - { - "epoch": 1.191392568138814, - "grad_norm": 0.0012945233611389995, - "learning_rate": 0.0001999993014842803, - "loss": 46.0, - "step": 7398 - }, - { - "epoch": 1.1915536052176012, - "grad_norm": 0.001209920272231102, - "learning_rate": 0.00019999930129517305, - "loss": 46.0, - "step": 7399 - }, - { - "epoch": 1.1917146422963887, - "grad_norm": 0.0005322758224792778, - "learning_rate": 0.00019999930110604021, - "loss": 46.0, - "step": 7400 - }, - { - "epoch": 1.1918756793751761, - "grad_norm": 0.00047942172386683524, - "learning_rate": 0.00019999930091688177, - "loss": 46.0, - "step": 7401 - }, - { - "epoch": 1.1920367164539636, - "grad_norm": 0.0017445423873141408, - "learning_rate": 0.00019999930072769776, - "loss": 46.0, - "step": 7402 - }, - { - "epoch": 1.192197753532751, - "grad_norm": 0.0020476365461945534, - "learning_rate": 0.00019999930053848813, - "loss": 46.0, - "step": 7403 - }, - { - "epoch": 1.1923587906115383, - "grad_norm": 0.0036957187112420797, - "learning_rate": 0.0001999993003492529, - "loss": 46.0, - "step": 7404 - }, - { - "epoch": 1.1925198276903257, - "grad_norm": 0.00220651482231915, - "learning_rate": 0.00019999930015999207, - "loss": 46.0, - "step": 7405 - }, - { - "epoch": 1.1926808647691132, - "grad_norm": 0.0008695185533724725, - "learning_rate": 0.00019999929997070566, - "loss": 46.0, - "step": 7406 - }, - { - "epoch": 1.1928419018479004, - "grad_norm": 0.0010108186397701502, - "learning_rate": 0.00019999929978139366, - "loss": 46.0, - "step": 7407 - }, - { - "epoch": 1.1930029389266879, - "grad_norm": 0.0006435916875489056, - "learning_rate": 0.00019999929959205605, - "loss": 46.0, - "step": 7408 - }, - { - "epoch": 1.1931639760054753, - "grad_norm": 0.0014818136114627123, - "learning_rate": 0.00019999929940269287, - "loss": 46.0, - "step": 7409 - }, - { - "epoch": 1.1933250130842628, - "grad_norm": 0.0004252234648447484, - "learning_rate": 0.00019999929921330406, - "loss": 46.0, - "step": 7410 - }, - { - "epoch": 1.19348605016305, - "grad_norm": 0.0009287706343457103, - "learning_rate": 0.0001999992990238897, - "loss": 46.0, - "step": 7411 - }, - { - "epoch": 1.1936470872418374, - "grad_norm": 0.000723625416867435, - "learning_rate": 0.0001999992988344497, - "loss": 46.0, - "step": 7412 - }, - { - "epoch": 1.1938081243206249, - "grad_norm": 0.0014388399431481957, - "learning_rate": 0.00019999929864498412, - "loss": 46.0, - "step": 7413 - }, - { - "epoch": 1.1939691613994121, - "grad_norm": 0.00482554268091917, - "learning_rate": 0.00019999929845549295, - "loss": 46.0, - "step": 7414 - }, - { - "epoch": 1.1941301984781996, - "grad_norm": 0.0002463750133756548, - "learning_rate": 0.00019999929826597617, - "loss": 46.0, - "step": 7415 - }, - { - "epoch": 1.194291235556987, - "grad_norm": 0.0011618523858487606, - "learning_rate": 0.00019999929807643384, - "loss": 46.0, - "step": 7416 - }, - { - "epoch": 1.1944522726357745, - "grad_norm": 0.002354543888941407, - "learning_rate": 0.00019999929788686588, - "loss": 46.0, - "step": 7417 - }, - { - "epoch": 1.1946133097145617, - "grad_norm": 0.000962183577939868, - "learning_rate": 0.00019999929769727232, - "loss": 46.0, - "step": 7418 - }, - { - "epoch": 1.1947743467933492, - "grad_norm": 0.001150438329204917, - "learning_rate": 0.0001999992975076532, - "loss": 46.0, - "step": 7419 - }, - { - "epoch": 1.1949353838721366, - "grad_norm": 0.001018354669213295, - "learning_rate": 0.00019999929731800845, - "loss": 46.0, - "step": 7420 - }, - { - "epoch": 1.1950964209509238, - "grad_norm": 0.0009027085034176707, - "learning_rate": 0.00019999929712833812, - "loss": 46.0, - "step": 7421 - }, - { - "epoch": 1.1952574580297113, - "grad_norm": 0.006388049107044935, - "learning_rate": 0.00019999929693864218, - "loss": 46.0, - "step": 7422 - }, - { - "epoch": 1.1954184951084987, - "grad_norm": 0.0009071163949556649, - "learning_rate": 0.00019999929674892065, - "loss": 46.0, - "step": 7423 - }, - { - "epoch": 1.1955795321872862, - "grad_norm": 0.0011035699862986803, - "learning_rate": 0.00019999929655917356, - "loss": 46.0, - "step": 7424 - }, - { - "epoch": 1.1957405692660734, - "grad_norm": 0.0010314702522009611, - "learning_rate": 0.00019999929636940086, - "loss": 46.0, - "step": 7425 - }, - { - "epoch": 1.1959016063448609, - "grad_norm": 0.0004092316667083651, - "learning_rate": 0.00019999929617960254, - "loss": 46.0, - "step": 7426 - }, - { - "epoch": 1.1960626434236483, - "grad_norm": 0.0012313921470195055, - "learning_rate": 0.00019999929598977863, - "loss": 46.0, - "step": 7427 - }, - { - "epoch": 1.1962236805024358, - "grad_norm": 0.0015070174122229218, - "learning_rate": 0.00019999929579992914, - "loss": 46.0, - "step": 7428 - }, - { - "epoch": 1.196384717581223, - "grad_norm": 0.0010475274175405502, - "learning_rate": 0.00019999929561005403, - "loss": 46.0, - "step": 7429 - }, - { - "epoch": 1.1965457546600105, - "grad_norm": 0.004691485781222582, - "learning_rate": 0.00019999929542015337, - "loss": 46.0, - "step": 7430 - }, - { - "epoch": 1.196706791738798, - "grad_norm": 0.0021805621217936277, - "learning_rate": 0.00019999929523022706, - "loss": 46.0, - "step": 7431 - }, - { - "epoch": 1.1968678288175854, - "grad_norm": 0.0036309207789599895, - "learning_rate": 0.0001999992950402752, - "loss": 46.0, - "step": 7432 - }, - { - "epoch": 1.1970288658963726, - "grad_norm": 0.0011829808354377747, - "learning_rate": 0.00019999929485029774, - "loss": 46.0, - "step": 7433 - }, - { - "epoch": 1.19718990297516, - "grad_norm": 0.0009272071765735745, - "learning_rate": 0.00019999929466029467, - "loss": 46.0, - "step": 7434 - }, - { - "epoch": 1.1973509400539475, - "grad_norm": 0.0074990191496908665, - "learning_rate": 0.000199999294470266, - "loss": 46.0, - "step": 7435 - }, - { - "epoch": 1.1975119771327347, - "grad_norm": 0.0015586653025820851, - "learning_rate": 0.00019999929428021177, - "loss": 46.0, - "step": 7436 - }, - { - "epoch": 1.1976730142115222, - "grad_norm": 0.0021267388947308064, - "learning_rate": 0.0001999992940901319, - "loss": 46.0, - "step": 7437 - }, - { - "epoch": 1.1978340512903096, - "grad_norm": 0.0005111548234708607, - "learning_rate": 0.00019999929390002646, - "loss": 46.0, - "step": 7438 - }, - { - "epoch": 1.197995088369097, - "grad_norm": 0.0037622665986418724, - "learning_rate": 0.00019999929370989543, - "loss": 46.0, - "step": 7439 - }, - { - "epoch": 1.1981561254478843, - "grad_norm": 0.0017674292903393507, - "learning_rate": 0.0001999992935197388, - "loss": 46.0, - "step": 7440 - }, - { - "epoch": 1.1983171625266718, - "grad_norm": 0.001636980683542788, - "learning_rate": 0.00019999929332955657, - "loss": 46.0, - "step": 7441 - }, - { - "epoch": 1.1984781996054592, - "grad_norm": 0.0010594683699309826, - "learning_rate": 0.00019999929313934875, - "loss": 46.0, - "step": 7442 - }, - { - "epoch": 1.1986392366842464, - "grad_norm": 0.0013347334461286664, - "learning_rate": 0.00019999929294911532, - "loss": 46.0, - "step": 7443 - }, - { - "epoch": 1.1988002737630339, - "grad_norm": 0.004444236867129803, - "learning_rate": 0.00019999929275885632, - "loss": 46.0, - "step": 7444 - }, - { - "epoch": 1.1989613108418213, - "grad_norm": 0.0015611716080456972, - "learning_rate": 0.00019999929256857174, - "loss": 46.0, - "step": 7445 - }, - { - "epoch": 1.1991223479206088, - "grad_norm": 0.0004436145245563239, - "learning_rate": 0.00019999929237826152, - "loss": 46.0, - "step": 7446 - }, - { - "epoch": 1.199283384999396, - "grad_norm": 0.004913871642202139, - "learning_rate": 0.00019999929218792573, - "loss": 46.0, - "step": 7447 - }, - { - "epoch": 1.1994444220781835, - "grad_norm": 0.0017446422716602683, - "learning_rate": 0.00019999929199756436, - "loss": 46.0, - "step": 7448 - }, - { - "epoch": 1.199605459156971, - "grad_norm": 0.0016749318456277251, - "learning_rate": 0.00019999929180717735, - "loss": 46.0, - "step": 7449 - }, - { - "epoch": 1.1997664962357584, - "grad_norm": 0.0005602969904430211, - "learning_rate": 0.00019999929161676477, - "loss": 46.0, - "step": 7450 - }, - { - "epoch": 1.1999275333145456, - "grad_norm": 0.0004954435862600803, - "learning_rate": 0.00019999929142632661, - "loss": 46.0, - "step": 7451 - }, - { - "epoch": 1.200088570393333, - "grad_norm": 0.0007596751675009727, - "learning_rate": 0.00019999929123586284, - "loss": 46.0, - "step": 7452 - }, - { - "epoch": 1.2002496074721205, - "grad_norm": 0.001637642621062696, - "learning_rate": 0.00019999929104537348, - "loss": 46.0, - "step": 7453 - }, - { - "epoch": 1.200410644550908, - "grad_norm": 0.0042334916070103645, - "learning_rate": 0.0001999992908548585, - "loss": 46.0, - "step": 7454 - }, - { - "epoch": 1.2005716816296952, - "grad_norm": 0.0008722651982679963, - "learning_rate": 0.00019999929066431794, - "loss": 46.0, - "step": 7455 - }, - { - "epoch": 1.2007327187084826, - "grad_norm": 0.0017842648085206747, - "learning_rate": 0.00019999929047375182, - "loss": 46.0, - "step": 7456 - }, - { - "epoch": 1.20089375578727, - "grad_norm": 0.002939926227554679, - "learning_rate": 0.00019999929028316008, - "loss": 46.0, - "step": 7457 - }, - { - "epoch": 1.2010547928660573, - "grad_norm": 0.001383136142976582, - "learning_rate": 0.00019999929009254275, - "loss": 46.0, - "step": 7458 - }, - { - "epoch": 1.2012158299448448, - "grad_norm": 0.0012861231807619333, - "learning_rate": 0.00019999928990189982, - "loss": 46.0, - "step": 7459 - }, - { - "epoch": 1.2013768670236322, - "grad_norm": 0.0017660737503319979, - "learning_rate": 0.00019999928971123126, - "loss": 46.0, - "step": 7460 - }, - { - "epoch": 1.2015379041024197, - "grad_norm": 0.0038430211134254932, - "learning_rate": 0.00019999928952053715, - "loss": 46.0, - "step": 7461 - }, - { - "epoch": 1.201698941181207, - "grad_norm": 0.001807059976272285, - "learning_rate": 0.00019999928932981745, - "loss": 46.0, - "step": 7462 - }, - { - "epoch": 1.2018599782599944, - "grad_norm": 0.0013017336605116725, - "learning_rate": 0.00019999928913907214, - "loss": 46.0, - "step": 7463 - }, - { - "epoch": 1.2020210153387818, - "grad_norm": 0.0018817775417119265, - "learning_rate": 0.00019999928894830123, - "loss": 46.0, - "step": 7464 - }, - { - "epoch": 1.202182052417569, - "grad_norm": 0.0006017991690896451, - "learning_rate": 0.00019999928875750474, - "loss": 46.0, - "step": 7465 - }, - { - "epoch": 1.2023430894963565, - "grad_norm": 0.0017661134479567409, - "learning_rate": 0.00019999928856668264, - "loss": 46.0, - "step": 7466 - }, - { - "epoch": 1.202504126575144, - "grad_norm": 0.0006214601453393698, - "learning_rate": 0.00019999928837583495, - "loss": 46.0, - "step": 7467 - }, - { - "epoch": 1.2026651636539314, - "grad_norm": 0.0011267195222899318, - "learning_rate": 0.00019999928818496167, - "loss": 46.0, - "step": 7468 - }, - { - "epoch": 1.2028262007327186, - "grad_norm": 0.0017872502794489264, - "learning_rate": 0.00019999928799406278, - "loss": 46.0, - "step": 7469 - }, - { - "epoch": 1.202987237811506, - "grad_norm": 0.0035053330939263105, - "learning_rate": 0.00019999928780313833, - "loss": 46.0, - "step": 7470 - }, - { - "epoch": 1.2031482748902935, - "grad_norm": 0.0019433449488133192, - "learning_rate": 0.00019999928761218824, - "loss": 46.0, - "step": 7471 - }, - { - "epoch": 1.203309311969081, - "grad_norm": 0.012096031568944454, - "learning_rate": 0.00019999928742121258, - "loss": 46.0, - "step": 7472 - }, - { - "epoch": 1.2034703490478682, - "grad_norm": 0.002600267995148897, - "learning_rate": 0.00019999928723021132, - "loss": 46.0, - "step": 7473 - }, - { - "epoch": 1.2036313861266557, - "grad_norm": 0.003844389459118247, - "learning_rate": 0.00019999928703918446, - "loss": 46.0, - "step": 7474 - }, - { - "epoch": 1.203792423205443, - "grad_norm": 0.0008677352452650666, - "learning_rate": 0.00019999928684813205, - "loss": 46.0, - "step": 7475 - }, - { - "epoch": 1.2039534602842306, - "grad_norm": 0.0035268752835690975, - "learning_rate": 0.00019999928665705402, - "loss": 46.0, - "step": 7476 - }, - { - "epoch": 1.2041144973630178, - "grad_norm": 0.0038812810089439154, - "learning_rate": 0.00019999928646595037, - "loss": 46.0, - "step": 7477 - }, - { - "epoch": 1.2042755344418052, - "grad_norm": 0.004013581667095423, - "learning_rate": 0.00019999928627482114, - "loss": 46.0, - "step": 7478 - }, - { - "epoch": 1.2044365715205927, - "grad_norm": 0.0026490387972444296, - "learning_rate": 0.00019999928608366633, - "loss": 46.0, - "step": 7479 - }, - { - "epoch": 1.20459760859938, - "grad_norm": 0.003590575885027647, - "learning_rate": 0.0001999992858924859, - "loss": 46.0, - "step": 7480 - }, - { - "epoch": 1.2047586456781674, - "grad_norm": 0.0010156849166378379, - "learning_rate": 0.00019999928570127988, - "loss": 46.0, - "step": 7481 - }, - { - "epoch": 1.2049196827569548, - "grad_norm": 0.0028317461255937815, - "learning_rate": 0.00019999928551004827, - "loss": 46.0, - "step": 7482 - }, - { - "epoch": 1.2050807198357423, - "grad_norm": 0.001909225364215672, - "learning_rate": 0.00019999928531879108, - "loss": 46.0, - "step": 7483 - }, - { - "epoch": 1.2052417569145295, - "grad_norm": 0.0010485837701708078, - "learning_rate": 0.00019999928512750827, - "loss": 46.0, - "step": 7484 - }, - { - "epoch": 1.205402793993317, - "grad_norm": 0.0007255824166350067, - "learning_rate": 0.00019999928493619987, - "loss": 46.0, - "step": 7485 - }, - { - "epoch": 1.2055638310721044, - "grad_norm": 0.0006895649712532759, - "learning_rate": 0.00019999928474486586, - "loss": 46.0, - "step": 7486 - }, - { - "epoch": 1.2057248681508916, - "grad_norm": 0.000973891990724951, - "learning_rate": 0.00019999928455350632, - "loss": 46.0, - "step": 7487 - }, - { - "epoch": 1.205885905229679, - "grad_norm": 0.0014355878811329603, - "learning_rate": 0.0001999992843621211, - "loss": 46.0, - "step": 7488 - }, - { - "epoch": 1.2060469423084665, - "grad_norm": 0.0020541890989989042, - "learning_rate": 0.00019999928417071034, - "loss": 46.0, - "step": 7489 - }, - { - "epoch": 1.206207979387254, - "grad_norm": 0.0005340231582522392, - "learning_rate": 0.00019999928397927398, - "loss": 46.0, - "step": 7490 - }, - { - "epoch": 1.2063690164660412, - "grad_norm": 0.005234366282820702, - "learning_rate": 0.000199999283787812, - "loss": 46.0, - "step": 7491 - }, - { - "epoch": 1.2065300535448287, - "grad_norm": 0.0008825716795399785, - "learning_rate": 0.00019999928359632448, - "loss": 46.0, - "step": 7492 - }, - { - "epoch": 1.2066910906236161, - "grad_norm": 0.0014141829451546073, - "learning_rate": 0.0001999992834048113, - "loss": 46.0, - "step": 7493 - }, - { - "epoch": 1.2068521277024034, - "grad_norm": 0.0009230491705238819, - "learning_rate": 0.00019999928321327257, - "loss": 46.0, - "step": 7494 - }, - { - "epoch": 1.2070131647811908, - "grad_norm": 0.0006315942155197263, - "learning_rate": 0.00019999928302170825, - "loss": 46.0, - "step": 7495 - }, - { - "epoch": 1.2071742018599783, - "grad_norm": 0.0007494970341213048, - "learning_rate": 0.00019999928283011831, - "loss": 46.0, - "step": 7496 - }, - { - "epoch": 1.2073352389387657, - "grad_norm": 0.009130037389695644, - "learning_rate": 0.0001999992826385028, - "loss": 46.0, - "step": 7497 - }, - { - "epoch": 1.2074962760175532, - "grad_norm": 0.0030808583833277225, - "learning_rate": 0.00019999928244686163, - "loss": 46.0, - "step": 7498 - }, - { - "epoch": 1.2076573130963404, - "grad_norm": 0.0030400161631405354, - "learning_rate": 0.00019999928225519493, - "loss": 46.0, - "step": 7499 - }, - { - "epoch": 1.2078183501751278, - "grad_norm": 0.000541490619070828, - "learning_rate": 0.0001999992820635026, - "loss": 46.0, - "step": 7500 - }, - { - "epoch": 1.2079793872539153, - "grad_norm": 0.002253288635984063, - "learning_rate": 0.00019999928187178472, - "loss": 46.0, - "step": 7501 - }, - { - "epoch": 1.2081404243327025, - "grad_norm": 0.0013256544480100274, - "learning_rate": 0.0001999992816800412, - "loss": 46.0, - "step": 7502 - }, - { - "epoch": 1.20830146141149, - "grad_norm": 0.0010246033780276775, - "learning_rate": 0.0001999992814882721, - "loss": 46.0, - "step": 7503 - }, - { - "epoch": 1.2084624984902774, - "grad_norm": 0.005492016673088074, - "learning_rate": 0.0001999992812964774, - "loss": 46.0, - "step": 7504 - }, - { - "epoch": 1.2086235355690649, - "grad_norm": 0.006982710212469101, - "learning_rate": 0.00019999928110465712, - "loss": 46.0, - "step": 7505 - }, - { - "epoch": 1.208784572647852, - "grad_norm": 0.0042646704241633415, - "learning_rate": 0.00019999928091281124, - "loss": 46.0, - "step": 7506 - }, - { - "epoch": 1.2089456097266396, - "grad_norm": 0.0019437269074842334, - "learning_rate": 0.0001999992807209398, - "loss": 46.0, - "step": 7507 - }, - { - "epoch": 1.209106646805427, - "grad_norm": 0.003257865086197853, - "learning_rate": 0.0001999992805290427, - "loss": 46.0, - "step": 7508 - }, - { - "epoch": 1.2092676838842142, - "grad_norm": 0.008707267232239246, - "learning_rate": 0.00019999928033712005, - "loss": 46.0, - "step": 7509 - }, - { - "epoch": 1.2094287209630017, - "grad_norm": 0.0007214149227365851, - "learning_rate": 0.00019999928014517178, - "loss": 46.0, - "step": 7510 - }, - { - "epoch": 1.2095897580417891, - "grad_norm": 0.004151150584220886, - "learning_rate": 0.0001999992799531979, - "loss": 46.0, - "step": 7511 - }, - { - "epoch": 1.2097507951205766, - "grad_norm": 0.0015474956016987562, - "learning_rate": 0.00019999927976119847, - "loss": 46.0, - "step": 7512 - }, - { - "epoch": 1.2099118321993638, - "grad_norm": 0.0008899067179299891, - "learning_rate": 0.00019999927956917342, - "loss": 46.0, - "step": 7513 - }, - { - "epoch": 1.2100728692781513, - "grad_norm": 0.0036538224667310715, - "learning_rate": 0.0001999992793771228, - "loss": 46.0, - "step": 7514 - }, - { - "epoch": 1.2102339063569387, - "grad_norm": 0.0029800725169479847, - "learning_rate": 0.00019999927918504655, - "loss": 46.0, - "step": 7515 - }, - { - "epoch": 1.210394943435726, - "grad_norm": 0.002335120690986514, - "learning_rate": 0.00019999927899294473, - "loss": 46.0, - "step": 7516 - }, - { - "epoch": 1.2105559805145134, - "grad_norm": 0.0017036811914294958, - "learning_rate": 0.00019999927880081733, - "loss": 46.0, - "step": 7517 - }, - { - "epoch": 1.2107170175933009, - "grad_norm": 0.0012495525879785419, - "learning_rate": 0.00019999927860866426, - "loss": 46.0, - "step": 7518 - }, - { - "epoch": 1.2108780546720883, - "grad_norm": 0.0018980697495862842, - "learning_rate": 0.0001999992784164857, - "loss": 46.0, - "step": 7519 - }, - { - "epoch": 1.2110390917508758, - "grad_norm": 0.0017072854097932577, - "learning_rate": 0.00019999927822428147, - "loss": 46.0, - "step": 7520 - }, - { - "epoch": 1.211200128829663, - "grad_norm": 0.0014214892871677876, - "learning_rate": 0.00019999927803205167, - "loss": 46.0, - "step": 7521 - }, - { - "epoch": 1.2113611659084504, - "grad_norm": 0.011226499453186989, - "learning_rate": 0.00019999927783979628, - "loss": 46.0, - "step": 7522 - }, - { - "epoch": 1.211522202987238, - "grad_norm": 0.00080280308611691, - "learning_rate": 0.0001999992776475153, - "loss": 46.0, - "step": 7523 - }, - { - "epoch": 1.2116832400660251, - "grad_norm": 0.0008106543682515621, - "learning_rate": 0.0001999992774552087, - "loss": 46.0, - "step": 7524 - }, - { - "epoch": 1.2118442771448126, - "grad_norm": 0.0005572764202952385, - "learning_rate": 0.00019999927726287652, - "loss": 46.0, - "step": 7525 - }, - { - "epoch": 1.2120053142236, - "grad_norm": 0.0012713458854705095, - "learning_rate": 0.00019999927707051876, - "loss": 46.0, - "step": 7526 - }, - { - "epoch": 1.2121663513023875, - "grad_norm": 0.0009874645620584488, - "learning_rate": 0.00019999927687813538, - "loss": 46.0, - "step": 7527 - }, - { - "epoch": 1.2123273883811747, - "grad_norm": 0.0040589929558336735, - "learning_rate": 0.00019999927668572643, - "loss": 46.0, - "step": 7528 - }, - { - "epoch": 1.2124884254599622, - "grad_norm": 0.0011651463573798537, - "learning_rate": 0.00019999927649329185, - "loss": 46.0, - "step": 7529 - }, - { - "epoch": 1.2126494625387496, - "grad_norm": 0.0027236214373260736, - "learning_rate": 0.00019999927630083168, - "loss": 46.0, - "step": 7530 - }, - { - "epoch": 1.2128104996175368, - "grad_norm": 0.0005804926622658968, - "learning_rate": 0.00019999927610834595, - "loss": 46.0, - "step": 7531 - }, - { - "epoch": 1.2129715366963243, - "grad_norm": 0.0013138545909896493, - "learning_rate": 0.0001999992759158346, - "loss": 46.0, - "step": 7532 - }, - { - "epoch": 1.2131325737751117, - "grad_norm": 0.0020083794370293617, - "learning_rate": 0.00019999927572329768, - "loss": 46.0, - "step": 7533 - }, - { - "epoch": 1.2132936108538992, - "grad_norm": 0.001530150417238474, - "learning_rate": 0.00019999927553073513, - "loss": 46.0, - "step": 7534 - }, - { - "epoch": 1.2134546479326864, - "grad_norm": 0.0006744688143953681, - "learning_rate": 0.000199999275338147, - "loss": 46.0, - "step": 7535 - }, - { - "epoch": 1.2136156850114739, - "grad_norm": 0.0009230764699168503, - "learning_rate": 0.0001999992751455333, - "loss": 46.0, - "step": 7536 - }, - { - "epoch": 1.2137767220902613, - "grad_norm": 0.005693175829946995, - "learning_rate": 0.00019999927495289397, - "loss": 46.0, - "step": 7537 - }, - { - "epoch": 1.2139377591690486, - "grad_norm": 0.004383837804198265, - "learning_rate": 0.00019999927476022905, - "loss": 46.0, - "step": 7538 - }, - { - "epoch": 1.214098796247836, - "grad_norm": 0.001445006811991334, - "learning_rate": 0.00019999927456753854, - "loss": 46.0, - "step": 7539 - }, - { - "epoch": 1.2142598333266235, - "grad_norm": 0.0013964000390842557, - "learning_rate": 0.00019999927437482247, - "loss": 46.0, - "step": 7540 - }, - { - "epoch": 1.214420870405411, - "grad_norm": 0.0019224357092753053, - "learning_rate": 0.00019999927418208076, - "loss": 46.0, - "step": 7541 - }, - { - "epoch": 1.2145819074841981, - "grad_norm": 0.0012270859442651272, - "learning_rate": 0.00019999927398931346, - "loss": 46.0, - "step": 7542 - }, - { - "epoch": 1.2147429445629856, - "grad_norm": 0.0035568969324231148, - "learning_rate": 0.00019999927379652058, - "loss": 46.0, - "step": 7543 - }, - { - "epoch": 1.214903981641773, - "grad_norm": 0.004302465356886387, - "learning_rate": 0.0001999992736037021, - "loss": 46.0, - "step": 7544 - }, - { - "epoch": 1.2150650187205605, - "grad_norm": 0.0010684158187359571, - "learning_rate": 0.00019999927341085805, - "loss": 46.0, - "step": 7545 - }, - { - "epoch": 1.2152260557993477, - "grad_norm": 0.009326261468231678, - "learning_rate": 0.00019999927321798837, - "loss": 46.0, - "step": 7546 - }, - { - "epoch": 1.2153870928781352, - "grad_norm": 0.0024467259645462036, - "learning_rate": 0.00019999927302509311, - "loss": 46.0, - "step": 7547 - }, - { - "epoch": 1.2155481299569226, - "grad_norm": 0.0045942021533846855, - "learning_rate": 0.00019999927283217224, - "loss": 46.0, - "step": 7548 - }, - { - "epoch": 1.21570916703571, - "grad_norm": 0.004820413421839476, - "learning_rate": 0.00019999927263922578, - "loss": 46.0, - "step": 7549 - }, - { - "epoch": 1.2158702041144973, - "grad_norm": 0.001074828440323472, - "learning_rate": 0.00019999927244625373, - "loss": 46.0, - "step": 7550 - }, - { - "epoch": 1.2160312411932848, - "grad_norm": 0.003381298389285803, - "learning_rate": 0.0001999992722532561, - "loss": 46.0, - "step": 7551 - }, - { - "epoch": 1.2161922782720722, - "grad_norm": 0.010321678593754768, - "learning_rate": 0.00019999927206023287, - "loss": 46.0, - "step": 7552 - }, - { - "epoch": 1.2163533153508594, - "grad_norm": 0.0007929296116344631, - "learning_rate": 0.00019999927186718403, - "loss": 46.0, - "step": 7553 - }, - { - "epoch": 1.216514352429647, - "grad_norm": 0.006196086760610342, - "learning_rate": 0.0001999992716741096, - "loss": 46.0, - "step": 7554 - }, - { - "epoch": 1.2166753895084343, - "grad_norm": 0.0011012848699465394, - "learning_rate": 0.0001999992714810096, - "loss": 46.0, - "step": 7555 - }, - { - "epoch": 1.2168364265872218, - "grad_norm": 0.0007859840989112854, - "learning_rate": 0.00019999927128788397, - "loss": 46.0, - "step": 7556 - }, - { - "epoch": 1.216997463666009, - "grad_norm": 0.005725369323045015, - "learning_rate": 0.00019999927109473276, - "loss": 46.0, - "step": 7557 - }, - { - "epoch": 1.2171585007447965, - "grad_norm": 0.0012831080239266157, - "learning_rate": 0.00019999927090155596, - "loss": 46.0, - "step": 7558 - }, - { - "epoch": 1.217319537823584, - "grad_norm": 0.0032234350219368935, - "learning_rate": 0.00019999927070835357, - "loss": 46.0, - "step": 7559 - }, - { - "epoch": 1.2174805749023712, - "grad_norm": 0.0037518665194511414, - "learning_rate": 0.00019999927051512554, - "loss": 46.0, - "step": 7560 - }, - { - "epoch": 1.2176416119811586, - "grad_norm": 0.00048648836673237383, - "learning_rate": 0.00019999927032187198, - "loss": 46.0, - "step": 7561 - }, - { - "epoch": 1.217802649059946, - "grad_norm": 0.008244657889008522, - "learning_rate": 0.00019999927012859277, - "loss": 46.0, - "step": 7562 - }, - { - "epoch": 1.2179636861387335, - "grad_norm": 0.002935724565759301, - "learning_rate": 0.000199999269935288, - "loss": 46.0, - "step": 7563 - }, - { - "epoch": 1.2181247232175207, - "grad_norm": 0.0009815345983952284, - "learning_rate": 0.00019999926974195763, - "loss": 46.0, - "step": 7564 - }, - { - "epoch": 1.2182857602963082, - "grad_norm": 0.0011572600342333317, - "learning_rate": 0.00019999926954860167, - "loss": 46.0, - "step": 7565 - }, - { - "epoch": 1.2184467973750956, - "grad_norm": 0.0006085620843805373, - "learning_rate": 0.00019999926935522012, - "loss": 46.0, - "step": 7566 - }, - { - "epoch": 1.218607834453883, - "grad_norm": 0.010725236497819424, - "learning_rate": 0.00019999926916181295, - "loss": 46.0, - "step": 7567 - }, - { - "epoch": 1.2187688715326703, - "grad_norm": 0.0013549436116591096, - "learning_rate": 0.0001999992689683802, - "loss": 46.0, - "step": 7568 - }, - { - "epoch": 1.2189299086114578, - "grad_norm": 0.0008937516831792891, - "learning_rate": 0.00019999926877492185, - "loss": 46.0, - "step": 7569 - }, - { - "epoch": 1.2190909456902452, - "grad_norm": 0.0013728990452364087, - "learning_rate": 0.00019999926858143793, - "loss": 46.0, - "step": 7570 - }, - { - "epoch": 1.2192519827690327, - "grad_norm": 0.0009747871663421392, - "learning_rate": 0.00019999926838792838, - "loss": 46.0, - "step": 7571 - }, - { - "epoch": 1.21941301984782, - "grad_norm": 0.0011147890472784638, - "learning_rate": 0.00019999926819439326, - "loss": 46.0, - "step": 7572 - }, - { - "epoch": 1.2195740569266074, - "grad_norm": 0.0007510284194722772, - "learning_rate": 0.0001999992680008325, - "loss": 46.0, - "step": 7573 - }, - { - "epoch": 1.2197350940053948, - "grad_norm": 0.0020506775472313166, - "learning_rate": 0.0001999992678072462, - "loss": 46.0, - "step": 7574 - }, - { - "epoch": 1.219896131084182, - "grad_norm": 0.0007354197441600263, - "learning_rate": 0.0001999992676136343, - "loss": 46.0, - "step": 7575 - }, - { - "epoch": 1.2200571681629695, - "grad_norm": 0.0015805386938154697, - "learning_rate": 0.00019999926741999679, - "loss": 46.0, - "step": 7576 - }, - { - "epoch": 1.220218205241757, - "grad_norm": 0.002647318411618471, - "learning_rate": 0.0001999992672263337, - "loss": 46.0, - "step": 7577 - }, - { - "epoch": 1.2203792423205444, - "grad_norm": 0.0003503689949866384, - "learning_rate": 0.00019999926703264499, - "loss": 46.0, - "step": 7578 - }, - { - "epoch": 1.2205402793993316, - "grad_norm": 0.0007300485740415752, - "learning_rate": 0.0001999992668389307, - "loss": 46.0, - "step": 7579 - }, - { - "epoch": 1.220701316478119, - "grad_norm": 0.009703694842755795, - "learning_rate": 0.00019999926664519078, - "loss": 46.0, - "step": 7580 - }, - { - "epoch": 1.2208623535569065, - "grad_norm": 0.0012521852040663362, - "learning_rate": 0.00019999926645142532, - "loss": 46.0, - "step": 7581 - }, - { - "epoch": 1.2210233906356938, - "grad_norm": 0.00207143509760499, - "learning_rate": 0.00019999926625763426, - "loss": 46.0, - "step": 7582 - }, - { - "epoch": 1.2211844277144812, - "grad_norm": 0.00037611162406392395, - "learning_rate": 0.00019999926606381756, - "loss": 46.0, - "step": 7583 - }, - { - "epoch": 1.2213454647932687, - "grad_norm": 0.0014601427828893065, - "learning_rate": 0.0001999992658699753, - "loss": 46.0, - "step": 7584 - }, - { - "epoch": 1.2215065018720561, - "grad_norm": 0.0020650255028158426, - "learning_rate": 0.00019999926567610744, - "loss": 46.0, - "step": 7585 - }, - { - "epoch": 1.2216675389508433, - "grad_norm": 0.0004501556686591357, - "learning_rate": 0.000199999265482214, - "loss": 46.0, - "step": 7586 - }, - { - "epoch": 1.2218285760296308, - "grad_norm": 0.0039031270425766706, - "learning_rate": 0.00019999926528829493, - "loss": 46.0, - "step": 7587 - }, - { - "epoch": 1.2219896131084182, - "grad_norm": 0.006299312692135572, - "learning_rate": 0.0001999992650943503, - "loss": 46.0, - "step": 7588 - }, - { - "epoch": 1.2221506501872057, - "grad_norm": 0.0011198463616892695, - "learning_rate": 0.00019999926490038003, - "loss": 46.0, - "step": 7589 - }, - { - "epoch": 1.222311687265993, - "grad_norm": 0.0013702830765396357, - "learning_rate": 0.00019999926470638422, - "loss": 46.0, - "step": 7590 - }, - { - "epoch": 1.2224727243447804, - "grad_norm": 0.0017298564780503511, - "learning_rate": 0.00019999926451236277, - "loss": 46.0, - "step": 7591 - }, - { - "epoch": 1.2226337614235678, - "grad_norm": 0.0037376952823251486, - "learning_rate": 0.00019999926431831576, - "loss": 46.0, - "step": 7592 - }, - { - "epoch": 1.2227947985023553, - "grad_norm": 0.001607323531061411, - "learning_rate": 0.00019999926412424314, - "loss": 46.0, - "step": 7593 - }, - { - "epoch": 1.2229558355811425, - "grad_norm": 0.0032464477699249983, - "learning_rate": 0.00019999926393014493, - "loss": 46.0, - "step": 7594 - }, - { - "epoch": 1.22311687265993, - "grad_norm": 0.007269986905157566, - "learning_rate": 0.0001999992637360211, - "loss": 46.0, - "step": 7595 - }, - { - "epoch": 1.2232779097387174, - "grad_norm": 0.0024941288866102695, - "learning_rate": 0.0001999992635418717, - "loss": 46.0, - "step": 7596 - }, - { - "epoch": 1.2234389468175046, - "grad_norm": 0.0009921793825924397, - "learning_rate": 0.00019999926334769672, - "loss": 46.0, - "step": 7597 - }, - { - "epoch": 1.223599983896292, - "grad_norm": 0.0012252703309059143, - "learning_rate": 0.00019999926315349614, - "loss": 46.0, - "step": 7598 - }, - { - "epoch": 1.2237610209750795, - "grad_norm": 0.0026946994476020336, - "learning_rate": 0.00019999926295926994, - "loss": 46.0, - "step": 7599 - }, - { - "epoch": 1.223922058053867, - "grad_norm": 0.001200696686282754, - "learning_rate": 0.00019999926276501815, - "loss": 46.0, - "step": 7600 - }, - { - "epoch": 1.2240830951326542, - "grad_norm": 0.0002305166271980852, - "learning_rate": 0.0001999992625707408, - "loss": 46.0, - "step": 7601 - }, - { - "epoch": 1.2242441322114417, - "grad_norm": 0.0006880192668177187, - "learning_rate": 0.00019999926237643782, - "loss": 46.0, - "step": 7602 - }, - { - "epoch": 1.2244051692902291, - "grad_norm": 0.0009748409502208233, - "learning_rate": 0.00019999926218210924, - "loss": 46.0, - "step": 7603 - }, - { - "epoch": 1.2245662063690164, - "grad_norm": 0.005277007352560759, - "learning_rate": 0.0001999992619877551, - "loss": 46.0, - "step": 7604 - }, - { - "epoch": 1.2247272434478038, - "grad_norm": 0.0005417760694399476, - "learning_rate": 0.00019999926179337533, - "loss": 46.0, - "step": 7605 - }, - { - "epoch": 1.2248882805265913, - "grad_norm": 0.0014984805602580309, - "learning_rate": 0.00019999926159897, - "loss": 46.0, - "step": 7606 - }, - { - "epoch": 1.2250493176053787, - "grad_norm": 0.000900235609151423, - "learning_rate": 0.00019999926140453904, - "loss": 46.0, - "step": 7607 - }, - { - "epoch": 1.225210354684166, - "grad_norm": 0.0006910812226124108, - "learning_rate": 0.0001999992612100825, - "loss": 46.0, - "step": 7608 - }, - { - "epoch": 1.2253713917629534, - "grad_norm": 0.0021148917730897665, - "learning_rate": 0.00019999926101560038, - "loss": 46.0, - "step": 7609 - }, - { - "epoch": 1.2255324288417409, - "grad_norm": 0.000786135729867965, - "learning_rate": 0.00019999926082109264, - "loss": 46.0, - "step": 7610 - }, - { - "epoch": 1.225693465920528, - "grad_norm": 0.0008698116871528327, - "learning_rate": 0.0001999992606265593, - "loss": 46.0, - "step": 7611 - }, - { - "epoch": 1.2258545029993155, - "grad_norm": 0.0007346338243223727, - "learning_rate": 0.00019999926043200042, - "loss": 46.0, - "step": 7612 - }, - { - "epoch": 1.226015540078103, - "grad_norm": 0.001855868031270802, - "learning_rate": 0.0001999992602374159, - "loss": 46.0, - "step": 7613 - }, - { - "epoch": 1.2261765771568904, - "grad_norm": 0.0010466381208971143, - "learning_rate": 0.00019999926004280578, - "loss": 46.0, - "step": 7614 - }, - { - "epoch": 1.2263376142356779, - "grad_norm": 0.0024692926090210676, - "learning_rate": 0.0001999992598481701, - "loss": 46.0, - "step": 7615 - }, - { - "epoch": 1.2264986513144651, - "grad_norm": 0.002758033573627472, - "learning_rate": 0.00019999925965350878, - "loss": 46.0, - "step": 7616 - }, - { - "epoch": 1.2266596883932526, - "grad_norm": 0.004525685682892799, - "learning_rate": 0.0001999992594588219, - "loss": 46.0, - "step": 7617 - }, - { - "epoch": 1.22682072547204, - "grad_norm": 0.003646411467343569, - "learning_rate": 0.00019999925926410942, - "loss": 46.0, - "step": 7618 - }, - { - "epoch": 1.2269817625508272, - "grad_norm": 0.0031161773949861526, - "learning_rate": 0.00019999925906937134, - "loss": 46.0, - "step": 7619 - }, - { - "epoch": 1.2271427996296147, - "grad_norm": 0.006743249017745256, - "learning_rate": 0.00019999925887460767, - "loss": 46.0, - "step": 7620 - }, - { - "epoch": 1.2273038367084022, - "grad_norm": 0.0008316087769344449, - "learning_rate": 0.00019999925867981836, - "loss": 46.0, - "step": 7621 - }, - { - "epoch": 1.2274648737871896, - "grad_norm": 0.001978185959160328, - "learning_rate": 0.00019999925848500352, - "loss": 46.0, - "step": 7622 - }, - { - "epoch": 1.2276259108659768, - "grad_norm": 0.006658573169261217, - "learning_rate": 0.00019999925829016307, - "loss": 46.0, - "step": 7623 - }, - { - "epoch": 1.2277869479447643, - "grad_norm": 0.0013616629876196384, - "learning_rate": 0.000199999258095297, - "loss": 46.0, - "step": 7624 - }, - { - "epoch": 1.2279479850235517, - "grad_norm": 0.0017589150229468942, - "learning_rate": 0.00019999925790040534, - "loss": 46.0, - "step": 7625 - }, - { - "epoch": 1.228109022102339, - "grad_norm": 0.0010458810720592737, - "learning_rate": 0.0001999992577054881, - "loss": 46.0, - "step": 7626 - }, - { - "epoch": 1.2282700591811264, - "grad_norm": 0.0017738519236445427, - "learning_rate": 0.0001999992575105453, - "loss": 46.0, - "step": 7627 - }, - { - "epoch": 1.2284310962599139, - "grad_norm": 0.002235678257420659, - "learning_rate": 0.00019999925731557685, - "loss": 46.0, - "step": 7628 - }, - { - "epoch": 1.2285921333387013, - "grad_norm": 0.005524209700524807, - "learning_rate": 0.00019999925712058282, - "loss": 46.0, - "step": 7629 - }, - { - "epoch": 1.2287531704174885, - "grad_norm": 0.0012723833788186312, - "learning_rate": 0.00019999925692556318, - "loss": 46.0, - "step": 7630 - }, - { - "epoch": 1.228914207496276, - "grad_norm": 0.0008022763067856431, - "learning_rate": 0.00019999925673051797, - "loss": 46.0, - "step": 7631 - }, - { - "epoch": 1.2290752445750635, - "grad_norm": 0.001587162259966135, - "learning_rate": 0.00019999925653544715, - "loss": 46.0, - "step": 7632 - }, - { - "epoch": 1.2292362816538507, - "grad_norm": 0.0008148058550432324, - "learning_rate": 0.00019999925634035077, - "loss": 46.0, - "step": 7633 - }, - { - "epoch": 1.2293973187326381, - "grad_norm": 0.0032398866023868322, - "learning_rate": 0.00019999925614522877, - "loss": 46.0, - "step": 7634 - }, - { - "epoch": 1.2295583558114256, - "grad_norm": 0.0021040227729827166, - "learning_rate": 0.00019999925595008116, - "loss": 46.0, - "step": 7635 - }, - { - "epoch": 1.229719392890213, - "grad_norm": 0.0009233879973180592, - "learning_rate": 0.00019999925575490797, - "loss": 46.0, - "step": 7636 - }, - { - "epoch": 1.2298804299690003, - "grad_norm": 0.0011741952039301395, - "learning_rate": 0.00019999925555970918, - "loss": 46.0, - "step": 7637 - }, - { - "epoch": 1.2300414670477877, - "grad_norm": 0.0007919566705822945, - "learning_rate": 0.00019999925536448481, - "loss": 46.0, - "step": 7638 - }, - { - "epoch": 1.2302025041265752, - "grad_norm": 0.0016446413937956095, - "learning_rate": 0.00019999925516923486, - "loss": 46.0, - "step": 7639 - }, - { - "epoch": 1.2303635412053626, - "grad_norm": 0.004901389125734568, - "learning_rate": 0.00019999925497395928, - "loss": 46.0, - "step": 7640 - }, - { - "epoch": 1.2305245782841499, - "grad_norm": 0.0046926806680858135, - "learning_rate": 0.0001999992547786581, - "loss": 46.0, - "step": 7641 - }, - { - "epoch": 1.2306856153629373, - "grad_norm": 0.010192851535975933, - "learning_rate": 0.00019999925458333135, - "loss": 46.0, - "step": 7642 - }, - { - "epoch": 1.2308466524417248, - "grad_norm": 0.0009865125175565481, - "learning_rate": 0.000199999254387979, - "loss": 46.0, - "step": 7643 - }, - { - "epoch": 1.2310076895205122, - "grad_norm": 0.0004271470825187862, - "learning_rate": 0.00019999925419260107, - "loss": 46.0, - "step": 7644 - }, - { - "epoch": 1.2311687265992994, - "grad_norm": 0.0004858318134211004, - "learning_rate": 0.0001999992539971975, - "loss": 46.0, - "step": 7645 - }, - { - "epoch": 1.2313297636780869, - "grad_norm": 0.00045591805246658623, - "learning_rate": 0.00019999925380176836, - "loss": 46.0, - "step": 7646 - }, - { - "epoch": 1.2314908007568743, - "grad_norm": 0.001879748422652483, - "learning_rate": 0.00019999925360631365, - "loss": 46.0, - "step": 7647 - }, - { - "epoch": 1.2316518378356616, - "grad_norm": 0.002072389703243971, - "learning_rate": 0.0001999992534108333, - "loss": 46.0, - "step": 7648 - }, - { - "epoch": 1.231812874914449, - "grad_norm": 0.001408843556419015, - "learning_rate": 0.0001999992532153274, - "loss": 46.0, - "step": 7649 - }, - { - "epoch": 1.2319739119932365, - "grad_norm": 0.0006739210220985115, - "learning_rate": 0.00019999925301979586, - "loss": 46.0, - "step": 7650 - }, - { - "epoch": 1.232134949072024, - "grad_norm": 0.001930776285007596, - "learning_rate": 0.00019999925282423878, - "loss": 46.0, - "step": 7651 - }, - { - "epoch": 1.2322959861508112, - "grad_norm": 0.005610702559351921, - "learning_rate": 0.00019999925262865605, - "loss": 46.0, - "step": 7652 - }, - { - "epoch": 1.2324570232295986, - "grad_norm": 0.001402527210302651, - "learning_rate": 0.00019999925243304777, - "loss": 46.0, - "step": 7653 - }, - { - "epoch": 1.232618060308386, - "grad_norm": 0.0014332960126921535, - "learning_rate": 0.00019999925223741387, - "loss": 46.0, - "step": 7654 - }, - { - "epoch": 1.2327790973871733, - "grad_norm": 0.0015186292584985495, - "learning_rate": 0.00019999925204175438, - "loss": 46.0, - "step": 7655 - }, - { - "epoch": 1.2329401344659607, - "grad_norm": 0.0013117969501763582, - "learning_rate": 0.0001999992518460693, - "loss": 46.0, - "step": 7656 - }, - { - "epoch": 1.2331011715447482, - "grad_norm": 0.001633496955037117, - "learning_rate": 0.0001999992516503586, - "loss": 46.0, - "step": 7657 - }, - { - "epoch": 1.2332622086235356, - "grad_norm": 0.005964313633739948, - "learning_rate": 0.00019999925145462234, - "loss": 46.0, - "step": 7658 - }, - { - "epoch": 1.2334232457023229, - "grad_norm": 0.0020560745615512133, - "learning_rate": 0.00019999925125886047, - "loss": 46.0, - "step": 7659 - }, - { - "epoch": 1.2335842827811103, - "grad_norm": 0.0005800784565508366, - "learning_rate": 0.00019999925106307302, - "loss": 46.0, - "step": 7660 - }, - { - "epoch": 1.2337453198598978, - "grad_norm": 0.008018608205020428, - "learning_rate": 0.00019999925086725996, - "loss": 46.0, - "step": 7661 - }, - { - "epoch": 1.2339063569386852, - "grad_norm": 0.011703826487064362, - "learning_rate": 0.0001999992506714213, - "loss": 46.0, - "step": 7662 - }, - { - "epoch": 1.2340673940174725, - "grad_norm": 0.0022539463825523853, - "learning_rate": 0.00019999925047555706, - "loss": 46.0, - "step": 7663 - }, - { - "epoch": 1.23422843109626, - "grad_norm": 0.0010529665742069483, - "learning_rate": 0.0001999992502796672, - "loss": 46.0, - "step": 7664 - }, - { - "epoch": 1.2343894681750474, - "grad_norm": 0.0015553730772808194, - "learning_rate": 0.0001999992500837518, - "loss": 46.0, - "step": 7665 - }, - { - "epoch": 1.2345505052538348, - "grad_norm": 0.0028824489563703537, - "learning_rate": 0.00019999924988781077, - "loss": 46.0, - "step": 7666 - }, - { - "epoch": 1.234711542332622, - "grad_norm": 0.00441374396905303, - "learning_rate": 0.00019999924969184413, - "loss": 46.0, - "step": 7667 - }, - { - "epoch": 1.2348725794114095, - "grad_norm": 0.0005688901874236763, - "learning_rate": 0.00019999924949585192, - "loss": 46.0, - "step": 7668 - }, - { - "epoch": 1.235033616490197, - "grad_norm": 0.0012712272582575679, - "learning_rate": 0.0001999992492998341, - "loss": 46.0, - "step": 7669 - }, - { - "epoch": 1.2351946535689842, - "grad_norm": 0.003068245016038418, - "learning_rate": 0.00019999924910379068, - "loss": 46.0, - "step": 7670 - }, - { - "epoch": 1.2353556906477716, - "grad_norm": 0.0014743818901479244, - "learning_rate": 0.00019999924890772168, - "loss": 46.0, - "step": 7671 - }, - { - "epoch": 1.235516727726559, - "grad_norm": 0.002503872150555253, - "learning_rate": 0.00019999924871162708, - "loss": 46.0, - "step": 7672 - }, - { - "epoch": 1.2356777648053465, - "grad_norm": 0.0005188011564314365, - "learning_rate": 0.0001999992485155069, - "loss": 46.0, - "step": 7673 - }, - { - "epoch": 1.2358388018841338, - "grad_norm": 0.0010696490062400699, - "learning_rate": 0.00019999924831936113, - "loss": 46.0, - "step": 7674 - }, - { - "epoch": 1.2359998389629212, - "grad_norm": 0.004267524462193251, - "learning_rate": 0.00019999924812318974, - "loss": 46.0, - "step": 7675 - }, - { - "epoch": 1.2361608760417087, - "grad_norm": 0.003083720337599516, - "learning_rate": 0.00019999924792699276, - "loss": 46.0, - "step": 7676 - }, - { - "epoch": 1.2363219131204959, - "grad_norm": 0.0018491089576855302, - "learning_rate": 0.00019999924773077016, - "loss": 46.0, - "step": 7677 - }, - { - "epoch": 1.2364829501992833, - "grad_norm": 0.0014480621321126819, - "learning_rate": 0.000199999247534522, - "loss": 46.0, - "step": 7678 - }, - { - "epoch": 1.2366439872780708, - "grad_norm": 0.000995188718661666, - "learning_rate": 0.00019999924733824824, - "loss": 46.0, - "step": 7679 - }, - { - "epoch": 1.2368050243568582, - "grad_norm": 0.0004588558222167194, - "learning_rate": 0.0001999992471419489, - "loss": 46.0, - "step": 7680 - }, - { - "epoch": 1.2369660614356455, - "grad_norm": 0.0009994530119001865, - "learning_rate": 0.00019999924694562396, - "loss": 46.0, - "step": 7681 - }, - { - "epoch": 1.237127098514433, - "grad_norm": 0.004038609564304352, - "learning_rate": 0.0001999992467492734, - "loss": 46.0, - "step": 7682 - }, - { - "epoch": 1.2372881355932204, - "grad_norm": 0.0009063747129403055, - "learning_rate": 0.00019999924655289726, - "loss": 46.0, - "step": 7683 - }, - { - "epoch": 1.2374491726720078, - "grad_norm": 0.0032376996241509914, - "learning_rate": 0.00019999924635649553, - "loss": 46.0, - "step": 7684 - }, - { - "epoch": 1.237610209750795, - "grad_norm": 0.004285745322704315, - "learning_rate": 0.0001999992461600682, - "loss": 46.0, - "step": 7685 - }, - { - "epoch": 1.2377712468295825, - "grad_norm": 0.0007059372728690505, - "learning_rate": 0.00019999924596361527, - "loss": 46.0, - "step": 7686 - }, - { - "epoch": 1.23793228390837, - "grad_norm": 0.0046587600372731686, - "learning_rate": 0.00019999924576713675, - "loss": 46.0, - "step": 7687 - }, - { - "epoch": 1.2380933209871574, - "grad_norm": 0.0014678449369966984, - "learning_rate": 0.00019999924557063264, - "loss": 46.0, - "step": 7688 - }, - { - "epoch": 1.2382543580659446, - "grad_norm": 0.002469314495101571, - "learning_rate": 0.00019999924537410295, - "loss": 46.0, - "step": 7689 - }, - { - "epoch": 1.238415395144732, - "grad_norm": 0.006369494367390871, - "learning_rate": 0.00019999924517754764, - "loss": 46.0, - "step": 7690 - }, - { - "epoch": 1.2385764322235195, - "grad_norm": 0.0013271247735247016, - "learning_rate": 0.00019999924498096674, - "loss": 46.0, - "step": 7691 - }, - { - "epoch": 1.2387374693023068, - "grad_norm": 0.0009874668903648853, - "learning_rate": 0.00019999924478436026, - "loss": 46.0, - "step": 7692 - }, - { - "epoch": 1.2388985063810942, - "grad_norm": 0.0019541261717677116, - "learning_rate": 0.00019999924458772816, - "loss": 46.0, - "step": 7693 - }, - { - "epoch": 1.2390595434598817, - "grad_norm": 0.0023790907580405474, - "learning_rate": 0.00019999924439107047, - "loss": 46.0, - "step": 7694 - }, - { - "epoch": 1.2392205805386691, - "grad_norm": 0.0010876428568735719, - "learning_rate": 0.0001999992441943872, - "loss": 46.0, - "step": 7695 - }, - { - "epoch": 1.2393816176174564, - "grad_norm": 0.001680009183473885, - "learning_rate": 0.00019999924399767834, - "loss": 46.0, - "step": 7696 - }, - { - "epoch": 1.2395426546962438, - "grad_norm": 0.0030936039984226227, - "learning_rate": 0.0001999992438009439, - "loss": 46.0, - "step": 7697 - }, - { - "epoch": 1.2397036917750313, - "grad_norm": 0.00127962336409837, - "learning_rate": 0.0001999992436041838, - "loss": 46.0, - "step": 7698 - }, - { - "epoch": 1.2398647288538185, - "grad_norm": 0.007616166956722736, - "learning_rate": 0.00019999924340739815, - "loss": 46.0, - "step": 7699 - }, - { - "epoch": 1.240025765932606, - "grad_norm": 0.0006716623902320862, - "learning_rate": 0.00019999924321058692, - "loss": 46.0, - "step": 7700 - }, - { - "epoch": 1.2401868030113934, - "grad_norm": 0.0009732271428219974, - "learning_rate": 0.00019999924301375007, - "loss": 46.0, - "step": 7701 - }, - { - "epoch": 1.2403478400901808, - "grad_norm": 0.007000810466706753, - "learning_rate": 0.00019999924281688763, - "loss": 46.0, - "step": 7702 - }, - { - "epoch": 1.240508877168968, - "grad_norm": 0.0018774871714413166, - "learning_rate": 0.00019999924261999958, - "loss": 46.0, - "step": 7703 - }, - { - "epoch": 1.2406699142477555, - "grad_norm": 0.0026521796826273203, - "learning_rate": 0.00019999924242308597, - "loss": 46.0, - "step": 7704 - }, - { - "epoch": 1.240830951326543, - "grad_norm": 0.0009369513718411326, - "learning_rate": 0.00019999924222614674, - "loss": 46.0, - "step": 7705 - }, - { - "epoch": 1.2409919884053302, - "grad_norm": 0.0013953230809420347, - "learning_rate": 0.00019999924202918193, - "loss": 46.0, - "step": 7706 - }, - { - "epoch": 1.2411530254841177, - "grad_norm": 0.0031619705259799957, - "learning_rate": 0.00019999924183219153, - "loss": 46.0, - "step": 7707 - }, - { - "epoch": 1.241314062562905, - "grad_norm": 0.0009811873314902186, - "learning_rate": 0.0001999992416351755, - "loss": 46.0, - "step": 7708 - }, - { - "epoch": 1.2414750996416926, - "grad_norm": 0.0016565483529120684, - "learning_rate": 0.00019999924143813388, - "loss": 46.0, - "step": 7709 - }, - { - "epoch": 1.24163613672048, - "grad_norm": 0.0018206652021035552, - "learning_rate": 0.0001999992412410667, - "loss": 46.0, - "step": 7710 - }, - { - "epoch": 1.2417971737992672, - "grad_norm": 0.001449157833121717, - "learning_rate": 0.00019999924104397392, - "loss": 46.0, - "step": 7711 - }, - { - "epoch": 1.2419582108780547, - "grad_norm": 0.005289965309202671, - "learning_rate": 0.00019999924084685553, - "loss": 46.0, - "step": 7712 - }, - { - "epoch": 1.2421192479568421, - "grad_norm": 0.0035281176678836346, - "learning_rate": 0.00019999924064971155, - "loss": 46.0, - "step": 7713 - }, - { - "epoch": 1.2422802850356294, - "grad_norm": 0.0022060268092900515, - "learning_rate": 0.00019999924045254196, - "loss": 46.0, - "step": 7714 - }, - { - "epoch": 1.2424413221144168, - "grad_norm": 0.0017881705425679684, - "learning_rate": 0.0001999992402553468, - "loss": 46.0, - "step": 7715 - }, - { - "epoch": 1.2426023591932043, - "grad_norm": 0.0008391349110752344, - "learning_rate": 0.00019999924005812604, - "loss": 46.0, - "step": 7716 - }, - { - "epoch": 1.2427633962719917, - "grad_norm": 0.002548717427998781, - "learning_rate": 0.0001999992398608797, - "loss": 46.0, - "step": 7717 - }, - { - "epoch": 1.242924433350779, - "grad_norm": 0.0028777257539331913, - "learning_rate": 0.00019999923966360772, - "loss": 46.0, - "step": 7718 - }, - { - "epoch": 1.2430854704295664, - "grad_norm": 0.001732136937789619, - "learning_rate": 0.0001999992394663102, - "loss": 46.0, - "step": 7719 - }, - { - "epoch": 1.2432465075083539, - "grad_norm": 0.004271198995411396, - "learning_rate": 0.00019999923926898702, - "loss": 46.0, - "step": 7720 - }, - { - "epoch": 1.243407544587141, - "grad_norm": 0.0016677387757226825, - "learning_rate": 0.00019999923907163832, - "loss": 46.0, - "step": 7721 - }, - { - "epoch": 1.2435685816659285, - "grad_norm": 0.0005603987374342978, - "learning_rate": 0.00019999923887426398, - "loss": 46.0, - "step": 7722 - }, - { - "epoch": 1.243729618744716, - "grad_norm": 0.0015048218192532659, - "learning_rate": 0.00019999923867686405, - "loss": 46.0, - "step": 7723 - }, - { - "epoch": 1.2438906558235034, - "grad_norm": 0.0017956284573301673, - "learning_rate": 0.0001999992384794385, - "loss": 46.0, - "step": 7724 - }, - { - "epoch": 1.2440516929022907, - "grad_norm": 0.0007143549155443907, - "learning_rate": 0.0001999992382819874, - "loss": 46.0, - "step": 7725 - }, - { - "epoch": 1.2442127299810781, - "grad_norm": 0.00385107658803463, - "learning_rate": 0.00019999923808451068, - "loss": 46.0, - "step": 7726 - }, - { - "epoch": 1.2443737670598656, - "grad_norm": 0.0009276755154132843, - "learning_rate": 0.0001999992378870084, - "loss": 46.0, - "step": 7727 - }, - { - "epoch": 1.2445348041386528, - "grad_norm": 0.0008219244773499668, - "learning_rate": 0.0001999992376894805, - "loss": 46.0, - "step": 7728 - }, - { - "epoch": 1.2446958412174403, - "grad_norm": 0.002346830442547798, - "learning_rate": 0.000199999237491927, - "loss": 46.0, - "step": 7729 - }, - { - "epoch": 1.2448568782962277, - "grad_norm": 0.003625713288784027, - "learning_rate": 0.0001999992372943479, - "loss": 46.0, - "step": 7730 - }, - { - "epoch": 1.2450179153750152, - "grad_norm": 0.0018733638571575284, - "learning_rate": 0.00019999923709674322, - "loss": 46.0, - "step": 7731 - }, - { - "epoch": 1.2451789524538026, - "grad_norm": 0.0011365804821252823, - "learning_rate": 0.00019999923689911295, - "loss": 46.0, - "step": 7732 - }, - { - "epoch": 1.2453399895325898, - "grad_norm": 0.0005309239495545626, - "learning_rate": 0.00019999923670145707, - "loss": 46.0, - "step": 7733 - }, - { - "epoch": 1.2455010266113773, - "grad_norm": 0.0011068431194871664, - "learning_rate": 0.0001999992365037756, - "loss": 46.0, - "step": 7734 - }, - { - "epoch": 1.2456620636901647, - "grad_norm": 0.006984272971749306, - "learning_rate": 0.00019999923630606854, - "loss": 46.0, - "step": 7735 - }, - { - "epoch": 1.245823100768952, - "grad_norm": 0.007078150287270546, - "learning_rate": 0.0001999992361083359, - "loss": 46.0, - "step": 7736 - }, - { - "epoch": 1.2459841378477394, - "grad_norm": 0.0004573630867525935, - "learning_rate": 0.00019999923591057763, - "loss": 46.0, - "step": 7737 - }, - { - "epoch": 1.2461451749265269, - "grad_norm": 0.0017036707140505314, - "learning_rate": 0.0001999992357127938, - "loss": 46.0, - "step": 7738 - }, - { - "epoch": 1.2463062120053143, - "grad_norm": 0.0037747807800769806, - "learning_rate": 0.00019999923551498435, - "loss": 46.0, - "step": 7739 - }, - { - "epoch": 1.2464672490841016, - "grad_norm": 0.0032288769725710154, - "learning_rate": 0.0001999992353171493, - "loss": 46.0, - "step": 7740 - }, - { - "epoch": 1.246628286162889, - "grad_norm": 0.005686294753104448, - "learning_rate": 0.00019999923511928867, - "loss": 46.0, - "step": 7741 - }, - { - "epoch": 1.2467893232416765, - "grad_norm": 0.001286122016608715, - "learning_rate": 0.00019999923492140245, - "loss": 46.0, - "step": 7742 - }, - { - "epoch": 1.2469503603204637, - "grad_norm": 0.006018731743097305, - "learning_rate": 0.00019999923472349064, - "loss": 46.0, - "step": 7743 - }, - { - "epoch": 1.2471113973992511, - "grad_norm": 0.002859055530279875, - "learning_rate": 0.00019999923452555321, - "loss": 46.0, - "step": 7744 - }, - { - "epoch": 1.2472724344780386, - "grad_norm": 0.002178067108616233, - "learning_rate": 0.0001999992343275902, - "loss": 46.0, - "step": 7745 - }, - { - "epoch": 1.247433471556826, - "grad_norm": 0.003421215573325753, - "learning_rate": 0.0001999992341296016, - "loss": 46.0, - "step": 7746 - }, - { - "epoch": 1.2475945086356133, - "grad_norm": 0.0030373604968190193, - "learning_rate": 0.0001999992339315874, - "loss": 46.0, - "step": 7747 - }, - { - "epoch": 1.2477555457144007, - "grad_norm": 0.0007116520428098738, - "learning_rate": 0.00019999923373354762, - "loss": 46.0, - "step": 7748 - }, - { - "epoch": 1.2479165827931882, - "grad_norm": 0.0012223977828398347, - "learning_rate": 0.00019999923353548223, - "loss": 46.0, - "step": 7749 - }, - { - "epoch": 1.2480776198719754, - "grad_norm": 0.0027469934429973364, - "learning_rate": 0.00019999923333739123, - "loss": 46.0, - "step": 7750 - }, - { - "epoch": 1.2482386569507629, - "grad_norm": 0.0073843407444655895, - "learning_rate": 0.00019999923313927464, - "loss": 46.0, - "step": 7751 - }, - { - "epoch": 1.2483996940295503, - "grad_norm": 0.0005689335521310568, - "learning_rate": 0.00019999923294113247, - "loss": 46.0, - "step": 7752 - }, - { - "epoch": 1.2485607311083378, - "grad_norm": 0.0016763632884249091, - "learning_rate": 0.00019999923274296473, - "loss": 46.0, - "step": 7753 - }, - { - "epoch": 1.248721768187125, - "grad_norm": 0.0015108821680769324, - "learning_rate": 0.00019999923254477136, - "loss": 46.0, - "step": 7754 - }, - { - "epoch": 1.2488828052659124, - "grad_norm": 0.004024966154247522, - "learning_rate": 0.0001999992323465524, - "loss": 46.0, - "step": 7755 - }, - { - "epoch": 1.2490438423447, - "grad_norm": 0.001047232304699719, - "learning_rate": 0.00019999923214830784, - "loss": 46.0, - "step": 7756 - }, - { - "epoch": 1.2492048794234873, - "grad_norm": 0.002008634153753519, - "learning_rate": 0.0001999992319500377, - "loss": 46.0, - "step": 7757 - }, - { - "epoch": 1.2493659165022746, - "grad_norm": 0.003829686203971505, - "learning_rate": 0.00019999923175174198, - "loss": 46.0, - "step": 7758 - }, - { - "epoch": 1.249526953581062, - "grad_norm": 0.001497588586062193, - "learning_rate": 0.0001999992315534206, - "loss": 46.0, - "step": 7759 - }, - { - "epoch": 1.2496879906598495, - "grad_norm": 0.0035700860898941755, - "learning_rate": 0.0001999992313550737, - "loss": 46.0, - "step": 7760 - }, - { - "epoch": 1.249849027738637, - "grad_norm": 0.005076493602246046, - "learning_rate": 0.0001999992311567012, - "loss": 46.0, - "step": 7761 - }, - { - "epoch": 1.2500100648174242, - "grad_norm": 0.0015731723979115486, - "learning_rate": 0.00019999923095830307, - "loss": 46.0, - "step": 7762 - }, - { - "epoch": 1.2501711018962116, - "grad_norm": 0.005798127502202988, - "learning_rate": 0.00019999923075987935, - "loss": 46.0, - "step": 7763 - }, - { - "epoch": 1.250332138974999, - "grad_norm": 0.0013734509702771902, - "learning_rate": 0.00019999923056143005, - "loss": 46.0, - "step": 7764 - }, - { - "epoch": 1.2504931760537863, - "grad_norm": 0.004089319612830877, - "learning_rate": 0.00019999923036295513, - "loss": 46.0, - "step": 7765 - }, - { - "epoch": 1.2506542131325737, - "grad_norm": 0.0013517532497644424, - "learning_rate": 0.00019999923016445463, - "loss": 46.0, - "step": 7766 - }, - { - "epoch": 1.2508152502113612, - "grad_norm": 0.004541502799838781, - "learning_rate": 0.00019999922996592854, - "loss": 46.0, - "step": 7767 - }, - { - "epoch": 1.2509762872901486, - "grad_norm": 0.0010161533718928695, - "learning_rate": 0.00019999922976737686, - "loss": 46.0, - "step": 7768 - }, - { - "epoch": 1.2511373243689359, - "grad_norm": 0.0017163571901619434, - "learning_rate": 0.00019999922956879957, - "loss": 46.0, - "step": 7769 - }, - { - "epoch": 1.2512983614477233, - "grad_norm": 0.00045029327156953514, - "learning_rate": 0.0001999992293701967, - "loss": 46.0, - "step": 7770 - }, - { - "epoch": 1.2514593985265108, - "grad_norm": 0.0012799405958503485, - "learning_rate": 0.00019999922917156822, - "loss": 46.0, - "step": 7771 - }, - { - "epoch": 1.251620435605298, - "grad_norm": 0.0010593110928311944, - "learning_rate": 0.00019999922897291416, - "loss": 46.0, - "step": 7772 - }, - { - "epoch": 1.2517814726840855, - "grad_norm": 0.002537592314183712, - "learning_rate": 0.0001999992287742345, - "loss": 46.0, - "step": 7773 - }, - { - "epoch": 1.251942509762873, - "grad_norm": 0.0019495715387165546, - "learning_rate": 0.00019999922857552927, - "loss": 46.0, - "step": 7774 - }, - { - "epoch": 1.2521035468416604, - "grad_norm": 0.0004583829140756279, - "learning_rate": 0.0001999992283767984, - "loss": 46.0, - "step": 7775 - }, - { - "epoch": 1.2522645839204478, - "grad_norm": 0.001894280663691461, - "learning_rate": 0.00019999922817804197, - "loss": 46.0, - "step": 7776 - }, - { - "epoch": 1.252425620999235, - "grad_norm": 0.0008542780415154994, - "learning_rate": 0.00019999922797925992, - "loss": 46.0, - "step": 7777 - }, - { - "epoch": 1.2525866580780225, - "grad_norm": 0.006037456914782524, - "learning_rate": 0.0001999992277804523, - "loss": 46.0, - "step": 7778 - }, - { - "epoch": 1.2527476951568097, - "grad_norm": 0.002517266198992729, - "learning_rate": 0.00019999922758161907, - "loss": 46.0, - "step": 7779 - }, - { - "epoch": 1.2529087322355972, - "grad_norm": 0.0006100016180425882, - "learning_rate": 0.0001999992273827602, - "loss": 46.0, - "step": 7780 - }, - { - "epoch": 1.2530697693143846, - "grad_norm": 0.0007259164121933281, - "learning_rate": 0.0001999992271838758, - "loss": 46.0, - "step": 7781 - }, - { - "epoch": 1.253230806393172, - "grad_norm": 0.0017611620714887977, - "learning_rate": 0.0001999992269849658, - "loss": 46.0, - "step": 7782 - }, - { - "epoch": 1.2533918434719595, - "grad_norm": 0.0013355906121432781, - "learning_rate": 0.0001999992267860302, - "loss": 46.0, - "step": 7783 - }, - { - "epoch": 1.2535528805507468, - "grad_norm": 0.0031003172043710947, - "learning_rate": 0.00019999922658706898, - "loss": 46.0, - "step": 7784 - }, - { - "epoch": 1.2537139176295342, - "grad_norm": 0.0006265430711209774, - "learning_rate": 0.00019999922638808218, - "loss": 46.0, - "step": 7785 - }, - { - "epoch": 1.2538749547083217, - "grad_norm": 0.0015952771063894033, - "learning_rate": 0.0001999992261890698, - "loss": 46.0, - "step": 7786 - }, - { - "epoch": 1.254035991787109, - "grad_norm": 0.0006227141129784286, - "learning_rate": 0.00019999922599003183, - "loss": 46.0, - "step": 7787 - }, - { - "epoch": 1.2541970288658963, - "grad_norm": 0.0023414224851876497, - "learning_rate": 0.00019999922579096825, - "loss": 46.0, - "step": 7788 - }, - { - "epoch": 1.2543580659446838, - "grad_norm": 0.0013503084192052484, - "learning_rate": 0.00019999922559187905, - "loss": 46.0, - "step": 7789 - }, - { - "epoch": 1.2545191030234712, - "grad_norm": 0.0003863623715005815, - "learning_rate": 0.0001999992253927643, - "loss": 46.0, - "step": 7790 - }, - { - "epoch": 1.2546801401022585, - "grad_norm": 0.0008104327134788036, - "learning_rate": 0.00019999922519362394, - "loss": 46.0, - "step": 7791 - }, - { - "epoch": 1.254841177181046, - "grad_norm": 0.0016009743558242917, - "learning_rate": 0.00019999922499445795, - "loss": 46.0, - "step": 7792 - }, - { - "epoch": 1.2550022142598334, - "grad_norm": 0.0007865499937906861, - "learning_rate": 0.0001999992247952664, - "loss": 46.0, - "step": 7793 - }, - { - "epoch": 1.2551632513386206, - "grad_norm": 0.000501057889778167, - "learning_rate": 0.00019999922459604927, - "loss": 46.0, - "step": 7794 - }, - { - "epoch": 1.255324288417408, - "grad_norm": 0.0023141943383961916, - "learning_rate": 0.00019999922439680652, - "loss": 46.0, - "step": 7795 - }, - { - "epoch": 1.2554853254961955, - "grad_norm": 0.002178687136620283, - "learning_rate": 0.0001999992241975382, - "loss": 46.0, - "step": 7796 - }, - { - "epoch": 1.255646362574983, - "grad_norm": 0.0018516256241127849, - "learning_rate": 0.00019999922399824426, - "loss": 46.0, - "step": 7797 - }, - { - "epoch": 1.2558073996537704, - "grad_norm": 0.0013372964458540082, - "learning_rate": 0.00019999922379892473, - "loss": 46.0, - "step": 7798 - }, - { - "epoch": 1.2559684367325576, - "grad_norm": 0.0020784952212125063, - "learning_rate": 0.0001999992235995796, - "loss": 46.0, - "step": 7799 - }, - { - "epoch": 1.256129473811345, - "grad_norm": 0.0026113821659237146, - "learning_rate": 0.0001999992234002089, - "loss": 46.0, - "step": 7800 - }, - { - "epoch": 1.2562905108901323, - "grad_norm": 0.0039160423912107944, - "learning_rate": 0.0001999992232008126, - "loss": 46.0, - "step": 7801 - }, - { - "epoch": 1.2564515479689198, - "grad_norm": 0.0022273152135312557, - "learning_rate": 0.00019999922300139068, - "loss": 46.0, - "step": 7802 - }, - { - "epoch": 1.2566125850477072, - "grad_norm": 0.001879971125163138, - "learning_rate": 0.00019999922280194318, - "loss": 46.0, - "step": 7803 - }, - { - "epoch": 1.2567736221264947, - "grad_norm": 0.00146833760663867, - "learning_rate": 0.0001999992226024701, - "loss": 46.0, - "step": 7804 - }, - { - "epoch": 1.2569346592052821, - "grad_norm": 0.0015800370601937175, - "learning_rate": 0.00019999922240297139, - "loss": 46.0, - "step": 7805 - }, - { - "epoch": 1.2570956962840694, - "grad_norm": 0.0011895514326170087, - "learning_rate": 0.00019999922220344712, - "loss": 46.0, - "step": 7806 - }, - { - "epoch": 1.2572567333628568, - "grad_norm": 0.006521036382764578, - "learning_rate": 0.00019999922200389722, - "loss": 46.0, - "step": 7807 - }, - { - "epoch": 1.2574177704416443, - "grad_norm": 0.0016586346318945289, - "learning_rate": 0.00019999922180432176, - "loss": 46.0, - "step": 7808 - }, - { - "epoch": 1.2575788075204315, - "grad_norm": 0.00036867480957880616, - "learning_rate": 0.00019999922160472068, - "loss": 46.0, - "step": 7809 - }, - { - "epoch": 1.257739844599219, - "grad_norm": 0.004525313153862953, - "learning_rate": 0.00019999922140509401, - "loss": 46.0, - "step": 7810 - }, - { - "epoch": 1.2579008816780064, - "grad_norm": 0.0016664706636220217, - "learning_rate": 0.00019999922120544176, - "loss": 46.0, - "step": 7811 - }, - { - "epoch": 1.2580619187567939, - "grad_norm": 0.0030280391220003366, - "learning_rate": 0.00019999922100576392, - "loss": 46.0, - "step": 7812 - }, - { - "epoch": 1.258222955835581, - "grad_norm": 0.001684386981651187, - "learning_rate": 0.00019999922080606047, - "loss": 46.0, - "step": 7813 - }, - { - "epoch": 1.2583839929143685, - "grad_norm": 0.0018927217461168766, - "learning_rate": 0.00019999922060633143, - "loss": 46.0, - "step": 7814 - }, - { - "epoch": 1.258545029993156, - "grad_norm": 0.0013537174090743065, - "learning_rate": 0.00019999922040657677, - "loss": 46.0, - "step": 7815 - }, - { - "epoch": 1.2587060670719432, - "grad_norm": 0.0008411871967837214, - "learning_rate": 0.00019999922020679656, - "loss": 46.0, - "step": 7816 - }, - { - "epoch": 1.2588671041507307, - "grad_norm": 0.005712592042982578, - "learning_rate": 0.00019999922000699073, - "loss": 46.0, - "step": 7817 - }, - { - "epoch": 1.2590281412295181, - "grad_norm": 0.00286250957287848, - "learning_rate": 0.0001999992198071593, - "loss": 46.0, - "step": 7818 - }, - { - "epoch": 1.2591891783083056, - "grad_norm": 0.0030506982002407312, - "learning_rate": 0.0001999992196073023, - "loss": 46.0, - "step": 7819 - }, - { - "epoch": 1.2593502153870928, - "grad_norm": 0.0009191773133352399, - "learning_rate": 0.0001999992194074197, - "loss": 46.0, - "step": 7820 - }, - { - "epoch": 1.2595112524658802, - "grad_norm": 0.0027808849699795246, - "learning_rate": 0.00019999921920751148, - "loss": 46.0, - "step": 7821 - }, - { - "epoch": 1.2596722895446677, - "grad_norm": 0.0005924653960391879, - "learning_rate": 0.00019999921900757766, - "loss": 46.0, - "step": 7822 - }, - { - "epoch": 1.259833326623455, - "grad_norm": 0.001545565901324153, - "learning_rate": 0.00019999921880761829, - "loss": 46.0, - "step": 7823 - }, - { - "epoch": 1.2599943637022424, - "grad_norm": 0.0006674687028862536, - "learning_rate": 0.0001999992186076333, - "loss": 46.0, - "step": 7824 - }, - { - "epoch": 1.2601554007810298, - "grad_norm": 0.0017125386511906981, - "learning_rate": 0.0001999992184076227, - "loss": 46.0, - "step": 7825 - }, - { - "epoch": 1.2603164378598173, - "grad_norm": 0.009428374469280243, - "learning_rate": 0.00019999921820758654, - "loss": 46.0, - "step": 7826 - }, - { - "epoch": 1.2604774749386047, - "grad_norm": 0.0012851067585870624, - "learning_rate": 0.00019999921800752476, - "loss": 46.0, - "step": 7827 - }, - { - "epoch": 1.260638512017392, - "grad_norm": 0.0033618267625570297, - "learning_rate": 0.0001999992178074374, - "loss": 46.0, - "step": 7828 - }, - { - "epoch": 1.2607995490961794, - "grad_norm": 0.0018210412235930562, - "learning_rate": 0.00019999921760732443, - "loss": 46.0, - "step": 7829 - }, - { - "epoch": 1.2609605861749669, - "grad_norm": 0.0026901294477283955, - "learning_rate": 0.0001999992174071859, - "loss": 46.0, - "step": 7830 - }, - { - "epoch": 1.261121623253754, - "grad_norm": 0.004382093902677298, - "learning_rate": 0.00019999921720702173, - "loss": 46.0, - "step": 7831 - }, - { - "epoch": 1.2612826603325415, - "grad_norm": 0.0008639134466648102, - "learning_rate": 0.00019999921700683199, - "loss": 46.0, - "step": 7832 - }, - { - "epoch": 1.261443697411329, - "grad_norm": 0.0022201361134648323, - "learning_rate": 0.00019999921680661665, - "loss": 46.0, - "step": 7833 - }, - { - "epoch": 1.2616047344901165, - "grad_norm": 0.0019140188815072179, - "learning_rate": 0.0001999992166063757, - "loss": 46.0, - "step": 7834 - }, - { - "epoch": 1.2617657715689037, - "grad_norm": 0.0018385075964033604, - "learning_rate": 0.00019999921640610917, - "loss": 46.0, - "step": 7835 - }, - { - "epoch": 1.2619268086476911, - "grad_norm": 0.0075474693439900875, - "learning_rate": 0.00019999921620581705, - "loss": 46.0, - "step": 7836 - }, - { - "epoch": 1.2620878457264786, - "grad_norm": 0.0004926960682496428, - "learning_rate": 0.00019999921600549934, - "loss": 46.0, - "step": 7837 - }, - { - "epoch": 1.2622488828052658, - "grad_norm": 0.00046431299415417016, - "learning_rate": 0.00019999921580515602, - "loss": 46.0, - "step": 7838 - }, - { - "epoch": 1.2624099198840533, - "grad_norm": 0.0012626488460227847, - "learning_rate": 0.00019999921560478708, - "loss": 46.0, - "step": 7839 - }, - { - "epoch": 1.2625709569628407, - "grad_norm": 0.001826937892474234, - "learning_rate": 0.00019999921540439259, - "loss": 46.0, - "step": 7840 - }, - { - "epoch": 1.2627319940416282, - "grad_norm": 0.004643454682081938, - "learning_rate": 0.0001999992152039725, - "loss": 46.0, - "step": 7841 - }, - { - "epoch": 1.2628930311204154, - "grad_norm": 0.0019279230618849397, - "learning_rate": 0.0001999992150035268, - "loss": 46.0, - "step": 7842 - }, - { - "epoch": 1.2630540681992029, - "grad_norm": 0.0012744168052449822, - "learning_rate": 0.00019999921480305552, - "loss": 46.0, - "step": 7843 - }, - { - "epoch": 1.2632151052779903, - "grad_norm": 0.005128585267812014, - "learning_rate": 0.00019999921460255867, - "loss": 46.0, - "step": 7844 - }, - { - "epoch": 1.2633761423567775, - "grad_norm": 0.006046860944479704, - "learning_rate": 0.00019999921440203618, - "loss": 46.0, - "step": 7845 - }, - { - "epoch": 1.263537179435565, - "grad_norm": 0.003778246697038412, - "learning_rate": 0.0001999992142014881, - "loss": 46.0, - "step": 7846 - }, - { - "epoch": 1.2636982165143524, - "grad_norm": 0.0005131994839757681, - "learning_rate": 0.00019999921400091445, - "loss": 46.0, - "step": 7847 - }, - { - "epoch": 1.2638592535931399, - "grad_norm": 0.003927554935216904, - "learning_rate": 0.0001999992138003152, - "loss": 46.0, - "step": 7848 - }, - { - "epoch": 1.2640202906719273, - "grad_norm": 0.0013735465472564101, - "learning_rate": 0.0001999992135996903, - "loss": 46.0, - "step": 7849 - }, - { - "epoch": 1.2641813277507146, - "grad_norm": 0.0036202704068273306, - "learning_rate": 0.00019999921339903986, - "loss": 46.0, - "step": 7850 - }, - { - "epoch": 1.264342364829502, - "grad_norm": 0.001460652332752943, - "learning_rate": 0.00019999921319836382, - "loss": 46.0, - "step": 7851 - }, - { - "epoch": 1.2645034019082892, - "grad_norm": 0.0021991070825606585, - "learning_rate": 0.00019999921299766217, - "loss": 46.0, - "step": 7852 - }, - { - "epoch": 1.2646644389870767, - "grad_norm": 0.0044478788040578365, - "learning_rate": 0.00019999921279693496, - "loss": 46.0, - "step": 7853 - }, - { - "epoch": 1.2648254760658642, - "grad_norm": 0.0053705861791968346, - "learning_rate": 0.00019999921259618213, - "loss": 46.0, - "step": 7854 - }, - { - "epoch": 1.2649865131446516, - "grad_norm": 0.001818041317164898, - "learning_rate": 0.00019999921239540372, - "loss": 46.0, - "step": 7855 - }, - { - "epoch": 1.265147550223439, - "grad_norm": 0.0007710724021308124, - "learning_rate": 0.00019999921219459967, - "loss": 46.0, - "step": 7856 - }, - { - "epoch": 1.2653085873022263, - "grad_norm": 0.0025311822537332773, - "learning_rate": 0.00019999921199377005, - "loss": 46.0, - "step": 7857 - }, - { - "epoch": 1.2654696243810137, - "grad_norm": 0.004060943610966206, - "learning_rate": 0.00019999921179291485, - "loss": 46.0, - "step": 7858 - }, - { - "epoch": 1.2656306614598012, - "grad_norm": 0.0022596365306526423, - "learning_rate": 0.00019999921159203406, - "loss": 46.0, - "step": 7859 - }, - { - "epoch": 1.2657916985385884, - "grad_norm": 0.0014797047479078174, - "learning_rate": 0.00019999921139112766, - "loss": 46.0, - "step": 7860 - }, - { - "epoch": 1.2659527356173759, - "grad_norm": 0.005822623148560524, - "learning_rate": 0.00019999921119019567, - "loss": 46.0, - "step": 7861 - }, - { - "epoch": 1.2661137726961633, - "grad_norm": 0.006442841142416, - "learning_rate": 0.0001999992109892381, - "loss": 46.0, - "step": 7862 - }, - { - "epoch": 1.2662748097749508, - "grad_norm": 0.0025016714353114367, - "learning_rate": 0.0001999992107882549, - "loss": 46.0, - "step": 7863 - }, - { - "epoch": 1.266435846853738, - "grad_norm": 0.004256992135196924, - "learning_rate": 0.00019999921058724612, - "loss": 46.0, - "step": 7864 - }, - { - "epoch": 1.2665968839325255, - "grad_norm": 0.0008148694760166109, - "learning_rate": 0.00019999921038621176, - "loss": 46.0, - "step": 7865 - }, - { - "epoch": 1.266757921011313, - "grad_norm": 0.002305702306330204, - "learning_rate": 0.0001999992101851518, - "loss": 46.0, - "step": 7866 - }, - { - "epoch": 1.2669189580901001, - "grad_norm": 0.002403831807896495, - "learning_rate": 0.00019999920998406623, - "loss": 46.0, - "step": 7867 - }, - { - "epoch": 1.2670799951688876, - "grad_norm": 0.004615579731762409, - "learning_rate": 0.00019999920978295508, - "loss": 46.0, - "step": 7868 - }, - { - "epoch": 1.267241032247675, - "grad_norm": 0.002948813373222947, - "learning_rate": 0.00019999920958181834, - "loss": 46.0, - "step": 7869 - }, - { - "epoch": 1.2674020693264625, - "grad_norm": 0.000665841274894774, - "learning_rate": 0.00019999920938065598, - "loss": 46.0, - "step": 7870 - }, - { - "epoch": 1.26756310640525, - "grad_norm": 0.0013801068998873234, - "learning_rate": 0.00019999920917946807, - "loss": 46.0, - "step": 7871 - }, - { - "epoch": 1.2677241434840372, - "grad_norm": 0.00414777547121048, - "learning_rate": 0.00019999920897825454, - "loss": 46.0, - "step": 7872 - }, - { - "epoch": 1.2678851805628246, - "grad_norm": 0.0009371436899527907, - "learning_rate": 0.0001999992087770154, - "loss": 46.0, - "step": 7873 - }, - { - "epoch": 1.2680462176416119, - "grad_norm": 0.0004641699488274753, - "learning_rate": 0.00019999920857575069, - "loss": 46.0, - "step": 7874 - }, - { - "epoch": 1.2682072547203993, - "grad_norm": 0.0019806360360234976, - "learning_rate": 0.00019999920837446037, - "loss": 46.0, - "step": 7875 - }, - { - "epoch": 1.2683682917991868, - "grad_norm": 0.0008508330211043358, - "learning_rate": 0.00019999920817314446, - "loss": 46.0, - "step": 7876 - }, - { - "epoch": 1.2685293288779742, - "grad_norm": 0.0032252187374979258, - "learning_rate": 0.00019999920797180294, - "loss": 46.0, - "step": 7877 - }, - { - "epoch": 1.2686903659567617, - "grad_norm": 0.0025532187428325415, - "learning_rate": 0.00019999920777043583, - "loss": 46.0, - "step": 7878 - }, - { - "epoch": 1.2688514030355489, - "grad_norm": 0.0009707274148240685, - "learning_rate": 0.00019999920756904316, - "loss": 46.0, - "step": 7879 - }, - { - "epoch": 1.2690124401143363, - "grad_norm": 0.00444596353918314, - "learning_rate": 0.00019999920736762486, - "loss": 46.0, - "step": 7880 - }, - { - "epoch": 1.2691734771931238, - "grad_norm": 0.0009638170013204217, - "learning_rate": 0.00019999920716618096, - "loss": 46.0, - "step": 7881 - }, - { - "epoch": 1.269334514271911, - "grad_norm": 0.00812914501875639, - "learning_rate": 0.00019999920696471148, - "loss": 46.0, - "step": 7882 - }, - { - "epoch": 1.2694955513506985, - "grad_norm": 0.0049952557310462, - "learning_rate": 0.0001999992067632164, - "loss": 46.0, - "step": 7883 - }, - { - "epoch": 1.269656588429486, - "grad_norm": 0.006431289482861757, - "learning_rate": 0.00019999920656169575, - "loss": 46.0, - "step": 7884 - }, - { - "epoch": 1.2698176255082734, - "grad_norm": 0.0091160973533988, - "learning_rate": 0.00019999920636014948, - "loss": 46.0, - "step": 7885 - }, - { - "epoch": 1.2699786625870606, - "grad_norm": 0.0008121522841975093, - "learning_rate": 0.00019999920615857764, - "loss": 46.0, - "step": 7886 - }, - { - "epoch": 1.270139699665848, - "grad_norm": 0.001287944265641272, - "learning_rate": 0.0001999992059569802, - "loss": 46.0, - "step": 7887 - }, - { - "epoch": 1.2703007367446355, - "grad_norm": 0.0017001189989969134, - "learning_rate": 0.0001999992057553571, - "loss": 46.0, - "step": 7888 - }, - { - "epoch": 1.2704617738234227, - "grad_norm": 0.0034251012839376926, - "learning_rate": 0.0001999992055537085, - "loss": 46.0, - "step": 7889 - }, - { - "epoch": 1.2706228109022102, - "grad_norm": 0.0019064397783949971, - "learning_rate": 0.00019999920535203425, - "loss": 46.0, - "step": 7890 - }, - { - "epoch": 1.2707838479809976, - "grad_norm": 0.0015276261838153005, - "learning_rate": 0.0001999992051503344, - "loss": 46.0, - "step": 7891 - }, - { - "epoch": 1.270944885059785, - "grad_norm": 0.0007089645368978381, - "learning_rate": 0.00019999920494860897, - "loss": 46.0, - "step": 7892 - }, - { - "epoch": 1.2711059221385725, - "grad_norm": 0.0020035470370203257, - "learning_rate": 0.00019999920474685797, - "loss": 46.0, - "step": 7893 - }, - { - "epoch": 1.2712669592173598, - "grad_norm": 0.0019431532127782702, - "learning_rate": 0.00019999920454508136, - "loss": 46.0, - "step": 7894 - }, - { - "epoch": 1.2714279962961472, - "grad_norm": 0.0028469907119870186, - "learning_rate": 0.00019999920434327914, - "loss": 46.0, - "step": 7895 - }, - { - "epoch": 1.2715890333749345, - "grad_norm": 0.00600505480542779, - "learning_rate": 0.00019999920414145135, - "loss": 46.0, - "step": 7896 - }, - { - "epoch": 1.271750070453722, - "grad_norm": 0.0009883149759843946, - "learning_rate": 0.00019999920393959795, - "loss": 46.0, - "step": 7897 - }, - { - "epoch": 1.2719111075325094, - "grad_norm": 0.004567850846797228, - "learning_rate": 0.00019999920373771894, - "loss": 46.0, - "step": 7898 - }, - { - "epoch": 1.2720721446112968, - "grad_norm": 0.0021470014471560717, - "learning_rate": 0.00019999920353581436, - "loss": 46.0, - "step": 7899 - }, - { - "epoch": 1.2722331816900843, - "grad_norm": 0.006687674205750227, - "learning_rate": 0.00019999920333388417, - "loss": 46.0, - "step": 7900 - }, - { - "epoch": 1.2723942187688715, - "grad_norm": 0.004898585379123688, - "learning_rate": 0.0001999992031319284, - "loss": 46.0, - "step": 7901 - }, - { - "epoch": 1.272555255847659, - "grad_norm": 0.0005736006423830986, - "learning_rate": 0.00019999920292994704, - "loss": 46.0, - "step": 7902 - }, - { - "epoch": 1.2727162929264464, - "grad_norm": 0.0014147100737318397, - "learning_rate": 0.00019999920272794006, - "loss": 46.0, - "step": 7903 - }, - { - "epoch": 1.2728773300052336, - "grad_norm": 0.0024097587447613478, - "learning_rate": 0.0001999992025259075, - "loss": 46.0, - "step": 7904 - }, - { - "epoch": 1.273038367084021, - "grad_norm": 0.0018156928708776832, - "learning_rate": 0.00019999920232384935, - "loss": 46.0, - "step": 7905 - }, - { - "epoch": 1.2731994041628085, - "grad_norm": 0.0015606241067871451, - "learning_rate": 0.0001999992021217656, - "loss": 46.0, - "step": 7906 - }, - { - "epoch": 1.273360441241596, - "grad_norm": 0.006038035731762648, - "learning_rate": 0.00019999920191965625, - "loss": 46.0, - "step": 7907 - }, - { - "epoch": 1.2735214783203832, - "grad_norm": 0.0036547789350152016, - "learning_rate": 0.00019999920171752132, - "loss": 46.0, - "step": 7908 - }, - { - "epoch": 1.2736825153991707, - "grad_norm": 0.0023034082259982824, - "learning_rate": 0.00019999920151536076, - "loss": 46.0, - "step": 7909 - }, - { - "epoch": 1.273843552477958, - "grad_norm": 0.0011867902940139174, - "learning_rate": 0.00019999920131317462, - "loss": 46.0, - "step": 7910 - }, - { - "epoch": 1.2740045895567453, - "grad_norm": 0.0023318377789109945, - "learning_rate": 0.00019999920111096292, - "loss": 46.0, - "step": 7911 - }, - { - "epoch": 1.2741656266355328, - "grad_norm": 0.0019217395456507802, - "learning_rate": 0.0001999992009087256, - "loss": 46.0, - "step": 7912 - }, - { - "epoch": 1.2743266637143202, - "grad_norm": 0.0010135539341717958, - "learning_rate": 0.00019999920070646267, - "loss": 46.0, - "step": 7913 - }, - { - "epoch": 1.2744877007931077, - "grad_norm": 0.0016933500301092863, - "learning_rate": 0.00019999920050417416, - "loss": 46.0, - "step": 7914 - }, - { - "epoch": 1.274648737871895, - "grad_norm": 0.0009722772520035505, - "learning_rate": 0.00019999920030186008, - "loss": 46.0, - "step": 7915 - }, - { - "epoch": 1.2748097749506824, - "grad_norm": 0.0015697190538048744, - "learning_rate": 0.0001999992000995204, - "loss": 46.0, - "step": 7916 - }, - { - "epoch": 1.2749708120294698, - "grad_norm": 0.0006854765233583748, - "learning_rate": 0.00019999919989715508, - "loss": 46.0, - "step": 7917 - }, - { - "epoch": 1.275131849108257, - "grad_norm": 0.0028528235852718353, - "learning_rate": 0.0001999991996947642, - "loss": 46.0, - "step": 7918 - }, - { - "epoch": 1.2752928861870445, - "grad_norm": 0.003435129765421152, - "learning_rate": 0.0001999991994923477, - "loss": 46.0, - "step": 7919 - }, - { - "epoch": 1.275453923265832, - "grad_norm": 0.0017934262286871672, - "learning_rate": 0.00019999919928990564, - "loss": 46.0, - "step": 7920 - }, - { - "epoch": 1.2756149603446194, - "grad_norm": 0.0005804290412925184, - "learning_rate": 0.00019999919908743796, - "loss": 46.0, - "step": 7921 - }, - { - "epoch": 1.2757759974234069, - "grad_norm": 0.004956151824444532, - "learning_rate": 0.0001999991988849447, - "loss": 46.0, - "step": 7922 - }, - { - "epoch": 1.275937034502194, - "grad_norm": 0.0014738744357600808, - "learning_rate": 0.00019999919868242584, - "loss": 46.0, - "step": 7923 - }, - { - "epoch": 1.2760980715809815, - "grad_norm": 0.002577925566583872, - "learning_rate": 0.00019999919847988137, - "loss": 46.0, - "step": 7924 - }, - { - "epoch": 1.276259108659769, - "grad_norm": 0.0007445004303008318, - "learning_rate": 0.00019999919827731134, - "loss": 46.0, - "step": 7925 - }, - { - "epoch": 1.2764201457385562, - "grad_norm": 0.0010513279121369123, - "learning_rate": 0.0001999991980747157, - "loss": 46.0, - "step": 7926 - }, - { - "epoch": 1.2765811828173437, - "grad_norm": 0.0005647443467751145, - "learning_rate": 0.00019999919787209444, - "loss": 46.0, - "step": 7927 - }, - { - "epoch": 1.2767422198961311, - "grad_norm": 0.00068237679079175, - "learning_rate": 0.00019999919766944762, - "loss": 46.0, - "step": 7928 - }, - { - "epoch": 1.2769032569749186, - "grad_norm": 0.011096027679741383, - "learning_rate": 0.0001999991974667752, - "loss": 46.0, - "step": 7929 - }, - { - "epoch": 1.2770642940537058, - "grad_norm": 0.0011830878211185336, - "learning_rate": 0.00019999919726407716, - "loss": 46.0, - "step": 7930 - }, - { - "epoch": 1.2772253311324933, - "grad_norm": 0.0021713164169341326, - "learning_rate": 0.00019999919706135356, - "loss": 46.0, - "step": 7931 - }, - { - "epoch": 1.2773863682112807, - "grad_norm": 0.0003846340987365693, - "learning_rate": 0.00019999919685860434, - "loss": 46.0, - "step": 7932 - }, - { - "epoch": 1.277547405290068, - "grad_norm": 0.0022516839671880007, - "learning_rate": 0.00019999919665582953, - "loss": 46.0, - "step": 7933 - }, - { - "epoch": 1.2777084423688554, - "grad_norm": 0.0006441550212912261, - "learning_rate": 0.00019999919645302913, - "loss": 46.0, - "step": 7934 - }, - { - "epoch": 1.2778694794476428, - "grad_norm": 0.00512974988669157, - "learning_rate": 0.00019999919625020312, - "loss": 46.0, - "step": 7935 - }, - { - "epoch": 1.2780305165264303, - "grad_norm": 0.001224461360834539, - "learning_rate": 0.00019999919604735152, - "loss": 46.0, - "step": 7936 - }, - { - "epoch": 1.2781915536052175, - "grad_norm": 0.0009842407889664173, - "learning_rate": 0.00019999919584447434, - "loss": 46.0, - "step": 7937 - }, - { - "epoch": 1.278352590684005, - "grad_norm": 0.0005291182897053659, - "learning_rate": 0.00019999919564157157, - "loss": 46.0, - "step": 7938 - }, - { - "epoch": 1.2785136277627924, - "grad_norm": 0.007361816707998514, - "learning_rate": 0.00019999919543864318, - "loss": 46.0, - "step": 7939 - }, - { - "epoch": 1.2786746648415797, - "grad_norm": 0.004945671651512384, - "learning_rate": 0.0001999991952356892, - "loss": 46.0, - "step": 7940 - }, - { - "epoch": 1.278835701920367, - "grad_norm": 0.004143549595028162, - "learning_rate": 0.00019999919503270968, - "loss": 46.0, - "step": 7941 - }, - { - "epoch": 1.2789967389991546, - "grad_norm": 0.002231571124866605, - "learning_rate": 0.0001999991948297045, - "loss": 46.0, - "step": 7942 - }, - { - "epoch": 1.279157776077942, - "grad_norm": 0.0015816737432032824, - "learning_rate": 0.00019999919462667377, - "loss": 46.0, - "step": 7943 - }, - { - "epoch": 1.2793188131567295, - "grad_norm": 0.0026737286243587732, - "learning_rate": 0.0001999991944236174, - "loss": 46.0, - "step": 7944 - }, - { - "epoch": 1.2794798502355167, - "grad_norm": 0.003992780111730099, - "learning_rate": 0.00019999919422053545, - "loss": 46.0, - "step": 7945 - }, - { - "epoch": 1.2796408873143041, - "grad_norm": 0.002354080555960536, - "learning_rate": 0.0001999991940174279, - "loss": 46.0, - "step": 7946 - }, - { - "epoch": 1.2798019243930914, - "grad_norm": 0.0004585007845889777, - "learning_rate": 0.0001999991938142948, - "loss": 46.0, - "step": 7947 - }, - { - "epoch": 1.2799629614718788, - "grad_norm": 0.006793987471610308, - "learning_rate": 0.00019999919361113607, - "loss": 46.0, - "step": 7948 - }, - { - "epoch": 1.2801239985506663, - "grad_norm": 0.003035103902220726, - "learning_rate": 0.00019999919340795176, - "loss": 46.0, - "step": 7949 - }, - { - "epoch": 1.2802850356294537, - "grad_norm": 0.003445405513048172, - "learning_rate": 0.0001999991932047418, - "loss": 46.0, - "step": 7950 - }, - { - "epoch": 1.2804460727082412, - "grad_norm": 0.0009236861951649189, - "learning_rate": 0.00019999919300150632, - "loss": 46.0, - "step": 7951 - }, - { - "epoch": 1.2806071097870284, - "grad_norm": 0.004041364882141352, - "learning_rate": 0.0001999991927982452, - "loss": 46.0, - "step": 7952 - }, - { - "epoch": 1.2807681468658159, - "grad_norm": 0.0007511607836931944, - "learning_rate": 0.00019999919259495853, - "loss": 46.0, - "step": 7953 - }, - { - "epoch": 1.2809291839446033, - "grad_norm": 0.0024063477758318186, - "learning_rate": 0.00019999919239164623, - "loss": 46.0, - "step": 7954 - }, - { - "epoch": 1.2810902210233905, - "grad_norm": 0.000695219321642071, - "learning_rate": 0.0001999991921883083, - "loss": 46.0, - "step": 7955 - }, - { - "epoch": 1.281251258102178, - "grad_norm": 0.002616358920931816, - "learning_rate": 0.00019999919198494484, - "loss": 46.0, - "step": 7956 - }, - { - "epoch": 1.2814122951809654, - "grad_norm": 0.0017433814937248826, - "learning_rate": 0.00019999919178155575, - "loss": 46.0, - "step": 7957 - }, - { - "epoch": 1.281573332259753, - "grad_norm": 0.0014205455081537366, - "learning_rate": 0.0001999991915781411, - "loss": 46.0, - "step": 7958 - }, - { - "epoch": 1.2817343693385401, - "grad_norm": 0.000655974552500993, - "learning_rate": 0.0001999991913747008, - "loss": 46.0, - "step": 7959 - }, - { - "epoch": 1.2818954064173276, - "grad_norm": 0.0025297480169683695, - "learning_rate": 0.00019999919117123492, - "loss": 46.0, - "step": 7960 - }, - { - "epoch": 1.282056443496115, - "grad_norm": 0.0021449653431773186, - "learning_rate": 0.00019999919096774348, - "loss": 46.0, - "step": 7961 - }, - { - "epoch": 1.2822174805749023, - "grad_norm": 0.002024073153734207, - "learning_rate": 0.00019999919076422643, - "loss": 46.0, - "step": 7962 - }, - { - "epoch": 1.2823785176536897, - "grad_norm": 0.0012765323044732213, - "learning_rate": 0.0001999991905606838, - "loss": 46.0, - "step": 7963 - }, - { - "epoch": 1.2825395547324772, - "grad_norm": 0.0009103547781705856, - "learning_rate": 0.00019999919035711554, - "loss": 46.0, - "step": 7964 - }, - { - "epoch": 1.2827005918112646, - "grad_norm": 0.0007491717697121203, - "learning_rate": 0.00019999919015352172, - "loss": 46.0, - "step": 7965 - }, - { - "epoch": 1.282861628890052, - "grad_norm": 0.0026717265136539936, - "learning_rate": 0.0001999991899499023, - "loss": 46.0, - "step": 7966 - }, - { - "epoch": 1.2830226659688393, - "grad_norm": 0.0019305183086544275, - "learning_rate": 0.00019999918974625725, - "loss": 46.0, - "step": 7967 - }, - { - "epoch": 1.2831837030476267, - "grad_norm": 0.0021675850730389357, - "learning_rate": 0.00019999918954258664, - "loss": 46.0, - "step": 7968 - }, - { - "epoch": 1.283344740126414, - "grad_norm": 0.0006487573264166713, - "learning_rate": 0.00019999918933889043, - "loss": 46.0, - "step": 7969 - }, - { - "epoch": 1.2835057772052014, - "grad_norm": 0.001013029832392931, - "learning_rate": 0.00019999918913516862, - "loss": 46.0, - "step": 7970 - }, - { - "epoch": 1.2836668142839889, - "grad_norm": 0.001681437948718667, - "learning_rate": 0.00019999918893142123, - "loss": 46.0, - "step": 7971 - }, - { - "epoch": 1.2838278513627763, - "grad_norm": 0.0013573182513937354, - "learning_rate": 0.00019999918872764822, - "loss": 46.0, - "step": 7972 - }, - { - "epoch": 1.2839888884415638, - "grad_norm": 0.00044439014163799584, - "learning_rate": 0.0001999991885238496, - "loss": 46.0, - "step": 7973 - }, - { - "epoch": 1.284149925520351, - "grad_norm": 0.001315410016104579, - "learning_rate": 0.00019999918832002545, - "loss": 46.0, - "step": 7974 - }, - { - "epoch": 1.2843109625991385, - "grad_norm": 0.0022111060097813606, - "learning_rate": 0.00019999918811617565, - "loss": 46.0, - "step": 7975 - }, - { - "epoch": 1.284471999677926, - "grad_norm": 0.0005336496396921575, - "learning_rate": 0.00019999918791230024, - "loss": 46.0, - "step": 7976 - }, - { - "epoch": 1.2846330367567131, - "grad_norm": 0.0023534928914159536, - "learning_rate": 0.0001999991877083993, - "loss": 46.0, - "step": 7977 - }, - { - "epoch": 1.2847940738355006, - "grad_norm": 0.001318909227848053, - "learning_rate": 0.00019999918750447272, - "loss": 46.0, - "step": 7978 - }, - { - "epoch": 1.284955110914288, - "grad_norm": 0.001514313742518425, - "learning_rate": 0.00019999918730052057, - "loss": 46.0, - "step": 7979 - }, - { - "epoch": 1.2851161479930755, - "grad_norm": 0.0018094531260430813, - "learning_rate": 0.00019999918709654282, - "loss": 46.0, - "step": 7980 - }, - { - "epoch": 1.2852771850718627, - "grad_norm": 0.004572336096316576, - "learning_rate": 0.00019999918689253944, - "loss": 46.0, - "step": 7981 - }, - { - "epoch": 1.2854382221506502, - "grad_norm": 0.004093192517757416, - "learning_rate": 0.0001999991866885105, - "loss": 46.0, - "step": 7982 - }, - { - "epoch": 1.2855992592294376, - "grad_norm": 0.0007442169589921832, - "learning_rate": 0.00019999918648445596, - "loss": 46.0, - "step": 7983 - }, - { - "epoch": 1.2857602963082249, - "grad_norm": 0.0020866876002401114, - "learning_rate": 0.00019999918628037583, - "loss": 46.0, - "step": 7984 - }, - { - "epoch": 1.2859213333870123, - "grad_norm": 0.0008816205663606524, - "learning_rate": 0.0001999991860762701, - "loss": 46.0, - "step": 7985 - }, - { - "epoch": 1.2860823704657998, - "grad_norm": 0.001046821940690279, - "learning_rate": 0.0001999991858721388, - "loss": 46.0, - "step": 7986 - }, - { - "epoch": 1.2862434075445872, - "grad_norm": 0.004963348153978586, - "learning_rate": 0.00019999918566798188, - "loss": 46.0, - "step": 7987 - }, - { - "epoch": 1.2864044446233747, - "grad_norm": 0.0036255496088415384, - "learning_rate": 0.00019999918546379934, - "loss": 46.0, - "step": 7988 - }, - { - "epoch": 1.286565481702162, - "grad_norm": 0.0006340309628285468, - "learning_rate": 0.00019999918525959124, - "loss": 46.0, - "step": 7989 - }, - { - "epoch": 1.2867265187809493, - "grad_norm": 0.0018060792936012149, - "learning_rate": 0.00019999918505535753, - "loss": 46.0, - "step": 7990 - }, - { - "epoch": 1.2868875558597366, - "grad_norm": 0.0011644319165498018, - "learning_rate": 0.00019999918485109826, - "loss": 46.0, - "step": 7991 - }, - { - "epoch": 1.287048592938524, - "grad_norm": 0.004299150314182043, - "learning_rate": 0.00019999918464681335, - "loss": 46.0, - "step": 7992 - }, - { - "epoch": 1.2872096300173115, - "grad_norm": 0.0017348442925140262, - "learning_rate": 0.00019999918444250288, - "loss": 46.0, - "step": 7993 - }, - { - "epoch": 1.287370667096099, - "grad_norm": 0.001312283449806273, - "learning_rate": 0.00019999918423816676, - "loss": 46.0, - "step": 7994 - }, - { - "epoch": 1.2875317041748864, - "grad_norm": 0.004243955947458744, - "learning_rate": 0.00019999918403380512, - "loss": 46.0, - "step": 7995 - }, - { - "epoch": 1.2876927412536736, - "grad_norm": 0.0038759559392929077, - "learning_rate": 0.00019999918382941783, - "loss": 46.0, - "step": 7996 - }, - { - "epoch": 1.287853778332461, - "grad_norm": 0.000978336320258677, - "learning_rate": 0.00019999918362500498, - "loss": 46.0, - "step": 7997 - }, - { - "epoch": 1.2880148154112485, - "grad_norm": 0.0017428981373086572, - "learning_rate": 0.00019999918342056652, - "loss": 46.0, - "step": 7998 - }, - { - "epoch": 1.2881758524900357, - "grad_norm": 0.0021736298222094774, - "learning_rate": 0.00019999918321610247, - "loss": 46.0, - "step": 7999 - }, - { - "epoch": 1.2883368895688232, - "grad_norm": 0.0005048715975135565, - "learning_rate": 0.0001999991830116128, - "loss": 46.0, - "step": 8000 - }, - { - "epoch": 1.2884979266476106, - "grad_norm": 0.0011691147228702903, - "learning_rate": 0.00019999918280709755, - "loss": 46.0, - "step": 8001 - }, - { - "epoch": 1.288658963726398, - "grad_norm": 0.003715418977662921, - "learning_rate": 0.0001999991826025567, - "loss": 46.0, - "step": 8002 - }, - { - "epoch": 1.2888200008051853, - "grad_norm": 0.0007326299091801047, - "learning_rate": 0.0001999991823979903, - "loss": 46.0, - "step": 8003 - }, - { - "epoch": 1.2889810378839728, - "grad_norm": 0.0018270571017637849, - "learning_rate": 0.00019999918219339825, - "loss": 46.0, - "step": 8004 - }, - { - "epoch": 1.2891420749627602, - "grad_norm": 0.004157285206019878, - "learning_rate": 0.00019999918198878062, - "loss": 46.0, - "step": 8005 - }, - { - "epoch": 1.2893031120415475, - "grad_norm": 0.0014705565990880132, - "learning_rate": 0.00019999918178413743, - "loss": 46.0, - "step": 8006 - }, - { - "epoch": 1.289464149120335, - "grad_norm": 0.0027261364739388227, - "learning_rate": 0.00019999918157946863, - "loss": 46.0, - "step": 8007 - }, - { - "epoch": 1.2896251861991224, - "grad_norm": 0.002807356882840395, - "learning_rate": 0.00019999918137477422, - "loss": 46.0, - "step": 8008 - }, - { - "epoch": 1.2897862232779098, - "grad_norm": 0.0049120294861495495, - "learning_rate": 0.0001999991811700542, - "loss": 46.0, - "step": 8009 - }, - { - "epoch": 1.2899472603566973, - "grad_norm": 0.00293182791210711, - "learning_rate": 0.0001999991809653086, - "loss": 46.0, - "step": 8010 - }, - { - "epoch": 1.2901082974354845, - "grad_norm": 0.0010758258868008852, - "learning_rate": 0.00019999918076053742, - "loss": 46.0, - "step": 8011 - }, - { - "epoch": 1.290269334514272, - "grad_norm": 0.0014885530108585954, - "learning_rate": 0.00019999918055574063, - "loss": 46.0, - "step": 8012 - }, - { - "epoch": 1.2904303715930592, - "grad_norm": 0.0022727432660758495, - "learning_rate": 0.00019999918035091825, - "loss": 46.0, - "step": 8013 - }, - { - "epoch": 1.2905914086718466, - "grad_norm": 0.000856993836350739, - "learning_rate": 0.00019999918014607028, - "loss": 46.0, - "step": 8014 - }, - { - "epoch": 1.290752445750634, - "grad_norm": 0.0037907164078205824, - "learning_rate": 0.0001999991799411967, - "loss": 46.0, - "step": 8015 - }, - { - "epoch": 1.2909134828294215, - "grad_norm": 0.0027179638855159283, - "learning_rate": 0.00019999917973629754, - "loss": 46.0, - "step": 8016 - }, - { - "epoch": 1.291074519908209, - "grad_norm": 0.0005476229125633836, - "learning_rate": 0.00019999917953137278, - "loss": 46.0, - "step": 8017 - }, - { - "epoch": 1.2912355569869962, - "grad_norm": 0.013994633220136166, - "learning_rate": 0.00019999917932642244, - "loss": 46.0, - "step": 8018 - }, - { - "epoch": 1.2913965940657837, - "grad_norm": 0.0033847116865217686, - "learning_rate": 0.00019999917912144648, - "loss": 46.0, - "step": 8019 - }, - { - "epoch": 1.2915576311445711, - "grad_norm": 0.0005505895824171603, - "learning_rate": 0.00019999917891644494, - "loss": 46.0, - "step": 8020 - }, - { - "epoch": 1.2917186682233583, - "grad_norm": 0.0038216772954910994, - "learning_rate": 0.0001999991787114178, - "loss": 46.0, - "step": 8021 - }, - { - "epoch": 1.2918797053021458, - "grad_norm": 0.002420647768303752, - "learning_rate": 0.00019999917850636507, - "loss": 46.0, - "step": 8022 - }, - { - "epoch": 1.2920407423809332, - "grad_norm": 0.001554326037876308, - "learning_rate": 0.00019999917830128676, - "loss": 46.0, - "step": 8023 - }, - { - "epoch": 1.2922017794597207, - "grad_norm": 0.0006883572787046432, - "learning_rate": 0.00019999917809618281, - "loss": 46.0, - "step": 8024 - }, - { - "epoch": 1.292362816538508, - "grad_norm": 0.008520892821252346, - "learning_rate": 0.0001999991778910533, - "loss": 46.0, - "step": 8025 - }, - { - "epoch": 1.2925238536172954, - "grad_norm": 0.0022111323196440935, - "learning_rate": 0.0001999991776858982, - "loss": 46.0, - "step": 8026 - }, - { - "epoch": 1.2926848906960828, - "grad_norm": 0.0007049944251775742, - "learning_rate": 0.0001999991774807175, - "loss": 46.0, - "step": 8027 - }, - { - "epoch": 1.29284592777487, - "grad_norm": 0.001421093475073576, - "learning_rate": 0.0001999991772755112, - "loss": 46.0, - "step": 8028 - }, - { - "epoch": 1.2930069648536575, - "grad_norm": 0.004672470968216658, - "learning_rate": 0.00019999917707027928, - "loss": 46.0, - "step": 8029 - }, - { - "epoch": 1.293168001932445, - "grad_norm": 0.0018201512284576893, - "learning_rate": 0.0001999991768650218, - "loss": 46.0, - "step": 8030 - }, - { - "epoch": 1.2933290390112324, - "grad_norm": 0.0022638379596173763, - "learning_rate": 0.00019999917665973872, - "loss": 46.0, - "step": 8031 - }, - { - "epoch": 1.2934900760900196, - "grad_norm": 0.00047650974011048675, - "learning_rate": 0.00019999917645443005, - "loss": 46.0, - "step": 8032 - }, - { - "epoch": 1.293651113168807, - "grad_norm": 0.0015986618818715215, - "learning_rate": 0.00019999917624909577, - "loss": 46.0, - "step": 8033 - }, - { - "epoch": 1.2938121502475946, - "grad_norm": 0.0008542933501303196, - "learning_rate": 0.0001999991760437359, - "loss": 46.0, - "step": 8034 - }, - { - "epoch": 1.2939731873263818, - "grad_norm": 0.0021304574329406023, - "learning_rate": 0.00019999917583835044, - "loss": 46.0, - "step": 8035 - }, - { - "epoch": 1.2941342244051692, - "grad_norm": 0.0023620950523763895, - "learning_rate": 0.0001999991756329394, - "loss": 46.0, - "step": 8036 - }, - { - "epoch": 1.2942952614839567, - "grad_norm": 0.004709760192781687, - "learning_rate": 0.00019999917542750276, - "loss": 46.0, - "step": 8037 - }, - { - "epoch": 1.2944562985627441, - "grad_norm": 0.0010166389402002096, - "learning_rate": 0.00019999917522204048, - "loss": 46.0, - "step": 8038 - }, - { - "epoch": 1.2946173356415316, - "grad_norm": 0.0028152933809906244, - "learning_rate": 0.00019999917501655262, - "loss": 46.0, - "step": 8039 - }, - { - "epoch": 1.2947783727203188, - "grad_norm": 0.0018348614685237408, - "learning_rate": 0.0001999991748110392, - "loss": 46.0, - "step": 8040 - }, - { - "epoch": 1.2949394097991063, - "grad_norm": 0.0011196538107469678, - "learning_rate": 0.0001999991746055002, - "loss": 46.0, - "step": 8041 - }, - { - "epoch": 1.2951004468778935, - "grad_norm": 0.0016232288908213377, - "learning_rate": 0.00019999917439993557, - "loss": 46.0, - "step": 8042 - }, - { - "epoch": 1.295261483956681, - "grad_norm": 0.004233572166413069, - "learning_rate": 0.00019999917419434536, - "loss": 46.0, - "step": 8043 - }, - { - "epoch": 1.2954225210354684, - "grad_norm": 0.0019040943589061499, - "learning_rate": 0.0001999991739887295, - "loss": 46.0, - "step": 8044 - }, - { - "epoch": 1.2955835581142559, - "grad_norm": 0.003112334990873933, - "learning_rate": 0.0001999991737830881, - "loss": 46.0, - "step": 8045 - }, - { - "epoch": 1.2957445951930433, - "grad_norm": 0.0008003904949873686, - "learning_rate": 0.0001999991735774211, - "loss": 46.0, - "step": 8046 - }, - { - "epoch": 1.2959056322718305, - "grad_norm": 0.0007273473311215639, - "learning_rate": 0.0001999991733717285, - "loss": 46.0, - "step": 8047 - }, - { - "epoch": 1.296066669350618, - "grad_norm": 0.0004309994983486831, - "learning_rate": 0.00019999917316601033, - "loss": 46.0, - "step": 8048 - }, - { - "epoch": 1.2962277064294054, - "grad_norm": 0.003821717808023095, - "learning_rate": 0.00019999917296026655, - "loss": 46.0, - "step": 8049 - }, - { - "epoch": 1.2963887435081927, - "grad_norm": 0.0004929801216349006, - "learning_rate": 0.00019999917275449714, - "loss": 46.0, - "step": 8050 - }, - { - "epoch": 1.2965497805869801, - "grad_norm": 0.0012608777033165097, - "learning_rate": 0.00019999917254870218, - "loss": 46.0, - "step": 8051 - }, - { - "epoch": 1.2967108176657676, - "grad_norm": 0.0006955444696359336, - "learning_rate": 0.0001999991723428816, - "loss": 46.0, - "step": 8052 - }, - { - "epoch": 1.296871854744555, - "grad_norm": 0.0009961365722119808, - "learning_rate": 0.00019999917213703544, - "loss": 46.0, - "step": 8053 - }, - { - "epoch": 1.2970328918233422, - "grad_norm": 0.0014007980935275555, - "learning_rate": 0.00019999917193116366, - "loss": 46.0, - "step": 8054 - }, - { - "epoch": 1.2971939289021297, - "grad_norm": 0.0021253363229334354, - "learning_rate": 0.00019999917172526632, - "loss": 46.0, - "step": 8055 - }, - { - "epoch": 1.2973549659809172, - "grad_norm": 0.001655158819630742, - "learning_rate": 0.00019999917151934335, - "loss": 46.0, - "step": 8056 - }, - { - "epoch": 1.2975160030597044, - "grad_norm": 0.0010083351517096162, - "learning_rate": 0.0001999991713133948, - "loss": 46.0, - "step": 8057 - }, - { - "epoch": 1.2976770401384918, - "grad_norm": 0.000745421159081161, - "learning_rate": 0.00019999917110742068, - "loss": 46.0, - "step": 8058 - }, - { - "epoch": 1.2978380772172793, - "grad_norm": 0.008971952833235264, - "learning_rate": 0.00019999917090142094, - "loss": 46.0, - "step": 8059 - }, - { - "epoch": 1.2979991142960667, - "grad_norm": 0.006118796765804291, - "learning_rate": 0.0001999991706953956, - "loss": 46.0, - "step": 8060 - }, - { - "epoch": 1.2981601513748542, - "grad_norm": 0.004308641888201237, - "learning_rate": 0.0001999991704893447, - "loss": 46.0, - "step": 8061 - }, - { - "epoch": 1.2983211884536414, - "grad_norm": 0.005008167587220669, - "learning_rate": 0.00019999917028326817, - "loss": 46.0, - "step": 8062 - }, - { - "epoch": 1.2984822255324289, - "grad_norm": 0.0022040249314159155, - "learning_rate": 0.00019999917007716608, - "loss": 46.0, - "step": 8063 - }, - { - "epoch": 1.298643262611216, - "grad_norm": 0.0010592591715976596, - "learning_rate": 0.00019999916987103835, - "loss": 46.0, - "step": 8064 - }, - { - "epoch": 1.2988042996900035, - "grad_norm": 0.00405563460662961, - "learning_rate": 0.00019999916966488505, - "loss": 46.0, - "step": 8065 - }, - { - "epoch": 1.298965336768791, - "grad_norm": 0.0020846552215516567, - "learning_rate": 0.00019999916945870615, - "loss": 46.0, - "step": 8066 - }, - { - "epoch": 1.2991263738475785, - "grad_norm": 0.00427275849506259, - "learning_rate": 0.00019999916925250166, - "loss": 46.0, - "step": 8067 - }, - { - "epoch": 1.299287410926366, - "grad_norm": 0.0016410695388913155, - "learning_rate": 0.00019999916904627158, - "loss": 46.0, - "step": 8068 - }, - { - "epoch": 1.2994484480051531, - "grad_norm": 0.004746242426335812, - "learning_rate": 0.00019999916884001588, - "loss": 46.0, - "step": 8069 - }, - { - "epoch": 1.2996094850839406, - "grad_norm": 0.002526788739487529, - "learning_rate": 0.00019999916863373463, - "loss": 46.0, - "step": 8070 - }, - { - "epoch": 1.299770522162728, - "grad_norm": 0.002641509985551238, - "learning_rate": 0.00019999916842742773, - "loss": 46.0, - "step": 8071 - }, - { - "epoch": 1.2999315592415153, - "grad_norm": 0.0013726758770644665, - "learning_rate": 0.00019999916822109528, - "loss": 46.0, - "step": 8072 - }, - { - "epoch": 1.3000925963203027, - "grad_norm": 0.0021210613194853067, - "learning_rate": 0.00019999916801473724, - "loss": 46.0, - "step": 8073 - }, - { - "epoch": 1.3002536333990902, - "grad_norm": 0.0007135680061765015, - "learning_rate": 0.00019999916780835358, - "loss": 46.0, - "step": 8074 - }, - { - "epoch": 1.3004146704778776, - "grad_norm": 0.0015486666234210134, - "learning_rate": 0.00019999916760194433, - "loss": 46.0, - "step": 8075 - }, - { - "epoch": 1.3005757075566649, - "grad_norm": 0.0015272445743903518, - "learning_rate": 0.00019999916739550948, - "loss": 46.0, - "step": 8076 - }, - { - "epoch": 1.3007367446354523, - "grad_norm": 0.0007526118424721062, - "learning_rate": 0.00019999916718904906, - "loss": 46.0, - "step": 8077 - }, - { - "epoch": 1.3008977817142398, - "grad_norm": 0.006024092901498079, - "learning_rate": 0.00019999916698256302, - "loss": 46.0, - "step": 8078 - }, - { - "epoch": 1.301058818793027, - "grad_norm": 0.0013033177237957716, - "learning_rate": 0.00019999916677605138, - "loss": 46.0, - "step": 8079 - }, - { - "epoch": 1.3012198558718144, - "grad_norm": 0.000533826241735369, - "learning_rate": 0.00019999916656951417, - "loss": 46.0, - "step": 8080 - }, - { - "epoch": 1.3013808929506019, - "grad_norm": 0.0009822201682254672, - "learning_rate": 0.00019999916636295132, - "loss": 46.0, - "step": 8081 - }, - { - "epoch": 1.3015419300293893, - "grad_norm": 0.0017239375738427043, - "learning_rate": 0.00019999916615636294, - "loss": 46.0, - "step": 8082 - }, - { - "epoch": 1.3017029671081768, - "grad_norm": 0.0006431580986827612, - "learning_rate": 0.00019999916594974892, - "loss": 46.0, - "step": 8083 - }, - { - "epoch": 1.301864004186964, - "grad_norm": 0.0009649729472585022, - "learning_rate": 0.00019999916574310934, - "loss": 46.0, - "step": 8084 - }, - { - "epoch": 1.3020250412657515, - "grad_norm": 0.0007157270447351038, - "learning_rate": 0.00019999916553644414, - "loss": 46.0, - "step": 8085 - }, - { - "epoch": 1.3021860783445387, - "grad_norm": 0.002580914180725813, - "learning_rate": 0.00019999916532975335, - "loss": 46.0, - "step": 8086 - }, - { - "epoch": 1.3023471154233262, - "grad_norm": 0.002775158267468214, - "learning_rate": 0.00019999916512303696, - "loss": 46.0, - "step": 8087 - }, - { - "epoch": 1.3025081525021136, - "grad_norm": 0.002985249971970916, - "learning_rate": 0.00019999916491629497, - "loss": 46.0, - "step": 8088 - }, - { - "epoch": 1.302669189580901, - "grad_norm": 0.0054305437952280045, - "learning_rate": 0.0001999991647095274, - "loss": 46.0, - "step": 8089 - }, - { - "epoch": 1.3028302266596885, - "grad_norm": 0.0016031991690397263, - "learning_rate": 0.00019999916450273424, - "loss": 46.0, - "step": 8090 - }, - { - "epoch": 1.3029912637384757, - "grad_norm": 0.00307249603793025, - "learning_rate": 0.00019999916429591546, - "loss": 46.0, - "step": 8091 - }, - { - "epoch": 1.3031523008172632, - "grad_norm": 0.0022639601957052946, - "learning_rate": 0.0001999991640890711, - "loss": 46.0, - "step": 8092 - }, - { - "epoch": 1.3033133378960506, - "grad_norm": 0.0016816904535517097, - "learning_rate": 0.00019999916388220115, - "loss": 46.0, - "step": 8093 - }, - { - "epoch": 1.3034743749748379, - "grad_norm": 0.0011698901653289795, - "learning_rate": 0.00019999916367530562, - "loss": 46.0, - "step": 8094 - }, - { - "epoch": 1.3036354120536253, - "grad_norm": 0.0005653820117004216, - "learning_rate": 0.00019999916346838447, - "loss": 46.0, - "step": 8095 - }, - { - "epoch": 1.3037964491324128, - "grad_norm": 0.0018024810124188662, - "learning_rate": 0.00019999916326143776, - "loss": 46.0, - "step": 8096 - }, - { - "epoch": 1.3039574862112002, - "grad_norm": 0.0014141090214252472, - "learning_rate": 0.0001999991630544654, - "loss": 46.0, - "step": 8097 - }, - { - "epoch": 1.3041185232899875, - "grad_norm": 0.0006488273502327502, - "learning_rate": 0.00019999916284746747, - "loss": 46.0, - "step": 8098 - }, - { - "epoch": 1.304279560368775, - "grad_norm": 0.0029810857959091663, - "learning_rate": 0.00019999916264044397, - "loss": 46.0, - "step": 8099 - }, - { - "epoch": 1.3044405974475624, - "grad_norm": 0.002607325091958046, - "learning_rate": 0.00019999916243339485, - "loss": 46.0, - "step": 8100 - }, - { - "epoch": 1.3046016345263496, - "grad_norm": 0.0005921588744968176, - "learning_rate": 0.00019999916222632013, - "loss": 46.0, - "step": 8101 - }, - { - "epoch": 1.304762671605137, - "grad_norm": 0.0008126826141960919, - "learning_rate": 0.0001999991620192198, - "loss": 46.0, - "step": 8102 - }, - { - "epoch": 1.3049237086839245, - "grad_norm": 0.001323246629908681, - "learning_rate": 0.00019999916181209394, - "loss": 46.0, - "step": 8103 - }, - { - "epoch": 1.305084745762712, - "grad_norm": 0.006293023470789194, - "learning_rate": 0.00019999916160494242, - "loss": 46.0, - "step": 8104 - }, - { - "epoch": 1.3052457828414994, - "grad_norm": 0.0030664147343486547, - "learning_rate": 0.00019999916139776534, - "loss": 46.0, - "step": 8105 - }, - { - "epoch": 1.3054068199202866, - "grad_norm": 0.0019583632238209248, - "learning_rate": 0.00019999916119056268, - "loss": 46.0, - "step": 8106 - }, - { - "epoch": 1.305567856999074, - "grad_norm": 0.0006180562195368111, - "learning_rate": 0.0001999991609833344, - "loss": 46.0, - "step": 8107 - }, - { - "epoch": 1.3057288940778613, - "grad_norm": 0.001685330760665238, - "learning_rate": 0.0001999991607760805, - "loss": 46.0, - "step": 8108 - }, - { - "epoch": 1.3058899311566488, - "grad_norm": 0.0012558940798044205, - "learning_rate": 0.00019999916056880106, - "loss": 46.0, - "step": 8109 - }, - { - "epoch": 1.3060509682354362, - "grad_norm": 0.0027943886816501617, - "learning_rate": 0.000199999160361496, - "loss": 46.0, - "step": 8110 - }, - { - "epoch": 1.3062120053142237, - "grad_norm": 0.005581276956945658, - "learning_rate": 0.00019999916015416534, - "loss": 46.0, - "step": 8111 - }, - { - "epoch": 1.306373042393011, - "grad_norm": 0.0030407344456762075, - "learning_rate": 0.0001999991599468091, - "loss": 46.0, - "step": 8112 - }, - { - "epoch": 1.3065340794717983, - "grad_norm": 0.0007889422122389078, - "learning_rate": 0.00019999915973942725, - "loss": 46.0, - "step": 8113 - }, - { - "epoch": 1.3066951165505858, - "grad_norm": 0.007997584529221058, - "learning_rate": 0.0001999991595320198, - "loss": 46.0, - "step": 8114 - }, - { - "epoch": 1.3068561536293732, - "grad_norm": 0.0024954320397228003, - "learning_rate": 0.00019999915932458677, - "loss": 46.0, - "step": 8115 - }, - { - "epoch": 1.3070171907081605, - "grad_norm": 0.0025826154742389917, - "learning_rate": 0.00019999915911712813, - "loss": 46.0, - "step": 8116 - }, - { - "epoch": 1.307178227786948, - "grad_norm": 0.000843958870973438, - "learning_rate": 0.0001999991589096439, - "loss": 46.0, - "step": 8117 - }, - { - "epoch": 1.3073392648657354, - "grad_norm": 0.002617934951558709, - "learning_rate": 0.00019999915870213408, - "loss": 46.0, - "step": 8118 - }, - { - "epoch": 1.3075003019445228, - "grad_norm": 0.0013686921447515488, - "learning_rate": 0.00019999915849459868, - "loss": 46.0, - "step": 8119 - }, - { - "epoch": 1.30766133902331, - "grad_norm": 0.0015199461486190557, - "learning_rate": 0.00019999915828703766, - "loss": 46.0, - "step": 8120 - }, - { - "epoch": 1.3078223761020975, - "grad_norm": 0.00356725649908185, - "learning_rate": 0.00019999915807945105, - "loss": 46.0, - "step": 8121 - }, - { - "epoch": 1.307983413180885, - "grad_norm": 0.002082472899928689, - "learning_rate": 0.00019999915787183886, - "loss": 46.0, - "step": 8122 - }, - { - "epoch": 1.3081444502596722, - "grad_norm": 0.0005661632167175412, - "learning_rate": 0.00019999915766420108, - "loss": 46.0, - "step": 8123 - }, - { - "epoch": 1.3083054873384596, - "grad_norm": 0.0008750281413085759, - "learning_rate": 0.00019999915745653768, - "loss": 46.0, - "step": 8124 - }, - { - "epoch": 1.308466524417247, - "grad_norm": 0.002260432578623295, - "learning_rate": 0.0001999991572488487, - "loss": 46.0, - "step": 8125 - }, - { - "epoch": 1.3086275614960345, - "grad_norm": 0.0023060941603034735, - "learning_rate": 0.00019999915704113413, - "loss": 46.0, - "step": 8126 - }, - { - "epoch": 1.3087885985748218, - "grad_norm": 0.0011326707899570465, - "learning_rate": 0.00019999915683339398, - "loss": 46.0, - "step": 8127 - }, - { - "epoch": 1.3089496356536092, - "grad_norm": 0.006058217026293278, - "learning_rate": 0.0001999991566256282, - "loss": 46.0, - "step": 8128 - }, - { - "epoch": 1.3091106727323967, - "grad_norm": 0.002363781677559018, - "learning_rate": 0.00019999915641783682, - "loss": 46.0, - "step": 8129 - }, - { - "epoch": 1.309271709811184, - "grad_norm": 0.004301850683987141, - "learning_rate": 0.00019999915621001988, - "loss": 46.0, - "step": 8130 - }, - { - "epoch": 1.3094327468899714, - "grad_norm": 0.001516456832177937, - "learning_rate": 0.00019999915600217732, - "loss": 46.0, - "step": 8131 - }, - { - "epoch": 1.3095937839687588, - "grad_norm": 0.005582747980952263, - "learning_rate": 0.0001999991557943092, - "loss": 46.0, - "step": 8132 - }, - { - "epoch": 1.3097548210475463, - "grad_norm": 0.008259088732302189, - "learning_rate": 0.00019999915558641544, - "loss": 46.0, - "step": 8133 - }, - { - "epoch": 1.3099158581263337, - "grad_norm": 0.0004960817750543356, - "learning_rate": 0.00019999915537849611, - "loss": 46.0, - "step": 8134 - }, - { - "epoch": 1.310076895205121, - "grad_norm": 0.0035664236638695, - "learning_rate": 0.00019999915517055118, - "loss": 46.0, - "step": 8135 - }, - { - "epoch": 1.3102379322839084, - "grad_norm": 0.002415742026641965, - "learning_rate": 0.00019999915496258066, - "loss": 46.0, - "step": 8136 - }, - { - "epoch": 1.3103989693626958, - "grad_norm": 0.0005769507260993123, - "learning_rate": 0.00019999915475458452, - "loss": 46.0, - "step": 8137 - }, - { - "epoch": 1.310560006441483, - "grad_norm": 0.00028646091232076287, - "learning_rate": 0.00019999915454656285, - "loss": 46.0, - "step": 8138 - }, - { - "epoch": 1.3107210435202705, - "grad_norm": 0.0006671464652754366, - "learning_rate": 0.0001999991543385155, - "loss": 46.0, - "step": 8139 - }, - { - "epoch": 1.310882080599058, - "grad_norm": 0.0017367142718285322, - "learning_rate": 0.00019999915413044264, - "loss": 46.0, - "step": 8140 - }, - { - "epoch": 1.3110431176778454, - "grad_norm": 0.01110994815826416, - "learning_rate": 0.00019999915392234413, - "loss": 46.0, - "step": 8141 - }, - { - "epoch": 1.3112041547566327, - "grad_norm": 0.0017920555546879768, - "learning_rate": 0.00019999915371422003, - "loss": 46.0, - "step": 8142 - }, - { - "epoch": 1.31136519183542, - "grad_norm": 0.0013599625090137124, - "learning_rate": 0.00019999915350607034, - "loss": 46.0, - "step": 8143 - }, - { - "epoch": 1.3115262289142076, - "grad_norm": 0.003930533304810524, - "learning_rate": 0.00019999915329789507, - "loss": 46.0, - "step": 8144 - }, - { - "epoch": 1.3116872659929948, - "grad_norm": 0.002713278168812394, - "learning_rate": 0.00019999915308969418, - "loss": 46.0, - "step": 8145 - }, - { - "epoch": 1.3118483030717822, - "grad_norm": 0.0009364794241264462, - "learning_rate": 0.0001999991528814677, - "loss": 46.0, - "step": 8146 - }, - { - "epoch": 1.3120093401505697, - "grad_norm": 0.002139805117622018, - "learning_rate": 0.00019999915267321567, - "loss": 46.0, - "step": 8147 - }, - { - "epoch": 1.3121703772293571, - "grad_norm": 0.004375578835606575, - "learning_rate": 0.00019999915246493802, - "loss": 46.0, - "step": 8148 - }, - { - "epoch": 1.3123314143081444, - "grad_norm": 0.0029165996238589287, - "learning_rate": 0.00019999915225663476, - "loss": 46.0, - "step": 8149 - }, - { - "epoch": 1.3124924513869318, - "grad_norm": 0.0011333934962749481, - "learning_rate": 0.0001999991520483059, - "loss": 46.0, - "step": 8150 - }, - { - "epoch": 1.3126534884657193, - "grad_norm": 0.0010142400860786438, - "learning_rate": 0.00019999915183995147, - "loss": 46.0, - "step": 8151 - }, - { - "epoch": 1.3128145255445065, - "grad_norm": 0.0018940945155918598, - "learning_rate": 0.00019999915163157144, - "loss": 46.0, - "step": 8152 - }, - { - "epoch": 1.312975562623294, - "grad_norm": 0.0011889304732903838, - "learning_rate": 0.00019999915142316578, - "loss": 46.0, - "step": 8153 - }, - { - "epoch": 1.3131365997020814, - "grad_norm": 0.001042266609147191, - "learning_rate": 0.00019999915121473455, - "loss": 46.0, - "step": 8154 - }, - { - "epoch": 1.3132976367808689, - "grad_norm": 0.002718703355640173, - "learning_rate": 0.00019999915100627776, - "loss": 46.0, - "step": 8155 - }, - { - "epoch": 1.3134586738596563, - "grad_norm": 0.0007215013029053807, - "learning_rate": 0.00019999915079779533, - "loss": 46.0, - "step": 8156 - }, - { - "epoch": 1.3136197109384435, - "grad_norm": 0.0013306067558005452, - "learning_rate": 0.00019999915058928732, - "loss": 46.0, - "step": 8157 - }, - { - "epoch": 1.313780748017231, - "grad_norm": 0.0013448548270389438, - "learning_rate": 0.00019999915038075371, - "loss": 46.0, - "step": 8158 - }, - { - "epoch": 1.3139417850960182, - "grad_norm": 0.002382321050390601, - "learning_rate": 0.0001999991501721945, - "loss": 46.0, - "step": 8159 - }, - { - "epoch": 1.3141028221748057, - "grad_norm": 0.000424124562414363, - "learning_rate": 0.00019999914996360972, - "loss": 46.0, - "step": 8160 - }, - { - "epoch": 1.3142638592535931, - "grad_norm": 0.0051336102187633514, - "learning_rate": 0.00019999914975499933, - "loss": 46.0, - "step": 8161 - }, - { - "epoch": 1.3144248963323806, - "grad_norm": 0.0005932336207479239, - "learning_rate": 0.00019999914954636335, - "loss": 46.0, - "step": 8162 - }, - { - "epoch": 1.314585933411168, - "grad_norm": 0.0005597893614321947, - "learning_rate": 0.00019999914933770176, - "loss": 46.0, - "step": 8163 - }, - { - "epoch": 1.3147469704899553, - "grad_norm": 0.0035444789100438356, - "learning_rate": 0.0001999991491290146, - "loss": 46.0, - "step": 8164 - }, - { - "epoch": 1.3149080075687427, - "grad_norm": 0.00033637703745625913, - "learning_rate": 0.00019999914892030184, - "loss": 46.0, - "step": 8165 - }, - { - "epoch": 1.3150690446475302, - "grad_norm": 0.0006662870291620493, - "learning_rate": 0.00019999914871156346, - "loss": 46.0, - "step": 8166 - }, - { - "epoch": 1.3152300817263174, - "grad_norm": 0.0007983579998835921, - "learning_rate": 0.00019999914850279952, - "loss": 46.0, - "step": 8167 - }, - { - "epoch": 1.3153911188051048, - "grad_norm": 0.004298283252865076, - "learning_rate": 0.00019999914829400996, - "loss": 46.0, - "step": 8168 - }, - { - "epoch": 1.3155521558838923, - "grad_norm": 0.002112639369443059, - "learning_rate": 0.00019999914808519482, - "loss": 46.0, - "step": 8169 - }, - { - "epoch": 1.3157131929626797, - "grad_norm": 0.005377735011279583, - "learning_rate": 0.00019999914787635406, - "loss": 46.0, - "step": 8170 - }, - { - "epoch": 1.315874230041467, - "grad_norm": 0.0009230033028870821, - "learning_rate": 0.00019999914766748774, - "loss": 46.0, - "step": 8171 - }, - { - "epoch": 1.3160352671202544, - "grad_norm": 0.0005012241890653968, - "learning_rate": 0.0001999991474585958, - "loss": 46.0, - "step": 8172 - }, - { - "epoch": 1.3161963041990419, - "grad_norm": 0.004119296092540026, - "learning_rate": 0.00019999914724967826, - "loss": 46.0, - "step": 8173 - }, - { - "epoch": 1.316357341277829, - "grad_norm": 0.0025279750116169453, - "learning_rate": 0.00019999914704073516, - "loss": 46.0, - "step": 8174 - }, - { - "epoch": 1.3165183783566166, - "grad_norm": 0.0009732013568282127, - "learning_rate": 0.00019999914683176644, - "loss": 46.0, - "step": 8175 - }, - { - "epoch": 1.316679415435404, - "grad_norm": 0.0019259214168414474, - "learning_rate": 0.00019999914662277216, - "loss": 46.0, - "step": 8176 - }, - { - "epoch": 1.3168404525141915, - "grad_norm": 0.0021715043112635612, - "learning_rate": 0.00019999914641375224, - "loss": 46.0, - "step": 8177 - }, - { - "epoch": 1.317001489592979, - "grad_norm": 0.003505889791995287, - "learning_rate": 0.00019999914620470676, - "loss": 46.0, - "step": 8178 - }, - { - "epoch": 1.3171625266717661, - "grad_norm": 0.0014351534191519022, - "learning_rate": 0.00019999914599563563, - "loss": 46.0, - "step": 8179 - }, - { - "epoch": 1.3173235637505536, - "grad_norm": 0.0009196783066727221, - "learning_rate": 0.00019999914578653895, - "loss": 46.0, - "step": 8180 - }, - { - "epoch": 1.3174846008293408, - "grad_norm": 0.0009344897698611021, - "learning_rate": 0.00019999914557741665, - "loss": 46.0, - "step": 8181 - }, - { - "epoch": 1.3176456379081283, - "grad_norm": 0.0017691469984129071, - "learning_rate": 0.0001999991453682688, - "loss": 46.0, - "step": 8182 - }, - { - "epoch": 1.3178066749869157, - "grad_norm": 0.001179224462248385, - "learning_rate": 0.00019999914515909532, - "loss": 46.0, - "step": 8183 - }, - { - "epoch": 1.3179677120657032, - "grad_norm": 0.0020149152260273695, - "learning_rate": 0.00019999914494989626, - "loss": 46.0, - "step": 8184 - }, - { - "epoch": 1.3181287491444906, - "grad_norm": 0.002091505564749241, - "learning_rate": 0.0001999991447406716, - "loss": 46.0, - "step": 8185 - }, - { - "epoch": 1.3182897862232779, - "grad_norm": 0.0004325432819314301, - "learning_rate": 0.00019999914453142136, - "loss": 46.0, - "step": 8186 - }, - { - "epoch": 1.3184508233020653, - "grad_norm": 0.0008748378604650497, - "learning_rate": 0.00019999914432214548, - "loss": 46.0, - "step": 8187 - }, - { - "epoch": 1.3186118603808528, - "grad_norm": 0.0022790180519223213, - "learning_rate": 0.00019999914411284405, - "loss": 46.0, - "step": 8188 - }, - { - "epoch": 1.31877289745964, - "grad_norm": 0.0016114182071760297, - "learning_rate": 0.00019999914390351703, - "loss": 46.0, - "step": 8189 - }, - { - "epoch": 1.3189339345384274, - "grad_norm": 0.0009810769697651267, - "learning_rate": 0.00019999914369416436, - "loss": 46.0, - "step": 8190 - }, - { - "epoch": 1.319094971617215, - "grad_norm": 0.0019941620994359255, - "learning_rate": 0.00019999914348478614, - "loss": 46.0, - "step": 8191 - }, - { - "epoch": 1.3192560086960023, - "grad_norm": 0.0012314112391322851, - "learning_rate": 0.00019999914327538233, - "loss": 46.0, - "step": 8192 - }, - { - "epoch": 1.3194170457747896, - "grad_norm": 0.0009301109821535647, - "learning_rate": 0.0001999991430659529, - "loss": 46.0, - "step": 8193 - }, - { - "epoch": 1.319578082853577, - "grad_norm": 0.005204427987337112, - "learning_rate": 0.00019999914285649787, - "loss": 46.0, - "step": 8194 - }, - { - "epoch": 1.3197391199323645, - "grad_norm": 0.002175047993659973, - "learning_rate": 0.0001999991426470173, - "loss": 46.0, - "step": 8195 - }, - { - "epoch": 1.3199001570111517, - "grad_norm": 0.004512629471719265, - "learning_rate": 0.00019999914243751108, - "loss": 46.0, - "step": 8196 - }, - { - "epoch": 1.3200611940899392, - "grad_norm": 0.00025794017710722983, - "learning_rate": 0.00019999914222797928, - "loss": 46.0, - "step": 8197 - }, - { - "epoch": 1.3202222311687266, - "grad_norm": 0.00371658056974411, - "learning_rate": 0.0001999991420184219, - "loss": 46.0, - "step": 8198 - }, - { - "epoch": 1.320383268247514, - "grad_norm": 0.0019222635310143232, - "learning_rate": 0.0001999991418088389, - "loss": 46.0, - "step": 8199 - }, - { - "epoch": 1.3205443053263015, - "grad_norm": 0.0017025705892592669, - "learning_rate": 0.00019999914159923035, - "loss": 46.0, - "step": 8200 - }, - { - "epoch": 1.3207053424050887, - "grad_norm": 0.002271057805046439, - "learning_rate": 0.00019999914138959618, - "loss": 46.0, - "step": 8201 - }, - { - "epoch": 1.3208663794838762, - "grad_norm": 0.001219138503074646, - "learning_rate": 0.0001999991411799364, - "loss": 46.0, - "step": 8202 - }, - { - "epoch": 1.3210274165626634, - "grad_norm": 0.004323808941990137, - "learning_rate": 0.00019999914097025104, - "loss": 46.0, - "step": 8203 - }, - { - "epoch": 1.3211884536414509, - "grad_norm": 0.0010273369261994958, - "learning_rate": 0.00019999914076054007, - "loss": 46.0, - "step": 8204 - }, - { - "epoch": 1.3213494907202383, - "grad_norm": 0.002005660906434059, - "learning_rate": 0.00019999914055080352, - "loss": 46.0, - "step": 8205 - }, - { - "epoch": 1.3215105277990258, - "grad_norm": 0.001105654053390026, - "learning_rate": 0.00019999914034104138, - "loss": 46.0, - "step": 8206 - }, - { - "epoch": 1.3216715648778132, - "grad_norm": 0.001803745049983263, - "learning_rate": 0.00019999914013125363, - "loss": 46.0, - "step": 8207 - }, - { - "epoch": 1.3218326019566005, - "grad_norm": 0.0011665953788906336, - "learning_rate": 0.00019999913992144029, - "loss": 46.0, - "step": 8208 - }, - { - "epoch": 1.321993639035388, - "grad_norm": 0.0028275202494114637, - "learning_rate": 0.00019999913971160136, - "loss": 46.0, - "step": 8209 - }, - { - "epoch": 1.3221546761141754, - "grad_norm": 0.0011850474402308464, - "learning_rate": 0.00019999913950173684, - "loss": 46.0, - "step": 8210 - }, - { - "epoch": 1.3223157131929626, - "grad_norm": 0.0030700850766152143, - "learning_rate": 0.00019999913929184671, - "loss": 46.0, - "step": 8211 - }, - { - "epoch": 1.32247675027175, - "grad_norm": 0.00034545682137832046, - "learning_rate": 0.000199999139081931, - "loss": 46.0, - "step": 8212 - }, - { - "epoch": 1.3226377873505375, - "grad_norm": 0.0015467837220057845, - "learning_rate": 0.0001999991388719897, - "loss": 46.0, - "step": 8213 - }, - { - "epoch": 1.322798824429325, - "grad_norm": 0.001453143428079784, - "learning_rate": 0.00019999913866202275, - "loss": 46.0, - "step": 8214 - }, - { - "epoch": 1.3229598615081122, - "grad_norm": 0.004985189996659756, - "learning_rate": 0.00019999913845203027, - "loss": 46.0, - "step": 8215 - }, - { - "epoch": 1.3231208985868996, - "grad_norm": 0.001013303641229868, - "learning_rate": 0.00019999913824201218, - "loss": 46.0, - "step": 8216 - }, - { - "epoch": 1.323281935665687, - "grad_norm": 0.0010608448646962643, - "learning_rate": 0.00019999913803196847, - "loss": 46.0, - "step": 8217 - }, - { - "epoch": 1.3234429727444743, - "grad_norm": 0.0008629285730421543, - "learning_rate": 0.00019999913782189918, - "loss": 46.0, - "step": 8218 - }, - { - "epoch": 1.3236040098232618, - "grad_norm": 0.0007541061495430768, - "learning_rate": 0.0001999991376118043, - "loss": 46.0, - "step": 8219 - }, - { - "epoch": 1.3237650469020492, - "grad_norm": 0.0009547242661938071, - "learning_rate": 0.00019999913740168386, - "loss": 46.0, - "step": 8220 - }, - { - "epoch": 1.3239260839808367, - "grad_norm": 0.001845221733674407, - "learning_rate": 0.00019999913719153777, - "loss": 46.0, - "step": 8221 - }, - { - "epoch": 1.3240871210596241, - "grad_norm": 0.0059294309467077255, - "learning_rate": 0.00019999913698136613, - "loss": 46.0, - "step": 8222 - }, - { - "epoch": 1.3242481581384113, - "grad_norm": 0.004400711972266436, - "learning_rate": 0.00019999913677116885, - "loss": 46.0, - "step": 8223 - }, - { - "epoch": 1.3244091952171988, - "grad_norm": 0.01056316401809454, - "learning_rate": 0.00019999913656094603, - "loss": 46.0, - "step": 8224 - }, - { - "epoch": 1.324570232295986, - "grad_norm": 0.0015021535800769925, - "learning_rate": 0.00019999913635069755, - "loss": 46.0, - "step": 8225 - }, - { - "epoch": 1.3247312693747735, - "grad_norm": 0.0036306639667600393, - "learning_rate": 0.00019999913614042353, - "loss": 46.0, - "step": 8226 - }, - { - "epoch": 1.324892306453561, - "grad_norm": 0.001111185410991311, - "learning_rate": 0.00019999913593012387, - "loss": 46.0, - "step": 8227 - }, - { - "epoch": 1.3250533435323484, - "grad_norm": 0.0039011219050735235, - "learning_rate": 0.00019999913571979865, - "loss": 46.0, - "step": 8228 - }, - { - "epoch": 1.3252143806111358, - "grad_norm": 0.0017056837677955627, - "learning_rate": 0.00019999913550944784, - "loss": 46.0, - "step": 8229 - }, - { - "epoch": 1.325375417689923, - "grad_norm": 0.0009715649066492915, - "learning_rate": 0.00019999913529907142, - "loss": 46.0, - "step": 8230 - }, - { - "epoch": 1.3255364547687105, - "grad_norm": 0.005172718316316605, - "learning_rate": 0.0001999991350886694, - "loss": 46.0, - "step": 8231 - }, - { - "epoch": 1.325697491847498, - "grad_norm": 0.0013129191938787699, - "learning_rate": 0.0001999991348782418, - "loss": 46.0, - "step": 8232 - }, - { - "epoch": 1.3258585289262852, - "grad_norm": 0.0009031934314407408, - "learning_rate": 0.00019999913466778858, - "loss": 46.0, - "step": 8233 - }, - { - "epoch": 1.3260195660050726, - "grad_norm": 0.001726498012430966, - "learning_rate": 0.00019999913445730978, - "loss": 46.0, - "step": 8234 - }, - { - "epoch": 1.32618060308386, - "grad_norm": 0.00969169195741415, - "learning_rate": 0.0001999991342468054, - "loss": 46.0, - "step": 8235 - }, - { - "epoch": 1.3263416401626476, - "grad_norm": 0.002460531424731016, - "learning_rate": 0.00019999913403627543, - "loss": 46.0, - "step": 8236 - }, - { - "epoch": 1.3265026772414348, - "grad_norm": 0.0015143678756430745, - "learning_rate": 0.00019999913382571981, - "loss": 46.0, - "step": 8237 - }, - { - "epoch": 1.3266637143202222, - "grad_norm": 0.00221410789526999, - "learning_rate": 0.00019999913361513864, - "loss": 46.0, - "step": 8238 - }, - { - "epoch": 1.3268247513990097, - "grad_norm": 0.0012020812137052417, - "learning_rate": 0.00019999913340453188, - "loss": 46.0, - "step": 8239 - }, - { - "epoch": 1.326985788477797, - "grad_norm": 0.000679194345138967, - "learning_rate": 0.0001999991331938995, - "loss": 46.0, - "step": 8240 - }, - { - "epoch": 1.3271468255565844, - "grad_norm": 0.0010646703885868192, - "learning_rate": 0.00019999913298324155, - "loss": 46.0, - "step": 8241 - }, - { - "epoch": 1.3273078626353718, - "grad_norm": 0.00038416351890191436, - "learning_rate": 0.00019999913277255797, - "loss": 46.0, - "step": 8242 - }, - { - "epoch": 1.3274688997141593, - "grad_norm": 0.0044890837743878365, - "learning_rate": 0.00019999913256184883, - "loss": 46.0, - "step": 8243 - }, - { - "epoch": 1.3276299367929465, - "grad_norm": 0.0014554171357303858, - "learning_rate": 0.00019999913235111408, - "loss": 46.0, - "step": 8244 - }, - { - "epoch": 1.327790973871734, - "grad_norm": 0.012298003770411015, - "learning_rate": 0.00019999913214035375, - "loss": 46.0, - "step": 8245 - }, - { - "epoch": 1.3279520109505214, - "grad_norm": 0.0005114679806865752, - "learning_rate": 0.0001999991319295678, - "loss": 46.0, - "step": 8246 - }, - { - "epoch": 1.3281130480293086, - "grad_norm": 0.003929303027689457, - "learning_rate": 0.0001999991317187563, - "loss": 46.0, - "step": 8247 - }, - { - "epoch": 1.328274085108096, - "grad_norm": 0.00125032605137676, - "learning_rate": 0.00019999913150791919, - "loss": 46.0, - "step": 8248 - }, - { - "epoch": 1.3284351221868835, - "grad_norm": 0.00529604172334075, - "learning_rate": 0.00019999913129705645, - "loss": 46.0, - "step": 8249 - }, - { - "epoch": 1.328596159265671, - "grad_norm": 0.0008090316550806165, - "learning_rate": 0.00019999913108616815, - "loss": 46.0, - "step": 8250 - }, - { - "epoch": 1.3287571963444584, - "grad_norm": 0.0021193642169237137, - "learning_rate": 0.00019999913087525423, - "loss": 46.0, - "step": 8251 - }, - { - "epoch": 1.3289182334232457, - "grad_norm": 0.0016279650153592229, - "learning_rate": 0.00019999913066431473, - "loss": 46.0, - "step": 8252 - }, - { - "epoch": 1.3290792705020331, - "grad_norm": 0.0006489729275926948, - "learning_rate": 0.00019999913045334964, - "loss": 46.0, - "step": 8253 - }, - { - "epoch": 1.3292403075808203, - "grad_norm": 0.005904868245124817, - "learning_rate": 0.00019999913024235894, - "loss": 46.0, - "step": 8254 - }, - { - "epoch": 1.3294013446596078, - "grad_norm": 0.0029582222923636436, - "learning_rate": 0.00019999913003134265, - "loss": 46.0, - "step": 8255 - }, - { - "epoch": 1.3295623817383952, - "grad_norm": 0.0005188260111026466, - "learning_rate": 0.00019999912982030077, - "loss": 46.0, - "step": 8256 - }, - { - "epoch": 1.3297234188171827, - "grad_norm": 0.002158254152163863, - "learning_rate": 0.0001999991296092333, - "loss": 46.0, - "step": 8257 - }, - { - "epoch": 1.3298844558959702, - "grad_norm": 0.0006617026519961655, - "learning_rate": 0.00019999912939814023, - "loss": 46.0, - "step": 8258 - }, - { - "epoch": 1.3300454929747574, - "grad_norm": 0.0007311085937544703, - "learning_rate": 0.00019999912918702156, - "loss": 46.0, - "step": 8259 - }, - { - "epoch": 1.3302065300535448, - "grad_norm": 0.00880738440901041, - "learning_rate": 0.0001999991289758773, - "loss": 46.0, - "step": 8260 - }, - { - "epoch": 1.3303675671323323, - "grad_norm": 0.0009422603179700673, - "learning_rate": 0.00019999912876470747, - "loss": 46.0, - "step": 8261 - }, - { - "epoch": 1.3305286042111195, - "grad_norm": 0.0026198592968285084, - "learning_rate": 0.000199999128553512, - "loss": 46.0, - "step": 8262 - }, - { - "epoch": 1.330689641289907, - "grad_norm": 0.0023824789095669985, - "learning_rate": 0.00019999912834229094, - "loss": 46.0, - "step": 8263 - }, - { - "epoch": 1.3308506783686944, - "grad_norm": 0.0024635002482682467, - "learning_rate": 0.00019999912813104432, - "loss": 46.0, - "step": 8264 - }, - { - "epoch": 1.3310117154474819, - "grad_norm": 0.0005961424903944135, - "learning_rate": 0.0001999991279197721, - "loss": 46.0, - "step": 8265 - }, - { - "epoch": 1.331172752526269, - "grad_norm": 0.0017014414770528674, - "learning_rate": 0.00019999912770847427, - "loss": 46.0, - "step": 8266 - }, - { - "epoch": 1.3313337896050566, - "grad_norm": 0.003466135123744607, - "learning_rate": 0.00019999912749715085, - "loss": 46.0, - "step": 8267 - }, - { - "epoch": 1.331494826683844, - "grad_norm": 0.0069140056148171425, - "learning_rate": 0.00019999912728580182, - "loss": 46.0, - "step": 8268 - }, - { - "epoch": 1.3316558637626312, - "grad_norm": 0.0010820548050105572, - "learning_rate": 0.0001999991270744272, - "loss": 46.0, - "step": 8269 - }, - { - "epoch": 1.3318169008414187, - "grad_norm": 0.0007110327132977545, - "learning_rate": 0.00019999912686302702, - "loss": 46.0, - "step": 8270 - }, - { - "epoch": 1.3319779379202061, - "grad_norm": 0.002185997786000371, - "learning_rate": 0.0001999991266516012, - "loss": 46.0, - "step": 8271 - }, - { - "epoch": 1.3321389749989936, - "grad_norm": 0.00239480915479362, - "learning_rate": 0.00019999912644014982, - "loss": 46.0, - "step": 8272 - }, - { - "epoch": 1.332300012077781, - "grad_norm": 0.001562080578878522, - "learning_rate": 0.0001999991262286728, - "loss": 46.0, - "step": 8273 - }, - { - "epoch": 1.3324610491565683, - "grad_norm": 0.001237689284607768, - "learning_rate": 0.00019999912601717024, - "loss": 46.0, - "step": 8274 - }, - { - "epoch": 1.3326220862353557, - "grad_norm": 0.0019330834038555622, - "learning_rate": 0.00019999912580564208, - "loss": 46.0, - "step": 8275 - }, - { - "epoch": 1.332783123314143, - "grad_norm": 0.004047016613185406, - "learning_rate": 0.0001999991255940883, - "loss": 46.0, - "step": 8276 - }, - { - "epoch": 1.3329441603929304, - "grad_norm": 0.0012751294998452067, - "learning_rate": 0.00019999912538250892, - "loss": 46.0, - "step": 8277 - }, - { - "epoch": 1.3331051974717179, - "grad_norm": 0.0012406246969476342, - "learning_rate": 0.00019999912517090396, - "loss": 46.0, - "step": 8278 - }, - { - "epoch": 1.3332662345505053, - "grad_norm": 0.0010301086585968733, - "learning_rate": 0.0001999991249592734, - "loss": 46.0, - "step": 8279 - }, - { - "epoch": 1.3334272716292928, - "grad_norm": 0.0007084830431267619, - "learning_rate": 0.00019999912474761723, - "loss": 46.0, - "step": 8280 - }, - { - "epoch": 1.33358830870808, - "grad_norm": 0.0009050874505192041, - "learning_rate": 0.0001999991245359355, - "loss": 46.0, - "step": 8281 - }, - { - "epoch": 1.3337493457868674, - "grad_norm": 0.005876012146472931, - "learning_rate": 0.00019999912432422815, - "loss": 46.0, - "step": 8282 - }, - { - "epoch": 1.3339103828656549, - "grad_norm": 0.0011896018404513597, - "learning_rate": 0.0001999991241124952, - "loss": 46.0, - "step": 8283 - }, - { - "epoch": 1.3340714199444421, - "grad_norm": 0.0024625216610729694, - "learning_rate": 0.0001999991239007367, - "loss": 46.0, - "step": 8284 - }, - { - "epoch": 1.3342324570232296, - "grad_norm": 0.0014154164819046855, - "learning_rate": 0.00019999912368895255, - "loss": 46.0, - "step": 8285 - }, - { - "epoch": 1.334393494102017, - "grad_norm": 0.002725822851061821, - "learning_rate": 0.00019999912347714284, - "loss": 46.0, - "step": 8286 - }, - { - "epoch": 1.3345545311808045, - "grad_norm": 0.0017862452659755945, - "learning_rate": 0.00019999912326530751, - "loss": 46.0, - "step": 8287 - }, - { - "epoch": 1.3347155682595917, - "grad_norm": 0.0027259038761258125, - "learning_rate": 0.00019999912305344663, - "loss": 46.0, - "step": 8288 - }, - { - "epoch": 1.3348766053383792, - "grad_norm": 0.0010943528031930327, - "learning_rate": 0.00019999912284156013, - "loss": 46.0, - "step": 8289 - }, - { - "epoch": 1.3350376424171666, - "grad_norm": 0.001049652462825179, - "learning_rate": 0.00019999912262964802, - "loss": 46.0, - "step": 8290 - }, - { - "epoch": 1.3351986794959538, - "grad_norm": 0.0014066135045140982, - "learning_rate": 0.00019999912241771032, - "loss": 46.0, - "step": 8291 - }, - { - "epoch": 1.3353597165747413, - "grad_norm": 0.002039481420069933, - "learning_rate": 0.00019999912220574703, - "loss": 46.0, - "step": 8292 - }, - { - "epoch": 1.3355207536535287, - "grad_norm": 0.002038148930296302, - "learning_rate": 0.00019999912199375816, - "loss": 46.0, - "step": 8293 - }, - { - "epoch": 1.3356817907323162, - "grad_norm": 0.00338871986605227, - "learning_rate": 0.00019999912178174367, - "loss": 46.0, - "step": 8294 - }, - { - "epoch": 1.3358428278111036, - "grad_norm": 0.001932501676492393, - "learning_rate": 0.0001999991215697036, - "loss": 46.0, - "step": 8295 - }, - { - "epoch": 1.3360038648898909, - "grad_norm": 0.0007987224380485713, - "learning_rate": 0.00019999912135763794, - "loss": 46.0, - "step": 8296 - }, - { - "epoch": 1.3361649019686783, - "grad_norm": 0.0014676302671432495, - "learning_rate": 0.00019999912114554666, - "loss": 46.0, - "step": 8297 - }, - { - "epoch": 1.3363259390474655, - "grad_norm": 0.0007820677710697055, - "learning_rate": 0.0001999991209334298, - "loss": 46.0, - "step": 8298 - }, - { - "epoch": 1.336486976126253, - "grad_norm": 0.0009669442079029977, - "learning_rate": 0.00019999912072128737, - "loss": 46.0, - "step": 8299 - }, - { - "epoch": 1.3366480132050405, - "grad_norm": 0.0017099491087719798, - "learning_rate": 0.00019999912050911933, - "loss": 46.0, - "step": 8300 - }, - { - "epoch": 1.336809050283828, - "grad_norm": 0.003365869401022792, - "learning_rate": 0.00019999912029692568, - "loss": 46.0, - "step": 8301 - }, - { - "epoch": 1.3369700873626154, - "grad_norm": 0.0011575553799048066, - "learning_rate": 0.00019999912008470644, - "loss": 46.0, - "step": 8302 - }, - { - "epoch": 1.3371311244414026, - "grad_norm": 0.0024347794242203236, - "learning_rate": 0.00019999911987246161, - "loss": 46.0, - "step": 8303 - }, - { - "epoch": 1.33729216152019, - "grad_norm": 0.002584053436294198, - "learning_rate": 0.0001999991196601912, - "loss": 46.0, - "step": 8304 - }, - { - "epoch": 1.3374531985989775, - "grad_norm": 0.001957808155566454, - "learning_rate": 0.00019999911944789517, - "loss": 46.0, - "step": 8305 - }, - { - "epoch": 1.3376142356777647, - "grad_norm": 0.0006251740269362926, - "learning_rate": 0.00019999911923557358, - "loss": 46.0, - "step": 8306 - }, - { - "epoch": 1.3377752727565522, - "grad_norm": 0.0016109747812151909, - "learning_rate": 0.00019999911902322635, - "loss": 46.0, - "step": 8307 - }, - { - "epoch": 1.3379363098353396, - "grad_norm": 0.001177363214083016, - "learning_rate": 0.00019999911881085356, - "loss": 46.0, - "step": 8308 - }, - { - "epoch": 1.338097346914127, - "grad_norm": 0.001695385668426752, - "learning_rate": 0.00019999911859845516, - "loss": 46.0, - "step": 8309 - }, - { - "epoch": 1.3382583839929143, - "grad_norm": 0.0008704941719770432, - "learning_rate": 0.00019999911838603117, - "loss": 46.0, - "step": 8310 - }, - { - "epoch": 1.3384194210717018, - "grad_norm": 0.0013970899162814021, - "learning_rate": 0.0001999991181735816, - "loss": 46.0, - "step": 8311 - }, - { - "epoch": 1.3385804581504892, - "grad_norm": 0.005044432822614908, - "learning_rate": 0.0001999991179611064, - "loss": 46.0, - "step": 8312 - }, - { - "epoch": 1.3387414952292764, - "grad_norm": 0.001802628394216299, - "learning_rate": 0.00019999911774860562, - "loss": 46.0, - "step": 8313 - }, - { - "epoch": 1.3389025323080639, - "grad_norm": 0.005088148172944784, - "learning_rate": 0.00019999911753607928, - "loss": 46.0, - "step": 8314 - }, - { - "epoch": 1.3390635693868513, - "grad_norm": 0.0018335465574637055, - "learning_rate": 0.0001999991173235273, - "loss": 46.0, - "step": 8315 - }, - { - "epoch": 1.3392246064656388, - "grad_norm": 0.0007183181587606668, - "learning_rate": 0.00019999911711094973, - "loss": 46.0, - "step": 8316 - }, - { - "epoch": 1.3393856435444262, - "grad_norm": 0.0007244597654789686, - "learning_rate": 0.00019999911689834657, - "loss": 46.0, - "step": 8317 - }, - { - "epoch": 1.3395466806232135, - "grad_norm": 0.009339780546724796, - "learning_rate": 0.00019999911668571786, - "loss": 46.0, - "step": 8318 - }, - { - "epoch": 1.339707717702001, - "grad_norm": 0.0034740956034511328, - "learning_rate": 0.00019999911647306347, - "loss": 46.0, - "step": 8319 - }, - { - "epoch": 1.3398687547807882, - "grad_norm": 0.0004981555393896997, - "learning_rate": 0.00019999911626038356, - "loss": 46.0, - "step": 8320 - }, - { - "epoch": 1.3400297918595756, - "grad_norm": 0.001770513248629868, - "learning_rate": 0.000199999116047678, - "loss": 46.0, - "step": 8321 - }, - { - "epoch": 1.340190828938363, - "grad_norm": 0.0013742547016590834, - "learning_rate": 0.00019999911583494688, - "loss": 46.0, - "step": 8322 - }, - { - "epoch": 1.3403518660171505, - "grad_norm": 0.0006776098161935806, - "learning_rate": 0.00019999911562219017, - "loss": 46.0, - "step": 8323 - }, - { - "epoch": 1.340512903095938, - "grad_norm": 0.0008843560353852808, - "learning_rate": 0.00019999911540940785, - "loss": 46.0, - "step": 8324 - }, - { - "epoch": 1.3406739401747252, - "grad_norm": 0.003426025388762355, - "learning_rate": 0.00019999911519659995, - "loss": 46.0, - "step": 8325 - }, - { - "epoch": 1.3408349772535126, - "grad_norm": 0.004045134410262108, - "learning_rate": 0.00019999911498376643, - "loss": 46.0, - "step": 8326 - }, - { - "epoch": 1.3409960143323, - "grad_norm": 0.005195715464651585, - "learning_rate": 0.00019999911477090734, - "loss": 46.0, - "step": 8327 - }, - { - "epoch": 1.3411570514110873, - "grad_norm": 0.00196321913972497, - "learning_rate": 0.00019999911455802262, - "loss": 46.0, - "step": 8328 - }, - { - "epoch": 1.3413180884898748, - "grad_norm": 0.001250079134479165, - "learning_rate": 0.00019999911434511237, - "loss": 46.0, - "step": 8329 - }, - { - "epoch": 1.3414791255686622, - "grad_norm": 0.0014327957760542631, - "learning_rate": 0.00019999911413217647, - "loss": 46.0, - "step": 8330 - }, - { - "epoch": 1.3416401626474497, - "grad_norm": 0.0031670050229877234, - "learning_rate": 0.000199999113919215, - "loss": 46.0, - "step": 8331 - }, - { - "epoch": 1.341801199726237, - "grad_norm": 0.0003307102888356894, - "learning_rate": 0.0001999991137062279, - "loss": 46.0, - "step": 8332 - }, - { - "epoch": 1.3419622368050244, - "grad_norm": 0.0006395092932507396, - "learning_rate": 0.00019999911349321523, - "loss": 46.0, - "step": 8333 - }, - { - "epoch": 1.3421232738838118, - "grad_norm": 0.0009109979146160185, - "learning_rate": 0.00019999911328017698, - "loss": 46.0, - "step": 8334 - }, - { - "epoch": 1.342284310962599, - "grad_norm": 0.0006914462428539991, - "learning_rate": 0.00019999911306711312, - "loss": 46.0, - "step": 8335 - }, - { - "epoch": 1.3424453480413865, - "grad_norm": 0.0029380943160504103, - "learning_rate": 0.00019999911285402367, - "loss": 46.0, - "step": 8336 - }, - { - "epoch": 1.342606385120174, - "grad_norm": 0.003370831022039056, - "learning_rate": 0.00019999911264090864, - "loss": 46.0, - "step": 8337 - }, - { - "epoch": 1.3427674221989614, - "grad_norm": 0.004577951040118933, - "learning_rate": 0.000199999112427768, - "loss": 46.0, - "step": 8338 - }, - { - "epoch": 1.3429284592777486, - "grad_norm": 0.0004588786978274584, - "learning_rate": 0.00019999911221460175, - "loss": 46.0, - "step": 8339 - }, - { - "epoch": 1.343089496356536, - "grad_norm": 0.0007934690802358091, - "learning_rate": 0.00019999911200140993, - "loss": 46.0, - "step": 8340 - }, - { - "epoch": 1.3432505334353235, - "grad_norm": 0.010320227593183517, - "learning_rate": 0.0001999991117881925, - "loss": 46.0, - "step": 8341 - }, - { - "epoch": 1.3434115705141108, - "grad_norm": 0.005865951534360647, - "learning_rate": 0.00019999911157494947, - "loss": 46.0, - "step": 8342 - }, - { - "epoch": 1.3435726075928982, - "grad_norm": 0.0008823686512187123, - "learning_rate": 0.00019999911136168086, - "loss": 46.0, - "step": 8343 - }, - { - "epoch": 1.3437336446716857, - "grad_norm": 0.0012530016247183084, - "learning_rate": 0.00019999911114838666, - "loss": 46.0, - "step": 8344 - }, - { - "epoch": 1.343894681750473, - "grad_norm": 0.0032118402887135744, - "learning_rate": 0.00019999911093506684, - "loss": 46.0, - "step": 8345 - }, - { - "epoch": 1.3440557188292606, - "grad_norm": 0.0004974527982994914, - "learning_rate": 0.00019999911072172144, - "loss": 46.0, - "step": 8346 - }, - { - "epoch": 1.3442167559080478, - "grad_norm": 0.0007516128825955093, - "learning_rate": 0.00019999911050835046, - "loss": 46.0, - "step": 8347 - }, - { - "epoch": 1.3443777929868352, - "grad_norm": 0.002259405329823494, - "learning_rate": 0.00019999911029495386, - "loss": 46.0, - "step": 8348 - }, - { - "epoch": 1.3445388300656227, - "grad_norm": 0.0014529795153066516, - "learning_rate": 0.00019999911008153167, - "loss": 46.0, - "step": 8349 - }, - { - "epoch": 1.34469986714441, - "grad_norm": 0.0017328370595350862, - "learning_rate": 0.0001999991098680839, - "loss": 46.0, - "step": 8350 - }, - { - "epoch": 1.3448609042231974, - "grad_norm": 0.0014722829218953848, - "learning_rate": 0.00019999910965461053, - "loss": 46.0, - "step": 8351 - }, - { - "epoch": 1.3450219413019848, - "grad_norm": 0.0019518237095326185, - "learning_rate": 0.00019999910944111155, - "loss": 46.0, - "step": 8352 - }, - { - "epoch": 1.3451829783807723, - "grad_norm": 0.0008531832718290389, - "learning_rate": 0.000199999109227587, - "loss": 46.0, - "step": 8353 - }, - { - "epoch": 1.3453440154595595, - "grad_norm": 0.0020764674991369247, - "learning_rate": 0.00019999910901403683, - "loss": 46.0, - "step": 8354 - }, - { - "epoch": 1.345505052538347, - "grad_norm": 0.0012880354188382626, - "learning_rate": 0.0001999991088004611, - "loss": 46.0, - "step": 8355 - }, - { - "epoch": 1.3456660896171344, - "grad_norm": 0.0020424723625183105, - "learning_rate": 0.00019999910858685974, - "loss": 46.0, - "step": 8356 - }, - { - "epoch": 1.3458271266959216, - "grad_norm": 0.004828402306884527, - "learning_rate": 0.00019999910837323283, - "loss": 46.0, - "step": 8357 - }, - { - "epoch": 1.345988163774709, - "grad_norm": 0.0019889031536877155, - "learning_rate": 0.00019999910815958027, - "loss": 46.0, - "step": 8358 - }, - { - "epoch": 1.3461492008534965, - "grad_norm": 0.0031129762064665556, - "learning_rate": 0.00019999910794590216, - "loss": 46.0, - "step": 8359 - }, - { - "epoch": 1.346310237932284, - "grad_norm": 0.00445446465164423, - "learning_rate": 0.00019999910773219843, - "loss": 46.0, - "step": 8360 - }, - { - "epoch": 1.3464712750110712, - "grad_norm": 0.001002848264761269, - "learning_rate": 0.0001999991075184691, - "loss": 46.0, - "step": 8361 - }, - { - "epoch": 1.3466323120898587, - "grad_norm": 0.003077493514865637, - "learning_rate": 0.00019999910730471418, - "loss": 46.0, - "step": 8362 - }, - { - "epoch": 1.3467933491686461, - "grad_norm": 0.000879055995028466, - "learning_rate": 0.0001999991070909337, - "loss": 46.0, - "step": 8363 - }, - { - "epoch": 1.3469543862474334, - "grad_norm": 0.00426897220313549, - "learning_rate": 0.00019999910687712758, - "loss": 46.0, - "step": 8364 - }, - { - "epoch": 1.3471154233262208, - "grad_norm": 0.002269066171720624, - "learning_rate": 0.0001999991066632959, - "loss": 46.0, - "step": 8365 - }, - { - "epoch": 1.3472764604050083, - "grad_norm": 0.0005826735286973417, - "learning_rate": 0.0001999991064494386, - "loss": 46.0, - "step": 8366 - }, - { - "epoch": 1.3474374974837957, - "grad_norm": 0.0007702274597249925, - "learning_rate": 0.00019999910623555572, - "loss": 46.0, - "step": 8367 - }, - { - "epoch": 1.3475985345625832, - "grad_norm": 0.009286720305681229, - "learning_rate": 0.00019999910602164724, - "loss": 46.0, - "step": 8368 - }, - { - "epoch": 1.3477595716413704, - "grad_norm": 0.005011996254324913, - "learning_rate": 0.00019999910580771317, - "loss": 46.0, - "step": 8369 - }, - { - "epoch": 1.3479206087201578, - "grad_norm": 0.0004813605628442019, - "learning_rate": 0.0001999991055937535, - "loss": 46.0, - "step": 8370 - }, - { - "epoch": 1.348081645798945, - "grad_norm": 0.001700040651485324, - "learning_rate": 0.00019999910537976822, - "loss": 46.0, - "step": 8371 - }, - { - "epoch": 1.3482426828777325, - "grad_norm": 0.001677106600254774, - "learning_rate": 0.00019999910516575736, - "loss": 46.0, - "step": 8372 - }, - { - "epoch": 1.34840371995652, - "grad_norm": 0.000986463506706059, - "learning_rate": 0.00019999910495172092, - "loss": 46.0, - "step": 8373 - }, - { - "epoch": 1.3485647570353074, - "grad_norm": 0.0046012806706130505, - "learning_rate": 0.00019999910473765886, - "loss": 46.0, - "step": 8374 - }, - { - "epoch": 1.3487257941140949, - "grad_norm": 0.0017061902908608317, - "learning_rate": 0.00019999910452357122, - "loss": 46.0, - "step": 8375 - }, - { - "epoch": 1.348886831192882, - "grad_norm": 0.0032872790470719337, - "learning_rate": 0.00019999910430945798, - "loss": 46.0, - "step": 8376 - }, - { - "epoch": 1.3490478682716696, - "grad_norm": 0.0020111144986003637, - "learning_rate": 0.00019999910409531914, - "loss": 46.0, - "step": 8377 - }, - { - "epoch": 1.349208905350457, - "grad_norm": 0.0009973772102966905, - "learning_rate": 0.00019999910388115473, - "loss": 46.0, - "step": 8378 - }, - { - "epoch": 1.3493699424292442, - "grad_norm": 0.0016136944759637117, - "learning_rate": 0.0001999991036669647, - "loss": 46.0, - "step": 8379 - }, - { - "epoch": 1.3495309795080317, - "grad_norm": 0.00300107360817492, - "learning_rate": 0.00019999910345274907, - "loss": 46.0, - "step": 8380 - }, - { - "epoch": 1.3496920165868191, - "grad_norm": 0.0011417981004342437, - "learning_rate": 0.00019999910323850788, - "loss": 46.0, - "step": 8381 - }, - { - "epoch": 1.3498530536656066, - "grad_norm": 0.0031487373635172844, - "learning_rate": 0.00019999910302424104, - "loss": 46.0, - "step": 8382 - }, - { - "epoch": 1.3500140907443938, - "grad_norm": 0.001366683398373425, - "learning_rate": 0.00019999910280994867, - "loss": 46.0, - "step": 8383 - }, - { - "epoch": 1.3501751278231813, - "grad_norm": 0.0005385983968153596, - "learning_rate": 0.00019999910259563066, - "loss": 46.0, - "step": 8384 - }, - { - "epoch": 1.3503361649019687, - "grad_norm": 0.002977861789986491, - "learning_rate": 0.0001999991023812871, - "loss": 46.0, - "step": 8385 - }, - { - "epoch": 1.350497201980756, - "grad_norm": 0.004040272906422615, - "learning_rate": 0.0001999991021669179, - "loss": 46.0, - "step": 8386 - }, - { - "epoch": 1.3506582390595434, - "grad_norm": 0.0012413655640557408, - "learning_rate": 0.0001999991019525231, - "loss": 46.0, - "step": 8387 - }, - { - "epoch": 1.3508192761383309, - "grad_norm": 0.0014118441613391042, - "learning_rate": 0.00019999910173810275, - "loss": 46.0, - "step": 8388 - }, - { - "epoch": 1.3509803132171183, - "grad_norm": 0.0009189636330120265, - "learning_rate": 0.00019999910152365677, - "loss": 46.0, - "step": 8389 - }, - { - "epoch": 1.3511413502959058, - "grad_norm": 0.0013718331465497613, - "learning_rate": 0.0001999991013091852, - "loss": 46.0, - "step": 8390 - }, - { - "epoch": 1.351302387374693, - "grad_norm": 0.004593018442392349, - "learning_rate": 0.00019999910109468803, - "loss": 46.0, - "step": 8391 - }, - { - "epoch": 1.3514634244534804, - "grad_norm": 0.008490891195833683, - "learning_rate": 0.0001999991008801653, - "loss": 46.0, - "step": 8392 - }, - { - "epoch": 1.3516244615322677, - "grad_norm": 0.0019652703776955605, - "learning_rate": 0.00019999910066561695, - "loss": 46.0, - "step": 8393 - }, - { - "epoch": 1.3517854986110551, - "grad_norm": 0.0015580151230096817, - "learning_rate": 0.000199999100451043, - "loss": 46.0, - "step": 8394 - }, - { - "epoch": 1.3519465356898426, - "grad_norm": 0.0029250483494251966, - "learning_rate": 0.0001999991002364435, - "loss": 46.0, - "step": 8395 - }, - { - "epoch": 1.35210757276863, - "grad_norm": 0.002641297411173582, - "learning_rate": 0.00019999910002181835, - "loss": 46.0, - "step": 8396 - }, - { - "epoch": 1.3522686098474175, - "grad_norm": 0.0012804210418835282, - "learning_rate": 0.00019999909980716762, - "loss": 46.0, - "step": 8397 - }, - { - "epoch": 1.3524296469262047, - "grad_norm": 0.012449518777430058, - "learning_rate": 0.0001999990995924913, - "loss": 46.0, - "step": 8398 - }, - { - "epoch": 1.3525906840049922, - "grad_norm": 0.00150947249494493, - "learning_rate": 0.00019999909937778938, - "loss": 46.0, - "step": 8399 - }, - { - "epoch": 1.3527517210837796, - "grad_norm": 0.001049962593242526, - "learning_rate": 0.00019999909916306187, - "loss": 46.0, - "step": 8400 - }, - { - "epoch": 1.3529127581625668, - "grad_norm": 0.0005767635302618146, - "learning_rate": 0.0001999990989483088, - "loss": 46.0, - "step": 8401 - }, - { - "epoch": 1.3530737952413543, - "grad_norm": 0.0065422276966273785, - "learning_rate": 0.00019999909873353005, - "loss": 46.0, - "step": 8402 - }, - { - "epoch": 1.3532348323201417, - "grad_norm": 0.0030258605256676674, - "learning_rate": 0.0001999990985187258, - "loss": 46.0, - "step": 8403 - }, - { - "epoch": 1.3533958693989292, - "grad_norm": 0.01033786404877901, - "learning_rate": 0.0001999990983038959, - "loss": 46.0, - "step": 8404 - }, - { - "epoch": 1.3535569064777164, - "grad_norm": 0.0029609203338623047, - "learning_rate": 0.0001999990980890404, - "loss": 46.0, - "step": 8405 - }, - { - "epoch": 1.3537179435565039, - "grad_norm": 0.007678866386413574, - "learning_rate": 0.00019999909787415935, - "loss": 46.0, - "step": 8406 - }, - { - "epoch": 1.3538789806352913, - "grad_norm": 0.004396035801619291, - "learning_rate": 0.00019999909765925267, - "loss": 46.0, - "step": 8407 - }, - { - "epoch": 1.3540400177140786, - "grad_norm": 0.0047828382812440395, - "learning_rate": 0.0001999990974443204, - "loss": 46.0, - "step": 8408 - }, - { - "epoch": 1.354201054792866, - "grad_norm": 0.001196342404000461, - "learning_rate": 0.00019999909722936255, - "loss": 46.0, - "step": 8409 - }, - { - "epoch": 1.3543620918716535, - "grad_norm": 0.0012605487136170268, - "learning_rate": 0.00019999909701437908, - "loss": 46.0, - "step": 8410 - }, - { - "epoch": 1.354523128950441, - "grad_norm": 0.0005816929624415934, - "learning_rate": 0.00019999909679937003, - "loss": 46.0, - "step": 8411 - }, - { - "epoch": 1.3546841660292284, - "grad_norm": 0.0007248731562867761, - "learning_rate": 0.00019999909658433539, - "loss": 46.0, - "step": 8412 - }, - { - "epoch": 1.3548452031080156, - "grad_norm": 0.000497568107675761, - "learning_rate": 0.00019999909636927513, - "loss": 46.0, - "step": 8413 - }, - { - "epoch": 1.355006240186803, - "grad_norm": 0.0009381204145029187, - "learning_rate": 0.00019999909615418931, - "loss": 46.0, - "step": 8414 - }, - { - "epoch": 1.3551672772655903, - "grad_norm": 0.0036037079989910126, - "learning_rate": 0.00019999909593907788, - "loss": 46.0, - "step": 8415 - }, - { - "epoch": 1.3553283143443777, - "grad_norm": 0.0032943258993327618, - "learning_rate": 0.00019999909572394087, - "loss": 46.0, - "step": 8416 - }, - { - "epoch": 1.3554893514231652, - "grad_norm": 0.0037067849189043045, - "learning_rate": 0.00019999909550877823, - "loss": 46.0, - "step": 8417 - }, - { - "epoch": 1.3556503885019526, - "grad_norm": 0.007158919703215361, - "learning_rate": 0.00019999909529359004, - "loss": 46.0, - "step": 8418 - }, - { - "epoch": 1.35581142558074, - "grad_norm": 0.0005020997487008572, - "learning_rate": 0.00019999909507837624, - "loss": 46.0, - "step": 8419 - }, - { - "epoch": 1.3559724626595273, - "grad_norm": 0.001645746175199747, - "learning_rate": 0.00019999909486313682, - "loss": 46.0, - "step": 8420 - }, - { - "epoch": 1.3561334997383148, - "grad_norm": 0.0009111354011110961, - "learning_rate": 0.00019999909464787184, - "loss": 46.0, - "step": 8421 - }, - { - "epoch": 1.3562945368171022, - "grad_norm": 0.004183598328381777, - "learning_rate": 0.00019999909443258121, - "loss": 46.0, - "step": 8422 - }, - { - "epoch": 1.3564555738958894, - "grad_norm": 0.002702893689274788, - "learning_rate": 0.00019999909421726506, - "loss": 46.0, - "step": 8423 - }, - { - "epoch": 1.356616610974677, - "grad_norm": 0.0006687360582873225, - "learning_rate": 0.00019999909400192326, - "loss": 46.0, - "step": 8424 - }, - { - "epoch": 1.3567776480534643, - "grad_norm": 0.011225173249840736, - "learning_rate": 0.0001999990937865559, - "loss": 46.0, - "step": 8425 - }, - { - "epoch": 1.3569386851322518, - "grad_norm": 0.004288196563720703, - "learning_rate": 0.0001999990935711629, - "loss": 46.0, - "step": 8426 - }, - { - "epoch": 1.357099722211039, - "grad_norm": 0.002006173599511385, - "learning_rate": 0.00019999909335574435, - "loss": 46.0, - "step": 8427 - }, - { - "epoch": 1.3572607592898265, - "grad_norm": 0.0019895550794899464, - "learning_rate": 0.00019999909314030018, - "loss": 46.0, - "step": 8428 - }, - { - "epoch": 1.357421796368614, - "grad_norm": 0.0011818077182397246, - "learning_rate": 0.00019999909292483045, - "loss": 46.0, - "step": 8429 - }, - { - "epoch": 1.3575828334474012, - "grad_norm": 0.0022285941522568464, - "learning_rate": 0.0001999990927093351, - "loss": 46.0, - "step": 8430 - }, - { - "epoch": 1.3577438705261886, - "grad_norm": 0.0030746273696422577, - "learning_rate": 0.00019999909249381417, - "loss": 46.0, - "step": 8431 - }, - { - "epoch": 1.357904907604976, - "grad_norm": 0.0015490445075556636, - "learning_rate": 0.0001999990922782676, - "loss": 46.0, - "step": 8432 - }, - { - "epoch": 1.3580659446837635, - "grad_norm": 0.0040275962091982365, - "learning_rate": 0.0001999990920626955, - "loss": 46.0, - "step": 8433 - }, - { - "epoch": 1.3582269817625507, - "grad_norm": 0.0028057959862053394, - "learning_rate": 0.00019999909184709777, - "loss": 46.0, - "step": 8434 - }, - { - "epoch": 1.3583880188413382, - "grad_norm": 0.00052833795780316, - "learning_rate": 0.00019999909163147446, - "loss": 46.0, - "step": 8435 - }, - { - "epoch": 1.3585490559201256, - "grad_norm": 0.0020677833817899227, - "learning_rate": 0.00019999909141582553, - "loss": 46.0, - "step": 8436 - }, - { - "epoch": 1.3587100929989129, - "grad_norm": 0.008361995220184326, - "learning_rate": 0.00019999909120015102, - "loss": 46.0, - "step": 8437 - }, - { - "epoch": 1.3588711300777003, - "grad_norm": 0.0047791688703000546, - "learning_rate": 0.0001999990909844509, - "loss": 46.0, - "step": 8438 - }, - { - "epoch": 1.3590321671564878, - "grad_norm": 0.0005320021882653236, - "learning_rate": 0.0001999990907687252, - "loss": 46.0, - "step": 8439 - }, - { - "epoch": 1.3591932042352752, - "grad_norm": 0.005229340400546789, - "learning_rate": 0.0001999990905529739, - "loss": 46.0, - "step": 8440 - }, - { - "epoch": 1.3593542413140627, - "grad_norm": 0.0016253853682428598, - "learning_rate": 0.00019999909033719703, - "loss": 46.0, - "step": 8441 - }, - { - "epoch": 1.35951527839285, - "grad_norm": 0.0013752507511526346, - "learning_rate": 0.00019999909012139453, - "loss": 46.0, - "step": 8442 - }, - { - "epoch": 1.3596763154716374, - "grad_norm": 0.004130436107516289, - "learning_rate": 0.00019999908990556647, - "loss": 46.0, - "step": 8443 - }, - { - "epoch": 1.3598373525504248, - "grad_norm": 0.0005620232550427318, - "learning_rate": 0.0001999990896897128, - "loss": 46.0, - "step": 8444 - }, - { - "epoch": 1.359998389629212, - "grad_norm": 0.003313005669042468, - "learning_rate": 0.00019999908947383353, - "loss": 46.0, - "step": 8445 - }, - { - "epoch": 1.3601594267079995, - "grad_norm": 0.004215719643980265, - "learning_rate": 0.00019999908925792865, - "loss": 46.0, - "step": 8446 - }, - { - "epoch": 1.360320463786787, - "grad_norm": 0.005193840712308884, - "learning_rate": 0.0001999990890419982, - "loss": 46.0, - "step": 8447 - }, - { - "epoch": 1.3604815008655744, - "grad_norm": 0.009163262322545052, - "learning_rate": 0.00019999908882604214, - "loss": 46.0, - "step": 8448 - }, - { - "epoch": 1.3606425379443616, - "grad_norm": 0.006165005266666412, - "learning_rate": 0.0001999990886100605, - "loss": 46.0, - "step": 8449 - }, - { - "epoch": 1.360803575023149, - "grad_norm": 0.003316365648061037, - "learning_rate": 0.00019999908839405325, - "loss": 46.0, - "step": 8450 - }, - { - "epoch": 1.3609646121019365, - "grad_norm": 0.001776957418769598, - "learning_rate": 0.00019999908817802043, - "loss": 46.0, - "step": 8451 - }, - { - "epoch": 1.3611256491807238, - "grad_norm": 0.003165928181260824, - "learning_rate": 0.00019999908796196198, - "loss": 46.0, - "step": 8452 - }, - { - "epoch": 1.3612866862595112, - "grad_norm": 0.0013354551047086716, - "learning_rate": 0.00019999908774587797, - "loss": 46.0, - "step": 8453 - }, - { - "epoch": 1.3614477233382987, - "grad_norm": 0.00647937273606658, - "learning_rate": 0.00019999908752976834, - "loss": 46.0, - "step": 8454 - }, - { - "epoch": 1.3616087604170861, - "grad_norm": 0.00040617745253257453, - "learning_rate": 0.00019999908731363312, - "loss": 46.0, - "step": 8455 - }, - { - "epoch": 1.3617697974958733, - "grad_norm": 0.0016904743388295174, - "learning_rate": 0.00019999908709747232, - "loss": 46.0, - "step": 8456 - }, - { - "epoch": 1.3619308345746608, - "grad_norm": 0.0007965763215906918, - "learning_rate": 0.00019999908688128593, - "loss": 46.0, - "step": 8457 - }, - { - "epoch": 1.3620918716534482, - "grad_norm": 0.0035071561578661203, - "learning_rate": 0.00019999908666507393, - "loss": 46.0, - "step": 8458 - }, - { - "epoch": 1.3622529087322355, - "grad_norm": 0.0011240806197747588, - "learning_rate": 0.00019999908644883634, - "loss": 46.0, - "step": 8459 - }, - { - "epoch": 1.362413945811023, - "grad_norm": 0.0010247306199744344, - "learning_rate": 0.00019999908623257313, - "loss": 46.0, - "step": 8460 - }, - { - "epoch": 1.3625749828898104, - "grad_norm": 0.002692255424335599, - "learning_rate": 0.00019999908601628437, - "loss": 46.0, - "step": 8461 - }, - { - "epoch": 1.3627360199685978, - "grad_norm": 0.005207649897783995, - "learning_rate": 0.00019999908579997, - "loss": 46.0, - "step": 8462 - }, - { - "epoch": 1.3628970570473853, - "grad_norm": 0.0031039121095091105, - "learning_rate": 0.00019999908558363002, - "loss": 46.0, - "step": 8463 - }, - { - "epoch": 1.3630580941261725, - "grad_norm": 0.0008966705063357949, - "learning_rate": 0.00019999908536726447, - "loss": 46.0, - "step": 8464 - }, - { - "epoch": 1.36321913120496, - "grad_norm": 0.002622065832838416, - "learning_rate": 0.0001999990851508733, - "loss": 46.0, - "step": 8465 - }, - { - "epoch": 1.3633801682837472, - "grad_norm": 0.005339410621672869, - "learning_rate": 0.00019999908493445654, - "loss": 46.0, - "step": 8466 - }, - { - "epoch": 1.3635412053625346, - "grad_norm": 0.006051977165043354, - "learning_rate": 0.00019999908471801417, - "loss": 46.0, - "step": 8467 - }, - { - "epoch": 1.363702242441322, - "grad_norm": 0.001430383650586009, - "learning_rate": 0.00019999908450154624, - "loss": 46.0, - "step": 8468 - }, - { - "epoch": 1.3638632795201096, - "grad_norm": 0.0021215640008449554, - "learning_rate": 0.00019999908428505273, - "loss": 46.0, - "step": 8469 - }, - { - "epoch": 1.364024316598897, - "grad_norm": 0.002879285253584385, - "learning_rate": 0.00019999908406853357, - "loss": 46.0, - "step": 8470 - }, - { - "epoch": 1.3641853536776842, - "grad_norm": 0.0015226696850731969, - "learning_rate": 0.00019999908385198885, - "loss": 46.0, - "step": 8471 - }, - { - "epoch": 1.3643463907564717, - "grad_norm": 0.003810349851846695, - "learning_rate": 0.00019999908363541852, - "loss": 46.0, - "step": 8472 - }, - { - "epoch": 1.3645074278352591, - "grad_norm": 0.0025322316214442253, - "learning_rate": 0.0001999990834188226, - "loss": 46.0, - "step": 8473 - }, - { - "epoch": 1.3646684649140464, - "grad_norm": 0.0008757595205679536, - "learning_rate": 0.0001999990832022011, - "loss": 46.0, - "step": 8474 - }, - { - "epoch": 1.3648295019928338, - "grad_norm": 0.0014992282958701253, - "learning_rate": 0.000199999082985554, - "loss": 46.0, - "step": 8475 - }, - { - "epoch": 1.3649905390716213, - "grad_norm": 0.0012428911868482828, - "learning_rate": 0.00019999908276888126, - "loss": 46.0, - "step": 8476 - }, - { - "epoch": 1.3651515761504087, - "grad_norm": 0.001803399994969368, - "learning_rate": 0.000199999082552183, - "loss": 46.0, - "step": 8477 - }, - { - "epoch": 1.365312613229196, - "grad_norm": 0.00048699433682486415, - "learning_rate": 0.00019999908233545908, - "loss": 46.0, - "step": 8478 - }, - { - "epoch": 1.3654736503079834, - "grad_norm": 0.0044006346724927425, - "learning_rate": 0.0001999990821187096, - "loss": 46.0, - "step": 8479 - }, - { - "epoch": 1.3656346873867709, - "grad_norm": 0.0015177465975284576, - "learning_rate": 0.00019999908190193453, - "loss": 46.0, - "step": 8480 - }, - { - "epoch": 1.365795724465558, - "grad_norm": 0.0033245442900806665, - "learning_rate": 0.00019999908168513386, - "loss": 46.0, - "step": 8481 - }, - { - "epoch": 1.3659567615443455, - "grad_norm": 0.0016388514777645469, - "learning_rate": 0.00019999908146830757, - "loss": 46.0, - "step": 8482 - }, - { - "epoch": 1.366117798623133, - "grad_norm": 0.0009756018407642841, - "learning_rate": 0.00019999908125145573, - "loss": 46.0, - "step": 8483 - }, - { - "epoch": 1.3662788357019204, - "grad_norm": 0.0023046028800308704, - "learning_rate": 0.00019999908103457824, - "loss": 46.0, - "step": 8484 - }, - { - "epoch": 1.3664398727807079, - "grad_norm": 0.0067170062102377415, - "learning_rate": 0.00019999908081767522, - "loss": 46.0, - "step": 8485 - }, - { - "epoch": 1.3666009098594951, - "grad_norm": 0.002000573556870222, - "learning_rate": 0.00019999908060074656, - "loss": 46.0, - "step": 8486 - }, - { - "epoch": 1.3667619469382826, - "grad_norm": 0.0027586210053414106, - "learning_rate": 0.0001999990803837923, - "loss": 46.0, - "step": 8487 - }, - { - "epoch": 1.3669229840170698, - "grad_norm": 0.0005638182628899813, - "learning_rate": 0.00019999908016681247, - "loss": 46.0, - "step": 8488 - }, - { - "epoch": 1.3670840210958572, - "grad_norm": 0.00064820860279724, - "learning_rate": 0.00019999907994980705, - "loss": 46.0, - "step": 8489 - }, - { - "epoch": 1.3672450581746447, - "grad_norm": 0.0018096237909048796, - "learning_rate": 0.00019999907973277604, - "loss": 46.0, - "step": 8490 - }, - { - "epoch": 1.3674060952534322, - "grad_norm": 0.0019132404122501612, - "learning_rate": 0.0001999990795157194, - "loss": 46.0, - "step": 8491 - }, - { - "epoch": 1.3675671323322196, - "grad_norm": 0.003457923186942935, - "learning_rate": 0.00019999907929863718, - "loss": 46.0, - "step": 8492 - }, - { - "epoch": 1.3677281694110068, - "grad_norm": 0.0004081083170603961, - "learning_rate": 0.00019999907908152938, - "loss": 46.0, - "step": 8493 - }, - { - "epoch": 1.3678892064897943, - "grad_norm": 0.0013288760092109442, - "learning_rate": 0.00019999907886439596, - "loss": 46.0, - "step": 8494 - }, - { - "epoch": 1.3680502435685817, - "grad_norm": 0.0012940262677147985, - "learning_rate": 0.00019999907864723696, - "loss": 46.0, - "step": 8495 - }, - { - "epoch": 1.368211280647369, - "grad_norm": 0.003123154165223241, - "learning_rate": 0.00019999907843005238, - "loss": 46.0, - "step": 8496 - }, - { - "epoch": 1.3683723177261564, - "grad_norm": 0.0014393990859389305, - "learning_rate": 0.00019999907821284218, - "loss": 46.0, - "step": 8497 - }, - { - "epoch": 1.3685333548049439, - "grad_norm": 0.0007467317045666277, - "learning_rate": 0.0001999990779956064, - "loss": 46.0, - "step": 8498 - }, - { - "epoch": 1.3686943918837313, - "grad_norm": 0.003697833977639675, - "learning_rate": 0.000199999077778345, - "loss": 46.0, - "step": 8499 - }, - { - "epoch": 1.3688554289625186, - "grad_norm": 0.0012142921332269907, - "learning_rate": 0.00019999907756105807, - "loss": 46.0, - "step": 8500 - }, - { - "epoch": 1.369016466041306, - "grad_norm": 0.0026814532466232777, - "learning_rate": 0.0001999990773437455, - "loss": 46.0, - "step": 8501 - }, - { - "epoch": 1.3691775031200935, - "grad_norm": 0.0012107130605727434, - "learning_rate": 0.0001999990771264073, - "loss": 46.0, - "step": 8502 - }, - { - "epoch": 1.3693385401988807, - "grad_norm": 0.0011837449856102467, - "learning_rate": 0.00019999907690904358, - "loss": 46.0, - "step": 8503 - }, - { - "epoch": 1.3694995772776681, - "grad_norm": 0.011731158941984177, - "learning_rate": 0.00019999907669165424, - "loss": 46.0, - "step": 8504 - }, - { - "epoch": 1.3696606143564556, - "grad_norm": 0.0006542187184095383, - "learning_rate": 0.00019999907647423926, - "loss": 46.0, - "step": 8505 - }, - { - "epoch": 1.369821651435243, - "grad_norm": 0.004485903307795525, - "learning_rate": 0.00019999907625679872, - "loss": 46.0, - "step": 8506 - }, - { - "epoch": 1.3699826885140305, - "grad_norm": 0.004687386564910412, - "learning_rate": 0.0001999990760393326, - "loss": 46.0, - "step": 8507 - }, - { - "epoch": 1.3701437255928177, - "grad_norm": 0.003480333834886551, - "learning_rate": 0.00019999907582184085, - "loss": 46.0, - "step": 8508 - }, - { - "epoch": 1.3703047626716052, - "grad_norm": 0.0032353985588997602, - "learning_rate": 0.00019999907560432355, - "loss": 46.0, - "step": 8509 - }, - { - "epoch": 1.3704657997503924, - "grad_norm": 0.0030441845301538706, - "learning_rate": 0.00019999907538678063, - "loss": 46.0, - "step": 8510 - }, - { - "epoch": 1.3706268368291799, - "grad_norm": 0.009550451301038265, - "learning_rate": 0.0001999990751692121, - "loss": 46.0, - "step": 8511 - }, - { - "epoch": 1.3707878739079673, - "grad_norm": 0.002966528292745352, - "learning_rate": 0.00019999907495161799, - "loss": 46.0, - "step": 8512 - }, - { - "epoch": 1.3709489109867548, - "grad_norm": 0.007207896094769239, - "learning_rate": 0.00019999907473399828, - "loss": 46.0, - "step": 8513 - }, - { - "epoch": 1.3711099480655422, - "grad_norm": 0.001371773425489664, - "learning_rate": 0.00019999907451635302, - "loss": 46.0, - "step": 8514 - }, - { - "epoch": 1.3712709851443294, - "grad_norm": 0.004324706271290779, - "learning_rate": 0.0001999990742986821, - "loss": 46.0, - "step": 8515 - }, - { - "epoch": 1.3714320222231169, - "grad_norm": 0.004359810147434473, - "learning_rate": 0.00019999907408098562, - "loss": 46.0, - "step": 8516 - }, - { - "epoch": 1.3715930593019043, - "grad_norm": 0.008214239962399006, - "learning_rate": 0.00019999907386326356, - "loss": 46.0, - "step": 8517 - }, - { - "epoch": 1.3717540963806916, - "grad_norm": 0.0034329467453062534, - "learning_rate": 0.00019999907364551587, - "loss": 46.0, - "step": 8518 - }, - { - "epoch": 1.371915133459479, - "grad_norm": 0.003290883032605052, - "learning_rate": 0.0001999990734277426, - "loss": 46.0, - "step": 8519 - }, - { - "epoch": 1.3720761705382665, - "grad_norm": 0.002334638498723507, - "learning_rate": 0.00019999907320994372, - "loss": 46.0, - "step": 8520 - }, - { - "epoch": 1.372237207617054, - "grad_norm": 0.00335302553139627, - "learning_rate": 0.00019999907299211926, - "loss": 46.0, - "step": 8521 - }, - { - "epoch": 1.3723982446958412, - "grad_norm": 0.002353750402107835, - "learning_rate": 0.00019999907277426922, - "loss": 46.0, - "step": 8522 - }, - { - "epoch": 1.3725592817746286, - "grad_norm": 0.0003986102237831801, - "learning_rate": 0.00019999907255639356, - "loss": 46.0, - "step": 8523 - }, - { - "epoch": 1.372720318853416, - "grad_norm": 0.0026938191149383783, - "learning_rate": 0.00019999907233849231, - "loss": 46.0, - "step": 8524 - }, - { - "epoch": 1.3728813559322033, - "grad_norm": 0.006727281026542187, - "learning_rate": 0.00019999907212056548, - "loss": 46.0, - "step": 8525 - }, - { - "epoch": 1.3730423930109907, - "grad_norm": 0.0052147917449474335, - "learning_rate": 0.00019999907190261303, - "loss": 46.0, - "step": 8526 - }, - { - "epoch": 1.3732034300897782, - "grad_norm": 0.0011838451027870178, - "learning_rate": 0.00019999907168463503, - "loss": 46.0, - "step": 8527 - }, - { - "epoch": 1.3733644671685656, - "grad_norm": 0.0006170262349769473, - "learning_rate": 0.00019999907146663138, - "loss": 46.0, - "step": 8528 - }, - { - "epoch": 1.373525504247353, - "grad_norm": 0.0008709498215466738, - "learning_rate": 0.00019999907124860217, - "loss": 46.0, - "step": 8529 - }, - { - "epoch": 1.3736865413261403, - "grad_norm": 0.0012861124705523252, - "learning_rate": 0.00019999907103054735, - "loss": 46.0, - "step": 8530 - }, - { - "epoch": 1.3738475784049278, - "grad_norm": 0.002076564123854041, - "learning_rate": 0.00019999907081246697, - "loss": 46.0, - "step": 8531 - }, - { - "epoch": 1.374008615483715, - "grad_norm": 0.0015019255224615335, - "learning_rate": 0.00019999907059436097, - "loss": 46.0, - "step": 8532 - }, - { - "epoch": 1.3741696525625025, - "grad_norm": 0.0017158534610643983, - "learning_rate": 0.00019999907037622936, - "loss": 46.0, - "step": 8533 - }, - { - "epoch": 1.37433068964129, - "grad_norm": 0.002254312625154853, - "learning_rate": 0.0001999990701580722, - "loss": 46.0, - "step": 8534 - }, - { - "epoch": 1.3744917267200774, - "grad_norm": 0.0016968031413853168, - "learning_rate": 0.0001999990699398894, - "loss": 46.0, - "step": 8535 - }, - { - "epoch": 1.3746527637988648, - "grad_norm": 0.0005931690684519708, - "learning_rate": 0.000199999069721681, - "loss": 46.0, - "step": 8536 - }, - { - "epoch": 1.374813800877652, - "grad_norm": 0.002256153617054224, - "learning_rate": 0.00019999906950344705, - "loss": 46.0, - "step": 8537 - }, - { - "epoch": 1.3749748379564395, - "grad_norm": 0.0028346905019134283, - "learning_rate": 0.00019999906928518744, - "loss": 46.0, - "step": 8538 - }, - { - "epoch": 1.375135875035227, - "grad_norm": 0.0025116789620369673, - "learning_rate": 0.00019999906906690228, - "loss": 46.0, - "step": 8539 - }, - { - "epoch": 1.3752969121140142, - "grad_norm": 0.0016416899161413312, - "learning_rate": 0.00019999906884859154, - "loss": 46.0, - "step": 8540 - }, - { - "epoch": 1.3754579491928016, - "grad_norm": 0.0009125908836722374, - "learning_rate": 0.0001999990686302552, - "loss": 46.0, - "step": 8541 - }, - { - "epoch": 1.375618986271589, - "grad_norm": 0.002481288742274046, - "learning_rate": 0.00019999906841189325, - "loss": 46.0, - "step": 8542 - }, - { - "epoch": 1.3757800233503765, - "grad_norm": 0.0016046366654336452, - "learning_rate": 0.00019999906819350569, - "loss": 46.0, - "step": 8543 - }, - { - "epoch": 1.3759410604291638, - "grad_norm": 0.0023816947359591722, - "learning_rate": 0.0001999990679750926, - "loss": 46.0, - "step": 8544 - }, - { - "epoch": 1.3761020975079512, - "grad_norm": 0.0014378972118720412, - "learning_rate": 0.00019999906775665385, - "loss": 46.0, - "step": 8545 - }, - { - "epoch": 1.3762631345867387, - "grad_norm": 0.0006608270341530442, - "learning_rate": 0.0001999990675381895, - "loss": 46.0, - "step": 8546 - }, - { - "epoch": 1.3764241716655259, - "grad_norm": 0.0014959038235247135, - "learning_rate": 0.00019999906731969959, - "loss": 46.0, - "step": 8547 - }, - { - "epoch": 1.3765852087443133, - "grad_norm": 0.0011902515543624759, - "learning_rate": 0.0001999990671011841, - "loss": 46.0, - "step": 8548 - }, - { - "epoch": 1.3767462458231008, - "grad_norm": 0.009871224872767925, - "learning_rate": 0.00019999906688264297, - "loss": 46.0, - "step": 8549 - }, - { - "epoch": 1.3769072829018882, - "grad_norm": 0.0010699726408347487, - "learning_rate": 0.00019999906666407627, - "loss": 46.0, - "step": 8550 - }, - { - "epoch": 1.3770683199806755, - "grad_norm": 0.008970269002020359, - "learning_rate": 0.00019999906644548396, - "loss": 46.0, - "step": 8551 - }, - { - "epoch": 1.377229357059463, - "grad_norm": 0.0017508044838905334, - "learning_rate": 0.00019999906622686608, - "loss": 46.0, - "step": 8552 - }, - { - "epoch": 1.3773903941382504, - "grad_norm": 0.001487939734943211, - "learning_rate": 0.0001999990660082226, - "loss": 46.0, - "step": 8553 - }, - { - "epoch": 1.3775514312170376, - "grad_norm": 0.0007740810397081077, - "learning_rate": 0.00019999906578955349, - "loss": 46.0, - "step": 8554 - }, - { - "epoch": 1.377712468295825, - "grad_norm": 0.004181104712188244, - "learning_rate": 0.00019999906557085882, - "loss": 46.0, - "step": 8555 - }, - { - "epoch": 1.3778735053746125, - "grad_norm": 0.0027874098159372807, - "learning_rate": 0.00019999906535213854, - "loss": 46.0, - "step": 8556 - }, - { - "epoch": 1.3780345424534, - "grad_norm": 0.008951113559305668, - "learning_rate": 0.00019999906513339268, - "loss": 46.0, - "step": 8557 - }, - { - "epoch": 1.3781955795321874, - "grad_norm": 0.0015791063196957111, - "learning_rate": 0.00019999906491462123, - "loss": 46.0, - "step": 8558 - }, - { - "epoch": 1.3783566166109746, - "grad_norm": 0.001212267903611064, - "learning_rate": 0.00019999906469582416, - "loss": 46.0, - "step": 8559 - }, - { - "epoch": 1.378517653689762, - "grad_norm": 0.0009738767403177917, - "learning_rate": 0.00019999906447700153, - "loss": 46.0, - "step": 8560 - }, - { - "epoch": 1.3786786907685493, - "grad_norm": 0.0019429456442594528, - "learning_rate": 0.00019999906425815326, - "loss": 46.0, - "step": 8561 - }, - { - "epoch": 1.3788397278473368, - "grad_norm": 0.005254549905657768, - "learning_rate": 0.00019999906403927943, - "loss": 46.0, - "step": 8562 - }, - { - "epoch": 1.3790007649261242, - "grad_norm": 0.00786639004945755, - "learning_rate": 0.00019999906382038002, - "loss": 46.0, - "step": 8563 - }, - { - "epoch": 1.3791618020049117, - "grad_norm": 0.0016958412015810609, - "learning_rate": 0.00019999906360145496, - "loss": 46.0, - "step": 8564 - }, - { - "epoch": 1.3793228390836991, - "grad_norm": 0.004124409053474665, - "learning_rate": 0.00019999906338250434, - "loss": 46.0, - "step": 8565 - }, - { - "epoch": 1.3794838761624864, - "grad_norm": 0.0012453460367396474, - "learning_rate": 0.0001999990631635281, - "loss": 46.0, - "step": 8566 - }, - { - "epoch": 1.3796449132412738, - "grad_norm": 0.004079313017427921, - "learning_rate": 0.0001999990629445263, - "loss": 46.0, - "step": 8567 - }, - { - "epoch": 1.3798059503200613, - "grad_norm": 0.0018956586718559265, - "learning_rate": 0.0001999990627254989, - "loss": 46.0, - "step": 8568 - }, - { - "epoch": 1.3799669873988485, - "grad_norm": 0.005786104127764702, - "learning_rate": 0.0001999990625064459, - "loss": 46.0, - "step": 8569 - }, - { - "epoch": 1.380128024477636, - "grad_norm": 0.0016839215531945229, - "learning_rate": 0.00019999906228736732, - "loss": 46.0, - "step": 8570 - }, - { - "epoch": 1.3802890615564234, - "grad_norm": 0.0018276222981512547, - "learning_rate": 0.00019999906206826312, - "loss": 46.0, - "step": 8571 - }, - { - "epoch": 1.3804500986352108, - "grad_norm": 0.0021915456745773554, - "learning_rate": 0.00019999906184913334, - "loss": 46.0, - "step": 8572 - }, - { - "epoch": 1.380611135713998, - "grad_norm": 0.001955562736839056, - "learning_rate": 0.00019999906162997794, - "loss": 46.0, - "step": 8573 - }, - { - "epoch": 1.3807721727927855, - "grad_norm": 0.003083518473431468, - "learning_rate": 0.000199999061410797, - "loss": 46.0, - "step": 8574 - }, - { - "epoch": 1.380933209871573, - "grad_norm": 0.0024714069440960884, - "learning_rate": 0.0001999990611915904, - "loss": 46.0, - "step": 8575 - }, - { - "epoch": 1.3810942469503602, - "grad_norm": 0.0015121960313990712, - "learning_rate": 0.00019999906097235823, - "loss": 46.0, - "step": 8576 - }, - { - "epoch": 1.3812552840291477, - "grad_norm": 0.001893801148980856, - "learning_rate": 0.00019999906075310046, - "loss": 46.0, - "step": 8577 - }, - { - "epoch": 1.381416321107935, - "grad_norm": 0.0029527561273425817, - "learning_rate": 0.00019999906053381713, - "loss": 46.0, - "step": 8578 - }, - { - "epoch": 1.3815773581867226, - "grad_norm": 0.002330329967662692, - "learning_rate": 0.00019999906031450816, - "loss": 46.0, - "step": 8579 - }, - { - "epoch": 1.38173839526551, - "grad_norm": 0.0015530623495578766, - "learning_rate": 0.00019999906009517365, - "loss": 46.0, - "step": 8580 - }, - { - "epoch": 1.3818994323442972, - "grad_norm": 0.00039809252484701574, - "learning_rate": 0.00019999905987581348, - "loss": 46.0, - "step": 8581 - }, - { - "epoch": 1.3820604694230847, - "grad_norm": 0.0032321594189852476, - "learning_rate": 0.00019999905965642777, - "loss": 46.0, - "step": 8582 - }, - { - "epoch": 1.382221506501872, - "grad_norm": 0.0016787732020020485, - "learning_rate": 0.00019999905943701642, - "loss": 46.0, - "step": 8583 - }, - { - "epoch": 1.3823825435806594, - "grad_norm": 0.0013373702531680465, - "learning_rate": 0.0001999990592175795, - "loss": 46.0, - "step": 8584 - }, - { - "epoch": 1.3825435806594468, - "grad_norm": 0.0046799397096037865, - "learning_rate": 0.000199999058998117, - "loss": 46.0, - "step": 8585 - }, - { - "epoch": 1.3827046177382343, - "grad_norm": 0.007765064015984535, - "learning_rate": 0.00019999905877862888, - "loss": 46.0, - "step": 8586 - }, - { - "epoch": 1.3828656548170217, - "grad_norm": 0.005820256657898426, - "learning_rate": 0.00019999905855911515, - "loss": 46.0, - "step": 8587 - }, - { - "epoch": 1.383026691895809, - "grad_norm": 0.002248318400233984, - "learning_rate": 0.0001999990583395759, - "loss": 46.0, - "step": 8588 - }, - { - "epoch": 1.3831877289745964, - "grad_norm": 0.0011756689054891467, - "learning_rate": 0.000199999058120011, - "loss": 46.0, - "step": 8589 - }, - { - "epoch": 1.3833487660533839, - "grad_norm": 0.0028867514338344336, - "learning_rate": 0.0001999990579004205, - "loss": 46.0, - "step": 8590 - }, - { - "epoch": 1.383509803132171, - "grad_norm": 0.0005549927009269595, - "learning_rate": 0.0001999990576808044, - "loss": 46.0, - "step": 8591 - }, - { - "epoch": 1.3836708402109585, - "grad_norm": 0.0016280576819553971, - "learning_rate": 0.00019999905746116272, - "loss": 46.0, - "step": 8592 - }, - { - "epoch": 1.383831877289746, - "grad_norm": 0.003444229019805789, - "learning_rate": 0.00019999905724149545, - "loss": 46.0, - "step": 8593 - }, - { - "epoch": 1.3839929143685334, - "grad_norm": 0.0005017066723667085, - "learning_rate": 0.00019999905702180258, - "loss": 46.0, - "step": 8594 - }, - { - "epoch": 1.3841539514473207, - "grad_norm": 0.003034519497305155, - "learning_rate": 0.00019999905680208413, - "loss": 46.0, - "step": 8595 - }, - { - "epoch": 1.3843149885261081, - "grad_norm": 0.0020030911546200514, - "learning_rate": 0.00019999905658234007, - "loss": 46.0, - "step": 8596 - }, - { - "epoch": 1.3844760256048956, - "grad_norm": 0.003164656227454543, - "learning_rate": 0.00019999905636257042, - "loss": 46.0, - "step": 8597 - }, - { - "epoch": 1.3846370626836828, - "grad_norm": 0.0007547682034783065, - "learning_rate": 0.00019999905614277516, - "loss": 46.0, - "step": 8598 - }, - { - "epoch": 1.3847980997624703, - "grad_norm": 0.004079625476151705, - "learning_rate": 0.0001999990559229543, - "loss": 46.0, - "step": 8599 - }, - { - "epoch": 1.3849591368412577, - "grad_norm": 0.00556177319958806, - "learning_rate": 0.00019999905570310792, - "loss": 46.0, - "step": 8600 - }, - { - "epoch": 1.3851201739200452, - "grad_norm": 0.002690773457288742, - "learning_rate": 0.00019999905548323587, - "loss": 46.0, - "step": 8601 - }, - { - "epoch": 1.3852812109988326, - "grad_norm": 0.0009012288646772504, - "learning_rate": 0.00019999905526333825, - "loss": 46.0, - "step": 8602 - }, - { - "epoch": 1.3854422480776198, - "grad_norm": 0.0029388947878032923, - "learning_rate": 0.00019999905504341502, - "loss": 46.0, - "step": 8603 - }, - { - "epoch": 1.3856032851564073, - "grad_norm": 0.0008120383718051016, - "learning_rate": 0.0001999990548234662, - "loss": 46.0, - "step": 8604 - }, - { - "epoch": 1.3857643222351945, - "grad_norm": 0.0033238877076655626, - "learning_rate": 0.0001999990546034918, - "loss": 46.0, - "step": 8605 - }, - { - "epoch": 1.385925359313982, - "grad_norm": 0.00048069297918118536, - "learning_rate": 0.0001999990543834918, - "loss": 46.0, - "step": 8606 - }, - { - "epoch": 1.3860863963927694, - "grad_norm": 0.0004616714140865952, - "learning_rate": 0.0001999990541634662, - "loss": 46.0, - "step": 8607 - }, - { - "epoch": 1.3862474334715569, - "grad_norm": 0.0020958795212209225, - "learning_rate": 0.000199999053943415, - "loss": 46.0, - "step": 8608 - }, - { - "epoch": 1.3864084705503443, - "grad_norm": 0.005875249858945608, - "learning_rate": 0.00019999905372333822, - "loss": 46.0, - "step": 8609 - }, - { - "epoch": 1.3865695076291316, - "grad_norm": 0.00039585522608831525, - "learning_rate": 0.00019999905350323585, - "loss": 46.0, - "step": 8610 - }, - { - "epoch": 1.386730544707919, - "grad_norm": 0.002378098201006651, - "learning_rate": 0.00019999905328310785, - "loss": 46.0, - "step": 8611 - }, - { - "epoch": 1.3868915817867065, - "grad_norm": 0.009137958288192749, - "learning_rate": 0.00019999905306295428, - "loss": 46.0, - "step": 8612 - }, - { - "epoch": 1.3870526188654937, - "grad_norm": 0.0042574042454361916, - "learning_rate": 0.00019999905284277513, - "loss": 46.0, - "step": 8613 - }, - { - "epoch": 1.3872136559442811, - "grad_norm": 0.0010369190713390708, - "learning_rate": 0.00019999905262257036, - "loss": 46.0, - "step": 8614 - }, - { - "epoch": 1.3873746930230686, - "grad_norm": 0.005388530436903238, - "learning_rate": 0.00019999905240234, - "loss": 46.0, - "step": 8615 - }, - { - "epoch": 1.387535730101856, - "grad_norm": 0.005703634582459927, - "learning_rate": 0.00019999905218208406, - "loss": 46.0, - "step": 8616 - }, - { - "epoch": 1.3876967671806433, - "grad_norm": 0.004314677324146032, - "learning_rate": 0.0001999990519618025, - "loss": 46.0, - "step": 8617 - }, - { - "epoch": 1.3878578042594307, - "grad_norm": 0.0030692103318870068, - "learning_rate": 0.00019999905174149536, - "loss": 46.0, - "step": 8618 - }, - { - "epoch": 1.3880188413382182, - "grad_norm": 0.0033350626472383738, - "learning_rate": 0.00019999905152116263, - "loss": 46.0, - "step": 8619 - }, - { - "epoch": 1.3881798784170054, - "grad_norm": 0.001150695257820189, - "learning_rate": 0.0001999990513008043, - "loss": 46.0, - "step": 8620 - }, - { - "epoch": 1.3883409154957929, - "grad_norm": 0.0031372515950351954, - "learning_rate": 0.00019999905108042038, - "loss": 46.0, - "step": 8621 - }, - { - "epoch": 1.3885019525745803, - "grad_norm": 0.0019240840338170528, - "learning_rate": 0.00019999905086001089, - "loss": 46.0, - "step": 8622 - }, - { - "epoch": 1.3886629896533678, - "grad_norm": 0.0004467913240659982, - "learning_rate": 0.00019999905063957575, - "loss": 46.0, - "step": 8623 - }, - { - "epoch": 1.3888240267321552, - "grad_norm": 0.0076491315849125385, - "learning_rate": 0.00019999905041911503, - "loss": 46.0, - "step": 8624 - }, - { - "epoch": 1.3889850638109424, - "grad_norm": 0.0015910729998722672, - "learning_rate": 0.00019999905019862875, - "loss": 46.0, - "step": 8625 - }, - { - "epoch": 1.38914610088973, - "grad_norm": 0.00404138071462512, - "learning_rate": 0.00019999904997811685, - "loss": 46.0, - "step": 8626 - }, - { - "epoch": 1.3893071379685171, - "grad_norm": 0.0019203597912564874, - "learning_rate": 0.00019999904975757934, - "loss": 46.0, - "step": 8627 - }, - { - "epoch": 1.3894681750473046, - "grad_norm": 0.0036678712349385023, - "learning_rate": 0.00019999904953701627, - "loss": 46.0, - "step": 8628 - }, - { - "epoch": 1.389629212126092, - "grad_norm": 0.004156417679041624, - "learning_rate": 0.0001999990493164276, - "loss": 46.0, - "step": 8629 - }, - { - "epoch": 1.3897902492048795, - "grad_norm": 0.000452744570793584, - "learning_rate": 0.00019999904909581332, - "loss": 46.0, - "step": 8630 - }, - { - "epoch": 1.389951286283667, - "grad_norm": 0.0016183399129658937, - "learning_rate": 0.00019999904887517343, - "loss": 46.0, - "step": 8631 - }, - { - "epoch": 1.3901123233624542, - "grad_norm": 0.0013192167971283197, - "learning_rate": 0.000199999048654508, - "loss": 46.0, - "step": 8632 - }, - { - "epoch": 1.3902733604412416, - "grad_norm": 0.004599005915224552, - "learning_rate": 0.00019999904843381693, - "loss": 46.0, - "step": 8633 - }, - { - "epoch": 1.390434397520029, - "grad_norm": 0.0008950078627094626, - "learning_rate": 0.00019999904821310028, - "loss": 46.0, - "step": 8634 - }, - { - "epoch": 1.3905954345988163, - "grad_norm": 0.001665567047894001, - "learning_rate": 0.00019999904799235802, - "loss": 46.0, - "step": 8635 - }, - { - "epoch": 1.3907564716776037, - "grad_norm": 0.003530068090185523, - "learning_rate": 0.0001999990477715902, - "loss": 46.0, - "step": 8636 - }, - { - "epoch": 1.3909175087563912, - "grad_norm": 0.0019249017350375652, - "learning_rate": 0.00019999904755079674, - "loss": 46.0, - "step": 8637 - }, - { - "epoch": 1.3910785458351786, - "grad_norm": 0.0029693576507270336, - "learning_rate": 0.0001999990473299777, - "loss": 46.0, - "step": 8638 - }, - { - "epoch": 1.3912395829139659, - "grad_norm": 0.001407565432600677, - "learning_rate": 0.00019999904710913308, - "loss": 46.0, - "step": 8639 - }, - { - "epoch": 1.3914006199927533, - "grad_norm": 0.007015765644609928, - "learning_rate": 0.00019999904688826288, - "loss": 46.0, - "step": 8640 - }, - { - "epoch": 1.3915616570715408, - "grad_norm": 0.003454859834164381, - "learning_rate": 0.00019999904666736704, - "loss": 46.0, - "step": 8641 - }, - { - "epoch": 1.391722694150328, - "grad_norm": 0.0015796166844666004, - "learning_rate": 0.00019999904644644564, - "loss": 46.0, - "step": 8642 - }, - { - "epoch": 1.3918837312291155, - "grad_norm": 0.005556751508265734, - "learning_rate": 0.00019999904622549863, - "loss": 46.0, - "step": 8643 - }, - { - "epoch": 1.392044768307903, - "grad_norm": 0.0014212372479960322, - "learning_rate": 0.00019999904600452603, - "loss": 46.0, - "step": 8644 - }, - { - "epoch": 1.3922058053866904, - "grad_norm": 0.0007226198213174939, - "learning_rate": 0.00019999904578352787, - "loss": 46.0, - "step": 8645 - }, - { - "epoch": 1.3923668424654776, - "grad_norm": 0.007025664672255516, - "learning_rate": 0.00019999904556250407, - "loss": 46.0, - "step": 8646 - }, - { - "epoch": 1.392527879544265, - "grad_norm": 0.0009287431603297591, - "learning_rate": 0.00019999904534145468, - "loss": 46.0, - "step": 8647 - }, - { - "epoch": 1.3926889166230525, - "grad_norm": 0.004047059919685125, - "learning_rate": 0.0001999990451203797, - "loss": 46.0, - "step": 8648 - }, - { - "epoch": 1.3928499537018397, - "grad_norm": 0.003496703924611211, - "learning_rate": 0.0001999990448992791, - "loss": 46.0, - "step": 8649 - }, - { - "epoch": 1.3930109907806272, - "grad_norm": 0.0007651507621631026, - "learning_rate": 0.00019999904467815296, - "loss": 46.0, - "step": 8650 - }, - { - "epoch": 1.3931720278594146, - "grad_norm": 0.003980850335210562, - "learning_rate": 0.0001999990444570012, - "loss": 46.0, - "step": 8651 - }, - { - "epoch": 1.393333064938202, - "grad_norm": 0.007391641847789288, - "learning_rate": 0.00019999904423582385, - "loss": 46.0, - "step": 8652 - }, - { - "epoch": 1.3934941020169895, - "grad_norm": 0.008265133015811443, - "learning_rate": 0.00019999904401462088, - "loss": 46.0, - "step": 8653 - }, - { - "epoch": 1.3936551390957768, - "grad_norm": 0.00351297901943326, - "learning_rate": 0.00019999904379339233, - "loss": 46.0, - "step": 8654 - }, - { - "epoch": 1.3938161761745642, - "grad_norm": 0.0011872978648170829, - "learning_rate": 0.00019999904357213821, - "loss": 46.0, - "step": 8655 - }, - { - "epoch": 1.3939772132533517, - "grad_norm": 0.0021320045925676823, - "learning_rate": 0.00019999904335085846, - "loss": 46.0, - "step": 8656 - }, - { - "epoch": 1.394138250332139, - "grad_norm": 0.0029541810508817434, - "learning_rate": 0.00019999904312955315, - "loss": 46.0, - "step": 8657 - }, - { - "epoch": 1.3942992874109263, - "grad_norm": 0.004228669218719006, - "learning_rate": 0.00019999904290822222, - "loss": 46.0, - "step": 8658 - }, - { - "epoch": 1.3944603244897138, - "grad_norm": 0.0008973954827524722, - "learning_rate": 0.0001999990426868657, - "loss": 46.0, - "step": 8659 - }, - { - "epoch": 1.3946213615685012, - "grad_norm": 0.00026754557620733976, - "learning_rate": 0.00019999904246548357, - "loss": 46.0, - "step": 8660 - }, - { - "epoch": 1.3947823986472885, - "grad_norm": 0.0018369912868365645, - "learning_rate": 0.00019999904224407588, - "loss": 46.0, - "step": 8661 - }, - { - "epoch": 1.394943435726076, - "grad_norm": 0.0009069963707588613, - "learning_rate": 0.00019999904202264258, - "loss": 46.0, - "step": 8662 - }, - { - "epoch": 1.3951044728048634, - "grad_norm": 0.0014931592158973217, - "learning_rate": 0.00019999904180118368, - "loss": 46.0, - "step": 8663 - }, - { - "epoch": 1.3952655098836506, - "grad_norm": 0.0027464760933071375, - "learning_rate": 0.0001999990415796992, - "loss": 46.0, - "step": 8664 - }, - { - "epoch": 1.395426546962438, - "grad_norm": 0.0018627849640324712, - "learning_rate": 0.0001999990413581891, - "loss": 46.0, - "step": 8665 - }, - { - "epoch": 1.3955875840412255, - "grad_norm": 0.0030594351701438427, - "learning_rate": 0.00019999904113665343, - "loss": 46.0, - "step": 8666 - }, - { - "epoch": 1.395748621120013, - "grad_norm": 0.0012963516637682915, - "learning_rate": 0.00019999904091509214, - "loss": 46.0, - "step": 8667 - }, - { - "epoch": 1.3959096581988002, - "grad_norm": 0.004459632560610771, - "learning_rate": 0.00019999904069350528, - "loss": 46.0, - "step": 8668 - }, - { - "epoch": 1.3960706952775876, - "grad_norm": 0.00065183563856408, - "learning_rate": 0.0001999990404718928, - "loss": 46.0, - "step": 8669 - }, - { - "epoch": 1.396231732356375, - "grad_norm": 0.001076934626325965, - "learning_rate": 0.00019999904025025473, - "loss": 46.0, - "step": 8670 - }, - { - "epoch": 1.3963927694351623, - "grad_norm": 0.0009930869564414024, - "learning_rate": 0.00019999904002859109, - "loss": 46.0, - "step": 8671 - }, - { - "epoch": 1.3965538065139498, - "grad_norm": 0.002097767312079668, - "learning_rate": 0.00019999903980690185, - "loss": 46.0, - "step": 8672 - }, - { - "epoch": 1.3967148435927372, - "grad_norm": 0.004299834370613098, - "learning_rate": 0.000199999039585187, - "loss": 46.0, - "step": 8673 - }, - { - "epoch": 1.3968758806715247, - "grad_norm": 0.001463218592107296, - "learning_rate": 0.00019999903936344655, - "loss": 46.0, - "step": 8674 - }, - { - "epoch": 1.3970369177503121, - "grad_norm": 0.002759312279522419, - "learning_rate": 0.00019999903914168053, - "loss": 46.0, - "step": 8675 - }, - { - "epoch": 1.3971979548290994, - "grad_norm": 0.00230770418420434, - "learning_rate": 0.00019999903891988893, - "loss": 46.0, - "step": 8676 - }, - { - "epoch": 1.3973589919078868, - "grad_norm": 0.0014615310356020927, - "learning_rate": 0.00019999903869807168, - "loss": 46.0, - "step": 8677 - }, - { - "epoch": 1.397520028986674, - "grad_norm": 0.014054329134523869, - "learning_rate": 0.00019999903847622887, - "loss": 46.0, - "step": 8678 - }, - { - "epoch": 1.3976810660654615, - "grad_norm": 0.0041089532896876335, - "learning_rate": 0.00019999903825436045, - "loss": 46.0, - "step": 8679 - }, - { - "epoch": 1.397842103144249, - "grad_norm": 0.001594877801835537, - "learning_rate": 0.00019999903803246644, - "loss": 46.0, - "step": 8680 - }, - { - "epoch": 1.3980031402230364, - "grad_norm": 0.0009906088234856725, - "learning_rate": 0.00019999903781054684, - "loss": 46.0, - "step": 8681 - }, - { - "epoch": 1.3981641773018239, - "grad_norm": 0.004922871477901936, - "learning_rate": 0.00019999903758860166, - "loss": 46.0, - "step": 8682 - }, - { - "epoch": 1.398325214380611, - "grad_norm": 0.0011025634594261646, - "learning_rate": 0.00019999903736663086, - "loss": 46.0, - "step": 8683 - }, - { - "epoch": 1.3984862514593985, - "grad_norm": 0.0009672152809798717, - "learning_rate": 0.00019999903714463448, - "loss": 46.0, - "step": 8684 - }, - { - "epoch": 1.398647288538186, - "grad_norm": 0.00533898314461112, - "learning_rate": 0.00019999903692261248, - "loss": 46.0, - "step": 8685 - }, - { - "epoch": 1.3988083256169732, - "grad_norm": 0.0032699699513614178, - "learning_rate": 0.00019999903670056492, - "loss": 46.0, - "step": 8686 - }, - { - "epoch": 1.3989693626957607, - "grad_norm": 0.0008260086760856211, - "learning_rate": 0.00019999903647849174, - "loss": 46.0, - "step": 8687 - }, - { - "epoch": 1.3991303997745481, - "grad_norm": 0.0016012934502214193, - "learning_rate": 0.000199999036256393, - "loss": 46.0, - "step": 8688 - }, - { - "epoch": 1.3992914368533356, - "grad_norm": 0.0007186186849139631, - "learning_rate": 0.00019999903603426863, - "loss": 46.0, - "step": 8689 - }, - { - "epoch": 1.3994524739321228, - "grad_norm": 0.0034034280106425285, - "learning_rate": 0.0001999990358121187, - "loss": 46.0, - "step": 8690 - }, - { - "epoch": 1.3996135110109102, - "grad_norm": 0.002227758290246129, - "learning_rate": 0.00019999903558994312, - "loss": 46.0, - "step": 8691 - }, - { - "epoch": 1.3997745480896977, - "grad_norm": 0.004060195758938789, - "learning_rate": 0.00019999903536774199, - "loss": 46.0, - "step": 8692 - }, - { - "epoch": 1.399935585168485, - "grad_norm": 0.0006188918487168849, - "learning_rate": 0.00019999903514551526, - "loss": 46.0, - "step": 8693 - }, - { - "epoch": 1.4000966222472724, - "grad_norm": 0.0016157592181116343, - "learning_rate": 0.00019999903492326292, - "loss": 46.0, - "step": 8694 - }, - { - "epoch": 1.4002576593260598, - "grad_norm": 0.003066223580390215, - "learning_rate": 0.000199999034700985, - "loss": 46.0, - "step": 8695 - }, - { - "epoch": 1.4004186964048473, - "grad_norm": 0.0017708541126921773, - "learning_rate": 0.0001999990344786815, - "loss": 46.0, - "step": 8696 - }, - { - "epoch": 1.4005797334836347, - "grad_norm": 0.0019677416421473026, - "learning_rate": 0.0001999990342563524, - "loss": 46.0, - "step": 8697 - }, - { - "epoch": 1.400740770562422, - "grad_norm": 0.0005356779438443482, - "learning_rate": 0.00019999903403399767, - "loss": 46.0, - "step": 8698 - }, - { - "epoch": 1.4009018076412094, - "grad_norm": 0.0047472030855715275, - "learning_rate": 0.00019999903381161735, - "loss": 46.0, - "step": 8699 - }, - { - "epoch": 1.4010628447199966, - "grad_norm": 0.0035157930105924606, - "learning_rate": 0.00019999903358921146, - "loss": 46.0, - "step": 8700 - }, - { - "epoch": 1.401223881798784, - "grad_norm": 0.0008859345107339323, - "learning_rate": 0.00019999903336677995, - "loss": 46.0, - "step": 8701 - }, - { - "epoch": 1.4013849188775716, - "grad_norm": 0.001499322708696127, - "learning_rate": 0.0001999990331443229, - "loss": 46.0, - "step": 8702 - }, - { - "epoch": 1.401545955956359, - "grad_norm": 0.012457621283829212, - "learning_rate": 0.0001999990329218402, - "loss": 46.0, - "step": 8703 - }, - { - "epoch": 1.4017069930351465, - "grad_norm": 0.0012489799410104752, - "learning_rate": 0.00019999903269933192, - "loss": 46.0, - "step": 8704 - }, - { - "epoch": 1.4018680301139337, - "grad_norm": 0.0008658130536787212, - "learning_rate": 0.00019999903247679805, - "loss": 46.0, - "step": 8705 - }, - { - "epoch": 1.4020290671927211, - "grad_norm": 0.00033852504566311836, - "learning_rate": 0.00019999903225423858, - "loss": 46.0, - "step": 8706 - }, - { - "epoch": 1.4021901042715086, - "grad_norm": 0.0016483375802636147, - "learning_rate": 0.0001999990320316535, - "loss": 46.0, - "step": 8707 - }, - { - "epoch": 1.4023511413502958, - "grad_norm": 0.0017330390401184559, - "learning_rate": 0.00019999903180904286, - "loss": 46.0, - "step": 8708 - }, - { - "epoch": 1.4025121784290833, - "grad_norm": 0.0009625107049942017, - "learning_rate": 0.0001999990315864066, - "loss": 46.0, - "step": 8709 - }, - { - "epoch": 1.4026732155078707, - "grad_norm": 0.0009603157523088157, - "learning_rate": 0.00019999903136374477, - "loss": 46.0, - "step": 8710 - }, - { - "epoch": 1.4028342525866582, - "grad_norm": 0.0016231354093179107, - "learning_rate": 0.00019999903114105734, - "loss": 46.0, - "step": 8711 - }, - { - "epoch": 1.4029952896654454, - "grad_norm": 0.002856927691027522, - "learning_rate": 0.0001999990309183443, - "loss": 46.0, - "step": 8712 - }, - { - "epoch": 1.4031563267442329, - "grad_norm": 0.003310267347842455, - "learning_rate": 0.00019999903069560566, - "loss": 46.0, - "step": 8713 - }, - { - "epoch": 1.4033173638230203, - "grad_norm": 0.0003559023898560554, - "learning_rate": 0.00019999903047284145, - "loss": 46.0, - "step": 8714 - }, - { - "epoch": 1.4034784009018075, - "grad_norm": 0.0037952205166220665, - "learning_rate": 0.00019999903025005162, - "loss": 46.0, - "step": 8715 - }, - { - "epoch": 1.403639437980595, - "grad_norm": 0.004152096342295408, - "learning_rate": 0.00019999903002723623, - "loss": 46.0, - "step": 8716 - }, - { - "epoch": 1.4038004750593824, - "grad_norm": 0.00553463539108634, - "learning_rate": 0.00019999902980439522, - "loss": 46.0, - "step": 8717 - }, - { - "epoch": 1.4039615121381699, - "grad_norm": 0.001983468420803547, - "learning_rate": 0.0001999990295815286, - "loss": 46.0, - "step": 8718 - }, - { - "epoch": 1.4041225492169573, - "grad_norm": 0.0009469576179981232, - "learning_rate": 0.00019999902935863642, - "loss": 46.0, - "step": 8719 - }, - { - "epoch": 1.4042835862957446, - "grad_norm": 0.0015824816655367613, - "learning_rate": 0.00019999902913571865, - "loss": 46.0, - "step": 8720 - }, - { - "epoch": 1.404444623374532, - "grad_norm": 0.004357549361884594, - "learning_rate": 0.00019999902891277524, - "loss": 46.0, - "step": 8721 - }, - { - "epoch": 1.4046056604533192, - "grad_norm": 0.002079347614198923, - "learning_rate": 0.00019999902868980628, - "loss": 46.0, - "step": 8722 - }, - { - "epoch": 1.4047666975321067, - "grad_norm": 0.0003655439068097621, - "learning_rate": 0.0001999990284668117, - "loss": 46.0, - "step": 8723 - }, - { - "epoch": 1.4049277346108942, - "grad_norm": 0.000651561887934804, - "learning_rate": 0.00019999902824379152, - "loss": 46.0, - "step": 8724 - }, - { - "epoch": 1.4050887716896816, - "grad_norm": 0.0011075891088694334, - "learning_rate": 0.00019999902802074574, - "loss": 46.0, - "step": 8725 - }, - { - "epoch": 1.405249808768469, - "grad_norm": 0.0027016333770006895, - "learning_rate": 0.0001999990277976744, - "loss": 46.0, - "step": 8726 - }, - { - "epoch": 1.4054108458472563, - "grad_norm": 0.0016776265110820532, - "learning_rate": 0.00019999902757457744, - "loss": 46.0, - "step": 8727 - }, - { - "epoch": 1.4055718829260437, - "grad_norm": 0.0007533309981226921, - "learning_rate": 0.0001999990273514549, - "loss": 46.0, - "step": 8728 - }, - { - "epoch": 1.4057329200048312, - "grad_norm": 0.0012430923525243998, - "learning_rate": 0.00019999902712830676, - "loss": 46.0, - "step": 8729 - }, - { - "epoch": 1.4058939570836184, - "grad_norm": 0.0019619695376604795, - "learning_rate": 0.000199999026905133, - "loss": 46.0, - "step": 8730 - }, - { - "epoch": 1.4060549941624059, - "grad_norm": 0.0006122136837802827, - "learning_rate": 0.00019999902668193367, - "loss": 46.0, - "step": 8731 - }, - { - "epoch": 1.4062160312411933, - "grad_norm": 0.0037623592652380466, - "learning_rate": 0.00019999902645870875, - "loss": 46.0, - "step": 8732 - }, - { - "epoch": 1.4063770683199808, - "grad_norm": 0.0016203204868361354, - "learning_rate": 0.00019999902623545824, - "loss": 46.0, - "step": 8733 - }, - { - "epoch": 1.406538105398768, - "grad_norm": 0.004895881284028292, - "learning_rate": 0.00019999902601218212, - "loss": 46.0, - "step": 8734 - }, - { - "epoch": 1.4066991424775555, - "grad_norm": 0.0008878657827153802, - "learning_rate": 0.0001999990257888804, - "loss": 46.0, - "step": 8735 - }, - { - "epoch": 1.406860179556343, - "grad_norm": 0.0007570901652798057, - "learning_rate": 0.00019999902556555309, - "loss": 46.0, - "step": 8736 - }, - { - "epoch": 1.4070212166351301, - "grad_norm": 0.006074225064367056, - "learning_rate": 0.0001999990253422002, - "loss": 46.0, - "step": 8737 - }, - { - "epoch": 1.4071822537139176, - "grad_norm": 0.009877574630081654, - "learning_rate": 0.0001999990251188217, - "loss": 46.0, - "step": 8738 - }, - { - "epoch": 1.407343290792705, - "grad_norm": 0.0013487032847478986, - "learning_rate": 0.00019999902489541764, - "loss": 46.0, - "step": 8739 - }, - { - "epoch": 1.4075043278714925, - "grad_norm": 0.0007247634348459542, - "learning_rate": 0.00019999902467198794, - "loss": 46.0, - "step": 8740 - }, - { - "epoch": 1.40766536495028, - "grad_norm": 0.002311364747583866, - "learning_rate": 0.00019999902444853266, - "loss": 46.0, - "step": 8741 - }, - { - "epoch": 1.4078264020290672, - "grad_norm": 0.001140328124165535, - "learning_rate": 0.00019999902422505178, - "loss": 46.0, - "step": 8742 - }, - { - "epoch": 1.4079874391078546, - "grad_norm": 0.0017533078789710999, - "learning_rate": 0.00019999902400154532, - "loss": 46.0, - "step": 8743 - }, - { - "epoch": 1.4081484761866419, - "grad_norm": 0.0011203738395124674, - "learning_rate": 0.00019999902377801324, - "loss": 46.0, - "step": 8744 - }, - { - "epoch": 1.4083095132654293, - "grad_norm": 0.0050362516194581985, - "learning_rate": 0.0001999990235544556, - "loss": 46.0, - "step": 8745 - }, - { - "epoch": 1.4084705503442168, - "grad_norm": 0.008753200061619282, - "learning_rate": 0.00019999902333087236, - "loss": 46.0, - "step": 8746 - }, - { - "epoch": 1.4086315874230042, - "grad_norm": 0.0038523238617926836, - "learning_rate": 0.00019999902310726352, - "loss": 46.0, - "step": 8747 - }, - { - "epoch": 1.4087926245017917, - "grad_norm": 0.010478362441062927, - "learning_rate": 0.0001999990228836291, - "loss": 46.0, - "step": 8748 - }, - { - "epoch": 1.4089536615805789, - "grad_norm": 0.002176894573494792, - "learning_rate": 0.00019999902265996903, - "loss": 46.0, - "step": 8749 - }, - { - "epoch": 1.4091146986593663, - "grad_norm": 0.002317194826900959, - "learning_rate": 0.0001999990224362834, - "loss": 46.0, - "step": 8750 - }, - { - "epoch": 1.4092757357381538, - "grad_norm": 0.0024861914571374655, - "learning_rate": 0.0001999990222125722, - "loss": 46.0, - "step": 8751 - }, - { - "epoch": 1.409436772816941, - "grad_norm": 0.0015292122261598706, - "learning_rate": 0.0001999990219888354, - "loss": 46.0, - "step": 8752 - }, - { - "epoch": 1.4095978098957285, - "grad_norm": 0.0011055519571527839, - "learning_rate": 0.00019999902176507295, - "loss": 46.0, - "step": 8753 - }, - { - "epoch": 1.409758846974516, - "grad_norm": 0.0009253533207811415, - "learning_rate": 0.00019999902154128494, - "loss": 46.0, - "step": 8754 - }, - { - "epoch": 1.4099198840533034, - "grad_norm": 0.0015908654313534498, - "learning_rate": 0.00019999902131747136, - "loss": 46.0, - "step": 8755 - }, - { - "epoch": 1.4100809211320906, - "grad_norm": 0.004116092808544636, - "learning_rate": 0.00019999902109363212, - "loss": 46.0, - "step": 8756 - }, - { - "epoch": 1.410241958210878, - "grad_norm": 0.001024664961732924, - "learning_rate": 0.00019999902086976733, - "loss": 46.0, - "step": 8757 - }, - { - "epoch": 1.4104029952896655, - "grad_norm": 0.0037672175094485283, - "learning_rate": 0.00019999902064587696, - "loss": 46.0, - "step": 8758 - }, - { - "epoch": 1.4105640323684527, - "grad_norm": 0.0013191432226449251, - "learning_rate": 0.000199999020421961, - "loss": 46.0, - "step": 8759 - }, - { - "epoch": 1.4107250694472402, - "grad_norm": 0.0021485602483153343, - "learning_rate": 0.0001999990201980194, - "loss": 46.0, - "step": 8760 - }, - { - "epoch": 1.4108861065260276, - "grad_norm": 0.0012577869929373264, - "learning_rate": 0.00019999901997405225, - "loss": 46.0, - "step": 8761 - }, - { - "epoch": 1.411047143604815, - "grad_norm": 0.0018599767936393619, - "learning_rate": 0.0001999990197500595, - "loss": 46.0, - "step": 8762 - }, - { - "epoch": 1.4112081806836023, - "grad_norm": 0.0014334272127598524, - "learning_rate": 0.00019999901952604112, - "loss": 46.0, - "step": 8763 - }, - { - "epoch": 1.4113692177623898, - "grad_norm": 0.001310729538090527, - "learning_rate": 0.00019999901930199717, - "loss": 46.0, - "step": 8764 - }, - { - "epoch": 1.4115302548411772, - "grad_norm": 0.0015597993042320013, - "learning_rate": 0.00019999901907792763, - "loss": 46.0, - "step": 8765 - }, - { - "epoch": 1.4116912919199645, - "grad_norm": 0.000865362526383251, - "learning_rate": 0.00019999901885383247, - "loss": 46.0, - "step": 8766 - }, - { - "epoch": 1.411852328998752, - "grad_norm": 0.0014334465377032757, - "learning_rate": 0.00019999901862971173, - "loss": 46.0, - "step": 8767 - }, - { - "epoch": 1.4120133660775394, - "grad_norm": 0.0009138078312389553, - "learning_rate": 0.0001999990184055654, - "loss": 46.0, - "step": 8768 - }, - { - "epoch": 1.4121744031563268, - "grad_norm": 0.0018936212873086333, - "learning_rate": 0.00019999901818139345, - "loss": 46.0, - "step": 8769 - }, - { - "epoch": 1.4123354402351143, - "grad_norm": 0.0014860385563224554, - "learning_rate": 0.00019999901795719595, - "loss": 46.0, - "step": 8770 - }, - { - "epoch": 1.4124964773139015, - "grad_norm": 0.0005086000310257077, - "learning_rate": 0.00019999901773297283, - "loss": 46.0, - "step": 8771 - }, - { - "epoch": 1.412657514392689, - "grad_norm": 0.002495589666068554, - "learning_rate": 0.00019999901750872412, - "loss": 46.0, - "step": 8772 - }, - { - "epoch": 1.4128185514714762, - "grad_norm": 0.005120695102959871, - "learning_rate": 0.00019999901728444983, - "loss": 46.0, - "step": 8773 - }, - { - "epoch": 1.4129795885502636, - "grad_norm": 0.0004170707252342254, - "learning_rate": 0.00019999901706014992, - "loss": 46.0, - "step": 8774 - }, - { - "epoch": 1.413140625629051, - "grad_norm": 0.0021690381690859795, - "learning_rate": 0.00019999901683582442, - "loss": 46.0, - "step": 8775 - }, - { - "epoch": 1.4133016627078385, - "grad_norm": 0.003178894752636552, - "learning_rate": 0.00019999901661147334, - "loss": 46.0, - "step": 8776 - }, - { - "epoch": 1.413462699786626, - "grad_norm": 0.005027687177062035, - "learning_rate": 0.00019999901638709667, - "loss": 46.0, - "step": 8777 - }, - { - "epoch": 1.4136237368654132, - "grad_norm": 0.0009337655501440167, - "learning_rate": 0.00019999901616269439, - "loss": 46.0, - "step": 8778 - }, - { - "epoch": 1.4137847739442007, - "grad_norm": 0.0008403321262449026, - "learning_rate": 0.0001999990159382665, - "loss": 46.0, - "step": 8779 - }, - { - "epoch": 1.413945811022988, - "grad_norm": 0.0018781916005536914, - "learning_rate": 0.00019999901571381303, - "loss": 46.0, - "step": 8780 - }, - { - "epoch": 1.4141068481017753, - "grad_norm": 0.004008853808045387, - "learning_rate": 0.00019999901548933398, - "loss": 46.0, - "step": 8781 - }, - { - "epoch": 1.4142678851805628, - "grad_norm": 0.0013632572954520583, - "learning_rate": 0.00019999901526482932, - "loss": 46.0, - "step": 8782 - }, - { - "epoch": 1.4144289222593502, - "grad_norm": 0.0004339565057307482, - "learning_rate": 0.00019999901504029905, - "loss": 46.0, - "step": 8783 - }, - { - "epoch": 1.4145899593381377, - "grad_norm": 0.0027160339523106813, - "learning_rate": 0.00019999901481574322, - "loss": 46.0, - "step": 8784 - }, - { - "epoch": 1.414750996416925, - "grad_norm": 0.003067605197429657, - "learning_rate": 0.0001999990145911618, - "loss": 46.0, - "step": 8785 - }, - { - "epoch": 1.4149120334957124, - "grad_norm": 0.0035614839289337397, - "learning_rate": 0.00019999901436655476, - "loss": 46.0, - "step": 8786 - }, - { - "epoch": 1.4150730705744998, - "grad_norm": 0.0018429290503263474, - "learning_rate": 0.00019999901414192214, - "loss": 46.0, - "step": 8787 - }, - { - "epoch": 1.415234107653287, - "grad_norm": 0.000458260445157066, - "learning_rate": 0.00019999901391726387, - "loss": 46.0, - "step": 8788 - }, - { - "epoch": 1.4153951447320745, - "grad_norm": 0.006405094172805548, - "learning_rate": 0.00019999901369258008, - "loss": 46.0, - "step": 8789 - }, - { - "epoch": 1.415556181810862, - "grad_norm": 0.0011929180473089218, - "learning_rate": 0.00019999901346787064, - "loss": 46.0, - "step": 8790 - }, - { - "epoch": 1.4157172188896494, - "grad_norm": 0.0028130868449807167, - "learning_rate": 0.00019999901324313567, - "loss": 46.0, - "step": 8791 - }, - { - "epoch": 1.4158782559684369, - "grad_norm": 0.0011518935207277536, - "learning_rate": 0.00019999901301837505, - "loss": 46.0, - "step": 8792 - }, - { - "epoch": 1.416039293047224, - "grad_norm": 0.0036750249564647675, - "learning_rate": 0.00019999901279358885, - "loss": 46.0, - "step": 8793 - }, - { - "epoch": 1.4162003301260115, - "grad_norm": 0.0013662953861057758, - "learning_rate": 0.00019999901256877704, - "loss": 46.0, - "step": 8794 - }, - { - "epoch": 1.4163613672047988, - "grad_norm": 0.0008359162020497024, - "learning_rate": 0.00019999901234393964, - "loss": 46.0, - "step": 8795 - }, - { - "epoch": 1.4165224042835862, - "grad_norm": 0.0008566214819438756, - "learning_rate": 0.00019999901211907668, - "loss": 46.0, - "step": 8796 - }, - { - "epoch": 1.4166834413623737, - "grad_norm": 0.0021488158963620663, - "learning_rate": 0.0001999990118941881, - "loss": 46.0, - "step": 8797 - }, - { - "epoch": 1.4168444784411611, - "grad_norm": 0.002395689720287919, - "learning_rate": 0.00019999901166927394, - "loss": 46.0, - "step": 8798 - }, - { - "epoch": 1.4170055155199486, - "grad_norm": 0.006017365492880344, - "learning_rate": 0.00019999901144433416, - "loss": 46.0, - "step": 8799 - }, - { - "epoch": 1.4171665525987358, - "grad_norm": 0.005037990864366293, - "learning_rate": 0.00019999901121936883, - "loss": 46.0, - "step": 8800 - }, - { - "epoch": 1.4173275896775233, - "grad_norm": 0.0011998401023447514, - "learning_rate": 0.00019999901099437785, - "loss": 46.0, - "step": 8801 - }, - { - "epoch": 1.4174886267563107, - "grad_norm": 0.0010431613773107529, - "learning_rate": 0.0001999990107693613, - "loss": 46.0, - "step": 8802 - }, - { - "epoch": 1.417649663835098, - "grad_norm": 0.0011488142190501094, - "learning_rate": 0.00019999901054431916, - "loss": 46.0, - "step": 8803 - }, - { - "epoch": 1.4178107009138854, - "grad_norm": 0.0015358793316408992, - "learning_rate": 0.00019999901031925142, - "loss": 46.0, - "step": 8804 - }, - { - "epoch": 1.4179717379926728, - "grad_norm": 0.00228817667812109, - "learning_rate": 0.0001999990100941581, - "loss": 46.0, - "step": 8805 - }, - { - "epoch": 1.4181327750714603, - "grad_norm": 0.002047525718808174, - "learning_rate": 0.00019999900986903917, - "loss": 46.0, - "step": 8806 - }, - { - "epoch": 1.4182938121502475, - "grad_norm": 0.0016591690946370363, - "learning_rate": 0.00019999900964389464, - "loss": 46.0, - "step": 8807 - }, - { - "epoch": 1.418454849229035, - "grad_norm": 0.0011893196497112513, - "learning_rate": 0.0001999990094187245, - "loss": 46.0, - "step": 8808 - }, - { - "epoch": 1.4186158863078224, - "grad_norm": 0.0021540282759815454, - "learning_rate": 0.0001999990091935288, - "loss": 46.0, - "step": 8809 - }, - { - "epoch": 1.4187769233866097, - "grad_norm": 0.0015994729474186897, - "learning_rate": 0.0001999990089683075, - "loss": 46.0, - "step": 8810 - }, - { - "epoch": 1.418937960465397, - "grad_norm": 0.0015133037231862545, - "learning_rate": 0.00019999900874306058, - "loss": 46.0, - "step": 8811 - }, - { - "epoch": 1.4190989975441846, - "grad_norm": 0.000605659675784409, - "learning_rate": 0.0001999990085177881, - "loss": 46.0, - "step": 8812 - }, - { - "epoch": 1.419260034622972, - "grad_norm": 0.005168214440345764, - "learning_rate": 0.00019999900829249003, - "loss": 46.0, - "step": 8813 - }, - { - "epoch": 1.4194210717017595, - "grad_norm": 0.0012356200022622943, - "learning_rate": 0.00019999900806716634, - "loss": 46.0, - "step": 8814 - }, - { - "epoch": 1.4195821087805467, - "grad_norm": 0.002502727322280407, - "learning_rate": 0.00019999900784181706, - "loss": 46.0, - "step": 8815 - }, - { - "epoch": 1.4197431458593341, - "grad_norm": 0.005924689583480358, - "learning_rate": 0.0001999990076164422, - "loss": 46.0, - "step": 8816 - }, - { - "epoch": 1.4199041829381214, - "grad_norm": 0.0018171198898926377, - "learning_rate": 0.0001999990073910417, - "loss": 46.0, - "step": 8817 - }, - { - "epoch": 1.4200652200169088, - "grad_norm": 0.003842373611405492, - "learning_rate": 0.00019999900716561567, - "loss": 46.0, - "step": 8818 - }, - { - "epoch": 1.4202262570956963, - "grad_norm": 0.0007104586693458259, - "learning_rate": 0.000199999006940164, - "loss": 46.0, - "step": 8819 - }, - { - "epoch": 1.4203872941744837, - "grad_norm": 0.004088105633854866, - "learning_rate": 0.00019999900671468674, - "loss": 46.0, - "step": 8820 - }, - { - "epoch": 1.4205483312532712, - "grad_norm": 0.0031499583274126053, - "learning_rate": 0.0001999990064891839, - "loss": 46.0, - "step": 8821 - }, - { - "epoch": 1.4207093683320584, - "grad_norm": 0.001896240166388452, - "learning_rate": 0.00019999900626365547, - "loss": 46.0, - "step": 8822 - }, - { - "epoch": 1.4208704054108459, - "grad_norm": 0.0025624840054661036, - "learning_rate": 0.0001999990060381014, - "loss": 46.0, - "step": 8823 - }, - { - "epoch": 1.4210314424896333, - "grad_norm": 0.002257821848616004, - "learning_rate": 0.0001999990058125218, - "loss": 46.0, - "step": 8824 - }, - { - "epoch": 1.4211924795684205, - "grad_norm": 0.0005156651604920626, - "learning_rate": 0.00019999900558691656, - "loss": 46.0, - "step": 8825 - }, - { - "epoch": 1.421353516647208, - "grad_norm": 0.0013510119169950485, - "learning_rate": 0.00019999900536128574, - "loss": 46.0, - "step": 8826 - }, - { - "epoch": 1.4215145537259954, - "grad_norm": 0.004422096535563469, - "learning_rate": 0.00019999900513562933, - "loss": 46.0, - "step": 8827 - }, - { - "epoch": 1.421675590804783, - "grad_norm": 0.0021187118254601955, - "learning_rate": 0.0001999990049099473, - "loss": 46.0, - "step": 8828 - }, - { - "epoch": 1.4218366278835701, - "grad_norm": 0.0037883142940700054, - "learning_rate": 0.0001999990046842397, - "loss": 46.0, - "step": 8829 - }, - { - "epoch": 1.4219976649623576, - "grad_norm": 0.0020474051125347614, - "learning_rate": 0.0001999990044585065, - "loss": 46.0, - "step": 8830 - }, - { - "epoch": 1.422158702041145, - "grad_norm": 0.0012470899382606149, - "learning_rate": 0.00019999900423274772, - "loss": 46.0, - "step": 8831 - }, - { - "epoch": 1.4223197391199323, - "grad_norm": 0.0031712038908153772, - "learning_rate": 0.00019999900400696335, - "loss": 46.0, - "step": 8832 - }, - { - "epoch": 1.4224807761987197, - "grad_norm": 0.007882462814450264, - "learning_rate": 0.00019999900378115334, - "loss": 46.0, - "step": 8833 - }, - { - "epoch": 1.4226418132775072, - "grad_norm": 0.0020494707860052586, - "learning_rate": 0.00019999900355531776, - "loss": 46.0, - "step": 8834 - }, - { - "epoch": 1.4228028503562946, - "grad_norm": 0.0005583789898082614, - "learning_rate": 0.0001999990033294566, - "loss": 46.0, - "step": 8835 - }, - { - "epoch": 1.422963887435082, - "grad_norm": 0.0033763465471565723, - "learning_rate": 0.00019999900310356986, - "loss": 46.0, - "step": 8836 - }, - { - "epoch": 1.4231249245138693, - "grad_norm": 0.0016085447277873755, - "learning_rate": 0.00019999900287765747, - "loss": 46.0, - "step": 8837 - }, - { - "epoch": 1.4232859615926567, - "grad_norm": 0.0004522383096627891, - "learning_rate": 0.00019999900265171952, - "loss": 46.0, - "step": 8838 - }, - { - "epoch": 1.423446998671444, - "grad_norm": 0.0052393963560462, - "learning_rate": 0.00019999900242575596, - "loss": 46.0, - "step": 8839 - }, - { - "epoch": 1.4236080357502314, - "grad_norm": 0.001791846239939332, - "learning_rate": 0.0001999990021997668, - "loss": 46.0, - "step": 8840 - }, - { - "epoch": 1.4237690728290189, - "grad_norm": 0.0007898471667431295, - "learning_rate": 0.0001999990019737521, - "loss": 46.0, - "step": 8841 - }, - { - "epoch": 1.4239301099078063, - "grad_norm": 0.0008404402178712189, - "learning_rate": 0.00019999900174771175, - "loss": 46.0, - "step": 8842 - }, - { - "epoch": 1.4240911469865938, - "grad_norm": 0.000669444736558944, - "learning_rate": 0.00019999900152164584, - "loss": 46.0, - "step": 8843 - }, - { - "epoch": 1.424252184065381, - "grad_norm": 0.0038019234780222178, - "learning_rate": 0.0001999990012955543, - "loss": 46.0, - "step": 8844 - }, - { - "epoch": 1.4244132211441685, - "grad_norm": 0.001707833376713097, - "learning_rate": 0.00019999900106943718, - "loss": 46.0, - "step": 8845 - }, - { - "epoch": 1.424574258222956, - "grad_norm": 0.0011256089201197028, - "learning_rate": 0.00019999900084329445, - "loss": 46.0, - "step": 8846 - }, - { - "epoch": 1.4247352953017431, - "grad_norm": 0.0015628539258614182, - "learning_rate": 0.00019999900061712616, - "loss": 46.0, - "step": 8847 - }, - { - "epoch": 1.4248963323805306, - "grad_norm": 0.0013905201340094209, - "learning_rate": 0.00019999900039093226, - "loss": 46.0, - "step": 8848 - }, - { - "epoch": 1.425057369459318, - "grad_norm": 0.0003331666230224073, - "learning_rate": 0.00019999900016471277, - "loss": 46.0, - "step": 8849 - }, - { - "epoch": 1.4252184065381055, - "grad_norm": 0.0056498427875339985, - "learning_rate": 0.00019999899993846767, - "loss": 46.0, - "step": 8850 - }, - { - "epoch": 1.4253794436168927, - "grad_norm": 0.0026023618411272764, - "learning_rate": 0.00019999899971219698, - "loss": 46.0, - "step": 8851 - }, - { - "epoch": 1.4255404806956802, - "grad_norm": 0.0017910264432430267, - "learning_rate": 0.0001999989994859007, - "loss": 46.0, - "step": 8852 - }, - { - "epoch": 1.4257015177744676, - "grad_norm": 0.004014239646494389, - "learning_rate": 0.00019999899925957884, - "loss": 46.0, - "step": 8853 - }, - { - "epoch": 1.4258625548532549, - "grad_norm": 0.0017814973834902048, - "learning_rate": 0.00019999899903323136, - "loss": 46.0, - "step": 8854 - }, - { - "epoch": 1.4260235919320423, - "grad_norm": 0.0007097472553141415, - "learning_rate": 0.00019999899880685827, - "loss": 46.0, - "step": 8855 - }, - { - "epoch": 1.4261846290108298, - "grad_norm": 0.006036133039742708, - "learning_rate": 0.00019999899858045965, - "loss": 46.0, - "step": 8856 - }, - { - "epoch": 1.4263456660896172, - "grad_norm": 0.0005275346338748932, - "learning_rate": 0.0001999989983540354, - "loss": 46.0, - "step": 8857 - }, - { - "epoch": 1.4265067031684044, - "grad_norm": 0.0011749006807804108, - "learning_rate": 0.00019999899812758553, - "loss": 46.0, - "step": 8858 - }, - { - "epoch": 1.426667740247192, - "grad_norm": 0.0012769057648256421, - "learning_rate": 0.00019999899790111008, - "loss": 46.0, - "step": 8859 - }, - { - "epoch": 1.4268287773259793, - "grad_norm": 0.001751145115122199, - "learning_rate": 0.00019999899767460906, - "loss": 46.0, - "step": 8860 - }, - { - "epoch": 1.4269898144047666, - "grad_norm": 0.0005043031414970756, - "learning_rate": 0.00019999899744808244, - "loss": 46.0, - "step": 8861 - }, - { - "epoch": 1.427150851483554, - "grad_norm": 0.007926051504909992, - "learning_rate": 0.00019999899722153018, - "loss": 46.0, - "step": 8862 - }, - { - "epoch": 1.4273118885623415, - "grad_norm": 0.0040412782691419125, - "learning_rate": 0.00019999899699495237, - "loss": 46.0, - "step": 8863 - }, - { - "epoch": 1.427472925641129, - "grad_norm": 0.0006264648400247097, - "learning_rate": 0.00019999899676834896, - "loss": 46.0, - "step": 8864 - }, - { - "epoch": 1.4276339627199164, - "grad_norm": 0.0008083584834821522, - "learning_rate": 0.00019999899654171994, - "loss": 46.0, - "step": 8865 - }, - { - "epoch": 1.4277949997987036, - "grad_norm": 0.006049438379704952, - "learning_rate": 0.00019999899631506537, - "loss": 46.0, - "step": 8866 - }, - { - "epoch": 1.427956036877491, - "grad_norm": 0.0026091355830430984, - "learning_rate": 0.00019999899608838515, - "loss": 46.0, - "step": 8867 - }, - { - "epoch": 1.4281170739562785, - "grad_norm": 0.0013568110298365355, - "learning_rate": 0.00019999899586167937, - "loss": 46.0, - "step": 8868 - }, - { - "epoch": 1.4282781110350657, - "grad_norm": 0.001467092544771731, - "learning_rate": 0.00019999899563494797, - "loss": 46.0, - "step": 8869 - }, - { - "epoch": 1.4284391481138532, - "grad_norm": 0.00386932329274714, - "learning_rate": 0.000199998995408191, - "loss": 46.0, - "step": 8870 - }, - { - "epoch": 1.4286001851926406, - "grad_norm": 0.008279883302748203, - "learning_rate": 0.00019999899518140842, - "loss": 46.0, - "step": 8871 - }, - { - "epoch": 1.428761222271428, - "grad_norm": 0.0006841786671429873, - "learning_rate": 0.00019999899495460024, - "loss": 46.0, - "step": 8872 - }, - { - "epoch": 1.4289222593502153, - "grad_norm": 0.0033430345356464386, - "learning_rate": 0.00019999899472776647, - "loss": 46.0, - "step": 8873 - }, - { - "epoch": 1.4290832964290028, - "grad_norm": 0.0027362238615751266, - "learning_rate": 0.00019999899450090711, - "loss": 46.0, - "step": 8874 - }, - { - "epoch": 1.4292443335077902, - "grad_norm": 0.001019414165057242, - "learning_rate": 0.00019999899427402214, - "loss": 46.0, - "step": 8875 - }, - { - "epoch": 1.4294053705865775, - "grad_norm": 0.0004085192340426147, - "learning_rate": 0.0001999989940471116, - "loss": 46.0, - "step": 8876 - }, - { - "epoch": 1.429566407665365, - "grad_norm": 0.002970003057271242, - "learning_rate": 0.00019999899382017547, - "loss": 46.0, - "step": 8877 - }, - { - "epoch": 1.4297274447441524, - "grad_norm": 0.0005938848480582237, - "learning_rate": 0.0001999989935932137, - "loss": 46.0, - "step": 8878 - }, - { - "epoch": 1.4298884818229398, - "grad_norm": 0.0013657067902386189, - "learning_rate": 0.00019999899336622639, - "loss": 46.0, - "step": 8879 - }, - { - "epoch": 1.430049518901727, - "grad_norm": 0.001114029437303543, - "learning_rate": 0.00019999899313921345, - "loss": 46.0, - "step": 8880 - }, - { - "epoch": 1.4302105559805145, - "grad_norm": 0.002482883632183075, - "learning_rate": 0.00019999899291217493, - "loss": 46.0, - "step": 8881 - }, - { - "epoch": 1.430371593059302, - "grad_norm": 0.0020997305400669575, - "learning_rate": 0.00019999899268511082, - "loss": 46.0, - "step": 8882 - }, - { - "epoch": 1.4305326301380892, - "grad_norm": 0.00156143878120929, - "learning_rate": 0.0001999989924580211, - "loss": 46.0, - "step": 8883 - }, - { - "epoch": 1.4306936672168766, - "grad_norm": 0.005047987215220928, - "learning_rate": 0.0001999989922309058, - "loss": 46.0, - "step": 8884 - }, - { - "epoch": 1.430854704295664, - "grad_norm": 0.0011467499425634742, - "learning_rate": 0.0001999989920037649, - "loss": 46.0, - "step": 8885 - }, - { - "epoch": 1.4310157413744515, - "grad_norm": 0.004054028075188398, - "learning_rate": 0.0001999989917765984, - "loss": 46.0, - "step": 8886 - }, - { - "epoch": 1.431176778453239, - "grad_norm": 0.00480400025844574, - "learning_rate": 0.0001999989915494063, - "loss": 46.0, - "step": 8887 - }, - { - "epoch": 1.4313378155320262, - "grad_norm": 0.0008795669418759644, - "learning_rate": 0.0001999989913221886, - "loss": 46.0, - "step": 8888 - }, - { - "epoch": 1.4314988526108137, - "grad_norm": 0.0016269002808257937, - "learning_rate": 0.00019999899109494532, - "loss": 46.0, - "step": 8889 - }, - { - "epoch": 1.431659889689601, - "grad_norm": 0.0030724999960511923, - "learning_rate": 0.00019999899086767646, - "loss": 46.0, - "step": 8890 - }, - { - "epoch": 1.4318209267683883, - "grad_norm": 0.0027794321067631245, - "learning_rate": 0.000199998990640382, - "loss": 46.0, - "step": 8891 - }, - { - "epoch": 1.4319819638471758, - "grad_norm": 0.002924382919445634, - "learning_rate": 0.00019999899041306193, - "loss": 46.0, - "step": 8892 - }, - { - "epoch": 1.4321430009259632, - "grad_norm": 0.006947075482457876, - "learning_rate": 0.00019999899018571625, - "loss": 46.0, - "step": 8893 - }, - { - "epoch": 1.4323040380047507, - "grad_norm": 0.001057026325725019, - "learning_rate": 0.000199998989958345, - "loss": 46.0, - "step": 8894 - }, - { - "epoch": 1.432465075083538, - "grad_norm": 0.004213789477944374, - "learning_rate": 0.00019999898973094816, - "loss": 46.0, - "step": 8895 - }, - { - "epoch": 1.4326261121623254, - "grad_norm": 0.0013789647491648793, - "learning_rate": 0.00019999898950352573, - "loss": 46.0, - "step": 8896 - }, - { - "epoch": 1.4327871492411128, - "grad_norm": 0.007213205099105835, - "learning_rate": 0.00019999898927607767, - "loss": 46.0, - "step": 8897 - }, - { - "epoch": 1.4329481863199, - "grad_norm": 0.001731844968162477, - "learning_rate": 0.00019999898904860404, - "loss": 46.0, - "step": 8898 - }, - { - "epoch": 1.4331092233986875, - "grad_norm": 0.0005468594608828425, - "learning_rate": 0.0001999989888211048, - "loss": 46.0, - "step": 8899 - }, - { - "epoch": 1.433270260477475, - "grad_norm": 0.001397643587552011, - "learning_rate": 0.00019999898859358002, - "loss": 46.0, - "step": 8900 - }, - { - "epoch": 1.4334312975562624, - "grad_norm": 0.001007004757411778, - "learning_rate": 0.0001999989883660296, - "loss": 46.0, - "step": 8901 - }, - { - "epoch": 1.4335923346350496, - "grad_norm": 0.0024804940912872553, - "learning_rate": 0.00019999898813845358, - "loss": 46.0, - "step": 8902 - }, - { - "epoch": 1.433753371713837, - "grad_norm": 0.002691722009330988, - "learning_rate": 0.00019999898791085195, - "loss": 46.0, - "step": 8903 - }, - { - "epoch": 1.4339144087926246, - "grad_norm": 0.003183201886713505, - "learning_rate": 0.00019999898768322476, - "loss": 46.0, - "step": 8904 - }, - { - "epoch": 1.4340754458714118, - "grad_norm": 0.0037826804909855127, - "learning_rate": 0.000199998987455572, - "loss": 46.0, - "step": 8905 - }, - { - "epoch": 1.4342364829501992, - "grad_norm": 0.0005913475761190057, - "learning_rate": 0.0001999989872278936, - "loss": 46.0, - "step": 8906 - }, - { - "epoch": 1.4343975200289867, - "grad_norm": 0.000808125187177211, - "learning_rate": 0.00019999898700018962, - "loss": 46.0, - "step": 8907 - }, - { - "epoch": 1.4345585571077741, - "grad_norm": 0.0026960677932947874, - "learning_rate": 0.00019999898677246003, - "loss": 46.0, - "step": 8908 - }, - { - "epoch": 1.4347195941865616, - "grad_norm": 0.0020128260366618633, - "learning_rate": 0.00019999898654470485, - "loss": 46.0, - "step": 8909 - }, - { - "epoch": 1.4348806312653488, - "grad_norm": 0.0008650345844216645, - "learning_rate": 0.00019999898631692408, - "loss": 46.0, - "step": 8910 - }, - { - "epoch": 1.4350416683441363, - "grad_norm": 0.0032119606621563435, - "learning_rate": 0.00019999898608911773, - "loss": 46.0, - "step": 8911 - }, - { - "epoch": 1.4352027054229235, - "grad_norm": 0.004212197847664356, - "learning_rate": 0.00019999898586128576, - "loss": 46.0, - "step": 8912 - }, - { - "epoch": 1.435363742501711, - "grad_norm": 0.008080984465777874, - "learning_rate": 0.0001999989856334282, - "loss": 46.0, - "step": 8913 - }, - { - "epoch": 1.4355247795804984, - "grad_norm": 0.0026922726538032293, - "learning_rate": 0.00019999898540554506, - "loss": 46.0, - "step": 8914 - }, - { - "epoch": 1.4356858166592859, - "grad_norm": 0.00260039116255939, - "learning_rate": 0.00019999898517763633, - "loss": 46.0, - "step": 8915 - }, - { - "epoch": 1.4358468537380733, - "grad_norm": 0.004634491633623838, - "learning_rate": 0.000199998984949702, - "loss": 46.0, - "step": 8916 - }, - { - "epoch": 1.4360078908168605, - "grad_norm": 0.00044389566755853593, - "learning_rate": 0.00019999898472174206, - "loss": 46.0, - "step": 8917 - }, - { - "epoch": 1.436168927895648, - "grad_norm": 0.002899541286751628, - "learning_rate": 0.00019999898449375654, - "loss": 46.0, - "step": 8918 - }, - { - "epoch": 1.4363299649744354, - "grad_norm": 0.004273874685168266, - "learning_rate": 0.0001999989842657454, - "loss": 46.0, - "step": 8919 - }, - { - "epoch": 1.4364910020532227, - "grad_norm": 0.0012014367384836078, - "learning_rate": 0.0001999989840377087, - "loss": 46.0, - "step": 8920 - }, - { - "epoch": 1.4366520391320101, - "grad_norm": 0.002292466117069125, - "learning_rate": 0.0001999989838096464, - "loss": 46.0, - "step": 8921 - }, - { - "epoch": 1.4368130762107976, - "grad_norm": 0.0026189687196165323, - "learning_rate": 0.00019999898358155852, - "loss": 46.0, - "step": 8922 - }, - { - "epoch": 1.436974113289585, - "grad_norm": 0.0011711049592122436, - "learning_rate": 0.000199998983353445, - "loss": 46.0, - "step": 8923 - }, - { - "epoch": 1.4371351503683722, - "grad_norm": 0.0024189141113311052, - "learning_rate": 0.00019999898312530591, - "loss": 46.0, - "step": 8924 - }, - { - "epoch": 1.4372961874471597, - "grad_norm": 0.0033526746556162834, - "learning_rate": 0.00019999898289714123, - "loss": 46.0, - "step": 8925 - }, - { - "epoch": 1.4374572245259472, - "grad_norm": 0.0005484105204232037, - "learning_rate": 0.00019999898266895094, - "loss": 46.0, - "step": 8926 - }, - { - "epoch": 1.4376182616047344, - "grad_norm": 0.004912239499390125, - "learning_rate": 0.00019999898244073508, - "loss": 46.0, - "step": 8927 - }, - { - "epoch": 1.4377792986835218, - "grad_norm": 0.0005740529741160572, - "learning_rate": 0.0001999989822124936, - "loss": 46.0, - "step": 8928 - }, - { - "epoch": 1.4379403357623093, - "grad_norm": 0.002250917488709092, - "learning_rate": 0.00019999898198422652, - "loss": 46.0, - "step": 8929 - }, - { - "epoch": 1.4381013728410967, - "grad_norm": 0.007793302647769451, - "learning_rate": 0.00019999898175593388, - "loss": 46.0, - "step": 8930 - }, - { - "epoch": 1.4382624099198842, - "grad_norm": 0.0038926429115235806, - "learning_rate": 0.00019999898152761562, - "loss": 46.0, - "step": 8931 - }, - { - "epoch": 1.4384234469986714, - "grad_norm": 0.00298431608825922, - "learning_rate": 0.00019999898129927174, - "loss": 46.0, - "step": 8932 - }, - { - "epoch": 1.4385844840774589, - "grad_norm": 0.000561332970391959, - "learning_rate": 0.00019999898107090234, - "loss": 46.0, - "step": 8933 - }, - { - "epoch": 1.438745521156246, - "grad_norm": 0.004708392079919577, - "learning_rate": 0.0001999989808425073, - "loss": 46.0, - "step": 8934 - }, - { - "epoch": 1.4389065582350336, - "grad_norm": 0.003812340786680579, - "learning_rate": 0.00019999898061408663, - "loss": 46.0, - "step": 8935 - }, - { - "epoch": 1.439067595313821, - "grad_norm": 0.0015855850651860237, - "learning_rate": 0.00019999898038564043, - "loss": 46.0, - "step": 8936 - }, - { - "epoch": 1.4392286323926085, - "grad_norm": 0.005309516564011574, - "learning_rate": 0.0001999989801571686, - "loss": 46.0, - "step": 8937 - }, - { - "epoch": 1.439389669471396, - "grad_norm": 0.004938654135912657, - "learning_rate": 0.00019999897992867117, - "loss": 46.0, - "step": 8938 - }, - { - "epoch": 1.4395507065501831, - "grad_norm": 0.0016658473759889603, - "learning_rate": 0.00019999897970014816, - "loss": 46.0, - "step": 8939 - }, - { - "epoch": 1.4397117436289706, - "grad_norm": 0.0013597691431641579, - "learning_rate": 0.00019999897947159956, - "loss": 46.0, - "step": 8940 - }, - { - "epoch": 1.439872780707758, - "grad_norm": 0.0016742486041039228, - "learning_rate": 0.00019999897924302538, - "loss": 46.0, - "step": 8941 - }, - { - "epoch": 1.4400338177865453, - "grad_norm": 0.003886726452037692, - "learning_rate": 0.00019999897901442558, - "loss": 46.0, - "step": 8942 - }, - { - "epoch": 1.4401948548653327, - "grad_norm": 0.0016749916831031442, - "learning_rate": 0.0001999989787858002, - "loss": 46.0, - "step": 8943 - }, - { - "epoch": 1.4403558919441202, - "grad_norm": 0.0005074695800431073, - "learning_rate": 0.0001999989785571492, - "loss": 46.0, - "step": 8944 - }, - { - "epoch": 1.4405169290229076, - "grad_norm": 0.0006953999982215464, - "learning_rate": 0.00019999897832847263, - "loss": 46.0, - "step": 8945 - }, - { - "epoch": 1.4406779661016949, - "grad_norm": 0.0015014850068837404, - "learning_rate": 0.00019999897809977045, - "loss": 46.0, - "step": 8946 - }, - { - "epoch": 1.4408390031804823, - "grad_norm": 0.00080797856207937, - "learning_rate": 0.00019999897787104266, - "loss": 46.0, - "step": 8947 - }, - { - "epoch": 1.4410000402592698, - "grad_norm": 0.003238941775634885, - "learning_rate": 0.0001999989776422893, - "loss": 46.0, - "step": 8948 - }, - { - "epoch": 1.441161077338057, - "grad_norm": 0.0038205208256840706, - "learning_rate": 0.00019999897741351035, - "loss": 46.0, - "step": 8949 - }, - { - "epoch": 1.4413221144168444, - "grad_norm": 0.001656422158703208, - "learning_rate": 0.00019999897718470582, - "loss": 46.0, - "step": 8950 - }, - { - "epoch": 1.4414831514956319, - "grad_norm": 0.0015996687579900026, - "learning_rate": 0.00019999897695587566, - "loss": 46.0, - "step": 8951 - }, - { - "epoch": 1.4416441885744193, - "grad_norm": 0.002445490099489689, - "learning_rate": 0.00019999897672701993, - "loss": 46.0, - "step": 8952 - }, - { - "epoch": 1.4418052256532066, - "grad_norm": 0.0065863230265676975, - "learning_rate": 0.00019999897649813857, - "loss": 46.0, - "step": 8953 - }, - { - "epoch": 1.441966262731994, - "grad_norm": 0.002228823024779558, - "learning_rate": 0.00019999897626923164, - "loss": 46.0, - "step": 8954 - }, - { - "epoch": 1.4421272998107815, - "grad_norm": 0.0026095169596374035, - "learning_rate": 0.00019999897604029913, - "loss": 46.0, - "step": 8955 - }, - { - "epoch": 1.4422883368895687, - "grad_norm": 0.0015870523639023304, - "learning_rate": 0.00019999897581134102, - "loss": 46.0, - "step": 8956 - }, - { - "epoch": 1.4424493739683562, - "grad_norm": 0.002546041738241911, - "learning_rate": 0.0001999989755823573, - "loss": 46.0, - "step": 8957 - }, - { - "epoch": 1.4426104110471436, - "grad_norm": 0.0006798851536586881, - "learning_rate": 0.000199998975353348, - "loss": 46.0, - "step": 8958 - }, - { - "epoch": 1.442771448125931, - "grad_norm": 0.0013138747308403254, - "learning_rate": 0.00019999897512431306, - "loss": 46.0, - "step": 8959 - }, - { - "epoch": 1.4429324852047185, - "grad_norm": 0.0011445775162428617, - "learning_rate": 0.00019999897489525258, - "loss": 46.0, - "step": 8960 - }, - { - "epoch": 1.4430935222835057, - "grad_norm": 0.0018516422715038061, - "learning_rate": 0.0001999989746661665, - "loss": 46.0, - "step": 8961 - }, - { - "epoch": 1.4432545593622932, - "grad_norm": 0.00044858240289613605, - "learning_rate": 0.00019999897443705481, - "loss": 46.0, - "step": 8962 - }, - { - "epoch": 1.4434155964410806, - "grad_norm": 0.002639931160956621, - "learning_rate": 0.00019999897420791752, - "loss": 46.0, - "step": 8963 - }, - { - "epoch": 1.4435766335198679, - "grad_norm": 0.009227258153259754, - "learning_rate": 0.00019999897397875464, - "loss": 46.0, - "step": 8964 - }, - { - "epoch": 1.4437376705986553, - "grad_norm": 0.0008425979758612812, - "learning_rate": 0.00019999897374956617, - "loss": 46.0, - "step": 8965 - }, - { - "epoch": 1.4438987076774428, - "grad_norm": 0.0006759099778719246, - "learning_rate": 0.00019999897352035212, - "loss": 46.0, - "step": 8966 - }, - { - "epoch": 1.4440597447562302, - "grad_norm": 0.00044971969327889383, - "learning_rate": 0.00019999897329111245, - "loss": 46.0, - "step": 8967 - }, - { - "epoch": 1.4442207818350175, - "grad_norm": 0.0006275823689065874, - "learning_rate": 0.0001999989730618472, - "loss": 46.0, - "step": 8968 - }, - { - "epoch": 1.444381818913805, - "grad_norm": 0.0018190392293035984, - "learning_rate": 0.00019999897283255632, - "loss": 46.0, - "step": 8969 - }, - { - "epoch": 1.4445428559925924, - "grad_norm": 0.0032785404473543167, - "learning_rate": 0.0001999989726032399, - "loss": 46.0, - "step": 8970 - }, - { - "epoch": 1.4447038930713796, - "grad_norm": 0.0007270055939443409, - "learning_rate": 0.00019999897237389788, - "loss": 46.0, - "step": 8971 - }, - { - "epoch": 1.444864930150167, - "grad_norm": 0.0024905952159315348, - "learning_rate": 0.00019999897214453022, - "loss": 46.0, - "step": 8972 - }, - { - "epoch": 1.4450259672289545, - "grad_norm": 0.00047327953507192433, - "learning_rate": 0.000199998971915137, - "loss": 46.0, - "step": 8973 - }, - { - "epoch": 1.445187004307742, - "grad_norm": 0.0021170086693018675, - "learning_rate": 0.0001999989716857182, - "loss": 46.0, - "step": 8974 - }, - { - "epoch": 1.4453480413865292, - "grad_norm": 0.0029951727483421564, - "learning_rate": 0.00019999897145627377, - "loss": 46.0, - "step": 8975 - }, - { - "epoch": 1.4455090784653166, - "grad_norm": 0.0024707107804715633, - "learning_rate": 0.00019999897122680377, - "loss": 46.0, - "step": 8976 - }, - { - "epoch": 1.445670115544104, - "grad_norm": 0.0008560841088183224, - "learning_rate": 0.00019999897099730814, - "loss": 46.0, - "step": 8977 - }, - { - "epoch": 1.4458311526228913, - "grad_norm": 0.001000311691313982, - "learning_rate": 0.00019999897076778696, - "loss": 46.0, - "step": 8978 - }, - { - "epoch": 1.4459921897016788, - "grad_norm": 0.0013898588949814439, - "learning_rate": 0.00019999897053824014, - "loss": 46.0, - "step": 8979 - }, - { - "epoch": 1.4461532267804662, - "grad_norm": 0.002270257333293557, - "learning_rate": 0.00019999897030866776, - "loss": 46.0, - "step": 8980 - }, - { - "epoch": 1.4463142638592537, - "grad_norm": 0.0007034658337943256, - "learning_rate": 0.00019999897007906979, - "loss": 46.0, - "step": 8981 - }, - { - "epoch": 1.446475300938041, - "grad_norm": 0.003080200869590044, - "learning_rate": 0.0001999989698494462, - "loss": 46.0, - "step": 8982 - }, - { - "epoch": 1.4466363380168283, - "grad_norm": 0.005392127670347691, - "learning_rate": 0.00019999896961979703, - "loss": 46.0, - "step": 8983 - }, - { - "epoch": 1.4467973750956158, - "grad_norm": 0.0011138945119455457, - "learning_rate": 0.00019999896939012227, - "loss": 46.0, - "step": 8984 - }, - { - "epoch": 1.446958412174403, - "grad_norm": 0.002085252432152629, - "learning_rate": 0.0001999989691604219, - "loss": 46.0, - "step": 8985 - }, - { - "epoch": 1.4471194492531905, - "grad_norm": 0.0038350208196789026, - "learning_rate": 0.00019999896893069594, - "loss": 46.0, - "step": 8986 - }, - { - "epoch": 1.447280486331978, - "grad_norm": 0.0015229257987812161, - "learning_rate": 0.0001999989687009444, - "loss": 46.0, - "step": 8987 - }, - { - "epoch": 1.4474415234107654, - "grad_norm": 0.002044251887127757, - "learning_rate": 0.00019999896847116723, - "loss": 46.0, - "step": 8988 - }, - { - "epoch": 1.4476025604895528, - "grad_norm": 0.0011432894971221685, - "learning_rate": 0.0001999989682413645, - "loss": 46.0, - "step": 8989 - }, - { - "epoch": 1.44776359756834, - "grad_norm": 0.0019160597585141659, - "learning_rate": 0.00019999896801153617, - "loss": 46.0, - "step": 8990 - }, - { - "epoch": 1.4479246346471275, - "grad_norm": 0.0007443545036949217, - "learning_rate": 0.00019999896778168222, - "loss": 46.0, - "step": 8991 - }, - { - "epoch": 1.448085671725915, - "grad_norm": 0.0023979567922651768, - "learning_rate": 0.0001999989675518027, - "loss": 46.0, - "step": 8992 - }, - { - "epoch": 1.4482467088047022, - "grad_norm": 0.0017052312614396214, - "learning_rate": 0.00019999896732189758, - "loss": 46.0, - "step": 8993 - }, - { - "epoch": 1.4484077458834896, - "grad_norm": 0.002735251560807228, - "learning_rate": 0.00019999896709196687, - "loss": 46.0, - "step": 8994 - }, - { - "epoch": 1.448568782962277, - "grad_norm": 0.0045250714756548405, - "learning_rate": 0.00019999896686201054, - "loss": 46.0, - "step": 8995 - }, - { - "epoch": 1.4487298200410645, - "grad_norm": 0.0044335597194731236, - "learning_rate": 0.00019999896663202863, - "loss": 46.0, - "step": 8996 - }, - { - "epoch": 1.4488908571198518, - "grad_norm": 0.0023370222188532352, - "learning_rate": 0.00019999896640202116, - "loss": 46.0, - "step": 8997 - }, - { - "epoch": 1.4490518941986392, - "grad_norm": 0.003713067388162017, - "learning_rate": 0.00019999896617198807, - "loss": 46.0, - "step": 8998 - }, - { - "epoch": 1.4492129312774267, - "grad_norm": 0.0035601495765149593, - "learning_rate": 0.00019999896594192937, - "loss": 46.0, - "step": 8999 - }, - { - "epoch": 1.449373968356214, - "grad_norm": 0.0014270787360146642, - "learning_rate": 0.00019999896571184508, - "loss": 46.0, - "step": 9000 - }, - { - "epoch": 1.4495350054350014, - "grad_norm": 0.007551154121756554, - "learning_rate": 0.00019999896548173523, - "loss": 46.0, - "step": 9001 - }, - { - "epoch": 1.4496960425137888, - "grad_norm": 0.005864028353244066, - "learning_rate": 0.00019999896525159976, - "loss": 46.0, - "step": 9002 - }, - { - "epoch": 1.4498570795925763, - "grad_norm": 0.0005610091029666364, - "learning_rate": 0.00019999896502143866, - "loss": 46.0, - "step": 9003 - }, - { - "epoch": 1.4500181166713637, - "grad_norm": 0.0017337421886622906, - "learning_rate": 0.00019999896479125202, - "loss": 46.0, - "step": 9004 - }, - { - "epoch": 1.450179153750151, - "grad_norm": 0.0008135696989484131, - "learning_rate": 0.00019999896456103974, - "loss": 46.0, - "step": 9005 - }, - { - "epoch": 1.4503401908289384, - "grad_norm": 0.006821257993578911, - "learning_rate": 0.00019999896433080193, - "loss": 46.0, - "step": 9006 - }, - { - "epoch": 1.4505012279077256, - "grad_norm": 0.005144826602190733, - "learning_rate": 0.00019999896410053848, - "loss": 46.0, - "step": 9007 - }, - { - "epoch": 1.450662264986513, - "grad_norm": 0.0009511394891887903, - "learning_rate": 0.00019999896387024944, - "loss": 46.0, - "step": 9008 - }, - { - "epoch": 1.4508233020653005, - "grad_norm": 0.0011892416514456272, - "learning_rate": 0.0001999989636399348, - "loss": 46.0, - "step": 9009 - }, - { - "epoch": 1.450984339144088, - "grad_norm": 0.009082089178264141, - "learning_rate": 0.00019999896340959456, - "loss": 46.0, - "step": 9010 - }, - { - "epoch": 1.4511453762228754, - "grad_norm": 0.0015414797235280275, - "learning_rate": 0.00019999896317922874, - "loss": 46.0, - "step": 9011 - }, - { - "epoch": 1.4513064133016627, - "grad_norm": 0.0003873731184285134, - "learning_rate": 0.00019999896294883732, - "loss": 46.0, - "step": 9012 - }, - { - "epoch": 1.45146745038045, - "grad_norm": 0.0009792075725272298, - "learning_rate": 0.00019999896271842031, - "loss": 46.0, - "step": 9013 - }, - { - "epoch": 1.4516284874592376, - "grad_norm": 0.0015056653646752238, - "learning_rate": 0.00019999896248797772, - "loss": 46.0, - "step": 9014 - }, - { - "epoch": 1.4517895245380248, - "grad_norm": 0.0022380282171070576, - "learning_rate": 0.0001999989622575095, - "loss": 46.0, - "step": 9015 - }, - { - "epoch": 1.4519505616168122, - "grad_norm": 0.0013196611544117332, - "learning_rate": 0.00019999896202701573, - "loss": 46.0, - "step": 9016 - }, - { - "epoch": 1.4521115986955997, - "grad_norm": 0.0054290881380438805, - "learning_rate": 0.00019999896179649632, - "loss": 46.0, - "step": 9017 - }, - { - "epoch": 1.4522726357743871, - "grad_norm": 0.0009963031625375152, - "learning_rate": 0.00019999896156595135, - "loss": 46.0, - "step": 9018 - }, - { - "epoch": 1.4524336728531744, - "grad_norm": 0.00265924958512187, - "learning_rate": 0.00019999896133538077, - "loss": 46.0, - "step": 9019 - }, - { - "epoch": 1.4525947099319618, - "grad_norm": 0.0035344508942216635, - "learning_rate": 0.00019999896110478458, - "loss": 46.0, - "step": 9020 - }, - { - "epoch": 1.4527557470107493, - "grad_norm": 0.002570083364844322, - "learning_rate": 0.00019999896087416282, - "loss": 46.0, - "step": 9021 - }, - { - "epoch": 1.4529167840895365, - "grad_norm": 0.0011035854695364833, - "learning_rate": 0.00019999896064351545, - "loss": 46.0, - "step": 9022 - }, - { - "epoch": 1.453077821168324, - "grad_norm": 0.002907379297539592, - "learning_rate": 0.0001999989604128425, - "loss": 46.0, - "step": 9023 - }, - { - "epoch": 1.4532388582471114, - "grad_norm": 0.0038665556348860264, - "learning_rate": 0.00019999896018214392, - "loss": 46.0, - "step": 9024 - }, - { - "epoch": 1.4533998953258989, - "grad_norm": 0.002441034186631441, - "learning_rate": 0.0001999989599514198, - "loss": 46.0, - "step": 9025 - }, - { - "epoch": 1.4535609324046863, - "grad_norm": 0.005121355410665274, - "learning_rate": 0.00019999895972067005, - "loss": 46.0, - "step": 9026 - }, - { - "epoch": 1.4537219694834735, - "grad_norm": 0.0029127185698598623, - "learning_rate": 0.00019999895948989471, - "loss": 46.0, - "step": 9027 - }, - { - "epoch": 1.453883006562261, - "grad_norm": 0.006232391111552715, - "learning_rate": 0.0001999989592590938, - "loss": 46.0, - "step": 9028 - }, - { - "epoch": 1.4540440436410482, - "grad_norm": 0.001079633249901235, - "learning_rate": 0.00019999895902826723, - "loss": 46.0, - "step": 9029 - }, - { - "epoch": 1.4542050807198357, - "grad_norm": 0.0013632382033392787, - "learning_rate": 0.00019999895879741514, - "loss": 46.0, - "step": 9030 - }, - { - "epoch": 1.4543661177986231, - "grad_norm": 0.0015014581149443984, - "learning_rate": 0.00019999895856653743, - "loss": 46.0, - "step": 9031 - }, - { - "epoch": 1.4545271548774106, - "grad_norm": 0.0028064846992492676, - "learning_rate": 0.0001999989583356341, - "loss": 46.0, - "step": 9032 - }, - { - "epoch": 1.454688191956198, - "grad_norm": 0.0010828478261828423, - "learning_rate": 0.00019999895810470522, - "loss": 46.0, - "step": 9033 - }, - { - "epoch": 1.4548492290349853, - "grad_norm": 0.0010927043622359633, - "learning_rate": 0.00019999895787375073, - "loss": 46.0, - "step": 9034 - }, - { - "epoch": 1.4550102661137727, - "grad_norm": 0.0018936865963041782, - "learning_rate": 0.00019999895764277062, - "loss": 46.0, - "step": 9035 - }, - { - "epoch": 1.4551713031925602, - "grad_norm": 0.003322634380310774, - "learning_rate": 0.00019999895741176492, - "loss": 46.0, - "step": 9036 - }, - { - "epoch": 1.4553323402713474, - "grad_norm": 0.0016357441199943423, - "learning_rate": 0.00019999895718073366, - "loss": 46.0, - "step": 9037 - }, - { - "epoch": 1.4554933773501348, - "grad_norm": 0.005125960800796747, - "learning_rate": 0.00019999895694967679, - "loss": 46.0, - "step": 9038 - }, - { - "epoch": 1.4556544144289223, - "grad_norm": 0.0005522160790860653, - "learning_rate": 0.0001999989567185943, - "loss": 46.0, - "step": 9039 - }, - { - "epoch": 1.4558154515077097, - "grad_norm": 0.0020277206785976887, - "learning_rate": 0.00019999895648748622, - "loss": 46.0, - "step": 9040 - }, - { - "epoch": 1.455976488586497, - "grad_norm": 0.0015582212945446372, - "learning_rate": 0.00019999895625635256, - "loss": 46.0, - "step": 9041 - }, - { - "epoch": 1.4561375256652844, - "grad_norm": 0.010356155224144459, - "learning_rate": 0.00019999895602519331, - "loss": 46.0, - "step": 9042 - }, - { - "epoch": 1.4562985627440719, - "grad_norm": 0.0014032598119229078, - "learning_rate": 0.00019999895579400848, - "loss": 46.0, - "step": 9043 - }, - { - "epoch": 1.456459599822859, - "grad_norm": 0.00033606356009840965, - "learning_rate": 0.00019999895556279803, - "loss": 46.0, - "step": 9044 - }, - { - "epoch": 1.4566206369016466, - "grad_norm": 0.0014868822181597352, - "learning_rate": 0.000199998955331562, - "loss": 46.0, - "step": 9045 - }, - { - "epoch": 1.456781673980434, - "grad_norm": 0.0010604661656543612, - "learning_rate": 0.00019999895510030037, - "loss": 46.0, - "step": 9046 - }, - { - "epoch": 1.4569427110592215, - "grad_norm": 0.0005255446303635836, - "learning_rate": 0.00019999895486901313, - "loss": 46.0, - "step": 9047 - }, - { - "epoch": 1.457103748138009, - "grad_norm": 0.0013461486669257283, - "learning_rate": 0.0001999989546377003, - "loss": 46.0, - "step": 9048 - }, - { - "epoch": 1.4572647852167961, - "grad_norm": 0.0026272833347320557, - "learning_rate": 0.0001999989544063619, - "loss": 46.0, - "step": 9049 - }, - { - "epoch": 1.4574258222955836, - "grad_norm": 0.0005306096281856298, - "learning_rate": 0.00019999895417499786, - "loss": 46.0, - "step": 9050 - }, - { - "epoch": 1.4575868593743708, - "grad_norm": 0.0021950858645141125, - "learning_rate": 0.00019999895394360825, - "loss": 46.0, - "step": 9051 - }, - { - "epoch": 1.4577478964531583, - "grad_norm": 0.005761492531746626, - "learning_rate": 0.00019999895371219307, - "loss": 46.0, - "step": 9052 - }, - { - "epoch": 1.4579089335319457, - "grad_norm": 0.0008243282209150493, - "learning_rate": 0.00019999895348075226, - "loss": 46.0, - "step": 9053 - }, - { - "epoch": 1.4580699706107332, - "grad_norm": 0.0006277612410485744, - "learning_rate": 0.00019999895324928585, - "loss": 46.0, - "step": 9054 - }, - { - "epoch": 1.4582310076895206, - "grad_norm": 0.000492796883918345, - "learning_rate": 0.0001999989530177939, - "loss": 46.0, - "step": 9055 - }, - { - "epoch": 1.4583920447683079, - "grad_norm": 0.003343243384733796, - "learning_rate": 0.0001999989527862763, - "loss": 46.0, - "step": 9056 - }, - { - "epoch": 1.4585530818470953, - "grad_norm": 0.0018881288124248385, - "learning_rate": 0.00019999895255473312, - "loss": 46.0, - "step": 9057 - }, - { - "epoch": 1.4587141189258828, - "grad_norm": 0.002505961339920759, - "learning_rate": 0.00019999895232316437, - "loss": 46.0, - "step": 9058 - }, - { - "epoch": 1.45887515600467, - "grad_norm": 0.007228973787277937, - "learning_rate": 0.00019999895209157, - "loss": 46.0, - "step": 9059 - }, - { - "epoch": 1.4590361930834574, - "grad_norm": 0.0009150710538960993, - "learning_rate": 0.00019999895185995005, - "loss": 46.0, - "step": 9060 - }, - { - "epoch": 1.459197230162245, - "grad_norm": 0.0005061214906163514, - "learning_rate": 0.00019999895162830448, - "loss": 46.0, - "step": 9061 - }, - { - "epoch": 1.4593582672410323, - "grad_norm": 0.0010553308529779315, - "learning_rate": 0.00019999895139663333, - "loss": 46.0, - "step": 9062 - }, - { - "epoch": 1.4595193043198196, - "grad_norm": 0.0007964483229443431, - "learning_rate": 0.00019999895116493659, - "loss": 46.0, - "step": 9063 - }, - { - "epoch": 1.459680341398607, - "grad_norm": 0.0025870187673717737, - "learning_rate": 0.00019999895093321426, - "loss": 46.0, - "step": 9064 - }, - { - "epoch": 1.4598413784773945, - "grad_norm": 0.0016578648937866092, - "learning_rate": 0.00019999895070146631, - "loss": 46.0, - "step": 9065 - }, - { - "epoch": 1.4600024155561817, - "grad_norm": 0.007649368140846491, - "learning_rate": 0.0001999989504696928, - "loss": 46.0, - "step": 9066 - }, - { - "epoch": 1.4601634526349692, - "grad_norm": 0.0017081158002838492, - "learning_rate": 0.00019999895023789367, - "loss": 46.0, - "step": 9067 - }, - { - "epoch": 1.4603244897137566, - "grad_norm": 0.0014930607285350561, - "learning_rate": 0.00019999895000606896, - "loss": 46.0, - "step": 9068 - }, - { - "epoch": 1.460485526792544, - "grad_norm": 0.001350192236714065, - "learning_rate": 0.00019999894977421864, - "loss": 46.0, - "step": 9069 - }, - { - "epoch": 1.4606465638713313, - "grad_norm": 0.0026856905315071344, - "learning_rate": 0.00019999894954234276, - "loss": 46.0, - "step": 9070 - }, - { - "epoch": 1.4608076009501187, - "grad_norm": 0.0008866675198078156, - "learning_rate": 0.00019999894931044124, - "loss": 46.0, - "step": 9071 - }, - { - "epoch": 1.4609686380289062, - "grad_norm": 0.0015195850282907486, - "learning_rate": 0.00019999894907851416, - "loss": 46.0, - "step": 9072 - }, - { - "epoch": 1.4611296751076934, - "grad_norm": 0.006534232757985592, - "learning_rate": 0.00019999894884656147, - "loss": 46.0, - "step": 9073 - }, - { - "epoch": 1.4612907121864809, - "grad_norm": 0.0015818667598068714, - "learning_rate": 0.00019999894861458318, - "loss": 46.0, - "step": 9074 - }, - { - "epoch": 1.4614517492652683, - "grad_norm": 0.00365554541349411, - "learning_rate": 0.00019999894838257932, - "loss": 46.0, - "step": 9075 - }, - { - "epoch": 1.4616127863440558, - "grad_norm": 0.0020009437575936317, - "learning_rate": 0.00019999894815054986, - "loss": 46.0, - "step": 9076 - }, - { - "epoch": 1.4617738234228432, - "grad_norm": 0.004016316030174494, - "learning_rate": 0.00019999894791849476, - "loss": 46.0, - "step": 9077 - }, - { - "epoch": 1.4619348605016305, - "grad_norm": 0.003942732699215412, - "learning_rate": 0.00019999894768641413, - "loss": 46.0, - "step": 9078 - }, - { - "epoch": 1.462095897580418, - "grad_norm": 0.003082046052441001, - "learning_rate": 0.00019999894745430786, - "loss": 46.0, - "step": 9079 - }, - { - "epoch": 1.4622569346592054, - "grad_norm": 0.0013199597597122192, - "learning_rate": 0.00019999894722217603, - "loss": 46.0, - "step": 9080 - }, - { - "epoch": 1.4624179717379926, - "grad_norm": 0.0016363190952688456, - "learning_rate": 0.00019999894699001858, - "loss": 46.0, - "step": 9081 - }, - { - "epoch": 1.46257900881678, - "grad_norm": 0.00294755888171494, - "learning_rate": 0.00019999894675783552, - "loss": 46.0, - "step": 9082 - }, - { - "epoch": 1.4627400458955675, - "grad_norm": 0.002998506650328636, - "learning_rate": 0.0001999989465256269, - "loss": 46.0, - "step": 9083 - }, - { - "epoch": 1.462901082974355, - "grad_norm": 0.0039437864907085896, - "learning_rate": 0.00019999894629339266, - "loss": 46.0, - "step": 9084 - }, - { - "epoch": 1.4630621200531422, - "grad_norm": 0.001787786721251905, - "learning_rate": 0.00019999894606113284, - "loss": 46.0, - "step": 9085 - }, - { - "epoch": 1.4632231571319296, - "grad_norm": 0.005266864784061909, - "learning_rate": 0.0001999989458288474, - "loss": 46.0, - "step": 9086 - }, - { - "epoch": 1.463384194210717, - "grad_norm": 0.004001102410256863, - "learning_rate": 0.0001999989455965364, - "loss": 46.0, - "step": 9087 - }, - { - "epoch": 1.4635452312895043, - "grad_norm": 0.004251067992299795, - "learning_rate": 0.0001999989453641998, - "loss": 46.0, - "step": 9088 - }, - { - "epoch": 1.4637062683682918, - "grad_norm": 0.00332429981790483, - "learning_rate": 0.0001999989451318376, - "loss": 46.0, - "step": 9089 - }, - { - "epoch": 1.4638673054470792, - "grad_norm": 0.0018108172807842493, - "learning_rate": 0.00019999894489944979, - "loss": 46.0, - "step": 9090 - }, - { - "epoch": 1.4640283425258667, - "grad_norm": 0.005174580030143261, - "learning_rate": 0.00019999894466703641, - "loss": 46.0, - "step": 9091 - }, - { - "epoch": 1.464189379604654, - "grad_norm": 0.001297090551815927, - "learning_rate": 0.0001999989444345974, - "loss": 46.0, - "step": 9092 - }, - { - "epoch": 1.4643504166834413, - "grad_norm": 0.0008506930316798389, - "learning_rate": 0.00019999894420213283, - "loss": 46.0, - "step": 9093 - }, - { - "epoch": 1.4645114537622288, - "grad_norm": 0.00564588513225317, - "learning_rate": 0.00019999894396964267, - "loss": 46.0, - "step": 9094 - }, - { - "epoch": 1.464672490841016, - "grad_norm": 0.002092865528538823, - "learning_rate": 0.0001999989437371269, - "loss": 46.0, - "step": 9095 - }, - { - "epoch": 1.4648335279198035, - "grad_norm": 0.0006037953426130116, - "learning_rate": 0.00019999894350458553, - "loss": 46.0, - "step": 9096 - }, - { - "epoch": 1.464994564998591, - "grad_norm": 0.0027539010625332594, - "learning_rate": 0.00019999894327201858, - "loss": 46.0, - "step": 9097 - }, - { - "epoch": 1.4651556020773784, - "grad_norm": 0.0013463215436786413, - "learning_rate": 0.00019999894303942601, - "loss": 46.0, - "step": 9098 - }, - { - "epoch": 1.4653166391561658, - "grad_norm": 0.0013604918494820595, - "learning_rate": 0.00019999894280680786, - "loss": 46.0, - "step": 9099 - }, - { - "epoch": 1.465477676234953, - "grad_norm": 0.0030413393396884203, - "learning_rate": 0.00019999894257416415, - "loss": 46.0, - "step": 9100 - }, - { - "epoch": 1.4656387133137405, - "grad_norm": 0.0007645843434147537, - "learning_rate": 0.0001999989423414948, - "loss": 46.0, - "step": 9101 - }, - { - "epoch": 1.4657997503925277, - "grad_norm": 0.0010827609803527594, - "learning_rate": 0.00019999894210879986, - "loss": 46.0, - "step": 9102 - }, - { - "epoch": 1.4659607874713152, - "grad_norm": 0.0022910968400537968, - "learning_rate": 0.00019999894187607936, - "loss": 46.0, - "step": 9103 - }, - { - "epoch": 1.4661218245501026, - "grad_norm": 0.0005529465852305293, - "learning_rate": 0.00019999894164333322, - "loss": 46.0, - "step": 9104 - }, - { - "epoch": 1.46628286162889, - "grad_norm": 0.004184665624052286, - "learning_rate": 0.0001999989414105615, - "loss": 46.0, - "step": 9105 - }, - { - "epoch": 1.4664438987076776, - "grad_norm": 0.0013064953964203596, - "learning_rate": 0.0001999989411777642, - "loss": 46.0, - "step": 9106 - }, - { - "epoch": 1.4666049357864648, - "grad_norm": 0.0020443282555788755, - "learning_rate": 0.0001999989409449413, - "loss": 46.0, - "step": 9107 - }, - { - "epoch": 1.4667659728652522, - "grad_norm": 0.0005454795900732279, - "learning_rate": 0.0001999989407120928, - "loss": 46.0, - "step": 9108 - }, - { - "epoch": 1.4669270099440397, - "grad_norm": 0.0059854923747479916, - "learning_rate": 0.0001999989404792187, - "loss": 46.0, - "step": 9109 - }, - { - "epoch": 1.467088047022827, - "grad_norm": 0.0008014341583475471, - "learning_rate": 0.00019999894024631901, - "loss": 46.0, - "step": 9110 - }, - { - "epoch": 1.4672490841016144, - "grad_norm": 0.001022852840833366, - "learning_rate": 0.00019999894001339374, - "loss": 46.0, - "step": 9111 - }, - { - "epoch": 1.4674101211804018, - "grad_norm": 0.00381990778259933, - "learning_rate": 0.00019999893978044284, - "loss": 46.0, - "step": 9112 - }, - { - "epoch": 1.4675711582591893, - "grad_norm": 0.0009041810408234596, - "learning_rate": 0.0001999989395474664, - "loss": 46.0, - "step": 9113 - }, - { - "epoch": 1.4677321953379765, - "grad_norm": 0.0007273399969562888, - "learning_rate": 0.00019999893931446432, - "loss": 46.0, - "step": 9114 - }, - { - "epoch": 1.467893232416764, - "grad_norm": 0.0011411912273615599, - "learning_rate": 0.00019999893908143664, - "loss": 46.0, - "step": 9115 - }, - { - "epoch": 1.4680542694955514, - "grad_norm": 0.0024391021579504013, - "learning_rate": 0.0001999989388483834, - "loss": 46.0, - "step": 9116 - }, - { - "epoch": 1.4682153065743386, - "grad_norm": 0.0018117907457053661, - "learning_rate": 0.00019999893861530454, - "loss": 46.0, - "step": 9117 - }, - { - "epoch": 1.468376343653126, - "grad_norm": 0.003756811609491706, - "learning_rate": 0.0001999989383822001, - "loss": 46.0, - "step": 9118 - }, - { - "epoch": 1.4685373807319135, - "grad_norm": 0.0021754552144557238, - "learning_rate": 0.00019999893814907007, - "loss": 46.0, - "step": 9119 - }, - { - "epoch": 1.468698417810701, - "grad_norm": 0.0025537365581840277, - "learning_rate": 0.00019999893791591445, - "loss": 46.0, - "step": 9120 - }, - { - "epoch": 1.4688594548894884, - "grad_norm": 0.0020423149690032005, - "learning_rate": 0.0001999989376827332, - "loss": 46.0, - "step": 9121 - }, - { - "epoch": 1.4690204919682757, - "grad_norm": 0.0010749796638265252, - "learning_rate": 0.00019999893744952638, - "loss": 46.0, - "step": 9122 - }, - { - "epoch": 1.4691815290470631, - "grad_norm": 0.0017775411251932383, - "learning_rate": 0.00019999893721629394, - "loss": 46.0, - "step": 9123 - }, - { - "epoch": 1.4693425661258503, - "grad_norm": 0.0027228447142988443, - "learning_rate": 0.00019999893698303595, - "loss": 46.0, - "step": 9124 - }, - { - "epoch": 1.4695036032046378, - "grad_norm": 0.003028589766472578, - "learning_rate": 0.00019999893674975234, - "loss": 46.0, - "step": 9125 - }, - { - "epoch": 1.4696646402834252, - "grad_norm": 0.0011869377922266722, - "learning_rate": 0.00019999893651644315, - "loss": 46.0, - "step": 9126 - }, - { - "epoch": 1.4698256773622127, - "grad_norm": 0.0008890478638932109, - "learning_rate": 0.00019999893628310834, - "loss": 46.0, - "step": 9127 - }, - { - "epoch": 1.4699867144410002, - "grad_norm": 0.005459483712911606, - "learning_rate": 0.00019999893604974794, - "loss": 46.0, - "step": 9128 - }, - { - "epoch": 1.4701477515197874, - "grad_norm": 0.0008363460656255484, - "learning_rate": 0.00019999893581636196, - "loss": 46.0, - "step": 9129 - }, - { - "epoch": 1.4703087885985748, - "grad_norm": 0.0013150371378287673, - "learning_rate": 0.0001999989355829504, - "loss": 46.0, - "step": 9130 - }, - { - "epoch": 1.4704698256773623, - "grad_norm": 0.0022796099074184895, - "learning_rate": 0.0001999989353495132, - "loss": 46.0, - "step": 9131 - }, - { - "epoch": 1.4706308627561495, - "grad_norm": 0.00303072901442647, - "learning_rate": 0.00019999893511605044, - "loss": 46.0, - "step": 9132 - }, - { - "epoch": 1.470791899834937, - "grad_norm": 0.00559916952624917, - "learning_rate": 0.00019999893488256208, - "loss": 46.0, - "step": 9133 - }, - { - "epoch": 1.4709529369137244, - "grad_norm": 0.011432289145886898, - "learning_rate": 0.0001999989346490481, - "loss": 46.0, - "step": 9134 - }, - { - "epoch": 1.4711139739925119, - "grad_norm": 0.003210758324712515, - "learning_rate": 0.00019999893441550857, - "loss": 46.0, - "step": 9135 - }, - { - "epoch": 1.471275011071299, - "grad_norm": 0.012489925138652325, - "learning_rate": 0.0001999989341819434, - "loss": 46.0, - "step": 9136 - }, - { - "epoch": 1.4714360481500866, - "grad_norm": 0.0010342586319893599, - "learning_rate": 0.00019999893394835266, - "loss": 46.0, - "step": 9137 - }, - { - "epoch": 1.471597085228874, - "grad_norm": 0.0007231213967315853, - "learning_rate": 0.0001999989337147363, - "loss": 46.0, - "step": 9138 - }, - { - "epoch": 1.4717581223076612, - "grad_norm": 0.0007497454644180834, - "learning_rate": 0.00019999893348109443, - "loss": 46.0, - "step": 9139 - }, - { - "epoch": 1.4719191593864487, - "grad_norm": 0.0008579182904213667, - "learning_rate": 0.00019999893324742688, - "loss": 46.0, - "step": 9140 - }, - { - "epoch": 1.4720801964652361, - "grad_norm": 0.0013228532625362277, - "learning_rate": 0.00019999893301373377, - "loss": 46.0, - "step": 9141 - }, - { - "epoch": 1.4722412335440236, - "grad_norm": 0.000514134531840682, - "learning_rate": 0.00019999893278001504, - "loss": 46.0, - "step": 9142 - }, - { - "epoch": 1.472402270622811, - "grad_norm": 0.0011756445746868849, - "learning_rate": 0.00019999893254627073, - "loss": 46.0, - "step": 9143 - }, - { - "epoch": 1.4725633077015983, - "grad_norm": 0.0028722293209284544, - "learning_rate": 0.00019999893231250083, - "loss": 46.0, - "step": 9144 - }, - { - "epoch": 1.4727243447803857, - "grad_norm": 0.00224276608787477, - "learning_rate": 0.00019999893207870532, - "loss": 46.0, - "step": 9145 - }, - { - "epoch": 1.472885381859173, - "grad_norm": 0.0013697216054424644, - "learning_rate": 0.00019999893184488422, - "loss": 46.0, - "step": 9146 - }, - { - "epoch": 1.4730464189379604, - "grad_norm": 0.000897496473044157, - "learning_rate": 0.00019999893161103756, - "loss": 46.0, - "step": 9147 - }, - { - "epoch": 1.4732074560167479, - "grad_norm": 0.002812425373122096, - "learning_rate": 0.00019999893137716526, - "loss": 46.0, - "step": 9148 - }, - { - "epoch": 1.4733684930955353, - "grad_norm": 0.0022975506726652384, - "learning_rate": 0.00019999893114326737, - "loss": 46.0, - "step": 9149 - }, - { - "epoch": 1.4735295301743228, - "grad_norm": 0.0005632441025227308, - "learning_rate": 0.00019999893090934392, - "loss": 46.0, - "step": 9150 - }, - { - "epoch": 1.47369056725311, - "grad_norm": 0.0009142677881754935, - "learning_rate": 0.00019999893067539483, - "loss": 46.0, - "step": 9151 - }, - { - "epoch": 1.4738516043318974, - "grad_norm": 0.008548218756914139, - "learning_rate": 0.00019999893044142018, - "loss": 46.0, - "step": 9152 - }, - { - "epoch": 1.4740126414106849, - "grad_norm": 0.0013078803895041347, - "learning_rate": 0.00019999893020741994, - "loss": 46.0, - "step": 9153 - }, - { - "epoch": 1.4741736784894721, - "grad_norm": 0.0006266293930821121, - "learning_rate": 0.00019999892997339406, - "loss": 46.0, - "step": 9154 - }, - { - "epoch": 1.4743347155682596, - "grad_norm": 0.0020333644933998585, - "learning_rate": 0.00019999892973934262, - "loss": 46.0, - "step": 9155 - }, - { - "epoch": 1.474495752647047, - "grad_norm": 0.0019565399270504713, - "learning_rate": 0.0001999989295052656, - "loss": 46.0, - "step": 9156 - }, - { - "epoch": 1.4746567897258345, - "grad_norm": 0.0007945430115796626, - "learning_rate": 0.00019999892927116295, - "loss": 46.0, - "step": 9157 - }, - { - "epoch": 1.4748178268046217, - "grad_norm": 0.0004254174418747425, - "learning_rate": 0.00019999892903703473, - "loss": 46.0, - "step": 9158 - }, - { - "epoch": 1.4749788638834092, - "grad_norm": 0.000837479077745229, - "learning_rate": 0.0001999989288028809, - "loss": 46.0, - "step": 9159 - }, - { - "epoch": 1.4751399009621966, - "grad_norm": 0.0035503138788044453, - "learning_rate": 0.00019999892856870148, - "loss": 46.0, - "step": 9160 - }, - { - "epoch": 1.4753009380409838, - "grad_norm": 0.0038676303811371326, - "learning_rate": 0.00019999892833449647, - "loss": 46.0, - "step": 9161 - }, - { - "epoch": 1.4754619751197713, - "grad_norm": 0.0017247156938537955, - "learning_rate": 0.0001999989281002659, - "loss": 46.0, - "step": 9162 - }, - { - "epoch": 1.4756230121985587, - "grad_norm": 0.008360270410776138, - "learning_rate": 0.00019999892786600967, - "loss": 46.0, - "step": 9163 - }, - { - "epoch": 1.4757840492773462, - "grad_norm": 0.0021021724678575993, - "learning_rate": 0.00019999892763172787, - "loss": 46.0, - "step": 9164 - }, - { - "epoch": 1.4759450863561334, - "grad_norm": 0.00189265760127455, - "learning_rate": 0.00019999892739742047, - "loss": 46.0, - "step": 9165 - }, - { - "epoch": 1.4761061234349209, - "grad_norm": 0.0018198195612058043, - "learning_rate": 0.0001999989271630875, - "loss": 46.0, - "step": 9166 - }, - { - "epoch": 1.4762671605137083, - "grad_norm": 0.0013389589730650187, - "learning_rate": 0.00019999892692872893, - "loss": 46.0, - "step": 9167 - }, - { - "epoch": 1.4764281975924956, - "grad_norm": 0.0029416116885840893, - "learning_rate": 0.00019999892669434472, - "loss": 46.0, - "step": 9168 - }, - { - "epoch": 1.476589234671283, - "grad_norm": 0.002891369629651308, - "learning_rate": 0.00019999892645993498, - "loss": 46.0, - "step": 9169 - }, - { - "epoch": 1.4767502717500705, - "grad_norm": 0.000511622114572674, - "learning_rate": 0.0001999989262254996, - "loss": 46.0, - "step": 9170 - }, - { - "epoch": 1.476911308828858, - "grad_norm": 0.0022698314860463142, - "learning_rate": 0.00019999892599103865, - "loss": 46.0, - "step": 9171 - }, - { - "epoch": 1.4770723459076454, - "grad_norm": 0.0025655021890997887, - "learning_rate": 0.00019999892575655207, - "loss": 46.0, - "step": 9172 - }, - { - "epoch": 1.4772333829864326, - "grad_norm": 0.0019411066314205527, - "learning_rate": 0.00019999892552203993, - "loss": 46.0, - "step": 9173 - }, - { - "epoch": 1.47739442006522, - "grad_norm": 0.001997189363464713, - "learning_rate": 0.00019999892528750222, - "loss": 46.0, - "step": 9174 - }, - { - "epoch": 1.4775554571440075, - "grad_norm": 0.006019538268446922, - "learning_rate": 0.00019999892505293888, - "loss": 46.0, - "step": 9175 - }, - { - "epoch": 1.4777164942227947, - "grad_norm": 0.0023416599724441767, - "learning_rate": 0.00019999892481834994, - "loss": 46.0, - "step": 9176 - }, - { - "epoch": 1.4778775313015822, - "grad_norm": 0.0007697054534219205, - "learning_rate": 0.00019999892458373542, - "loss": 46.0, - "step": 9177 - }, - { - "epoch": 1.4780385683803696, - "grad_norm": 0.0018224541563540697, - "learning_rate": 0.0001999989243490953, - "loss": 46.0, - "step": 9178 - }, - { - "epoch": 1.478199605459157, - "grad_norm": 0.006273406092077494, - "learning_rate": 0.00019999892411442957, - "loss": 46.0, - "step": 9179 - }, - { - "epoch": 1.4783606425379443, - "grad_norm": 0.0025578478816896677, - "learning_rate": 0.00019999892387973826, - "loss": 46.0, - "step": 9180 - }, - { - "epoch": 1.4785216796167318, - "grad_norm": 0.0023115873336791992, - "learning_rate": 0.00019999892364502134, - "loss": 46.0, - "step": 9181 - }, - { - "epoch": 1.4786827166955192, - "grad_norm": 0.012280327267944813, - "learning_rate": 0.00019999892341027886, - "loss": 46.0, - "step": 9182 - }, - { - "epoch": 1.4788437537743064, - "grad_norm": 0.0037991516292095184, - "learning_rate": 0.00019999892317551076, - "loss": 46.0, - "step": 9183 - }, - { - "epoch": 1.4790047908530939, - "grad_norm": 0.0010416858131065965, - "learning_rate": 0.0001999989229407171, - "loss": 46.0, - "step": 9184 - }, - { - "epoch": 1.4791658279318813, - "grad_norm": 0.0033041746355593204, - "learning_rate": 0.0001999989227058978, - "loss": 46.0, - "step": 9185 - }, - { - "epoch": 1.4793268650106688, - "grad_norm": 0.0004504225216805935, - "learning_rate": 0.00019999892247105292, - "loss": 46.0, - "step": 9186 - }, - { - "epoch": 1.479487902089456, - "grad_norm": 0.004607070237398148, - "learning_rate": 0.00019999892223618245, - "loss": 46.0, - "step": 9187 - }, - { - "epoch": 1.4796489391682435, - "grad_norm": 0.011801824904978275, - "learning_rate": 0.00019999892200128639, - "loss": 46.0, - "step": 9188 - }, - { - "epoch": 1.479809976247031, - "grad_norm": 0.0008670894894748926, - "learning_rate": 0.0001999989217663647, - "loss": 46.0, - "step": 9189 - }, - { - "epoch": 1.4799710133258182, - "grad_norm": 0.004203254822641611, - "learning_rate": 0.00019999892153141745, - "loss": 46.0, - "step": 9190 - }, - { - "epoch": 1.4801320504046056, - "grad_norm": 0.0028932723216712475, - "learning_rate": 0.0001999989212964446, - "loss": 46.0, - "step": 9191 - }, - { - "epoch": 1.480293087483393, - "grad_norm": 0.001833435264416039, - "learning_rate": 0.00019999892106144617, - "loss": 46.0, - "step": 9192 - }, - { - "epoch": 1.4804541245621805, - "grad_norm": 0.0006322352564893663, - "learning_rate": 0.00019999892082642214, - "loss": 46.0, - "step": 9193 - }, - { - "epoch": 1.480615161640968, - "grad_norm": 0.006942402571439743, - "learning_rate": 0.0001999989205913725, - "loss": 46.0, - "step": 9194 - }, - { - "epoch": 1.4807761987197552, - "grad_norm": 0.00653650239109993, - "learning_rate": 0.00019999892035629726, - "loss": 46.0, - "step": 9195 - }, - { - "epoch": 1.4809372357985426, - "grad_norm": 0.0007525699911639094, - "learning_rate": 0.00019999892012119644, - "loss": 46.0, - "step": 9196 - }, - { - "epoch": 1.4810982728773299, - "grad_norm": 0.003730009775608778, - "learning_rate": 0.00019999891988607002, - "loss": 46.0, - "step": 9197 - }, - { - "epoch": 1.4812593099561173, - "grad_norm": 0.0032291864044964314, - "learning_rate": 0.000199998919650918, - "loss": 46.0, - "step": 9198 - }, - { - "epoch": 1.4814203470349048, - "grad_norm": 0.001430146861821413, - "learning_rate": 0.0001999989194157404, - "loss": 46.0, - "step": 9199 - }, - { - "epoch": 1.4815813841136922, - "grad_norm": 0.0004250580968800932, - "learning_rate": 0.0001999989191805372, - "loss": 46.0, - "step": 9200 - }, - { - "epoch": 1.4817424211924797, - "grad_norm": 0.0018537724390625954, - "learning_rate": 0.0001999989189453084, - "loss": 46.0, - "step": 9201 - }, - { - "epoch": 1.481903458271267, - "grad_norm": 0.0027568424120545387, - "learning_rate": 0.00019999891871005403, - "loss": 46.0, - "step": 9202 - }, - { - "epoch": 1.4820644953500544, - "grad_norm": 0.00115341623313725, - "learning_rate": 0.000199998918474774, - "loss": 46.0, - "step": 9203 - }, - { - "epoch": 1.4822255324288418, - "grad_norm": 0.0020698762964457273, - "learning_rate": 0.00019999891823946846, - "loss": 46.0, - "step": 9204 - }, - { - "epoch": 1.482386569507629, - "grad_norm": 0.000808591372333467, - "learning_rate": 0.00019999891800413728, - "loss": 46.0, - "step": 9205 - }, - { - "epoch": 1.4825476065864165, - "grad_norm": 0.004555118270218372, - "learning_rate": 0.0001999989177687805, - "loss": 46.0, - "step": 9206 - }, - { - "epoch": 1.482708643665204, - "grad_norm": 0.0008105576853267848, - "learning_rate": 0.00019999891753339814, - "loss": 46.0, - "step": 9207 - }, - { - "epoch": 1.4828696807439914, - "grad_norm": 0.0014980818377807736, - "learning_rate": 0.00019999891729799018, - "loss": 46.0, - "step": 9208 - }, - { - "epoch": 1.4830307178227786, - "grad_norm": 0.005529528018087149, - "learning_rate": 0.00019999891706255662, - "loss": 46.0, - "step": 9209 - }, - { - "epoch": 1.483191754901566, - "grad_norm": 0.003451485652476549, - "learning_rate": 0.00019999891682709748, - "loss": 46.0, - "step": 9210 - }, - { - "epoch": 1.4833527919803535, - "grad_norm": 0.0024196526501327753, - "learning_rate": 0.00019999891659161273, - "loss": 46.0, - "step": 9211 - }, - { - "epoch": 1.4835138290591408, - "grad_norm": 0.001738803111948073, - "learning_rate": 0.00019999891635610239, - "loss": 46.0, - "step": 9212 - }, - { - "epoch": 1.4836748661379282, - "grad_norm": 0.00106618064455688, - "learning_rate": 0.00019999891612056648, - "loss": 46.0, - "step": 9213 - }, - { - "epoch": 1.4838359032167157, - "grad_norm": 0.002034460660070181, - "learning_rate": 0.00019999891588500494, - "loss": 46.0, - "step": 9214 - }, - { - "epoch": 1.483996940295503, - "grad_norm": 0.004390411544591188, - "learning_rate": 0.0001999989156494178, - "loss": 46.0, - "step": 9215 - }, - { - "epoch": 1.4841579773742906, - "grad_norm": 0.0005137246917001903, - "learning_rate": 0.00019999891541380512, - "loss": 46.0, - "step": 9216 - }, - { - "epoch": 1.4843190144530778, - "grad_norm": 0.0015641130739822984, - "learning_rate": 0.0001999989151781668, - "loss": 46.0, - "step": 9217 - }, - { - "epoch": 1.4844800515318652, - "grad_norm": 0.005606240127235651, - "learning_rate": 0.0001999989149425029, - "loss": 46.0, - "step": 9218 - }, - { - "epoch": 1.4846410886106525, - "grad_norm": 0.0023736802395433187, - "learning_rate": 0.0001999989147068134, - "loss": 46.0, - "step": 9219 - }, - { - "epoch": 1.48480212568944, - "grad_norm": 0.0007125459378585219, - "learning_rate": 0.0001999989144710983, - "loss": 46.0, - "step": 9220 - }, - { - "epoch": 1.4849631627682274, - "grad_norm": 0.0022514360025525093, - "learning_rate": 0.00019999891423535762, - "loss": 46.0, - "step": 9221 - }, - { - "epoch": 1.4851241998470148, - "grad_norm": 0.003857055213302374, - "learning_rate": 0.00019999891399959135, - "loss": 46.0, - "step": 9222 - }, - { - "epoch": 1.4852852369258023, - "grad_norm": 0.005299419164657593, - "learning_rate": 0.00019999891376379947, - "loss": 46.0, - "step": 9223 - }, - { - "epoch": 1.4854462740045895, - "grad_norm": 0.005835393443703651, - "learning_rate": 0.00019999891352798197, - "loss": 46.0, - "step": 9224 - }, - { - "epoch": 1.485607311083377, - "grad_norm": 0.0017973246285691857, - "learning_rate": 0.00019999891329213892, - "loss": 46.0, - "step": 9225 - }, - { - "epoch": 1.4857683481621644, - "grad_norm": 0.0010201316326856613, - "learning_rate": 0.00019999891305627024, - "loss": 46.0, - "step": 9226 - }, - { - "epoch": 1.4859293852409516, - "grad_norm": 0.003794406307861209, - "learning_rate": 0.000199998912820376, - "loss": 46.0, - "step": 9227 - }, - { - "epoch": 1.486090422319739, - "grad_norm": 0.002419550670310855, - "learning_rate": 0.00019999891258445614, - "loss": 46.0, - "step": 9228 - }, - { - "epoch": 1.4862514593985265, - "grad_norm": 0.004375604912638664, - "learning_rate": 0.00019999891234851068, - "loss": 46.0, - "step": 9229 - }, - { - "epoch": 1.486412496477314, - "grad_norm": 0.0053855255246162415, - "learning_rate": 0.00019999891211253966, - "loss": 46.0, - "step": 9230 - }, - { - "epoch": 1.4865735335561012, - "grad_norm": 0.0018320202361792326, - "learning_rate": 0.00019999891187654303, - "loss": 46.0, - "step": 9231 - }, - { - "epoch": 1.4867345706348887, - "grad_norm": 0.0006352829514071345, - "learning_rate": 0.0001999989116405208, - "loss": 46.0, - "step": 9232 - }, - { - "epoch": 1.4868956077136761, - "grad_norm": 0.0026084398850798607, - "learning_rate": 0.00019999891140447297, - "loss": 46.0, - "step": 9233 - }, - { - "epoch": 1.4870566447924634, - "grad_norm": 0.0022584833204746246, - "learning_rate": 0.00019999891116839958, - "loss": 46.0, - "step": 9234 - }, - { - "epoch": 1.4872176818712508, - "grad_norm": 0.0017240295419469476, - "learning_rate": 0.00019999891093230054, - "loss": 46.0, - "step": 9235 - }, - { - "epoch": 1.4873787189500383, - "grad_norm": 0.006388253066688776, - "learning_rate": 0.00019999891069617595, - "loss": 46.0, - "step": 9236 - }, - { - "epoch": 1.4875397560288257, - "grad_norm": 0.0011752115096896887, - "learning_rate": 0.00019999891046002574, - "loss": 46.0, - "step": 9237 - }, - { - "epoch": 1.4877007931076132, - "grad_norm": 0.0032273733522742987, - "learning_rate": 0.00019999891022384994, - "loss": 46.0, - "step": 9238 - }, - { - "epoch": 1.4878618301864004, - "grad_norm": 0.0007667818572372198, - "learning_rate": 0.00019999890998764855, - "loss": 46.0, - "step": 9239 - }, - { - "epoch": 1.4880228672651878, - "grad_norm": 0.006122061051428318, - "learning_rate": 0.00019999890975142158, - "loss": 46.0, - "step": 9240 - }, - { - "epoch": 1.488183904343975, - "grad_norm": 0.0007807639776729047, - "learning_rate": 0.000199998909515169, - "loss": 46.0, - "step": 9241 - }, - { - "epoch": 1.4883449414227625, - "grad_norm": 0.0017634417163208127, - "learning_rate": 0.0001999989092788908, - "loss": 46.0, - "step": 9242 - }, - { - "epoch": 1.48850597850155, - "grad_norm": 0.0021173693239688873, - "learning_rate": 0.00019999890904258703, - "loss": 46.0, - "step": 9243 - }, - { - "epoch": 1.4886670155803374, - "grad_norm": 0.0014326149830594659, - "learning_rate": 0.00019999890880625766, - "loss": 46.0, - "step": 9244 - }, - { - "epoch": 1.4888280526591249, - "grad_norm": 0.002000546781346202, - "learning_rate": 0.00019999890856990272, - "loss": 46.0, - "step": 9245 - }, - { - "epoch": 1.488989089737912, - "grad_norm": 0.004835196305066347, - "learning_rate": 0.00019999890833352217, - "loss": 46.0, - "step": 9246 - }, - { - "epoch": 1.4891501268166996, - "grad_norm": 0.0020897649228572845, - "learning_rate": 0.000199998908097116, - "loss": 46.0, - "step": 9247 - }, - { - "epoch": 1.489311163895487, - "grad_norm": 0.0033091893419623375, - "learning_rate": 0.00019999890786068426, - "loss": 46.0, - "step": 9248 - }, - { - "epoch": 1.4894722009742742, - "grad_norm": 0.0021526850759983063, - "learning_rate": 0.00019999890762422692, - "loss": 46.0, - "step": 9249 - }, - { - "epoch": 1.4896332380530617, - "grad_norm": 0.002141405828297138, - "learning_rate": 0.000199998907387744, - "loss": 46.0, - "step": 9250 - }, - { - "epoch": 1.4897942751318491, - "grad_norm": 0.0016003063647076488, - "learning_rate": 0.00019999890715123548, - "loss": 46.0, - "step": 9251 - }, - { - "epoch": 1.4899553122106366, - "grad_norm": 0.004432353191077709, - "learning_rate": 0.00019999890691470136, - "loss": 46.0, - "step": 9252 - }, - { - "epoch": 1.4901163492894238, - "grad_norm": 0.0035880128853023052, - "learning_rate": 0.00019999890667814164, - "loss": 46.0, - "step": 9253 - }, - { - "epoch": 1.4902773863682113, - "grad_norm": 0.004421964753419161, - "learning_rate": 0.00019999890644155634, - "loss": 46.0, - "step": 9254 - }, - { - "epoch": 1.4904384234469987, - "grad_norm": 0.0006389126647263765, - "learning_rate": 0.00019999890620494543, - "loss": 46.0, - "step": 9255 - }, - { - "epoch": 1.490599460525786, - "grad_norm": 0.009319878183305264, - "learning_rate": 0.00019999890596830895, - "loss": 46.0, - "step": 9256 - }, - { - "epoch": 1.4907604976045734, - "grad_norm": 0.002830260433256626, - "learning_rate": 0.00019999890573164683, - "loss": 46.0, - "step": 9257 - }, - { - "epoch": 1.4909215346833609, - "grad_norm": 0.006291674915701151, - "learning_rate": 0.00019999890549495916, - "loss": 46.0, - "step": 9258 - }, - { - "epoch": 1.4910825717621483, - "grad_norm": 0.003054615342989564, - "learning_rate": 0.00019999890525824587, - "loss": 46.0, - "step": 9259 - }, - { - "epoch": 1.4912436088409358, - "grad_norm": 0.0017148348269984126, - "learning_rate": 0.000199998905021507, - "loss": 46.0, - "step": 9260 - }, - { - "epoch": 1.491404645919723, - "grad_norm": 0.0015248499112203717, - "learning_rate": 0.0001999989047847425, - "loss": 46.0, - "step": 9261 - }, - { - "epoch": 1.4915656829985104, - "grad_norm": 0.0018423888832330704, - "learning_rate": 0.00019999890454795244, - "loss": 46.0, - "step": 9262 - }, - { - "epoch": 1.4917267200772977, - "grad_norm": 0.0065639144740998745, - "learning_rate": 0.0001999989043111368, - "loss": 46.0, - "step": 9263 - }, - { - "epoch": 1.4918877571560851, - "grad_norm": 0.0037312640342861414, - "learning_rate": 0.00019999890407429552, - "loss": 46.0, - "step": 9264 - }, - { - "epoch": 1.4920487942348726, - "grad_norm": 0.003073142608627677, - "learning_rate": 0.00019999890383742868, - "loss": 46.0, - "step": 9265 - }, - { - "epoch": 1.49220983131366, - "grad_norm": 0.001604669727385044, - "learning_rate": 0.00019999890360053622, - "loss": 46.0, - "step": 9266 - }, - { - "epoch": 1.4923708683924475, - "grad_norm": 0.003724227659404278, - "learning_rate": 0.0001999989033636182, - "loss": 46.0, - "step": 9267 - }, - { - "epoch": 1.4925319054712347, - "grad_norm": 0.00258837197907269, - "learning_rate": 0.00019999890312667455, - "loss": 46.0, - "step": 9268 - }, - { - "epoch": 1.4926929425500222, - "grad_norm": 0.0008089937036857009, - "learning_rate": 0.00019999890288970536, - "loss": 46.0, - "step": 9269 - }, - { - "epoch": 1.4928539796288096, - "grad_norm": 0.0025259137619286776, - "learning_rate": 0.00019999890265271053, - "loss": 46.0, - "step": 9270 - }, - { - "epoch": 1.4930150167075968, - "grad_norm": 0.0022156857885420322, - "learning_rate": 0.00019999890241569008, - "loss": 46.0, - "step": 9271 - }, - { - "epoch": 1.4931760537863843, - "grad_norm": 0.0041998522356152534, - "learning_rate": 0.0001999989021786441, - "loss": 46.0, - "step": 9272 - }, - { - "epoch": 1.4933370908651717, - "grad_norm": 0.001053009764291346, - "learning_rate": 0.00019999890194157249, - "loss": 46.0, - "step": 9273 - }, - { - "epoch": 1.4934981279439592, - "grad_norm": 0.0009771655313670635, - "learning_rate": 0.00019999890170447525, - "loss": 46.0, - "step": 9274 - }, - { - "epoch": 1.4936591650227464, - "grad_norm": 0.00158273137640208, - "learning_rate": 0.00019999890146735249, - "loss": 46.0, - "step": 9275 - }, - { - "epoch": 1.4938202021015339, - "grad_norm": 0.0026314619462937117, - "learning_rate": 0.0001999989012302041, - "loss": 46.0, - "step": 9276 - }, - { - "epoch": 1.4939812391803213, - "grad_norm": 0.001456606201827526, - "learning_rate": 0.00019999890099303008, - "loss": 46.0, - "step": 9277 - }, - { - "epoch": 1.4941422762591086, - "grad_norm": 0.0006253541796468198, - "learning_rate": 0.00019999890075583052, - "loss": 46.0, - "step": 9278 - }, - { - "epoch": 1.494303313337896, - "grad_norm": 0.0005209855735301971, - "learning_rate": 0.00019999890051860535, - "loss": 46.0, - "step": 9279 - }, - { - "epoch": 1.4944643504166835, - "grad_norm": 0.005019493866711855, - "learning_rate": 0.00019999890028135457, - "loss": 46.0, - "step": 9280 - }, - { - "epoch": 1.494625387495471, - "grad_norm": 0.0013266053283587098, - "learning_rate": 0.00019999890004407823, - "loss": 46.0, - "step": 9281 - }, - { - "epoch": 1.4947864245742581, - "grad_norm": 0.002484564669430256, - "learning_rate": 0.00019999889980677627, - "loss": 46.0, - "step": 9282 - }, - { - "epoch": 1.4949474616530456, - "grad_norm": 0.0008026795694604516, - "learning_rate": 0.0001999988995694487, - "loss": 46.0, - "step": 9283 - }, - { - "epoch": 1.495108498731833, - "grad_norm": 0.002022720407694578, - "learning_rate": 0.00019999889933209553, - "loss": 46.0, - "step": 9284 - }, - { - "epoch": 1.4952695358106203, - "grad_norm": 0.0013786606723442674, - "learning_rate": 0.00019999889909471681, - "loss": 46.0, - "step": 9285 - }, - { - "epoch": 1.4954305728894077, - "grad_norm": 0.001019939430989325, - "learning_rate": 0.00019999889885731248, - "loss": 46.0, - "step": 9286 - }, - { - "epoch": 1.4955916099681952, - "grad_norm": 0.007120917085558176, - "learning_rate": 0.00019999889861988256, - "loss": 46.0, - "step": 9287 - }, - { - "epoch": 1.4957526470469826, - "grad_norm": 0.0011648134095594287, - "learning_rate": 0.00019999889838242702, - "loss": 46.0, - "step": 9288 - }, - { - "epoch": 1.49591368412577, - "grad_norm": 0.0012876932742074132, - "learning_rate": 0.0001999988981449459, - "loss": 46.0, - "step": 9289 - }, - { - "epoch": 1.4960747212045573, - "grad_norm": 0.0029910027515143156, - "learning_rate": 0.0001999988979074392, - "loss": 46.0, - "step": 9290 - }, - { - "epoch": 1.4962357582833448, - "grad_norm": 0.004170332103967667, - "learning_rate": 0.0001999988976699069, - "loss": 46.0, - "step": 9291 - }, - { - "epoch": 1.496396795362132, - "grad_norm": 0.0058249616995453835, - "learning_rate": 0.00019999889743234895, - "loss": 46.0, - "step": 9292 - }, - { - "epoch": 1.4965578324409194, - "grad_norm": 0.002981281839311123, - "learning_rate": 0.00019999889719476548, - "loss": 46.0, - "step": 9293 - }, - { - "epoch": 1.496718869519707, - "grad_norm": 0.0033790236338973045, - "learning_rate": 0.00019999889695715637, - "loss": 46.0, - "step": 9294 - }, - { - "epoch": 1.4968799065984943, - "grad_norm": 0.0013915090821683407, - "learning_rate": 0.0001999988967195217, - "loss": 46.0, - "step": 9295 - }, - { - "epoch": 1.4970409436772818, - "grad_norm": 0.0018759649246931076, - "learning_rate": 0.0001999988964818614, - "loss": 46.0, - "step": 9296 - }, - { - "epoch": 1.497201980756069, - "grad_norm": 0.0022674081847071648, - "learning_rate": 0.00019999889624417553, - "loss": 46.0, - "step": 9297 - }, - { - "epoch": 1.4973630178348565, - "grad_norm": 0.002209196100011468, - "learning_rate": 0.00019999889600646407, - "loss": 46.0, - "step": 9298 - }, - { - "epoch": 1.497524054913644, - "grad_norm": 0.0026589909102767706, - "learning_rate": 0.00019999889576872702, - "loss": 46.0, - "step": 9299 - }, - { - "epoch": 1.4976850919924312, - "grad_norm": 0.006607708986848593, - "learning_rate": 0.00019999889553096433, - "loss": 46.0, - "step": 9300 - }, - { - "epoch": 1.4978461290712186, - "grad_norm": 0.0015110252425074577, - "learning_rate": 0.00019999889529317608, - "loss": 46.0, - "step": 9301 - }, - { - "epoch": 1.498007166150006, - "grad_norm": 0.00298205460421741, - "learning_rate": 0.00019999889505536222, - "loss": 46.0, - "step": 9302 - }, - { - "epoch": 1.4981682032287935, - "grad_norm": 0.003964473493397236, - "learning_rate": 0.0001999988948175228, - "loss": 46.0, - "step": 9303 - }, - { - "epoch": 1.4983292403075807, - "grad_norm": 0.002038866514340043, - "learning_rate": 0.00019999889457965775, - "loss": 46.0, - "step": 9304 - }, - { - "epoch": 1.4984902773863682, - "grad_norm": 0.0035502715036273003, - "learning_rate": 0.00019999889434176713, - "loss": 46.0, - "step": 9305 - }, - { - "epoch": 1.4986513144651556, - "grad_norm": 0.0007741302833892405, - "learning_rate": 0.00019999889410385089, - "loss": 46.0, - "step": 9306 - }, - { - "epoch": 1.4988123515439429, - "grad_norm": 0.0036379615776240826, - "learning_rate": 0.00019999889386590906, - "loss": 46.0, - "step": 9307 - }, - { - "epoch": 1.4989733886227303, - "grad_norm": 0.001970453653484583, - "learning_rate": 0.00019999889362794164, - "loss": 46.0, - "step": 9308 - }, - { - "epoch": 1.4991344257015178, - "grad_norm": 0.004272512160241604, - "learning_rate": 0.00019999889338994864, - "loss": 46.0, - "step": 9309 - }, - { - "epoch": 1.4992954627803052, - "grad_norm": 0.0005225231288932264, - "learning_rate": 0.00019999889315193002, - "loss": 46.0, - "step": 9310 - }, - { - "epoch": 1.4994564998590927, - "grad_norm": 0.003577888011932373, - "learning_rate": 0.00019999889291388582, - "loss": 46.0, - "step": 9311 - }, - { - "epoch": 1.49961753693788, - "grad_norm": 0.0007586209685541689, - "learning_rate": 0.00019999889267581603, - "loss": 46.0, - "step": 9312 - }, - { - "epoch": 1.4997785740166674, - "grad_norm": 0.0017913278425112367, - "learning_rate": 0.00019999889243772065, - "loss": 46.0, - "step": 9313 - }, - { - "epoch": 1.4999396110954546, - "grad_norm": 0.0008608452044427395, - "learning_rate": 0.00019999889219959966, - "loss": 46.0, - "step": 9314 - }, - { - "epoch": 1.500100648174242, - "grad_norm": 0.0011282166233286262, - "learning_rate": 0.00019999889196145308, - "loss": 46.0, - "step": 9315 - }, - { - "epoch": 1.5002616852530295, - "grad_norm": 0.003429776756092906, - "learning_rate": 0.00019999889172328091, - "loss": 46.0, - "step": 9316 - }, - { - "epoch": 1.500422722331817, - "grad_norm": 0.0012224511010572314, - "learning_rate": 0.00019999889148508313, - "loss": 46.0, - "step": 9317 - }, - { - "epoch": 1.5005837594106044, - "grad_norm": 0.0031226149294525385, - "learning_rate": 0.00019999889124685977, - "loss": 46.0, - "step": 9318 - }, - { - "epoch": 1.5007447964893916, - "grad_norm": 0.0020698532462120056, - "learning_rate": 0.0001999988910086108, - "loss": 46.0, - "step": 9319 - }, - { - "epoch": 1.500905833568179, - "grad_norm": 0.013390767388045788, - "learning_rate": 0.00019999889077033624, - "loss": 46.0, - "step": 9320 - }, - { - "epoch": 1.5010668706469663, - "grad_norm": 0.002201109193265438, - "learning_rate": 0.0001999988905320361, - "loss": 46.0, - "step": 9321 - }, - { - "epoch": 1.5012279077257538, - "grad_norm": 0.0023974617943167686, - "learning_rate": 0.00019999889029371037, - "loss": 46.0, - "step": 9322 - }, - { - "epoch": 1.5013889448045412, - "grad_norm": 0.004037914797663689, - "learning_rate": 0.00019999889005535904, - "loss": 46.0, - "step": 9323 - }, - { - "epoch": 1.5015499818833287, - "grad_norm": 0.00031237577786669135, - "learning_rate": 0.0001999988898169821, - "loss": 46.0, - "step": 9324 - }, - { - "epoch": 1.5017110189621161, - "grad_norm": 0.0017866770504042506, - "learning_rate": 0.0001999988895785796, - "loss": 46.0, - "step": 9325 - }, - { - "epoch": 1.5018720560409036, - "grad_norm": 0.0011356729082763195, - "learning_rate": 0.00019999888934015147, - "loss": 46.0, - "step": 9326 - }, - { - "epoch": 1.5020330931196908, - "grad_norm": 0.004935161676257849, - "learning_rate": 0.00019999888910169773, - "loss": 46.0, - "step": 9327 - }, - { - "epoch": 1.5021941301984783, - "grad_norm": 0.0003700161469168961, - "learning_rate": 0.00019999888886321844, - "loss": 46.0, - "step": 9328 - }, - { - "epoch": 1.5023551672772655, - "grad_norm": 0.000601876643486321, - "learning_rate": 0.00019999888862471353, - "loss": 46.0, - "step": 9329 - }, - { - "epoch": 1.502516204356053, - "grad_norm": 0.001284827827475965, - "learning_rate": 0.00019999888838618304, - "loss": 46.0, - "step": 9330 - }, - { - "epoch": 1.5026772414348404, - "grad_norm": 0.0016494819428771734, - "learning_rate": 0.00019999888814762695, - "loss": 46.0, - "step": 9331 - }, - { - "epoch": 1.5028382785136278, - "grad_norm": 0.004624811466783285, - "learning_rate": 0.00019999888790904526, - "loss": 46.0, - "step": 9332 - }, - { - "epoch": 1.5029993155924153, - "grad_norm": 0.0026612093206495047, - "learning_rate": 0.00019999888767043797, - "loss": 46.0, - "step": 9333 - }, - { - "epoch": 1.5031603526712025, - "grad_norm": 0.0004504411481320858, - "learning_rate": 0.0001999988874318051, - "loss": 46.0, - "step": 9334 - }, - { - "epoch": 1.50332138974999, - "grad_norm": 0.0011903643608093262, - "learning_rate": 0.00019999888719314664, - "loss": 46.0, - "step": 9335 - }, - { - "epoch": 1.5034824268287772, - "grad_norm": 0.0015241664368659258, - "learning_rate": 0.00019999888695446257, - "loss": 46.0, - "step": 9336 - }, - { - "epoch": 1.5036434639075646, - "grad_norm": 0.00607029115781188, - "learning_rate": 0.0001999988867157529, - "loss": 46.0, - "step": 9337 - }, - { - "epoch": 1.503804500986352, - "grad_norm": 0.0006795867811888456, - "learning_rate": 0.00019999888647701767, - "loss": 46.0, - "step": 9338 - }, - { - "epoch": 1.5039655380651396, - "grad_norm": 0.001204039203003049, - "learning_rate": 0.00019999888623825678, - "loss": 46.0, - "step": 9339 - }, - { - "epoch": 1.504126575143927, - "grad_norm": 0.005060479044914246, - "learning_rate": 0.00019999888599947036, - "loss": 46.0, - "step": 9340 - }, - { - "epoch": 1.5042876122227142, - "grad_norm": 0.002776928711682558, - "learning_rate": 0.0001999988857606583, - "loss": 46.0, - "step": 9341 - }, - { - "epoch": 1.5044486493015017, - "grad_norm": 0.0007243006257340312, - "learning_rate": 0.00019999888552182067, - "loss": 46.0, - "step": 9342 - }, - { - "epoch": 1.504609686380289, - "grad_norm": 0.0010960230138152838, - "learning_rate": 0.00019999888528295746, - "loss": 46.0, - "step": 9343 - }, - { - "epoch": 1.5047707234590764, - "grad_norm": 0.005405620206147432, - "learning_rate": 0.00019999888504406864, - "loss": 46.0, - "step": 9344 - }, - { - "epoch": 1.5049317605378638, - "grad_norm": 0.003555117640644312, - "learning_rate": 0.0001999988848051542, - "loss": 46.0, - "step": 9345 - }, - { - "epoch": 1.5050927976166513, - "grad_norm": 0.0032495323102921247, - "learning_rate": 0.00019999888456621417, - "loss": 46.0, - "step": 9346 - }, - { - "epoch": 1.5052538346954387, - "grad_norm": 0.002382584847509861, - "learning_rate": 0.0001999988843272486, - "loss": 46.0, - "step": 9347 - }, - { - "epoch": 1.5054148717742262, - "grad_norm": 0.000834115722682327, - "learning_rate": 0.0001999988840882574, - "loss": 46.0, - "step": 9348 - }, - { - "epoch": 1.5055759088530134, - "grad_norm": 0.002534291008487344, - "learning_rate": 0.0001999988838492406, - "loss": 46.0, - "step": 9349 - }, - { - "epoch": 1.5057369459318009, - "grad_norm": 0.003397679189220071, - "learning_rate": 0.0001999988836101982, - "loss": 46.0, - "step": 9350 - }, - { - "epoch": 1.505897983010588, - "grad_norm": 0.0018755896016955376, - "learning_rate": 0.0001999988833711302, - "loss": 46.0, - "step": 9351 - }, - { - "epoch": 1.5060590200893755, - "grad_norm": 0.0029869726859033108, - "learning_rate": 0.00019999888313203666, - "loss": 46.0, - "step": 9352 - }, - { - "epoch": 1.506220057168163, - "grad_norm": 0.002450810046866536, - "learning_rate": 0.00019999888289291747, - "loss": 46.0, - "step": 9353 - }, - { - "epoch": 1.5063810942469504, - "grad_norm": 0.0018947371281683445, - "learning_rate": 0.0001999988826537727, - "loss": 46.0, - "step": 9354 - }, - { - "epoch": 1.5065421313257379, - "grad_norm": 0.009498589672148228, - "learning_rate": 0.00019999888241460233, - "loss": 46.0, - "step": 9355 - }, - { - "epoch": 1.5067031684045251, - "grad_norm": 0.002005457179620862, - "learning_rate": 0.00019999888217540638, - "loss": 46.0, - "step": 9356 - }, - { - "epoch": 1.5068642054833126, - "grad_norm": 0.008147171698510647, - "learning_rate": 0.00019999888193618484, - "loss": 46.0, - "step": 9357 - }, - { - "epoch": 1.5070252425620998, - "grad_norm": 0.0025351904332637787, - "learning_rate": 0.0001999988816969377, - "loss": 46.0, - "step": 9358 - }, - { - "epoch": 1.5071862796408872, - "grad_norm": 0.003502566833049059, - "learning_rate": 0.00019999888145766495, - "loss": 46.0, - "step": 9359 - }, - { - "epoch": 1.5073473167196747, - "grad_norm": 0.0014478863449767232, - "learning_rate": 0.00019999888121836662, - "loss": 46.0, - "step": 9360 - }, - { - "epoch": 1.5075083537984622, - "grad_norm": 0.00041848496766760945, - "learning_rate": 0.00019999888097904268, - "loss": 46.0, - "step": 9361 - }, - { - "epoch": 1.5076693908772496, - "grad_norm": 0.004982709884643555, - "learning_rate": 0.00019999888073969315, - "loss": 46.0, - "step": 9362 - }, - { - "epoch": 1.5078304279560368, - "grad_norm": 0.0007134603220038116, - "learning_rate": 0.00019999888050031803, - "loss": 46.0, - "step": 9363 - }, - { - "epoch": 1.5079914650348243, - "grad_norm": 0.005868175532668829, - "learning_rate": 0.0001999988802609173, - "loss": 46.0, - "step": 9364 - }, - { - "epoch": 1.5081525021136115, - "grad_norm": 0.0010554182808846235, - "learning_rate": 0.00019999888002149101, - "loss": 46.0, - "step": 9365 - }, - { - "epoch": 1.508313539192399, - "grad_norm": 0.003978913649916649, - "learning_rate": 0.0001999988797820391, - "loss": 46.0, - "step": 9366 - }, - { - "epoch": 1.5084745762711864, - "grad_norm": 0.0032237533014267683, - "learning_rate": 0.0001999988795425616, - "loss": 46.0, - "step": 9367 - }, - { - "epoch": 1.5086356133499739, - "grad_norm": 0.004461077507585287, - "learning_rate": 0.00019999887930305848, - "loss": 46.0, - "step": 9368 - }, - { - "epoch": 1.5087966504287613, - "grad_norm": 0.0006379224942065775, - "learning_rate": 0.00019999887906352982, - "loss": 46.0, - "step": 9369 - }, - { - "epoch": 1.5089576875075488, - "grad_norm": 0.0012650849530473351, - "learning_rate": 0.00019999887882397554, - "loss": 46.0, - "step": 9370 - }, - { - "epoch": 1.509118724586336, - "grad_norm": 0.0005488916649483144, - "learning_rate": 0.00019999887858439567, - "loss": 46.0, - "step": 9371 - }, - { - "epoch": 1.5092797616651232, - "grad_norm": 0.001251340378075838, - "learning_rate": 0.0001999988783447902, - "loss": 46.0, - "step": 9372 - }, - { - "epoch": 1.5094407987439107, - "grad_norm": 0.0017519919201731682, - "learning_rate": 0.00019999887810515912, - "loss": 46.0, - "step": 9373 - }, - { - "epoch": 1.5096018358226981, - "grad_norm": 0.004199097398668528, - "learning_rate": 0.00019999887786550246, - "loss": 46.0, - "step": 9374 - }, - { - "epoch": 1.5097628729014856, - "grad_norm": 0.00195657298900187, - "learning_rate": 0.00019999887762582022, - "loss": 46.0, - "step": 9375 - }, - { - "epoch": 1.509923909980273, - "grad_norm": 0.0005312208668328822, - "learning_rate": 0.0001999988773861124, - "loss": 46.0, - "step": 9376 - }, - { - "epoch": 1.5100849470590605, - "grad_norm": 0.0004765833728015423, - "learning_rate": 0.00019999887714637892, - "loss": 46.0, - "step": 9377 - }, - { - "epoch": 1.5102459841378477, - "grad_norm": 0.0007699292618781328, - "learning_rate": 0.0001999988769066199, - "loss": 46.0, - "step": 9378 - }, - { - "epoch": 1.5104070212166352, - "grad_norm": 0.0006723885308019817, - "learning_rate": 0.00019999887666683527, - "loss": 46.0, - "step": 9379 - }, - { - "epoch": 1.5105680582954224, - "grad_norm": 0.0016152807511389256, - "learning_rate": 0.00019999887642702503, - "loss": 46.0, - "step": 9380 - }, - { - "epoch": 1.5107290953742099, - "grad_norm": 0.003390349680557847, - "learning_rate": 0.0001999988761871892, - "loss": 46.0, - "step": 9381 - }, - { - "epoch": 1.5108901324529973, - "grad_norm": 0.0036467250902205706, - "learning_rate": 0.00019999887594732778, - "loss": 46.0, - "step": 9382 - }, - { - "epoch": 1.5110511695317848, - "grad_norm": 0.0031857339199632406, - "learning_rate": 0.00019999887570744078, - "loss": 46.0, - "step": 9383 - }, - { - "epoch": 1.5112122066105722, - "grad_norm": 0.0025473847053945065, - "learning_rate": 0.00019999887546752815, - "loss": 46.0, - "step": 9384 - }, - { - "epoch": 1.5113732436893594, - "grad_norm": 0.0004909922135993838, - "learning_rate": 0.00019999887522758995, - "loss": 46.0, - "step": 9385 - }, - { - "epoch": 1.5115342807681469, - "grad_norm": 0.0008730872650630772, - "learning_rate": 0.00019999887498762617, - "loss": 46.0, - "step": 9386 - }, - { - "epoch": 1.5116953178469341, - "grad_norm": 0.0008864441188052297, - "learning_rate": 0.0001999988747476368, - "loss": 46.0, - "step": 9387 - }, - { - "epoch": 1.5118563549257216, - "grad_norm": 0.0012393741635605693, - "learning_rate": 0.0001999988745076218, - "loss": 46.0, - "step": 9388 - }, - { - "epoch": 1.512017392004509, - "grad_norm": 0.0033559727016836405, - "learning_rate": 0.00019999887426758121, - "loss": 46.0, - "step": 9389 - }, - { - "epoch": 1.5121784290832965, - "grad_norm": 0.004136817064136267, - "learning_rate": 0.00019999887402751505, - "loss": 46.0, - "step": 9390 - }, - { - "epoch": 1.512339466162084, - "grad_norm": 0.006455062422901392, - "learning_rate": 0.00019999887378742328, - "loss": 46.0, - "step": 9391 - }, - { - "epoch": 1.5125005032408714, - "grad_norm": 0.0031629258301109076, - "learning_rate": 0.00019999887354730592, - "loss": 46.0, - "step": 9392 - }, - { - "epoch": 1.5126615403196586, - "grad_norm": 0.0013771563535556197, - "learning_rate": 0.00019999887330716297, - "loss": 46.0, - "step": 9393 - }, - { - "epoch": 1.5128225773984458, - "grad_norm": 0.004468581173568964, - "learning_rate": 0.0001999988730669944, - "loss": 46.0, - "step": 9394 - }, - { - "epoch": 1.5129836144772333, - "grad_norm": 0.001272951834835112, - "learning_rate": 0.00019999887282680026, - "loss": 46.0, - "step": 9395 - }, - { - "epoch": 1.5131446515560207, - "grad_norm": 0.0011829917784780264, - "learning_rate": 0.00019999887258658052, - "loss": 46.0, - "step": 9396 - }, - { - "epoch": 1.5133056886348082, - "grad_norm": 0.0007782801403664052, - "learning_rate": 0.00019999887234633517, - "loss": 46.0, - "step": 9397 - }, - { - "epoch": 1.5134667257135956, - "grad_norm": 0.004144345875829458, - "learning_rate": 0.00019999887210606426, - "loss": 46.0, - "step": 9398 - }, - { - "epoch": 1.513627762792383, - "grad_norm": 0.0007989995647221804, - "learning_rate": 0.00019999887186576774, - "loss": 46.0, - "step": 9399 - }, - { - "epoch": 1.5137887998711703, - "grad_norm": 0.0019902114290744066, - "learning_rate": 0.00019999887162544563, - "loss": 46.0, - "step": 9400 - }, - { - "epoch": 1.5139498369499578, - "grad_norm": 0.006263981107622385, - "learning_rate": 0.0001999988713850979, - "loss": 46.0, - "step": 9401 - }, - { - "epoch": 1.514110874028745, - "grad_norm": 0.0006455009570345283, - "learning_rate": 0.00019999887114472459, - "loss": 46.0, - "step": 9402 - }, - { - "epoch": 1.5142719111075325, - "grad_norm": 0.002476086374372244, - "learning_rate": 0.00019999887090432568, - "loss": 46.0, - "step": 9403 - }, - { - "epoch": 1.51443294818632, - "grad_norm": 0.0046978420577943325, - "learning_rate": 0.0001999988706639012, - "loss": 46.0, - "step": 9404 - }, - { - "epoch": 1.5145939852651074, - "grad_norm": 0.005500440485775471, - "learning_rate": 0.0001999988704234511, - "loss": 46.0, - "step": 9405 - }, - { - "epoch": 1.5147550223438948, - "grad_norm": 0.00466714845970273, - "learning_rate": 0.0001999988701829754, - "loss": 46.0, - "step": 9406 - }, - { - "epoch": 1.514916059422682, - "grad_norm": 0.004333703778684139, - "learning_rate": 0.00019999886994247416, - "loss": 46.0, - "step": 9407 - }, - { - "epoch": 1.5150770965014695, - "grad_norm": 0.0029469789005815983, - "learning_rate": 0.00019999886970194726, - "loss": 46.0, - "step": 9408 - }, - { - "epoch": 1.5152381335802567, - "grad_norm": 0.000881087442394346, - "learning_rate": 0.00019999886946139479, - "loss": 46.0, - "step": 9409 - }, - { - "epoch": 1.5153991706590442, - "grad_norm": 0.0007487268303520977, - "learning_rate": 0.00019999886922081672, - "loss": 46.0, - "step": 9410 - }, - { - "epoch": 1.5155602077378316, - "grad_norm": 0.001845317892730236, - "learning_rate": 0.00019999886898021307, - "loss": 46.0, - "step": 9411 - }, - { - "epoch": 1.515721244816619, - "grad_norm": 0.0044486126862466335, - "learning_rate": 0.00019999886873958383, - "loss": 46.0, - "step": 9412 - }, - { - "epoch": 1.5158822818954065, - "grad_norm": 0.0032403534278273582, - "learning_rate": 0.00019999886849892895, - "loss": 46.0, - "step": 9413 - }, - { - "epoch": 1.5160433189741938, - "grad_norm": 0.001502752536907792, - "learning_rate": 0.0001999988682582485, - "loss": 46.0, - "step": 9414 - }, - { - "epoch": 1.5162043560529812, - "grad_norm": 0.0024749189615249634, - "learning_rate": 0.00019999886801754245, - "loss": 46.0, - "step": 9415 - }, - { - "epoch": 1.5163653931317684, - "grad_norm": 0.004080994985997677, - "learning_rate": 0.00019999886777681086, - "loss": 46.0, - "step": 9416 - }, - { - "epoch": 1.5165264302105559, - "grad_norm": 0.0015440452843904495, - "learning_rate": 0.0001999988675360536, - "loss": 46.0, - "step": 9417 - }, - { - "epoch": 1.5166874672893433, - "grad_norm": 0.003377762855961919, - "learning_rate": 0.0001999988672952708, - "loss": 46.0, - "step": 9418 - }, - { - "epoch": 1.5168485043681308, - "grad_norm": 0.00448443740606308, - "learning_rate": 0.00019999886705446239, - "loss": 46.0, - "step": 9419 - }, - { - "epoch": 1.5170095414469182, - "grad_norm": 0.0027451671194285154, - "learning_rate": 0.0001999988668136284, - "loss": 46.0, - "step": 9420 - }, - { - "epoch": 1.5171705785257057, - "grad_norm": 0.00677466532215476, - "learning_rate": 0.00019999886657276876, - "loss": 46.0, - "step": 9421 - }, - { - "epoch": 1.517331615604493, - "grad_norm": 0.0010044913506135345, - "learning_rate": 0.00019999886633188357, - "loss": 46.0, - "step": 9422 - }, - { - "epoch": 1.5174926526832804, - "grad_norm": 0.0032073038164526224, - "learning_rate": 0.00019999886609097276, - "loss": 46.0, - "step": 9423 - }, - { - "epoch": 1.5176536897620676, - "grad_norm": 0.003198109334334731, - "learning_rate": 0.00019999886585003637, - "loss": 46.0, - "step": 9424 - }, - { - "epoch": 1.517814726840855, - "grad_norm": 0.010036427527666092, - "learning_rate": 0.0001999988656090744, - "loss": 46.0, - "step": 9425 - }, - { - "epoch": 1.5179757639196425, - "grad_norm": 0.0008063007844612002, - "learning_rate": 0.00019999886536808682, - "loss": 46.0, - "step": 9426 - }, - { - "epoch": 1.51813680099843, - "grad_norm": 0.0009237265912815928, - "learning_rate": 0.00019999886512707364, - "loss": 46.0, - "step": 9427 - }, - { - "epoch": 1.5182978380772174, - "grad_norm": 0.0017372327856719494, - "learning_rate": 0.00019999886488603487, - "loss": 46.0, - "step": 9428 - }, - { - "epoch": 1.5184588751560046, - "grad_norm": 0.0023281259927898645, - "learning_rate": 0.0001999988646449705, - "loss": 46.0, - "step": 9429 - }, - { - "epoch": 1.518619912234792, - "grad_norm": 0.0008105894667096436, - "learning_rate": 0.00019999886440388056, - "loss": 46.0, - "step": 9430 - }, - { - "epoch": 1.5187809493135793, - "grad_norm": 0.002628910355269909, - "learning_rate": 0.000199998864162765, - "loss": 46.0, - "step": 9431 - }, - { - "epoch": 1.5189419863923668, - "grad_norm": 0.004149747081100941, - "learning_rate": 0.0001999988639216239, - "loss": 46.0, - "step": 9432 - }, - { - "epoch": 1.5191030234711542, - "grad_norm": 0.0019812181126326323, - "learning_rate": 0.00019999886368045713, - "loss": 46.0, - "step": 9433 - }, - { - "epoch": 1.5192640605499417, - "grad_norm": 0.001852197339758277, - "learning_rate": 0.0001999988634392648, - "loss": 46.0, - "step": 9434 - }, - { - "epoch": 1.5194250976287291, - "grad_norm": 0.001097636646591127, - "learning_rate": 0.00019999886319804687, - "loss": 46.0, - "step": 9435 - }, - { - "epoch": 1.5195861347075164, - "grad_norm": 0.002315138466656208, - "learning_rate": 0.00019999886295680335, - "loss": 46.0, - "step": 9436 - }, - { - "epoch": 1.5197471717863038, - "grad_norm": 0.0005515816737897694, - "learning_rate": 0.00019999886271553422, - "loss": 46.0, - "step": 9437 - }, - { - "epoch": 1.519908208865091, - "grad_norm": 0.013057180680334568, - "learning_rate": 0.0001999988624742395, - "loss": 46.0, - "step": 9438 - }, - { - "epoch": 1.5200692459438785, - "grad_norm": 0.005942794028669596, - "learning_rate": 0.00019999886223291918, - "loss": 46.0, - "step": 9439 - }, - { - "epoch": 1.520230283022666, - "grad_norm": 0.0067812916822731495, - "learning_rate": 0.00019999886199157329, - "loss": 46.0, - "step": 9440 - }, - { - "epoch": 1.5203913201014534, - "grad_norm": 0.0021425816230475903, - "learning_rate": 0.0001999988617502018, - "loss": 46.0, - "step": 9441 - }, - { - "epoch": 1.5205523571802408, - "grad_norm": 0.0017270178068429232, - "learning_rate": 0.0001999988615088047, - "loss": 46.0, - "step": 9442 - }, - { - "epoch": 1.5207133942590283, - "grad_norm": 0.0011038320371881127, - "learning_rate": 0.00019999886126738204, - "loss": 46.0, - "step": 9443 - }, - { - "epoch": 1.5208744313378155, - "grad_norm": 0.0018948365468531847, - "learning_rate": 0.00019999886102593374, - "loss": 46.0, - "step": 9444 - }, - { - "epoch": 1.521035468416603, - "grad_norm": 0.0007690870552323759, - "learning_rate": 0.00019999886078445986, - "loss": 46.0, - "step": 9445 - }, - { - "epoch": 1.5211965054953902, - "grad_norm": 0.0009242234518751502, - "learning_rate": 0.0001999988605429604, - "loss": 46.0, - "step": 9446 - }, - { - "epoch": 1.5213575425741777, - "grad_norm": 0.0010537394555285573, - "learning_rate": 0.00019999886030143532, - "loss": 46.0, - "step": 9447 - }, - { - "epoch": 1.521518579652965, - "grad_norm": 0.0008042344707064331, - "learning_rate": 0.0001999988600598847, - "loss": 46.0, - "step": 9448 - }, - { - "epoch": 1.5216796167317526, - "grad_norm": 0.003296369919553399, - "learning_rate": 0.0001999988598183084, - "loss": 46.0, - "step": 9449 - }, - { - "epoch": 1.52184065381054, - "grad_norm": 0.006282202899456024, - "learning_rate": 0.00019999885957670656, - "loss": 46.0, - "step": 9450 - }, - { - "epoch": 1.5220016908893272, - "grad_norm": 0.002256965497508645, - "learning_rate": 0.00019999885933507915, - "loss": 46.0, - "step": 9451 - }, - { - "epoch": 1.5221627279681147, - "grad_norm": 0.003114009741693735, - "learning_rate": 0.0001999988590934261, - "loss": 46.0, - "step": 9452 - }, - { - "epoch": 1.522323765046902, - "grad_norm": 0.0042876312509179115, - "learning_rate": 0.00019999885885174746, - "loss": 46.0, - "step": 9453 - }, - { - "epoch": 1.5224848021256894, - "grad_norm": 0.0022926165256649256, - "learning_rate": 0.00019999885861004323, - "loss": 46.0, - "step": 9454 - }, - { - "epoch": 1.5226458392044768, - "grad_norm": 0.0018849418265745044, - "learning_rate": 0.0001999988583683134, - "loss": 46.0, - "step": 9455 - }, - { - "epoch": 1.5228068762832643, - "grad_norm": 0.005244969390332699, - "learning_rate": 0.000199998858126558, - "loss": 46.0, - "step": 9456 - }, - { - "epoch": 1.5229679133620517, - "grad_norm": 0.004823376424610615, - "learning_rate": 0.000199998857884777, - "loss": 46.0, - "step": 9457 - }, - { - "epoch": 1.523128950440839, - "grad_norm": 0.001401046640239656, - "learning_rate": 0.00019999885764297037, - "loss": 46.0, - "step": 9458 - }, - { - "epoch": 1.5232899875196264, - "grad_norm": 0.004227713216096163, - "learning_rate": 0.00019999885740113818, - "loss": 46.0, - "step": 9459 - }, - { - "epoch": 1.5234510245984136, - "grad_norm": 0.0009940174641087651, - "learning_rate": 0.0001999988571592804, - "loss": 46.0, - "step": 9460 - }, - { - "epoch": 1.523612061677201, - "grad_norm": 0.0020566503517329693, - "learning_rate": 0.000199998856917397, - "loss": 46.0, - "step": 9461 - }, - { - "epoch": 1.5237730987559885, - "grad_norm": 0.0027434672228991985, - "learning_rate": 0.000199998856675488, - "loss": 46.0, - "step": 9462 - }, - { - "epoch": 1.523934135834776, - "grad_norm": 0.000408309162594378, - "learning_rate": 0.00019999885643355345, - "loss": 46.0, - "step": 9463 - }, - { - "epoch": 1.5240951729135634, - "grad_norm": 0.0031712001655250788, - "learning_rate": 0.00019999885619159327, - "loss": 46.0, - "step": 9464 - }, - { - "epoch": 1.524256209992351, - "grad_norm": 0.003166772425174713, - "learning_rate": 0.0001999988559496075, - "loss": 46.0, - "step": 9465 - }, - { - "epoch": 1.5244172470711381, - "grad_norm": 0.0014062365517020226, - "learning_rate": 0.00019999885570759615, - "loss": 46.0, - "step": 9466 - }, - { - "epoch": 1.5245782841499254, - "grad_norm": 0.002360724611207843, - "learning_rate": 0.00019999885546555918, - "loss": 46.0, - "step": 9467 - }, - { - "epoch": 1.5247393212287128, - "grad_norm": 0.0029434501193463802, - "learning_rate": 0.00019999885522349663, - "loss": 46.0, - "step": 9468 - }, - { - "epoch": 1.5249003583075003, - "grad_norm": 0.0009128785459324718, - "learning_rate": 0.00019999885498140848, - "loss": 46.0, - "step": 9469 - }, - { - "epoch": 1.5250613953862877, - "grad_norm": 0.0022576386108994484, - "learning_rate": 0.00019999885473929476, - "loss": 46.0, - "step": 9470 - }, - { - "epoch": 1.5252224324650752, - "grad_norm": 0.005614512134343386, - "learning_rate": 0.00019999885449715544, - "loss": 46.0, - "step": 9471 - }, - { - "epoch": 1.5253834695438626, - "grad_norm": 0.0014592206571251154, - "learning_rate": 0.00019999885425499048, - "loss": 46.0, - "step": 9472 - }, - { - "epoch": 1.5255445066226498, - "grad_norm": 0.00301287230104208, - "learning_rate": 0.00019999885401279996, - "loss": 46.0, - "step": 9473 - }, - { - "epoch": 1.5257055437014373, - "grad_norm": 0.00235157017596066, - "learning_rate": 0.00019999885377058386, - "loss": 46.0, - "step": 9474 - }, - { - "epoch": 1.5258665807802245, - "grad_norm": 0.0006280430243350565, - "learning_rate": 0.00019999885352834214, - "loss": 46.0, - "step": 9475 - }, - { - "epoch": 1.526027617859012, - "grad_norm": 0.0027024049777537584, - "learning_rate": 0.0001999988532860748, - "loss": 46.0, - "step": 9476 - }, - { - "epoch": 1.5261886549377994, - "grad_norm": 0.002077752025797963, - "learning_rate": 0.0001999988530437819, - "loss": 46.0, - "step": 9477 - }, - { - "epoch": 1.5263496920165869, - "grad_norm": 0.0024714572355151176, - "learning_rate": 0.00019999885280146343, - "loss": 46.0, - "step": 9478 - }, - { - "epoch": 1.5265107290953743, - "grad_norm": 0.0029144366271793842, - "learning_rate": 0.00019999885255911934, - "loss": 46.0, - "step": 9479 - }, - { - "epoch": 1.5266717661741616, - "grad_norm": 0.0006407216424122453, - "learning_rate": 0.00019999885231674965, - "loss": 46.0, - "step": 9480 - }, - { - "epoch": 1.526832803252949, - "grad_norm": 0.002878450322896242, - "learning_rate": 0.00019999885207435436, - "loss": 46.0, - "step": 9481 - }, - { - "epoch": 1.5269938403317362, - "grad_norm": 0.0013184280833229423, - "learning_rate": 0.0001999988518319335, - "loss": 46.0, - "step": 9482 - }, - { - "epoch": 1.5271548774105237, - "grad_norm": 0.0007456211023963988, - "learning_rate": 0.000199998851589487, - "loss": 46.0, - "step": 9483 - }, - { - "epoch": 1.5273159144893111, - "grad_norm": 0.001577394432388246, - "learning_rate": 0.00019999885134701497, - "loss": 46.0, - "step": 9484 - }, - { - "epoch": 1.5274769515680986, - "grad_norm": 0.004111071117222309, - "learning_rate": 0.0001999988511045173, - "loss": 46.0, - "step": 9485 - }, - { - "epoch": 1.527637988646886, - "grad_norm": 0.0010693949880078435, - "learning_rate": 0.00019999885086199407, - "loss": 46.0, - "step": 9486 - }, - { - "epoch": 1.5277990257256735, - "grad_norm": 0.0005185760091990232, - "learning_rate": 0.00019999885061944522, - "loss": 46.0, - "step": 9487 - }, - { - "epoch": 1.5279600628044607, - "grad_norm": 0.0025632947217673063, - "learning_rate": 0.00019999885037687076, - "loss": 46.0, - "step": 9488 - }, - { - "epoch": 1.528121099883248, - "grad_norm": 0.0009062702883966267, - "learning_rate": 0.00019999885013427074, - "loss": 46.0, - "step": 9489 - }, - { - "epoch": 1.5282821369620354, - "grad_norm": 0.0017577996477484703, - "learning_rate": 0.0001999988498916451, - "loss": 46.0, - "step": 9490 - }, - { - "epoch": 1.5284431740408229, - "grad_norm": 0.0030216483864933252, - "learning_rate": 0.00019999884964899388, - "loss": 46.0, - "step": 9491 - }, - { - "epoch": 1.5286042111196103, - "grad_norm": 0.0011699993629008532, - "learning_rate": 0.00019999884940631704, - "loss": 46.0, - "step": 9492 - }, - { - "epoch": 1.5287652481983978, - "grad_norm": 0.0007909003761596978, - "learning_rate": 0.00019999884916361462, - "loss": 46.0, - "step": 9493 - }, - { - "epoch": 1.5289262852771852, - "grad_norm": 0.009436006657779217, - "learning_rate": 0.00019999884892088664, - "loss": 46.0, - "step": 9494 - }, - { - "epoch": 1.5290873223559724, - "grad_norm": 0.0005345451063476503, - "learning_rate": 0.000199998848678133, - "loss": 46.0, - "step": 9495 - }, - { - "epoch": 1.52924835943476, - "grad_norm": 0.0019343576859682798, - "learning_rate": 0.00019999884843535382, - "loss": 46.0, - "step": 9496 - }, - { - "epoch": 1.5294093965135471, - "grad_norm": 0.0012227301485836506, - "learning_rate": 0.00019999884819254903, - "loss": 46.0, - "step": 9497 - }, - { - "epoch": 1.5295704335923346, - "grad_norm": 0.0024370127357542515, - "learning_rate": 0.00019999884794971867, - "loss": 46.0, - "step": 9498 - }, - { - "epoch": 1.529731470671122, - "grad_norm": 0.005993218161165714, - "learning_rate": 0.00019999884770686266, - "loss": 46.0, - "step": 9499 - }, - { - "epoch": 1.5298925077499095, - "grad_norm": 0.0016420215833932161, - "learning_rate": 0.00019999884746398108, - "loss": 46.0, - "step": 9500 - }, - { - "epoch": 1.530053544828697, - "grad_norm": 0.0007157622021622956, - "learning_rate": 0.00019999884722107393, - "loss": 46.0, - "step": 9501 - }, - { - "epoch": 1.5302145819074842, - "grad_norm": 0.002407926134765148, - "learning_rate": 0.00019999884697814116, - "loss": 46.0, - "step": 9502 - }, - { - "epoch": 1.5303756189862716, - "grad_norm": 0.002988303080201149, - "learning_rate": 0.0001999988467351828, - "loss": 46.0, - "step": 9503 - }, - { - "epoch": 1.5305366560650588, - "grad_norm": 0.006468224339187145, - "learning_rate": 0.00019999884649219882, - "loss": 46.0, - "step": 9504 - }, - { - "epoch": 1.5306976931438463, - "grad_norm": 0.0013437657617032528, - "learning_rate": 0.0001999988462491893, - "loss": 46.0, - "step": 9505 - }, - { - "epoch": 1.5308587302226337, - "grad_norm": 0.0013888109242543578, - "learning_rate": 0.00019999884600615416, - "loss": 46.0, - "step": 9506 - }, - { - "epoch": 1.5310197673014212, - "grad_norm": 0.0010514655150473118, - "learning_rate": 0.0001999988457630934, - "loss": 46.0, - "step": 9507 - }, - { - "epoch": 1.5311808043802086, - "grad_norm": 0.00038760315510444343, - "learning_rate": 0.00019999884552000707, - "loss": 46.0, - "step": 9508 - }, - { - "epoch": 1.531341841458996, - "grad_norm": 0.005011890549212694, - "learning_rate": 0.00019999884527689514, - "loss": 46.0, - "step": 9509 - }, - { - "epoch": 1.5315028785377833, - "grad_norm": 0.002129156840965152, - "learning_rate": 0.00019999884503375763, - "loss": 46.0, - "step": 9510 - }, - { - "epoch": 1.5316639156165706, - "grad_norm": 0.0015337747754529119, - "learning_rate": 0.0001999988447905945, - "loss": 46.0, - "step": 9511 - }, - { - "epoch": 1.531824952695358, - "grad_norm": 0.007746582850813866, - "learning_rate": 0.00019999884454740578, - "loss": 46.0, - "step": 9512 - }, - { - "epoch": 1.5319859897741455, - "grad_norm": 0.008065969683229923, - "learning_rate": 0.00019999884430419148, - "loss": 46.0, - "step": 9513 - }, - { - "epoch": 1.532147026852933, - "grad_norm": 0.0006683963001705706, - "learning_rate": 0.00019999884406095156, - "loss": 46.0, - "step": 9514 - }, - { - "epoch": 1.5323080639317204, - "grad_norm": 0.0038896435871720314, - "learning_rate": 0.00019999884381768609, - "loss": 46.0, - "step": 9515 - }, - { - "epoch": 1.5324691010105078, - "grad_norm": 0.011683257296681404, - "learning_rate": 0.000199998843574395, - "loss": 46.0, - "step": 9516 - }, - { - "epoch": 1.532630138089295, - "grad_norm": 0.0010450986446812749, - "learning_rate": 0.0001999988433310783, - "loss": 46.0, - "step": 9517 - }, - { - "epoch": 1.5327911751680825, - "grad_norm": 0.000758933019824326, - "learning_rate": 0.00019999884308773602, - "loss": 46.0, - "step": 9518 - }, - { - "epoch": 1.5329522122468697, - "grad_norm": 0.0019322397420182824, - "learning_rate": 0.00019999884284436817, - "loss": 46.0, - "step": 9519 - }, - { - "epoch": 1.5331132493256572, - "grad_norm": 0.00167573313228786, - "learning_rate": 0.00019999884260097468, - "loss": 46.0, - "step": 9520 - }, - { - "epoch": 1.5332742864044446, - "grad_norm": 0.0021413343492895365, - "learning_rate": 0.00019999884235755562, - "loss": 46.0, - "step": 9521 - }, - { - "epoch": 1.533435323483232, - "grad_norm": 0.0012244421523064375, - "learning_rate": 0.00019999884211411098, - "loss": 46.0, - "step": 9522 - }, - { - "epoch": 1.5335963605620195, - "grad_norm": 0.0019074471201747656, - "learning_rate": 0.0001999988418706407, - "loss": 46.0, - "step": 9523 - }, - { - "epoch": 1.5337573976408068, - "grad_norm": 0.001719399238936603, - "learning_rate": 0.00019999884162714485, - "loss": 46.0, - "step": 9524 - }, - { - "epoch": 1.5339184347195942, - "grad_norm": 0.005042642820626497, - "learning_rate": 0.00019999884138362342, - "loss": 46.0, - "step": 9525 - }, - { - "epoch": 1.5340794717983814, - "grad_norm": 0.0006284856353886425, - "learning_rate": 0.00019999884114007638, - "loss": 46.0, - "step": 9526 - }, - { - "epoch": 1.534240508877169, - "grad_norm": 0.004295391030609608, - "learning_rate": 0.00019999884089650375, - "loss": 46.0, - "step": 9527 - }, - { - "epoch": 1.5344015459559563, - "grad_norm": 0.0026151149068027735, - "learning_rate": 0.00019999884065290556, - "loss": 46.0, - "step": 9528 - }, - { - "epoch": 1.5345625830347438, - "grad_norm": 0.002352813957259059, - "learning_rate": 0.00019999884040928172, - "loss": 46.0, - "step": 9529 - }, - { - "epoch": 1.5347236201135313, - "grad_norm": 0.001912680920213461, - "learning_rate": 0.0001999988401656323, - "loss": 46.0, - "step": 9530 - }, - { - "epoch": 1.5348846571923185, - "grad_norm": 0.0016547852428629994, - "learning_rate": 0.0001999988399219573, - "loss": 46.0, - "step": 9531 - }, - { - "epoch": 1.535045694271106, - "grad_norm": 0.003520425409078598, - "learning_rate": 0.0001999988396782567, - "loss": 46.0, - "step": 9532 - }, - { - "epoch": 1.5352067313498932, - "grad_norm": 0.0017291458789259195, - "learning_rate": 0.00019999883943453052, - "loss": 46.0, - "step": 9533 - }, - { - "epoch": 1.5353677684286806, - "grad_norm": 0.0028484223876148462, - "learning_rate": 0.00019999883919077872, - "loss": 46.0, - "step": 9534 - }, - { - "epoch": 1.535528805507468, - "grad_norm": 0.0013635712675750256, - "learning_rate": 0.00019999883894700134, - "loss": 46.0, - "step": 9535 - }, - { - "epoch": 1.5356898425862555, - "grad_norm": 0.0014535625232383609, - "learning_rate": 0.00019999883870319834, - "loss": 46.0, - "step": 9536 - }, - { - "epoch": 1.535850879665043, - "grad_norm": 0.0004540627123787999, - "learning_rate": 0.00019999883845936976, - "loss": 46.0, - "step": 9537 - }, - { - "epoch": 1.5360119167438304, - "grad_norm": 0.01284743007272482, - "learning_rate": 0.00019999883821551558, - "loss": 46.0, - "step": 9538 - }, - { - "epoch": 1.5361729538226176, - "grad_norm": 0.007350571919232607, - "learning_rate": 0.0001999988379716358, - "loss": 46.0, - "step": 9539 - }, - { - "epoch": 1.536333990901405, - "grad_norm": 0.008967571891844273, - "learning_rate": 0.00019999883772773048, - "loss": 46.0, - "step": 9540 - }, - { - "epoch": 1.5364950279801923, - "grad_norm": 0.0023639057762920856, - "learning_rate": 0.00019999883748379952, - "loss": 46.0, - "step": 9541 - }, - { - "epoch": 1.5366560650589798, - "grad_norm": 0.0007659141556359828, - "learning_rate": 0.00019999883723984297, - "loss": 46.0, - "step": 9542 - }, - { - "epoch": 1.5368171021377672, - "grad_norm": 0.00257940124720335, - "learning_rate": 0.0001999988369958608, - "loss": 46.0, - "step": 9543 - }, - { - "epoch": 1.5369781392165547, - "grad_norm": 0.003213014453649521, - "learning_rate": 0.0001999988367518531, - "loss": 46.0, - "step": 9544 - }, - { - "epoch": 1.5371391762953421, - "grad_norm": 0.000548528681974858, - "learning_rate": 0.00019999883650781975, - "loss": 46.0, - "step": 9545 - }, - { - "epoch": 1.5373002133741294, - "grad_norm": 0.005622244440019131, - "learning_rate": 0.00019999883626376083, - "loss": 46.0, - "step": 9546 - }, - { - "epoch": 1.5374612504529168, - "grad_norm": 0.001243532751686871, - "learning_rate": 0.0001999988360196763, - "loss": 46.0, - "step": 9547 - }, - { - "epoch": 1.537622287531704, - "grad_norm": 0.0014249305240809917, - "learning_rate": 0.00019999883577556616, - "loss": 46.0, - "step": 9548 - }, - { - "epoch": 1.5377833246104915, - "grad_norm": 0.0013568312861025333, - "learning_rate": 0.00019999883553143048, - "loss": 46.0, - "step": 9549 - }, - { - "epoch": 1.537944361689279, - "grad_norm": 0.003219452453777194, - "learning_rate": 0.00019999883528726918, - "loss": 46.0, - "step": 9550 - }, - { - "epoch": 1.5381053987680664, - "grad_norm": 0.0025497402530163527, - "learning_rate": 0.00019999883504308227, - "loss": 46.0, - "step": 9551 - }, - { - "epoch": 1.5382664358468539, - "grad_norm": 0.0031076620798557997, - "learning_rate": 0.00019999883479886977, - "loss": 46.0, - "step": 9552 - }, - { - "epoch": 1.538427472925641, - "grad_norm": 0.0052823834121227264, - "learning_rate": 0.00019999883455463168, - "loss": 46.0, - "step": 9553 - }, - { - "epoch": 1.5385885100044285, - "grad_norm": 0.004734147805720568, - "learning_rate": 0.00019999883431036798, - "loss": 46.0, - "step": 9554 - }, - { - "epoch": 1.5387495470832158, - "grad_norm": 0.002411802299320698, - "learning_rate": 0.00019999883406607874, - "loss": 46.0, - "step": 9555 - }, - { - "epoch": 1.5389105841620032, - "grad_norm": 0.0026178339030593634, - "learning_rate": 0.00019999883382176386, - "loss": 46.0, - "step": 9556 - }, - { - "epoch": 1.5390716212407907, - "grad_norm": 0.0033464953303337097, - "learning_rate": 0.0001999988335774234, - "loss": 46.0, - "step": 9557 - }, - { - "epoch": 1.5392326583195781, - "grad_norm": 0.002232455415651202, - "learning_rate": 0.00019999883333305732, - "loss": 46.0, - "step": 9558 - }, - { - "epoch": 1.5393936953983656, - "grad_norm": 0.0008804902900010347, - "learning_rate": 0.00019999883308866566, - "loss": 46.0, - "step": 9559 - }, - { - "epoch": 1.539554732477153, - "grad_norm": 0.003191632218658924, - "learning_rate": 0.0001999988328442484, - "loss": 46.0, - "step": 9560 - }, - { - "epoch": 1.5397157695559403, - "grad_norm": 0.0005228768568485975, - "learning_rate": 0.00019999883259980557, - "loss": 46.0, - "step": 9561 - }, - { - "epoch": 1.5398768066347275, - "grad_norm": 0.00464447308331728, - "learning_rate": 0.0001999988323553371, - "loss": 46.0, - "step": 9562 - }, - { - "epoch": 1.540037843713515, - "grad_norm": 0.000984556507319212, - "learning_rate": 0.0001999988321108431, - "loss": 46.0, - "step": 9563 - }, - { - "epoch": 1.5401988807923024, - "grad_norm": 0.007177683990448713, - "learning_rate": 0.00019999883186632347, - "loss": 46.0, - "step": 9564 - }, - { - "epoch": 1.5403599178710898, - "grad_norm": 0.0015082447789609432, - "learning_rate": 0.00019999883162177823, - "loss": 46.0, - "step": 9565 - }, - { - "epoch": 1.5405209549498773, - "grad_norm": 0.002229714998975396, - "learning_rate": 0.00019999883137720743, - "loss": 46.0, - "step": 9566 - }, - { - "epoch": 1.5406819920286647, - "grad_norm": 0.002325141802430153, - "learning_rate": 0.000199998831132611, - "loss": 46.0, - "step": 9567 - }, - { - "epoch": 1.540843029107452, - "grad_norm": 0.004455997608602047, - "learning_rate": 0.00019999883088798898, - "loss": 46.0, - "step": 9568 - }, - { - "epoch": 1.5410040661862394, - "grad_norm": 0.008108694106340408, - "learning_rate": 0.0001999988306433414, - "loss": 46.0, - "step": 9569 - }, - { - "epoch": 1.5411651032650266, - "grad_norm": 0.012046033516526222, - "learning_rate": 0.0001999988303986682, - "loss": 46.0, - "step": 9570 - }, - { - "epoch": 1.541326140343814, - "grad_norm": 0.0017901815008372068, - "learning_rate": 0.00019999883015396942, - "loss": 46.0, - "step": 9571 - }, - { - "epoch": 1.5414871774226016, - "grad_norm": 0.0019001057371497154, - "learning_rate": 0.000199998829909245, - "loss": 46.0, - "step": 9572 - }, - { - "epoch": 1.541648214501389, - "grad_norm": 0.011951982975006104, - "learning_rate": 0.00019999882966449504, - "loss": 46.0, - "step": 9573 - }, - { - "epoch": 1.5418092515801765, - "grad_norm": 0.003459338331595063, - "learning_rate": 0.0001999988294197195, - "loss": 46.0, - "step": 9574 - }, - { - "epoch": 1.5419702886589637, - "grad_norm": 0.005500278435647488, - "learning_rate": 0.00019999882917491827, - "loss": 46.0, - "step": 9575 - }, - { - "epoch": 1.5421313257377511, - "grad_norm": 0.0008652671240270138, - "learning_rate": 0.00019999882893009154, - "loss": 46.0, - "step": 9576 - }, - { - "epoch": 1.5422923628165384, - "grad_norm": 0.0032363201025873423, - "learning_rate": 0.00019999882868523917, - "loss": 46.0, - "step": 9577 - }, - { - "epoch": 1.5424533998953258, - "grad_norm": 0.0017805654788389802, - "learning_rate": 0.0001999988284403612, - "loss": 46.0, - "step": 9578 - }, - { - "epoch": 1.5426144369741133, - "grad_norm": 0.00463515892624855, - "learning_rate": 0.00019999882819545767, - "loss": 46.0, - "step": 9579 - }, - { - "epoch": 1.5427754740529007, - "grad_norm": 0.0010887961834669113, - "learning_rate": 0.00019999882795052854, - "loss": 46.0, - "step": 9580 - }, - { - "epoch": 1.5429365111316882, - "grad_norm": 0.0014403262175619602, - "learning_rate": 0.0001999988277055738, - "loss": 46.0, - "step": 9581 - }, - { - "epoch": 1.5430975482104756, - "grad_norm": 0.002026051515713334, - "learning_rate": 0.00019999882746059346, - "loss": 46.0, - "step": 9582 - }, - { - "epoch": 1.5432585852892629, - "grad_norm": 0.0019245789153501391, - "learning_rate": 0.00019999882721558754, - "loss": 46.0, - "step": 9583 - }, - { - "epoch": 1.54341962236805, - "grad_norm": 0.007637613452970982, - "learning_rate": 0.000199998826970556, - "loss": 46.0, - "step": 9584 - }, - { - "epoch": 1.5435806594468375, - "grad_norm": 0.0010679109254851937, - "learning_rate": 0.0001999988267254989, - "loss": 46.0, - "step": 9585 - }, - { - "epoch": 1.543741696525625, - "grad_norm": 0.0013544211396947503, - "learning_rate": 0.0001999988264804162, - "loss": 46.0, - "step": 9586 - }, - { - "epoch": 1.5439027336044124, - "grad_norm": 0.0016832927940413356, - "learning_rate": 0.00019999882623530788, - "loss": 46.0, - "step": 9587 - }, - { - "epoch": 1.5440637706831999, - "grad_norm": 0.0006674944306723773, - "learning_rate": 0.000199998825990174, - "loss": 46.0, - "step": 9588 - }, - { - "epoch": 1.5442248077619873, - "grad_norm": 0.0005326775135472417, - "learning_rate": 0.0001999988257450145, - "loss": 46.0, - "step": 9589 - }, - { - "epoch": 1.5443858448407746, - "grad_norm": 0.0010344121837988496, - "learning_rate": 0.00019999882549982941, - "loss": 46.0, - "step": 9590 - }, - { - "epoch": 1.544546881919562, - "grad_norm": 0.0017514858627691865, - "learning_rate": 0.00019999882525461872, - "loss": 46.0, - "step": 9591 - }, - { - "epoch": 1.5447079189983492, - "grad_norm": 0.0007412286940962076, - "learning_rate": 0.00019999882500938246, - "loss": 46.0, - "step": 9592 - }, - { - "epoch": 1.5448689560771367, - "grad_norm": 0.004125567153096199, - "learning_rate": 0.00019999882476412058, - "loss": 46.0, - "step": 9593 - }, - { - "epoch": 1.5450299931559242, - "grad_norm": 0.0019025934161618352, - "learning_rate": 0.00019999882451883312, - "loss": 46.0, - "step": 9594 - }, - { - "epoch": 1.5451910302347116, - "grad_norm": 0.0009014655952341855, - "learning_rate": 0.00019999882427352002, - "loss": 46.0, - "step": 9595 - }, - { - "epoch": 1.545352067313499, - "grad_norm": 0.008022756315767765, - "learning_rate": 0.0001999988240281814, - "loss": 46.0, - "step": 9596 - }, - { - "epoch": 1.5455131043922863, - "grad_norm": 0.0005562002188526094, - "learning_rate": 0.00019999882378281714, - "loss": 46.0, - "step": 9597 - }, - { - "epoch": 1.5456741414710737, - "grad_norm": 0.002666367683559656, - "learning_rate": 0.0001999988235374273, - "loss": 46.0, - "step": 9598 - }, - { - "epoch": 1.545835178549861, - "grad_norm": 0.0017994075315073133, - "learning_rate": 0.00019999882329201185, - "loss": 46.0, - "step": 9599 - }, - { - "epoch": 1.5459962156286484, - "grad_norm": 0.0017747669480741024, - "learning_rate": 0.00019999882304657082, - "loss": 46.0, - "step": 9600 - }, - { - "epoch": 1.5461572527074359, - "grad_norm": 0.002589545911177993, - "learning_rate": 0.0001999988228011042, - "loss": 46.0, - "step": 9601 - }, - { - "epoch": 1.5463182897862233, - "grad_norm": 0.006421798840165138, - "learning_rate": 0.00019999882255561198, - "loss": 46.0, - "step": 9602 - }, - { - "epoch": 1.5464793268650108, - "grad_norm": 0.00528734689578414, - "learning_rate": 0.00019999882231009415, - "loss": 46.0, - "step": 9603 - }, - { - "epoch": 1.5466403639437982, - "grad_norm": 0.0019034610595554113, - "learning_rate": 0.00019999882206455074, - "loss": 46.0, - "step": 9604 - }, - { - "epoch": 1.5468014010225855, - "grad_norm": 0.004726993851363659, - "learning_rate": 0.00019999882181898174, - "loss": 46.0, - "step": 9605 - }, - { - "epoch": 1.5469624381013727, - "grad_norm": 0.0008454433409497142, - "learning_rate": 0.00019999882157338715, - "loss": 46.0, - "step": 9606 - }, - { - "epoch": 1.5471234751801601, - "grad_norm": 0.009155957959592342, - "learning_rate": 0.00019999882132776692, - "loss": 46.0, - "step": 9607 - }, - { - "epoch": 1.5472845122589476, - "grad_norm": 0.003743691137060523, - "learning_rate": 0.00019999882108212114, - "loss": 46.0, - "step": 9608 - }, - { - "epoch": 1.547445549337735, - "grad_norm": 0.0019337142584845424, - "learning_rate": 0.00019999882083644973, - "loss": 46.0, - "step": 9609 - }, - { - "epoch": 1.5476065864165225, - "grad_norm": 0.0013208661694079638, - "learning_rate": 0.00019999882059075277, - "loss": 46.0, - "step": 9610 - }, - { - "epoch": 1.54776762349531, - "grad_norm": 0.0032328011002391577, - "learning_rate": 0.00019999882034503022, - "loss": 46.0, - "step": 9611 - }, - { - "epoch": 1.5479286605740972, - "grad_norm": 0.011548959650099277, - "learning_rate": 0.00019999882009928203, - "loss": 46.0, - "step": 9612 - }, - { - "epoch": 1.5480896976528846, - "grad_norm": 0.0015373054193332791, - "learning_rate": 0.00019999881985350828, - "loss": 46.0, - "step": 9613 - }, - { - "epoch": 1.5482507347316719, - "grad_norm": 0.001842934056185186, - "learning_rate": 0.0001999988196077089, - "loss": 46.0, - "step": 9614 - }, - { - "epoch": 1.5484117718104593, - "grad_norm": 0.00206050300039351, - "learning_rate": 0.00019999881936188396, - "loss": 46.0, - "step": 9615 - }, - { - "epoch": 1.5485728088892468, - "grad_norm": 0.0033201349433511496, - "learning_rate": 0.0001999988191160334, - "loss": 46.0, - "step": 9616 - }, - { - "epoch": 1.5487338459680342, - "grad_norm": 0.0005659116432070732, - "learning_rate": 0.0001999988188701573, - "loss": 46.0, - "step": 9617 - }, - { - "epoch": 1.5488948830468217, - "grad_norm": 0.0024309554137289524, - "learning_rate": 0.00019999881862425555, - "loss": 46.0, - "step": 9618 - }, - { - "epoch": 1.5490559201256089, - "grad_norm": 0.0032668341882526875, - "learning_rate": 0.00019999881837832822, - "loss": 46.0, - "step": 9619 - }, - { - "epoch": 1.5492169572043963, - "grad_norm": 0.0016206871950998902, - "learning_rate": 0.00019999881813237527, - "loss": 46.0, - "step": 9620 - }, - { - "epoch": 1.5493779942831836, - "grad_norm": 0.006536832544952631, - "learning_rate": 0.00019999881788639677, - "loss": 46.0, - "step": 9621 - }, - { - "epoch": 1.549539031361971, - "grad_norm": 0.0012820035917684436, - "learning_rate": 0.00019999881764039263, - "loss": 46.0, - "step": 9622 - }, - { - "epoch": 1.5497000684407585, - "grad_norm": 0.002206459641456604, - "learning_rate": 0.00019999881739436295, - "loss": 46.0, - "step": 9623 - }, - { - "epoch": 1.549861105519546, - "grad_norm": 0.003899957751855254, - "learning_rate": 0.00019999881714830763, - "loss": 46.0, - "step": 9624 - }, - { - "epoch": 1.5500221425983334, - "grad_norm": 0.0033586008939892054, - "learning_rate": 0.00019999881690222675, - "loss": 46.0, - "step": 9625 - }, - { - "epoch": 1.5501831796771206, - "grad_norm": 0.00246395543217659, - "learning_rate": 0.00019999881665612023, - "loss": 46.0, - "step": 9626 - }, - { - "epoch": 1.550344216755908, - "grad_norm": 0.003294206690043211, - "learning_rate": 0.00019999881640998815, - "loss": 46.0, - "step": 9627 - }, - { - "epoch": 1.5505052538346953, - "grad_norm": 0.0024017507676035166, - "learning_rate": 0.00019999881616383048, - "loss": 46.0, - "step": 9628 - }, - { - "epoch": 1.5506662909134827, - "grad_norm": 0.006382984574884176, - "learning_rate": 0.0001999988159176472, - "loss": 46.0, - "step": 9629 - }, - { - "epoch": 1.5508273279922702, - "grad_norm": 0.0005004876293241978, - "learning_rate": 0.00019999881567143833, - "loss": 46.0, - "step": 9630 - }, - { - "epoch": 1.5509883650710576, - "grad_norm": 0.0020565080922096968, - "learning_rate": 0.00019999881542520385, - "loss": 46.0, - "step": 9631 - }, - { - "epoch": 1.551149402149845, - "grad_norm": 0.002605983754619956, - "learning_rate": 0.0001999988151789438, - "loss": 46.0, - "step": 9632 - }, - { - "epoch": 1.5513104392286325, - "grad_norm": 0.0009897048585116863, - "learning_rate": 0.00019999881493265814, - "loss": 46.0, - "step": 9633 - }, - { - "epoch": 1.5514714763074198, - "grad_norm": 0.0012678259517997503, - "learning_rate": 0.00019999881468634693, - "loss": 46.0, - "step": 9634 - }, - { - "epoch": 1.5516325133862072, - "grad_norm": 0.0029944973066449165, - "learning_rate": 0.00019999881444001004, - "loss": 46.0, - "step": 9635 - }, - { - "epoch": 1.5517935504649945, - "grad_norm": 0.0017019726801663637, - "learning_rate": 0.00019999881419364762, - "loss": 46.0, - "step": 9636 - }, - { - "epoch": 1.551954587543782, - "grad_norm": 0.001605618861503899, - "learning_rate": 0.0001999988139472596, - "loss": 46.0, - "step": 9637 - }, - { - "epoch": 1.5521156246225694, - "grad_norm": 0.0018793237395584583, - "learning_rate": 0.00019999881370084594, - "loss": 46.0, - "step": 9638 - }, - { - "epoch": 1.5522766617013568, - "grad_norm": 0.001324295881204307, - "learning_rate": 0.00019999881345440673, - "loss": 46.0, - "step": 9639 - }, - { - "epoch": 1.5524376987801443, - "grad_norm": 0.0011678725713863969, - "learning_rate": 0.0001999988132079419, - "loss": 46.0, - "step": 9640 - }, - { - "epoch": 1.5525987358589315, - "grad_norm": 0.002261652611196041, - "learning_rate": 0.0001999988129614515, - "loss": 46.0, - "step": 9641 - }, - { - "epoch": 1.552759772937719, - "grad_norm": 0.0017068805173039436, - "learning_rate": 0.0001999988127149355, - "loss": 46.0, - "step": 9642 - }, - { - "epoch": 1.5529208100165062, - "grad_norm": 0.003284177742898464, - "learning_rate": 0.0001999988124683939, - "loss": 46.0, - "step": 9643 - }, - { - "epoch": 1.5530818470952936, - "grad_norm": 0.0024580410681664944, - "learning_rate": 0.0001999988122218267, - "loss": 46.0, - "step": 9644 - }, - { - "epoch": 1.553242884174081, - "grad_norm": 0.0008174455724656582, - "learning_rate": 0.0001999988119752339, - "loss": 46.0, - "step": 9645 - }, - { - "epoch": 1.5534039212528685, - "grad_norm": 0.0011248092632740736, - "learning_rate": 0.0001999988117286155, - "loss": 46.0, - "step": 9646 - }, - { - "epoch": 1.553564958331656, - "grad_norm": 0.00452102767303586, - "learning_rate": 0.00019999881148197155, - "loss": 46.0, - "step": 9647 - }, - { - "epoch": 1.5537259954104432, - "grad_norm": 0.0009618006879463792, - "learning_rate": 0.00019999881123530198, - "loss": 46.0, - "step": 9648 - }, - { - "epoch": 1.5538870324892307, - "grad_norm": 0.003927244339138269, - "learning_rate": 0.0001999988109886068, - "loss": 46.0, - "step": 9649 - }, - { - "epoch": 1.5540480695680179, - "grad_norm": 0.0007460684282705188, - "learning_rate": 0.00019999881074188604, - "loss": 46.0, - "step": 9650 - }, - { - "epoch": 1.5542091066468053, - "grad_norm": 0.005137612111866474, - "learning_rate": 0.00019999881049513968, - "loss": 46.0, - "step": 9651 - }, - { - "epoch": 1.5543701437255928, - "grad_norm": 0.0016791753005236387, - "learning_rate": 0.00019999881024836773, - "loss": 46.0, - "step": 9652 - }, - { - "epoch": 1.5545311808043802, - "grad_norm": 0.0033985706977546215, - "learning_rate": 0.0001999988100015702, - "loss": 46.0, - "step": 9653 - }, - { - "epoch": 1.5546922178831677, - "grad_norm": 0.004292998928576708, - "learning_rate": 0.00019999880975474704, - "loss": 46.0, - "step": 9654 - }, - { - "epoch": 1.5548532549619551, - "grad_norm": 0.0011493455385789275, - "learning_rate": 0.0001999988095078983, - "loss": 46.0, - "step": 9655 - }, - { - "epoch": 1.5550142920407424, - "grad_norm": 0.0006883305031806231, - "learning_rate": 0.000199998809261024, - "loss": 46.0, - "step": 9656 - }, - { - "epoch": 1.5551753291195298, - "grad_norm": 0.0016404492780566216, - "learning_rate": 0.00019999880901412406, - "loss": 46.0, - "step": 9657 - }, - { - "epoch": 1.555336366198317, - "grad_norm": 0.0007315294933505356, - "learning_rate": 0.00019999880876719854, - "loss": 46.0, - "step": 9658 - }, - { - "epoch": 1.5554974032771045, - "grad_norm": 0.0032106267753988504, - "learning_rate": 0.00019999880852024745, - "loss": 46.0, - "step": 9659 - }, - { - "epoch": 1.555658440355892, - "grad_norm": 0.002301374450325966, - "learning_rate": 0.00019999880827327072, - "loss": 46.0, - "step": 9660 - }, - { - "epoch": 1.5558194774346794, - "grad_norm": 0.0032942492980509996, - "learning_rate": 0.00019999880802626843, - "loss": 46.0, - "step": 9661 - }, - { - "epoch": 1.5559805145134669, - "grad_norm": 0.001327955280430615, - "learning_rate": 0.00019999880777924053, - "loss": 46.0, - "step": 9662 - }, - { - "epoch": 1.556141551592254, - "grad_norm": 0.0009405059390701354, - "learning_rate": 0.00019999880753218704, - "loss": 46.0, - "step": 9663 - }, - { - "epoch": 1.5563025886710415, - "grad_norm": 0.0016414924757555127, - "learning_rate": 0.00019999880728510796, - "loss": 46.0, - "step": 9664 - }, - { - "epoch": 1.5564636257498288, - "grad_norm": 0.0015177715104073286, - "learning_rate": 0.00019999880703800327, - "loss": 46.0, - "step": 9665 - }, - { - "epoch": 1.5566246628286162, - "grad_norm": 0.005273567046970129, - "learning_rate": 0.000199998806790873, - "loss": 46.0, - "step": 9666 - }, - { - "epoch": 1.5567856999074037, - "grad_norm": 0.0012911235680803657, - "learning_rate": 0.00019999880654371713, - "loss": 46.0, - "step": 9667 - }, - { - "epoch": 1.5569467369861911, - "grad_norm": 0.00324858445674181, - "learning_rate": 0.00019999880629653567, - "loss": 46.0, - "step": 9668 - }, - { - "epoch": 1.5571077740649786, - "grad_norm": 0.0009071313543245196, - "learning_rate": 0.0001999988060493286, - "loss": 46.0, - "step": 9669 - }, - { - "epoch": 1.5572688111437658, - "grad_norm": 0.0013265524758026004, - "learning_rate": 0.00019999880580209593, - "loss": 46.0, - "step": 9670 - }, - { - "epoch": 1.5574298482225533, - "grad_norm": 0.0018497625133022666, - "learning_rate": 0.0001999988055548377, - "loss": 46.0, - "step": 9671 - }, - { - "epoch": 1.5575908853013405, - "grad_norm": 0.0012562641641125083, - "learning_rate": 0.00019999880530755386, - "loss": 46.0, - "step": 9672 - }, - { - "epoch": 1.557751922380128, - "grad_norm": 0.0012631615391001105, - "learning_rate": 0.0001999988050602444, - "loss": 46.0, - "step": 9673 - }, - { - "epoch": 1.5579129594589154, - "grad_norm": 0.0006363785942085087, - "learning_rate": 0.00019999880481290938, - "loss": 46.0, - "step": 9674 - }, - { - "epoch": 1.5580739965377028, - "grad_norm": 0.004513740073889494, - "learning_rate": 0.00019999880456554877, - "loss": 46.0, - "step": 9675 - }, - { - "epoch": 1.5582350336164903, - "grad_norm": 0.0013083830708637834, - "learning_rate": 0.00019999880431816253, - "loss": 46.0, - "step": 9676 - }, - { - "epoch": 1.5583960706952777, - "grad_norm": 0.01600608602166176, - "learning_rate": 0.00019999880407075074, - "loss": 46.0, - "step": 9677 - }, - { - "epoch": 1.558557107774065, - "grad_norm": 0.002192708197981119, - "learning_rate": 0.00019999880382331334, - "loss": 46.0, - "step": 9678 - }, - { - "epoch": 1.5587181448528522, - "grad_norm": 0.005927729420363903, - "learning_rate": 0.0001999988035758503, - "loss": 46.0, - "step": 9679 - }, - { - "epoch": 1.5588791819316397, - "grad_norm": 0.0029181777499616146, - "learning_rate": 0.0001999988033283617, - "loss": 46.0, - "step": 9680 - }, - { - "epoch": 1.559040219010427, - "grad_norm": 0.011076760478317738, - "learning_rate": 0.00019999880308084751, - "loss": 46.0, - "step": 9681 - }, - { - "epoch": 1.5592012560892146, - "grad_norm": 0.003319684648886323, - "learning_rate": 0.00019999880283330773, - "loss": 46.0, - "step": 9682 - }, - { - "epoch": 1.559362293168002, - "grad_norm": 0.0007327769417315722, - "learning_rate": 0.00019999880258574236, - "loss": 46.0, - "step": 9683 - }, - { - "epoch": 1.5595233302467895, - "grad_norm": 0.0017283943016082048, - "learning_rate": 0.00019999880233815138, - "loss": 46.0, - "step": 9684 - }, - { - "epoch": 1.5596843673255767, - "grad_norm": 0.003965584561228752, - "learning_rate": 0.00019999880209053478, - "loss": 46.0, - "step": 9685 - }, - { - "epoch": 1.5598454044043641, - "grad_norm": 0.000998963718302548, - "learning_rate": 0.00019999880184289263, - "loss": 46.0, - "step": 9686 - }, - { - "epoch": 1.5600064414831514, - "grad_norm": 0.0010566264390945435, - "learning_rate": 0.00019999880159522485, - "loss": 46.0, - "step": 9687 - }, - { - "epoch": 1.5601674785619388, - "grad_norm": 0.004201866220682859, - "learning_rate": 0.00019999880134753152, - "loss": 46.0, - "step": 9688 - }, - { - "epoch": 1.5603285156407263, - "grad_norm": 0.006156921852380037, - "learning_rate": 0.00019999880109981258, - "loss": 46.0, - "step": 9689 - }, - { - "epoch": 1.5604895527195137, - "grad_norm": 0.003583103884011507, - "learning_rate": 0.00019999880085206804, - "loss": 46.0, - "step": 9690 - }, - { - "epoch": 1.5606505897983012, - "grad_norm": 0.002841728273779154, - "learning_rate": 0.00019999880060429787, - "loss": 46.0, - "step": 9691 - }, - { - "epoch": 1.5608116268770884, - "grad_norm": 0.0031647998839616776, - "learning_rate": 0.00019999880035650213, - "loss": 46.0, - "step": 9692 - }, - { - "epoch": 1.5609726639558759, - "grad_norm": 0.006603090558201075, - "learning_rate": 0.0001999988001086808, - "loss": 46.0, - "step": 9693 - }, - { - "epoch": 1.561133701034663, - "grad_norm": 0.0007188520394265652, - "learning_rate": 0.0001999987998608339, - "loss": 46.0, - "step": 9694 - }, - { - "epoch": 1.5612947381134505, - "grad_norm": 0.0038769152015447617, - "learning_rate": 0.00019999879961296138, - "loss": 46.0, - "step": 9695 - }, - { - "epoch": 1.561455775192238, - "grad_norm": 0.006570107769221067, - "learning_rate": 0.00019999879936506324, - "loss": 46.0, - "step": 9696 - }, - { - "epoch": 1.5616168122710254, - "grad_norm": 0.0012102453038096428, - "learning_rate": 0.00019999879911713957, - "loss": 46.0, - "step": 9697 - }, - { - "epoch": 1.561777849349813, - "grad_norm": 0.0004778045986313373, - "learning_rate": 0.00019999879886919026, - "loss": 46.0, - "step": 9698 - }, - { - "epoch": 1.5619388864286003, - "grad_norm": 0.0005573790404014289, - "learning_rate": 0.0001999987986212154, - "loss": 46.0, - "step": 9699 - }, - { - "epoch": 1.5620999235073876, - "grad_norm": 0.0006665913970209658, - "learning_rate": 0.00019999879837321487, - "loss": 46.0, - "step": 9700 - }, - { - "epoch": 1.5622609605861748, - "grad_norm": 0.0038049796130508184, - "learning_rate": 0.00019999879812518877, - "loss": 46.0, - "step": 9701 - }, - { - "epoch": 1.5624219976649623, - "grad_norm": 0.007736711297184229, - "learning_rate": 0.0001999987978771371, - "loss": 46.0, - "step": 9702 - }, - { - "epoch": 1.5625830347437497, - "grad_norm": 0.0007463959627784789, - "learning_rate": 0.0001999987976290598, - "loss": 46.0, - "step": 9703 - }, - { - "epoch": 1.5627440718225372, - "grad_norm": 0.0018275681650266051, - "learning_rate": 0.00019999879738095695, - "loss": 46.0, - "step": 9704 - }, - { - "epoch": 1.5629051089013246, - "grad_norm": 0.0004914948367513716, - "learning_rate": 0.0001999987971328285, - "loss": 46.0, - "step": 9705 - }, - { - "epoch": 1.563066145980112, - "grad_norm": 0.003845184575766325, - "learning_rate": 0.00019999879688467444, - "loss": 46.0, - "step": 9706 - }, - { - "epoch": 1.5632271830588993, - "grad_norm": 0.0013122610980644822, - "learning_rate": 0.00019999879663649479, - "loss": 46.0, - "step": 9707 - }, - { - "epoch": 1.5633882201376867, - "grad_norm": 0.0017416197806596756, - "learning_rate": 0.00019999879638828952, - "loss": 46.0, - "step": 9708 - }, - { - "epoch": 1.563549257216474, - "grad_norm": 0.003517310367897153, - "learning_rate": 0.0001999987961400587, - "loss": 46.0, - "step": 9709 - }, - { - "epoch": 1.5637102942952614, - "grad_norm": 0.0009275232441723347, - "learning_rate": 0.00019999879589180228, - "loss": 46.0, - "step": 9710 - }, - { - "epoch": 1.5638713313740489, - "grad_norm": 0.0020001984667032957, - "learning_rate": 0.00019999879564352023, - "loss": 46.0, - "step": 9711 - }, - { - "epoch": 1.5640323684528363, - "grad_norm": 0.0015724546974524856, - "learning_rate": 0.00019999879539521262, - "loss": 46.0, - "step": 9712 - }, - { - "epoch": 1.5641934055316238, - "grad_norm": 0.0006613938021473587, - "learning_rate": 0.00019999879514687942, - "loss": 46.0, - "step": 9713 - }, - { - "epoch": 1.564354442610411, - "grad_norm": 0.0020729731768369675, - "learning_rate": 0.00019999879489852057, - "loss": 46.0, - "step": 9714 - }, - { - "epoch": 1.5645154796891985, - "grad_norm": 0.000949891924392432, - "learning_rate": 0.00019999879465013617, - "loss": 46.0, - "step": 9715 - }, - { - "epoch": 1.5646765167679857, - "grad_norm": 0.000744186865631491, - "learning_rate": 0.00019999879440172618, - "loss": 46.0, - "step": 9716 - }, - { - "epoch": 1.5648375538467731, - "grad_norm": 0.0011828161077573895, - "learning_rate": 0.00019999879415329058, - "loss": 46.0, - "step": 9717 - }, - { - "epoch": 1.5649985909255606, - "grad_norm": 0.0024074027314782143, - "learning_rate": 0.0001999987939048294, - "loss": 46.0, - "step": 9718 - }, - { - "epoch": 1.565159628004348, - "grad_norm": 0.003987540956586599, - "learning_rate": 0.0001999987936563426, - "loss": 46.0, - "step": 9719 - }, - { - "epoch": 1.5653206650831355, - "grad_norm": 0.004735024180263281, - "learning_rate": 0.00019999879340783022, - "loss": 46.0, - "step": 9720 - }, - { - "epoch": 1.5654817021619227, - "grad_norm": 0.0016514173476025462, - "learning_rate": 0.00019999879315929224, - "loss": 46.0, - "step": 9721 - }, - { - "epoch": 1.5656427392407102, - "grad_norm": 0.002796730725094676, - "learning_rate": 0.00019999879291072867, - "loss": 46.0, - "step": 9722 - }, - { - "epoch": 1.5658037763194974, - "grad_norm": 0.0004002567729912698, - "learning_rate": 0.0001999987926621395, - "loss": 46.0, - "step": 9723 - }, - { - "epoch": 1.5659648133982849, - "grad_norm": 0.0028831178788095713, - "learning_rate": 0.00019999879241352475, - "loss": 46.0, - "step": 9724 - }, - { - "epoch": 1.5661258504770723, - "grad_norm": 0.0030676613096147776, - "learning_rate": 0.00019999879216488437, - "loss": 46.0, - "step": 9725 - }, - { - "epoch": 1.5662868875558598, - "grad_norm": 0.0012627679388970137, - "learning_rate": 0.00019999879191621842, - "loss": 46.0, - "step": 9726 - }, - { - "epoch": 1.5664479246346472, - "grad_norm": 0.0024922697339206934, - "learning_rate": 0.0001999987916675269, - "loss": 46.0, - "step": 9727 - }, - { - "epoch": 1.5666089617134347, - "grad_norm": 0.006966681219637394, - "learning_rate": 0.00019999879141880975, - "loss": 46.0, - "step": 9728 - }, - { - "epoch": 1.566769998792222, - "grad_norm": 0.001992269651964307, - "learning_rate": 0.00019999879117006702, - "loss": 46.0, - "step": 9729 - }, - { - "epoch": 1.5669310358710093, - "grad_norm": 0.0008069348405115306, - "learning_rate": 0.00019999879092129867, - "loss": 46.0, - "step": 9730 - }, - { - "epoch": 1.5670920729497966, - "grad_norm": 0.0012568749953061342, - "learning_rate": 0.00019999879067250474, - "loss": 46.0, - "step": 9731 - }, - { - "epoch": 1.567253110028584, - "grad_norm": 0.0009445696487091482, - "learning_rate": 0.00019999879042368525, - "loss": 46.0, - "step": 9732 - }, - { - "epoch": 1.5674141471073715, - "grad_norm": 0.003530106507241726, - "learning_rate": 0.00019999879017484014, - "loss": 46.0, - "step": 9733 - }, - { - "epoch": 1.567575184186159, - "grad_norm": 0.0036179046146571636, - "learning_rate": 0.00019999878992596942, - "loss": 46.0, - "step": 9734 - }, - { - "epoch": 1.5677362212649464, - "grad_norm": 0.0049944715574383736, - "learning_rate": 0.00019999878967707314, - "loss": 46.0, - "step": 9735 - }, - { - "epoch": 1.5678972583437336, - "grad_norm": 0.0012217058101668954, - "learning_rate": 0.00019999878942815124, - "loss": 46.0, - "step": 9736 - }, - { - "epoch": 1.568058295422521, - "grad_norm": 0.010000333189964294, - "learning_rate": 0.00019999878917920376, - "loss": 46.0, - "step": 9737 - }, - { - "epoch": 1.5682193325013083, - "grad_norm": 0.002759035909548402, - "learning_rate": 0.00019999878893023064, - "loss": 46.0, - "step": 9738 - }, - { - "epoch": 1.5683803695800957, - "grad_norm": 0.0054483008570969105, - "learning_rate": 0.00019999878868123198, - "loss": 46.0, - "step": 9739 - }, - { - "epoch": 1.5685414066588832, - "grad_norm": 0.0006330199539661407, - "learning_rate": 0.0001999987884322077, - "loss": 46.0, - "step": 9740 - }, - { - "epoch": 1.5687024437376706, - "grad_norm": 0.006719228811562061, - "learning_rate": 0.00019999878818315785, - "loss": 46.0, - "step": 9741 - }, - { - "epoch": 1.568863480816458, - "grad_norm": 0.0036212641280144453, - "learning_rate": 0.00019999878793408238, - "loss": 46.0, - "step": 9742 - }, - { - "epoch": 1.5690245178952453, - "grad_norm": 0.005796676501631737, - "learning_rate": 0.00019999878768498132, - "loss": 46.0, - "step": 9743 - }, - { - "epoch": 1.5691855549740328, - "grad_norm": 0.010220407508313656, - "learning_rate": 0.0001999987874358547, - "loss": 46.0, - "step": 9744 - }, - { - "epoch": 1.56934659205282, - "grad_norm": 0.011911019682884216, - "learning_rate": 0.00019999878718670244, - "loss": 46.0, - "step": 9745 - }, - { - "epoch": 1.5695076291316075, - "grad_norm": 0.0013834377750754356, - "learning_rate": 0.00019999878693752462, - "loss": 46.0, - "step": 9746 - }, - { - "epoch": 1.569668666210395, - "grad_norm": 0.003457516198977828, - "learning_rate": 0.00019999878668832115, - "loss": 46.0, - "step": 9747 - }, - { - "epoch": 1.5698297032891824, - "grad_norm": 0.001415478065609932, - "learning_rate": 0.00019999878643909213, - "loss": 46.0, - "step": 9748 - }, - { - "epoch": 1.5699907403679698, - "grad_norm": 0.001333848456852138, - "learning_rate": 0.0001999987861898375, - "loss": 46.0, - "step": 9749 - }, - { - "epoch": 1.5701517774467573, - "grad_norm": 0.004237016197293997, - "learning_rate": 0.0001999987859405573, - "loss": 46.0, - "step": 9750 - }, - { - "epoch": 1.5703128145255445, - "grad_norm": 0.0006624659872613847, - "learning_rate": 0.00019999878569125148, - "loss": 46.0, - "step": 9751 - }, - { - "epoch": 1.570473851604332, - "grad_norm": 0.0010238528484478593, - "learning_rate": 0.00019999878544192009, - "loss": 46.0, - "step": 9752 - }, - { - "epoch": 1.5706348886831192, - "grad_norm": 0.0013221821282058954, - "learning_rate": 0.00019999878519256305, - "loss": 46.0, - "step": 9753 - }, - { - "epoch": 1.5707959257619066, - "grad_norm": 0.0014520084951072931, - "learning_rate": 0.00019999878494318047, - "loss": 46.0, - "step": 9754 - }, - { - "epoch": 1.570956962840694, - "grad_norm": 0.00646643340587616, - "learning_rate": 0.00019999878469377226, - "loss": 46.0, - "step": 9755 - }, - { - "epoch": 1.5711179999194815, - "grad_norm": 0.00045624279300682247, - "learning_rate": 0.0001999987844443385, - "loss": 46.0, - "step": 9756 - }, - { - "epoch": 1.571279036998269, - "grad_norm": 0.0010332841193303466, - "learning_rate": 0.00019999878419487912, - "loss": 46.0, - "step": 9757 - }, - { - "epoch": 1.5714400740770562, - "grad_norm": 0.0020471017342060804, - "learning_rate": 0.00019999878394539415, - "loss": 46.0, - "step": 9758 - }, - { - "epoch": 1.5716011111558437, - "grad_norm": 0.002583115128800273, - "learning_rate": 0.00019999878369588356, - "loss": 46.0, - "step": 9759 - }, - { - "epoch": 1.571762148234631, - "grad_norm": 0.0017519983230158687, - "learning_rate": 0.00019999878344634738, - "loss": 46.0, - "step": 9760 - }, - { - "epoch": 1.5719231853134183, - "grad_norm": 0.005189927294850349, - "learning_rate": 0.00019999878319678564, - "loss": 46.0, - "step": 9761 - }, - { - "epoch": 1.5720842223922058, - "grad_norm": 0.0008023937698453665, - "learning_rate": 0.0001999987829471983, - "loss": 46.0, - "step": 9762 - }, - { - "epoch": 1.5722452594709933, - "grad_norm": 0.002099481411278248, - "learning_rate": 0.00019999878269758535, - "loss": 46.0, - "step": 9763 - }, - { - "epoch": 1.5724062965497807, - "grad_norm": 0.0011411972809582949, - "learning_rate": 0.0001999987824479468, - "loss": 46.0, - "step": 9764 - }, - { - "epoch": 1.572567333628568, - "grad_norm": 0.0011199337895959616, - "learning_rate": 0.00019999878219828269, - "loss": 46.0, - "step": 9765 - }, - { - "epoch": 1.5727283707073554, - "grad_norm": 0.00040268953307531774, - "learning_rate": 0.00019999878194859293, - "loss": 46.0, - "step": 9766 - }, - { - "epoch": 1.5728894077861426, - "grad_norm": 0.0030387945007532835, - "learning_rate": 0.00019999878169887762, - "loss": 46.0, - "step": 9767 - }, - { - "epoch": 1.57305044486493, - "grad_norm": 0.0015354889910668135, - "learning_rate": 0.0001999987814491367, - "loss": 46.0, - "step": 9768 - }, - { - "epoch": 1.5732114819437175, - "grad_norm": 0.007252680603414774, - "learning_rate": 0.00019999878119937017, - "loss": 46.0, - "step": 9769 - }, - { - "epoch": 1.573372519022505, - "grad_norm": 0.0009251062292605639, - "learning_rate": 0.00019999878094957807, - "loss": 46.0, - "step": 9770 - }, - { - "epoch": 1.5735335561012924, - "grad_norm": 0.0029202389996498823, - "learning_rate": 0.00019999878069976035, - "loss": 46.0, - "step": 9771 - }, - { - "epoch": 1.5736945931800799, - "grad_norm": 0.0019986468832939863, - "learning_rate": 0.00019999878044991707, - "loss": 46.0, - "step": 9772 - }, - { - "epoch": 1.573855630258867, - "grad_norm": 0.0016948707634583116, - "learning_rate": 0.00019999878020004815, - "loss": 46.0, - "step": 9773 - }, - { - "epoch": 1.5740166673376543, - "grad_norm": 0.0024093349929898977, - "learning_rate": 0.00019999877995015368, - "loss": 46.0, - "step": 9774 - }, - { - "epoch": 1.5741777044164418, - "grad_norm": 0.0013166608987376094, - "learning_rate": 0.0001999987797002336, - "loss": 46.0, - "step": 9775 - }, - { - "epoch": 1.5743387414952292, - "grad_norm": 0.0019574237521737814, - "learning_rate": 0.0001999987794502879, - "loss": 46.0, - "step": 9776 - }, - { - "epoch": 1.5744997785740167, - "grad_norm": 0.01026909425854683, - "learning_rate": 0.00019999877920031666, - "loss": 46.0, - "step": 9777 - }, - { - "epoch": 1.5746608156528041, - "grad_norm": 0.007019911427050829, - "learning_rate": 0.0001999987789503198, - "loss": 46.0, - "step": 9778 - }, - { - "epoch": 1.5748218527315916, - "grad_norm": 0.0005509941256605089, - "learning_rate": 0.00019999877870029734, - "loss": 46.0, - "step": 9779 - }, - { - "epoch": 1.5749828898103788, - "grad_norm": 0.0015382322017103434, - "learning_rate": 0.00019999877845024928, - "loss": 46.0, - "step": 9780 - }, - { - "epoch": 1.5751439268891663, - "grad_norm": 0.0008280586916953325, - "learning_rate": 0.00019999877820017564, - "loss": 46.0, - "step": 9781 - }, - { - "epoch": 1.5753049639679535, - "grad_norm": 0.0020458255894482136, - "learning_rate": 0.00019999877795007638, - "loss": 46.0, - "step": 9782 - }, - { - "epoch": 1.575466001046741, - "grad_norm": 0.0025958814658224583, - "learning_rate": 0.00019999877769995153, - "loss": 46.0, - "step": 9783 - }, - { - "epoch": 1.5756270381255284, - "grad_norm": 0.002383159939199686, - "learning_rate": 0.0001999987774498011, - "loss": 46.0, - "step": 9784 - }, - { - "epoch": 1.5757880752043159, - "grad_norm": 0.002497895387932658, - "learning_rate": 0.00019999877719962508, - "loss": 46.0, - "step": 9785 - }, - { - "epoch": 1.5759491122831033, - "grad_norm": 0.0009028394124470651, - "learning_rate": 0.00019999877694942345, - "loss": 46.0, - "step": 9786 - }, - { - "epoch": 1.5761101493618905, - "grad_norm": 0.002269017044454813, - "learning_rate": 0.00019999877669919626, - "loss": 46.0, - "step": 9787 - }, - { - "epoch": 1.576271186440678, - "grad_norm": 0.0007879043114371598, - "learning_rate": 0.00019999877644894342, - "loss": 46.0, - "step": 9788 - }, - { - "epoch": 1.5764322235194652, - "grad_norm": 0.0005099507397972047, - "learning_rate": 0.00019999877619866503, - "loss": 46.0, - "step": 9789 - }, - { - "epoch": 1.5765932605982527, - "grad_norm": 0.0065828729420900345, - "learning_rate": 0.00019999877594836102, - "loss": 46.0, - "step": 9790 - }, - { - "epoch": 1.5767542976770401, - "grad_norm": 0.0021500701550394297, - "learning_rate": 0.00019999877569803142, - "loss": 46.0, - "step": 9791 - }, - { - "epoch": 1.5769153347558276, - "grad_norm": 0.0006644924287684262, - "learning_rate": 0.00019999877544767624, - "loss": 46.0, - "step": 9792 - }, - { - "epoch": 1.577076371834615, - "grad_norm": 0.008343099616467953, - "learning_rate": 0.00019999877519729547, - "loss": 46.0, - "step": 9793 - }, - { - "epoch": 1.5772374089134025, - "grad_norm": 0.0004977814387530088, - "learning_rate": 0.00019999877494688905, - "loss": 46.0, - "step": 9794 - }, - { - "epoch": 1.5773984459921897, - "grad_norm": 0.0005223865737207234, - "learning_rate": 0.00019999877469645708, - "loss": 46.0, - "step": 9795 - }, - { - "epoch": 1.577559483070977, - "grad_norm": 0.004047384485602379, - "learning_rate": 0.00019999877444599952, - "loss": 46.0, - "step": 9796 - }, - { - "epoch": 1.5777205201497644, - "grad_norm": 0.0023688289802521467, - "learning_rate": 0.00019999877419551635, - "loss": 46.0, - "step": 9797 - }, - { - "epoch": 1.5778815572285518, - "grad_norm": 0.00135643663816154, - "learning_rate": 0.00019999877394500761, - "loss": 46.0, - "step": 9798 - }, - { - "epoch": 1.5780425943073393, - "grad_norm": 0.003825664985924959, - "learning_rate": 0.00019999877369447324, - "loss": 46.0, - "step": 9799 - }, - { - "epoch": 1.5782036313861267, - "grad_norm": 0.001462132902815938, - "learning_rate": 0.0001999987734439133, - "loss": 46.0, - "step": 9800 - }, - { - "epoch": 1.5783646684649142, - "grad_norm": 0.0008735788869671524, - "learning_rate": 0.00019999877319332775, - "loss": 46.0, - "step": 9801 - }, - { - "epoch": 1.5785257055437014, - "grad_norm": 0.0017034619813784957, - "learning_rate": 0.00019999877294271662, - "loss": 46.0, - "step": 9802 - }, - { - "epoch": 1.5786867426224889, - "grad_norm": 0.0009953982662409544, - "learning_rate": 0.00019999877269207992, - "loss": 46.0, - "step": 9803 - }, - { - "epoch": 1.578847779701276, - "grad_norm": 0.00047335383715108037, - "learning_rate": 0.00019999877244141758, - "loss": 46.0, - "step": 9804 - }, - { - "epoch": 1.5790088167800636, - "grad_norm": 0.0025080873165279627, - "learning_rate": 0.00019999877219072966, - "loss": 46.0, - "step": 9805 - }, - { - "epoch": 1.579169853858851, - "grad_norm": 0.0016478647012263536, - "learning_rate": 0.00019999877194001615, - "loss": 46.0, - "step": 9806 - }, - { - "epoch": 1.5793308909376385, - "grad_norm": 0.010133449919521809, - "learning_rate": 0.00019999877168927705, - "loss": 46.0, - "step": 9807 - }, - { - "epoch": 1.579491928016426, - "grad_norm": 0.0004886648966930807, - "learning_rate": 0.00019999877143851233, - "loss": 46.0, - "step": 9808 - }, - { - "epoch": 1.5796529650952131, - "grad_norm": 0.0027399531099945307, - "learning_rate": 0.00019999877118772203, - "loss": 46.0, - "step": 9809 - }, - { - "epoch": 1.5798140021740006, - "grad_norm": 0.0018911331426352262, - "learning_rate": 0.00019999877093690614, - "loss": 46.0, - "step": 9810 - }, - { - "epoch": 1.5799750392527878, - "grad_norm": 0.000624898646492511, - "learning_rate": 0.00019999877068606464, - "loss": 46.0, - "step": 9811 - }, - { - "epoch": 1.5801360763315753, - "grad_norm": 0.0026830527931451797, - "learning_rate": 0.00019999877043519758, - "loss": 46.0, - "step": 9812 - }, - { - "epoch": 1.5802971134103627, - "grad_norm": 0.0003536730364430696, - "learning_rate": 0.0001999987701843049, - "loss": 46.0, - "step": 9813 - }, - { - "epoch": 1.5804581504891502, - "grad_norm": 0.0011075574439018965, - "learning_rate": 0.00019999876993338664, - "loss": 46.0, - "step": 9814 - }, - { - "epoch": 1.5806191875679376, - "grad_norm": 0.0031218486838042736, - "learning_rate": 0.00019999876968244276, - "loss": 46.0, - "step": 9815 - }, - { - "epoch": 1.580780224646725, - "grad_norm": 0.0012519750744104385, - "learning_rate": 0.00019999876943147332, - "loss": 46.0, - "step": 9816 - }, - { - "epoch": 1.5809412617255123, - "grad_norm": 0.001903881086036563, - "learning_rate": 0.00019999876918047827, - "loss": 46.0, - "step": 9817 - }, - { - "epoch": 1.5811022988042995, - "grad_norm": 0.004829671699553728, - "learning_rate": 0.0001999987689294576, - "loss": 46.0, - "step": 9818 - }, - { - "epoch": 1.581263335883087, - "grad_norm": 0.0009480537264607847, - "learning_rate": 0.00019999876867841137, - "loss": 46.0, - "step": 9819 - }, - { - "epoch": 1.5814243729618744, - "grad_norm": 0.000697803741786629, - "learning_rate": 0.00019999876842733953, - "loss": 46.0, - "step": 9820 - }, - { - "epoch": 1.5815854100406619, - "grad_norm": 0.0010910104028880596, - "learning_rate": 0.0001999987681762421, - "loss": 46.0, - "step": 9821 - }, - { - "epoch": 1.5817464471194493, - "grad_norm": 0.002191560808569193, - "learning_rate": 0.00019999876792511906, - "loss": 46.0, - "step": 9822 - }, - { - "epoch": 1.5819074841982368, - "grad_norm": 0.0026845159009099007, - "learning_rate": 0.00019999876767397046, - "loss": 46.0, - "step": 9823 - }, - { - "epoch": 1.582068521277024, - "grad_norm": 0.0026525049470365047, - "learning_rate": 0.00019999876742279624, - "loss": 46.0, - "step": 9824 - }, - { - "epoch": 1.5822295583558115, - "grad_norm": 0.0050405478104949, - "learning_rate": 0.00019999876717159644, - "loss": 46.0, - "step": 9825 - }, - { - "epoch": 1.5823905954345987, - "grad_norm": 0.0020936548244208097, - "learning_rate": 0.00019999876692037102, - "loss": 46.0, - "step": 9826 - }, - { - "epoch": 1.5825516325133862, - "grad_norm": 0.0021683350205421448, - "learning_rate": 0.00019999876666912, - "loss": 46.0, - "step": 9827 - }, - { - "epoch": 1.5827126695921736, - "grad_norm": 0.00042703395592980087, - "learning_rate": 0.00019999876641784342, - "loss": 46.0, - "step": 9828 - }, - { - "epoch": 1.582873706670961, - "grad_norm": 0.0012084603076800704, - "learning_rate": 0.00019999876616654124, - "loss": 46.0, - "step": 9829 - }, - { - "epoch": 1.5830347437497485, - "grad_norm": 0.001400305307470262, - "learning_rate": 0.00019999876591521347, - "loss": 46.0, - "step": 9830 - }, - { - "epoch": 1.5831957808285357, - "grad_norm": 0.001301633776165545, - "learning_rate": 0.0001999987656638601, - "loss": 46.0, - "step": 9831 - }, - { - "epoch": 1.5833568179073232, - "grad_norm": 0.012440150603652, - "learning_rate": 0.0001999987654124811, - "loss": 46.0, - "step": 9832 - }, - { - "epoch": 1.5835178549861104, - "grad_norm": 0.001563405618071556, - "learning_rate": 0.00019999876516107654, - "loss": 46.0, - "step": 9833 - }, - { - "epoch": 1.5836788920648979, - "grad_norm": 0.0009780620457604527, - "learning_rate": 0.00019999876490964637, - "loss": 46.0, - "step": 9834 - }, - { - "epoch": 1.5838399291436853, - "grad_norm": 0.006246987264603376, - "learning_rate": 0.00019999876465819064, - "loss": 46.0, - "step": 9835 - }, - { - "epoch": 1.5840009662224728, - "grad_norm": 0.001995635684579611, - "learning_rate": 0.00019999876440670927, - "loss": 46.0, - "step": 9836 - }, - { - "epoch": 1.5841620033012602, - "grad_norm": 0.0004036130558233708, - "learning_rate": 0.00019999876415520234, - "loss": 46.0, - "step": 9837 - }, - { - "epoch": 1.5843230403800475, - "grad_norm": 0.0013316834811121225, - "learning_rate": 0.0001999987639036698, - "loss": 46.0, - "step": 9838 - }, - { - "epoch": 1.584484077458835, - "grad_norm": 0.0018691255245357752, - "learning_rate": 0.00019999876365211166, - "loss": 46.0, - "step": 9839 - }, - { - "epoch": 1.5846451145376221, - "grad_norm": 0.003369741840288043, - "learning_rate": 0.00019999876340052794, - "loss": 46.0, - "step": 9840 - }, - { - "epoch": 1.5848061516164096, - "grad_norm": 0.0050245048478245735, - "learning_rate": 0.00019999876314891858, - "loss": 46.0, - "step": 9841 - }, - { - "epoch": 1.584967188695197, - "grad_norm": 0.0013695969246327877, - "learning_rate": 0.0001999987628972837, - "loss": 46.0, - "step": 9842 - }, - { - "epoch": 1.5851282257739845, - "grad_norm": 0.0031018375884741545, - "learning_rate": 0.00019999876264562317, - "loss": 46.0, - "step": 9843 - }, - { - "epoch": 1.585289262852772, - "grad_norm": 0.008670821785926819, - "learning_rate": 0.00019999876239393708, - "loss": 46.0, - "step": 9844 - }, - { - "epoch": 1.5854502999315594, - "grad_norm": 0.001297230483032763, - "learning_rate": 0.00019999876214222537, - "loss": 46.0, - "step": 9845 - }, - { - "epoch": 1.5856113370103466, - "grad_norm": 0.005896645598113537, - "learning_rate": 0.00019999876189048807, - "loss": 46.0, - "step": 9846 - }, - { - "epoch": 1.585772374089134, - "grad_norm": 0.0035727997310459614, - "learning_rate": 0.00019999876163872518, - "loss": 46.0, - "step": 9847 - }, - { - "epoch": 1.5859334111679213, - "grad_norm": 0.000984722631983459, - "learning_rate": 0.00019999876138693669, - "loss": 46.0, - "step": 9848 - }, - { - "epoch": 1.5860944482467088, - "grad_norm": 0.005690367426723242, - "learning_rate": 0.00019999876113512263, - "loss": 46.0, - "step": 9849 - }, - { - "epoch": 1.5862554853254962, - "grad_norm": 0.0014734865399077535, - "learning_rate": 0.00019999876088328295, - "loss": 46.0, - "step": 9850 - }, - { - "epoch": 1.5864165224042837, - "grad_norm": 0.0023802241776138544, - "learning_rate": 0.00019999876063141767, - "loss": 46.0, - "step": 9851 - }, - { - "epoch": 1.586577559483071, - "grad_norm": 0.0004882747307419777, - "learning_rate": 0.00019999876037952682, - "loss": 46.0, - "step": 9852 - }, - { - "epoch": 1.5867385965618583, - "grad_norm": 0.0012015477987006307, - "learning_rate": 0.00019999876012761036, - "loss": 46.0, - "step": 9853 - }, - { - "epoch": 1.5868996336406458, - "grad_norm": 0.003879109164699912, - "learning_rate": 0.0001999987598756683, - "loss": 46.0, - "step": 9854 - }, - { - "epoch": 1.587060670719433, - "grad_norm": 0.011290845461189747, - "learning_rate": 0.00019999875962370064, - "loss": 46.0, - "step": 9855 - }, - { - "epoch": 1.5872217077982205, - "grad_norm": 0.000992082292214036, - "learning_rate": 0.00019999875937170742, - "loss": 46.0, - "step": 9856 - }, - { - "epoch": 1.587382744877008, - "grad_norm": 0.0017628453206270933, - "learning_rate": 0.00019999875911968858, - "loss": 46.0, - "step": 9857 - }, - { - "epoch": 1.5875437819557954, - "grad_norm": 0.0018435249803587794, - "learning_rate": 0.00019999875886764413, - "loss": 46.0, - "step": 9858 - }, - { - "epoch": 1.5877048190345828, - "grad_norm": 0.006634776014834642, - "learning_rate": 0.00019999875861557415, - "loss": 46.0, - "step": 9859 - }, - { - "epoch": 1.58786585611337, - "grad_norm": 0.0014659801963716745, - "learning_rate": 0.0001999987583634785, - "loss": 46.0, - "step": 9860 - }, - { - "epoch": 1.5880268931921575, - "grad_norm": 0.00515702273696661, - "learning_rate": 0.00019999875811135728, - "loss": 46.0, - "step": 9861 - }, - { - "epoch": 1.5881879302709447, - "grad_norm": 0.007606877014040947, - "learning_rate": 0.0001999987578592105, - "loss": 46.0, - "step": 9862 - }, - { - "epoch": 1.5883489673497322, - "grad_norm": 0.0006977077573537827, - "learning_rate": 0.0001999987576070381, - "loss": 46.0, - "step": 9863 - }, - { - "epoch": 1.5885100044285196, - "grad_norm": 0.006941099185496569, - "learning_rate": 0.0001999987573548401, - "loss": 46.0, - "step": 9864 - }, - { - "epoch": 1.588671041507307, - "grad_norm": 0.0014213366666808724, - "learning_rate": 0.00019999875710261647, - "loss": 46.0, - "step": 9865 - }, - { - "epoch": 1.5888320785860945, - "grad_norm": 0.0023262694012373686, - "learning_rate": 0.0001999987568503673, - "loss": 46.0, - "step": 9866 - }, - { - "epoch": 1.588993115664882, - "grad_norm": 0.0031400443986058235, - "learning_rate": 0.00019999875659809253, - "loss": 46.0, - "step": 9867 - }, - { - "epoch": 1.5891541527436692, - "grad_norm": 0.0010237874230369925, - "learning_rate": 0.00019999875634579215, - "loss": 46.0, - "step": 9868 - }, - { - "epoch": 1.5893151898224567, - "grad_norm": 0.0031135983299463987, - "learning_rate": 0.0001999987560934662, - "loss": 46.0, - "step": 9869 - }, - { - "epoch": 1.589476226901244, - "grad_norm": 0.007396283093839884, - "learning_rate": 0.00019999875584111464, - "loss": 46.0, - "step": 9870 - }, - { - "epoch": 1.5896372639800314, - "grad_norm": 0.0020296175498515368, - "learning_rate": 0.00019999875558873744, - "loss": 46.0, - "step": 9871 - }, - { - "epoch": 1.5897983010588188, - "grad_norm": 0.0007125104311853647, - "learning_rate": 0.0001999987553363347, - "loss": 46.0, - "step": 9872 - }, - { - "epoch": 1.5899593381376063, - "grad_norm": 0.0018272243905812502, - "learning_rate": 0.00019999875508390635, - "loss": 46.0, - "step": 9873 - }, - { - "epoch": 1.5901203752163937, - "grad_norm": 0.0014895439380779862, - "learning_rate": 0.0001999987548314524, - "loss": 46.0, - "step": 9874 - }, - { - "epoch": 1.590281412295181, - "grad_norm": 0.001105980365537107, - "learning_rate": 0.00019999875457897285, - "loss": 46.0, - "step": 9875 - }, - { - "epoch": 1.5904424493739684, - "grad_norm": 0.0046920799650251865, - "learning_rate": 0.00019999875432646772, - "loss": 46.0, - "step": 9876 - }, - { - "epoch": 1.5906034864527556, - "grad_norm": 0.0036386162973940372, - "learning_rate": 0.00019999875407393703, - "loss": 46.0, - "step": 9877 - }, - { - "epoch": 1.590764523531543, - "grad_norm": 0.002501716371625662, - "learning_rate": 0.0001999987538213807, - "loss": 46.0, - "step": 9878 - }, - { - "epoch": 1.5909255606103305, - "grad_norm": 0.006380944512784481, - "learning_rate": 0.00019999875356879878, - "loss": 46.0, - "step": 9879 - }, - { - "epoch": 1.591086597689118, - "grad_norm": 0.00047685258323326707, - "learning_rate": 0.00019999875331619125, - "loss": 46.0, - "step": 9880 - }, - { - "epoch": 1.5912476347679054, - "grad_norm": 0.0011004656553268433, - "learning_rate": 0.00019999875306355816, - "loss": 46.0, - "step": 9881 - }, - { - "epoch": 1.5914086718466927, - "grad_norm": 0.0031154248863458633, - "learning_rate": 0.00019999875281089945, - "loss": 46.0, - "step": 9882 - }, - { - "epoch": 1.59156970892548, - "grad_norm": 0.002049099886789918, - "learning_rate": 0.00019999875255821516, - "loss": 46.0, - "step": 9883 - }, - { - "epoch": 1.5917307460042673, - "grad_norm": 0.0016456427983939648, - "learning_rate": 0.00019999875230550528, - "loss": 46.0, - "step": 9884 - }, - { - "epoch": 1.5918917830830548, - "grad_norm": 0.00288618472404778, - "learning_rate": 0.00019999875205276978, - "loss": 46.0, - "step": 9885 - }, - { - "epoch": 1.5920528201618422, - "grad_norm": 0.0011149515630677342, - "learning_rate": 0.0001999987518000087, - "loss": 46.0, - "step": 9886 - }, - { - "epoch": 1.5922138572406297, - "grad_norm": 0.0018803334096446633, - "learning_rate": 0.00019999875154722206, - "loss": 46.0, - "step": 9887 - }, - { - "epoch": 1.5923748943194171, - "grad_norm": 0.0047470806166529655, - "learning_rate": 0.00019999875129440977, - "loss": 46.0, - "step": 9888 - }, - { - "epoch": 1.5925359313982046, - "grad_norm": 0.0011762600624933839, - "learning_rate": 0.0001999987510415719, - "loss": 46.0, - "step": 9889 - }, - { - "epoch": 1.5926969684769918, - "grad_norm": 0.0022575596813112497, - "learning_rate": 0.00019999875078870844, - "loss": 46.0, - "step": 9890 - }, - { - "epoch": 1.592858005555779, - "grad_norm": 0.0043528713285923, - "learning_rate": 0.0001999987505358194, - "loss": 46.0, - "step": 9891 - }, - { - "epoch": 1.5930190426345665, - "grad_norm": 0.00403574900701642, - "learning_rate": 0.00019999875028290477, - "loss": 46.0, - "step": 9892 - }, - { - "epoch": 1.593180079713354, - "grad_norm": 0.0010333933169022202, - "learning_rate": 0.00019999875002996452, - "loss": 46.0, - "step": 9893 - }, - { - "epoch": 1.5933411167921414, - "grad_norm": 0.00641175452619791, - "learning_rate": 0.00019999874977699868, - "loss": 46.0, - "step": 9894 - }, - { - "epoch": 1.5935021538709289, - "grad_norm": 0.0007846063235774636, - "learning_rate": 0.00019999874952400726, - "loss": 46.0, - "step": 9895 - }, - { - "epoch": 1.5936631909497163, - "grad_norm": 0.003928713966161013, - "learning_rate": 0.00019999874927099023, - "loss": 46.0, - "step": 9896 - }, - { - "epoch": 1.5938242280285035, - "grad_norm": 0.0083048976957798, - "learning_rate": 0.0001999987490179476, - "loss": 46.0, - "step": 9897 - }, - { - "epoch": 1.593985265107291, - "grad_norm": 0.003947540186345577, - "learning_rate": 0.0001999987487648794, - "loss": 46.0, - "step": 9898 - }, - { - "epoch": 1.5941463021860782, - "grad_norm": 0.0018085397314280272, - "learning_rate": 0.0001999987485117856, - "loss": 46.0, - "step": 9899 - }, - { - "epoch": 1.5943073392648657, - "grad_norm": 0.0025369005743414164, - "learning_rate": 0.00019999874825866621, - "loss": 46.0, - "step": 9900 - }, - { - "epoch": 1.5944683763436531, - "grad_norm": 0.0011926913866773248, - "learning_rate": 0.00019999874800552122, - "loss": 46.0, - "step": 9901 - }, - { - "epoch": 1.5946294134224406, - "grad_norm": 0.0006960754981264472, - "learning_rate": 0.0001999987477523506, - "loss": 46.0, - "step": 9902 - }, - { - "epoch": 1.594790450501228, - "grad_norm": 0.0019860018510371447, - "learning_rate": 0.00019999874749915443, - "loss": 46.0, - "step": 9903 - }, - { - "epoch": 1.5949514875800153, - "grad_norm": 0.0046670278534293175, - "learning_rate": 0.00019999874724593264, - "loss": 46.0, - "step": 9904 - }, - { - "epoch": 1.5951125246588027, - "grad_norm": 0.004139245953410864, - "learning_rate": 0.00019999874699268527, - "loss": 46.0, - "step": 9905 - }, - { - "epoch": 1.59527356173759, - "grad_norm": 0.01147120725363493, - "learning_rate": 0.0001999987467394123, - "loss": 46.0, - "step": 9906 - }, - { - "epoch": 1.5954345988163774, - "grad_norm": 0.0006726644351147115, - "learning_rate": 0.00019999874648611373, - "loss": 46.0, - "step": 9907 - }, - { - "epoch": 1.5955956358951648, - "grad_norm": 0.0037660591769963503, - "learning_rate": 0.00019999874623278957, - "loss": 46.0, - "step": 9908 - }, - { - "epoch": 1.5957566729739523, - "grad_norm": 0.004735119640827179, - "learning_rate": 0.00019999874597943982, - "loss": 46.0, - "step": 9909 - }, - { - "epoch": 1.5959177100527397, - "grad_norm": 0.0009075236739590764, - "learning_rate": 0.0001999987457260645, - "loss": 46.0, - "step": 9910 - }, - { - "epoch": 1.5960787471315272, - "grad_norm": 0.004769476596266031, - "learning_rate": 0.00019999874547266356, - "loss": 46.0, - "step": 9911 - }, - { - "epoch": 1.5962397842103144, - "grad_norm": 0.0024767101276665926, - "learning_rate": 0.000199998745219237, - "loss": 46.0, - "step": 9912 - }, - { - "epoch": 1.5964008212891017, - "grad_norm": 0.00627156812697649, - "learning_rate": 0.00019999874496578487, - "loss": 46.0, - "step": 9913 - }, - { - "epoch": 1.596561858367889, - "grad_norm": 0.0011349557898938656, - "learning_rate": 0.00019999874471230715, - "loss": 46.0, - "step": 9914 - }, - { - "epoch": 1.5967228954466766, - "grad_norm": 0.0034325974993407726, - "learning_rate": 0.00019999874445880385, - "loss": 46.0, - "step": 9915 - }, - { - "epoch": 1.596883932525464, - "grad_norm": 0.003224226413294673, - "learning_rate": 0.0001999987442052749, - "loss": 46.0, - "step": 9916 - }, - { - "epoch": 1.5970449696042515, - "grad_norm": 0.00788784772157669, - "learning_rate": 0.0001999987439517204, - "loss": 46.0, - "step": 9917 - }, - { - "epoch": 1.597206006683039, - "grad_norm": 0.0029907876159995794, - "learning_rate": 0.00019999874369814032, - "loss": 46.0, - "step": 9918 - }, - { - "epoch": 1.5973670437618261, - "grad_norm": 0.0005910294130444527, - "learning_rate": 0.0001999987434445346, - "loss": 46.0, - "step": 9919 - }, - { - "epoch": 1.5975280808406136, - "grad_norm": 0.002190010854974389, - "learning_rate": 0.0001999987431909033, - "loss": 46.0, - "step": 9920 - }, - { - "epoch": 1.5976891179194008, - "grad_norm": 0.0011697777081280947, - "learning_rate": 0.00019999874293724642, - "loss": 46.0, - "step": 9921 - }, - { - "epoch": 1.5978501549981883, - "grad_norm": 0.004586322233080864, - "learning_rate": 0.00019999874268356393, - "loss": 46.0, - "step": 9922 - }, - { - "epoch": 1.5980111920769757, - "grad_norm": 0.0015874715754762292, - "learning_rate": 0.00019999874242985585, - "loss": 46.0, - "step": 9923 - }, - { - "epoch": 1.5981722291557632, - "grad_norm": 0.0033024463336914778, - "learning_rate": 0.0001999987421761222, - "loss": 46.0, - "step": 9924 - }, - { - "epoch": 1.5983332662345506, - "grad_norm": 0.0003507027868181467, - "learning_rate": 0.00019999874192236293, - "loss": 46.0, - "step": 9925 - }, - { - "epoch": 1.5984943033133379, - "grad_norm": 0.002473590662702918, - "learning_rate": 0.00019999874166857807, - "loss": 46.0, - "step": 9926 - }, - { - "epoch": 1.5986553403921253, - "grad_norm": 0.008915458805859089, - "learning_rate": 0.0001999987414147676, - "loss": 46.0, - "step": 9927 - }, - { - "epoch": 1.5988163774709125, - "grad_norm": 0.003729333635419607, - "learning_rate": 0.00019999874116093157, - "loss": 46.0, - "step": 9928 - }, - { - "epoch": 1.5989774145497, - "grad_norm": 0.001410358352586627, - "learning_rate": 0.0001999987409070699, - "loss": 46.0, - "step": 9929 - }, - { - "epoch": 1.5991384516284874, - "grad_norm": 0.002386793028563261, - "learning_rate": 0.0001999987406531827, - "loss": 46.0, - "step": 9930 - }, - { - "epoch": 1.599299488707275, - "grad_norm": 0.003954610321670771, - "learning_rate": 0.00019999874039926984, - "loss": 46.0, - "step": 9931 - }, - { - "epoch": 1.5994605257860623, - "grad_norm": 0.013110685162246227, - "learning_rate": 0.00019999874014533142, - "loss": 46.0, - "step": 9932 - }, - { - "epoch": 1.5996215628648496, - "grad_norm": 0.006606991868466139, - "learning_rate": 0.00019999873989136742, - "loss": 46.0, - "step": 9933 - }, - { - "epoch": 1.599782599943637, - "grad_norm": 0.005766377784311771, - "learning_rate": 0.0001999987396373778, - "loss": 46.0, - "step": 9934 - }, - { - "epoch": 1.5999436370224243, - "grad_norm": 0.0013292814837768674, - "learning_rate": 0.00019999873938336257, - "loss": 46.0, - "step": 9935 - }, - { - "epoch": 1.6001046741012117, - "grad_norm": 0.0008554612868465483, - "learning_rate": 0.0001999987391293218, - "loss": 46.0, - "step": 9936 - }, - { - "epoch": 1.6002657111799992, - "grad_norm": 0.0012830928899347782, - "learning_rate": 0.00019999873887525536, - "loss": 46.0, - "step": 9937 - }, - { - "epoch": 1.6004267482587866, - "grad_norm": 0.004417533054947853, - "learning_rate": 0.0001999987386211634, - "loss": 46.0, - "step": 9938 - }, - { - "epoch": 1.600587785337574, - "grad_norm": 0.00232595088891685, - "learning_rate": 0.00019999873836704581, - "loss": 46.0, - "step": 9939 - }, - { - "epoch": 1.6007488224163615, - "grad_norm": 0.004983017686754465, - "learning_rate": 0.00019999873811290262, - "loss": 46.0, - "step": 9940 - }, - { - "epoch": 1.6009098594951487, - "grad_norm": 0.0014203991740942001, - "learning_rate": 0.00019999873785873383, - "loss": 46.0, - "step": 9941 - }, - { - "epoch": 1.6010708965739362, - "grad_norm": 0.001022123615257442, - "learning_rate": 0.00019999873760453946, - "loss": 46.0, - "step": 9942 - }, - { - "epoch": 1.6012319336527234, - "grad_norm": 0.0029021401423960924, - "learning_rate": 0.0001999987373503195, - "loss": 46.0, - "step": 9943 - }, - { - "epoch": 1.6013929707315109, - "grad_norm": 0.0009786970913410187, - "learning_rate": 0.00019999873709607393, - "loss": 46.0, - "step": 9944 - }, - { - "epoch": 1.6015540078102983, - "grad_norm": 0.000947914901189506, - "learning_rate": 0.00019999873684180277, - "loss": 46.0, - "step": 9945 - }, - { - "epoch": 1.6017150448890858, - "grad_norm": 0.0036358211655169725, - "learning_rate": 0.00019999873658750603, - "loss": 46.0, - "step": 9946 - }, - { - "epoch": 1.6018760819678732, - "grad_norm": 0.004785642959177494, - "learning_rate": 0.00019999873633318367, - "loss": 46.0, - "step": 9947 - }, - { - "epoch": 1.6020371190466605, - "grad_norm": 0.0014921730617061257, - "learning_rate": 0.00019999873607883575, - "loss": 46.0, - "step": 9948 - }, - { - "epoch": 1.602198156125448, - "grad_norm": 0.004147863946855068, - "learning_rate": 0.0001999987358244622, - "loss": 46.0, - "step": 9949 - }, - { - "epoch": 1.6023591932042351, - "grad_norm": 0.001713430043309927, - "learning_rate": 0.0001999987355700631, - "loss": 46.0, - "step": 9950 - }, - { - "epoch": 1.6025202302830226, - "grad_norm": 0.0037970435805618763, - "learning_rate": 0.00019999873531563835, - "loss": 46.0, - "step": 9951 - }, - { - "epoch": 1.60268126736181, - "grad_norm": 0.00548350578173995, - "learning_rate": 0.00019999873506118806, - "loss": 46.0, - "step": 9952 - }, - { - "epoch": 1.6028423044405975, - "grad_norm": 0.0020618895068764687, - "learning_rate": 0.00019999873480671215, - "loss": 46.0, - "step": 9953 - }, - { - "epoch": 1.603003341519385, - "grad_norm": 0.0013701700372621417, - "learning_rate": 0.00019999873455221062, - "loss": 46.0, - "step": 9954 - }, - { - "epoch": 1.6031643785981722, - "grad_norm": 0.0019201809773221612, - "learning_rate": 0.0001999987342976835, - "loss": 46.0, - "step": 9955 - }, - { - "epoch": 1.6033254156769596, - "grad_norm": 0.0008332083816640079, - "learning_rate": 0.0001999987340431308, - "loss": 46.0, - "step": 9956 - }, - { - "epoch": 1.6034864527557469, - "grad_norm": 0.0005230630631558597, - "learning_rate": 0.00019999873378855255, - "loss": 46.0, - "step": 9957 - }, - { - "epoch": 1.6036474898345343, - "grad_norm": 0.0019583487883210182, - "learning_rate": 0.00019999873353394865, - "loss": 46.0, - "step": 9958 - }, - { - "epoch": 1.6038085269133218, - "grad_norm": 0.001861340133473277, - "learning_rate": 0.00019999873327931916, - "loss": 46.0, - "step": 9959 - }, - { - "epoch": 1.6039695639921092, - "grad_norm": 0.00693413196131587, - "learning_rate": 0.00019999873302466409, - "loss": 46.0, - "step": 9960 - }, - { - "epoch": 1.6041306010708967, - "grad_norm": 0.0014955116203054786, - "learning_rate": 0.00019999873276998342, - "loss": 46.0, - "step": 9961 - }, - { - "epoch": 1.6042916381496841, - "grad_norm": 0.0011993676889687777, - "learning_rate": 0.00019999873251527717, - "loss": 46.0, - "step": 9962 - }, - { - "epoch": 1.6044526752284713, - "grad_norm": 0.0018362307455390692, - "learning_rate": 0.0001999987322605453, - "loss": 46.0, - "step": 9963 - }, - { - "epoch": 1.6046137123072588, - "grad_norm": 0.0010144029511138797, - "learning_rate": 0.00019999873200578786, - "loss": 46.0, - "step": 9964 - }, - { - "epoch": 1.604774749386046, - "grad_norm": 0.0015136083820834756, - "learning_rate": 0.00019999873175100482, - "loss": 46.0, - "step": 9965 - }, - { - "epoch": 1.6049357864648335, - "grad_norm": 0.0010237840469926596, - "learning_rate": 0.00019999873149619617, - "loss": 46.0, - "step": 9966 - }, - { - "epoch": 1.605096823543621, - "grad_norm": 0.0012998317833989859, - "learning_rate": 0.00019999873124136193, - "loss": 46.0, - "step": 9967 - }, - { - "epoch": 1.6052578606224084, - "grad_norm": 0.0008958845282904804, - "learning_rate": 0.00019999873098650207, - "loss": 46.0, - "step": 9968 - }, - { - "epoch": 1.6054188977011958, - "grad_norm": 0.005237857345491648, - "learning_rate": 0.0001999987307316167, - "loss": 46.0, - "step": 9969 - }, - { - "epoch": 1.605579934779983, - "grad_norm": 0.0029390875715762377, - "learning_rate": 0.00019999873047670566, - "loss": 46.0, - "step": 9970 - }, - { - "epoch": 1.6057409718587705, - "grad_norm": 0.00178729637991637, - "learning_rate": 0.00019999873022176904, - "loss": 46.0, - "step": 9971 - }, - { - "epoch": 1.6059020089375577, - "grad_norm": 0.0023640634026378393, - "learning_rate": 0.00019999872996680681, - "loss": 46.0, - "step": 9972 - }, - { - "epoch": 1.6060630460163452, - "grad_norm": 0.0033357671927660704, - "learning_rate": 0.00019999872971181902, - "loss": 46.0, - "step": 9973 - }, - { - "epoch": 1.6062240830951326, - "grad_norm": 0.0024236023891717196, - "learning_rate": 0.00019999872945680562, - "loss": 46.0, - "step": 9974 - }, - { - "epoch": 1.60638512017392, - "grad_norm": 0.0005014078342355788, - "learning_rate": 0.00019999872920176663, - "loss": 46.0, - "step": 9975 - }, - { - "epoch": 1.6065461572527076, - "grad_norm": 0.007713499013334513, - "learning_rate": 0.00019999872894670205, - "loss": 46.0, - "step": 9976 - }, - { - "epoch": 1.6067071943314948, - "grad_norm": 0.0033008288592100143, - "learning_rate": 0.00019999872869161186, - "loss": 46.0, - "step": 9977 - }, - { - "epoch": 1.6068682314102822, - "grad_norm": 0.005757942795753479, - "learning_rate": 0.0001999987284364961, - "loss": 46.0, - "step": 9978 - }, - { - "epoch": 1.6070292684890695, - "grad_norm": 0.003667326644062996, - "learning_rate": 0.0001999987281813547, - "loss": 46.0, - "step": 9979 - }, - { - "epoch": 1.607190305567857, - "grad_norm": 0.002007820876315236, - "learning_rate": 0.00019999872792618776, - "loss": 46.0, - "step": 9980 - }, - { - "epoch": 1.6073513426466444, - "grad_norm": 0.006188342813402414, - "learning_rate": 0.0001999987276709952, - "loss": 46.0, - "step": 9981 - }, - { - "epoch": 1.6075123797254318, - "grad_norm": 0.003902670694515109, - "learning_rate": 0.00019999872741577703, - "loss": 46.0, - "step": 9982 - }, - { - "epoch": 1.6076734168042193, - "grad_norm": 0.001671084901317954, - "learning_rate": 0.0001999987271605333, - "loss": 46.0, - "step": 9983 - }, - { - "epoch": 1.6078344538830067, - "grad_norm": 0.004128395114094019, - "learning_rate": 0.00019999872690526393, - "loss": 46.0, - "step": 9984 - }, - { - "epoch": 1.607995490961794, - "grad_norm": 0.005472517106682062, - "learning_rate": 0.00019999872664996902, - "loss": 46.0, - "step": 9985 - }, - { - "epoch": 1.6081565280405812, - "grad_norm": 0.0011782434303313494, - "learning_rate": 0.00019999872639464848, - "loss": 46.0, - "step": 9986 - }, - { - "epoch": 1.6083175651193686, - "grad_norm": 0.0008484771242365241, - "learning_rate": 0.00019999872613930234, - "loss": 46.0, - "step": 9987 - }, - { - "epoch": 1.608478602198156, - "grad_norm": 0.001118054729886353, - "learning_rate": 0.00019999872588393063, - "loss": 46.0, - "step": 9988 - }, - { - "epoch": 1.6086396392769435, - "grad_norm": 0.0008306379313580692, - "learning_rate": 0.0001999987256285333, - "loss": 46.0, - "step": 9989 - }, - { - "epoch": 1.608800676355731, - "grad_norm": 0.000769666163250804, - "learning_rate": 0.0001999987253731104, - "loss": 46.0, - "step": 9990 - }, - { - "epoch": 1.6089617134345184, - "grad_norm": 0.002219146117568016, - "learning_rate": 0.0001999987251176619, - "loss": 46.0, - "step": 9991 - }, - { - "epoch": 1.6091227505133057, - "grad_norm": 0.003068450838327408, - "learning_rate": 0.0001999987248621878, - "loss": 46.0, - "step": 9992 - }, - { - "epoch": 1.6092837875920931, - "grad_norm": 0.0009180534398183227, - "learning_rate": 0.0001999987246066881, - "loss": 46.0, - "step": 9993 - }, - { - "epoch": 1.6094448246708803, - "grad_norm": 0.0008969181799329817, - "learning_rate": 0.00019999872435116282, - "loss": 46.0, - "step": 9994 - }, - { - "epoch": 1.6096058617496678, - "grad_norm": 0.0014274612767621875, - "learning_rate": 0.00019999872409561193, - "loss": 46.0, - "step": 9995 - }, - { - "epoch": 1.6097668988284553, - "grad_norm": 0.002303382847458124, - "learning_rate": 0.00019999872384003547, - "loss": 46.0, - "step": 9996 - }, - { - "epoch": 1.6099279359072427, - "grad_norm": 0.0036043638829141855, - "learning_rate": 0.0001999987235844334, - "loss": 46.0, - "step": 9997 - }, - { - "epoch": 1.6100889729860302, - "grad_norm": 0.002218181500211358, - "learning_rate": 0.0001999987233288057, - "loss": 46.0, - "step": 9998 - }, - { - "epoch": 1.6102500100648174, - "grad_norm": 0.0015294477343559265, - "learning_rate": 0.00019999872307315247, - "loss": 46.0, - "step": 9999 - }, - { - "epoch": 1.6104110471436048, - "grad_norm": 0.0011496507795527577, - "learning_rate": 0.0001999987228174736, - "loss": 46.0, - "step": 10000 - }, - { - "epoch": 1.610572084222392, - "grad_norm": 0.003749253461137414, - "learning_rate": 0.00019999872256176916, - "loss": 46.0, - "step": 10001 - }, - { - "epoch": 1.6107331213011795, - "grad_norm": 0.006967192515730858, - "learning_rate": 0.00019999872230603912, - "loss": 46.0, - "step": 10002 - }, - { - "epoch": 1.610894158379967, - "grad_norm": 0.003556340467184782, - "learning_rate": 0.00019999872205028347, - "loss": 46.0, - "step": 10003 - }, - { - "epoch": 1.6110551954587544, - "grad_norm": 0.002457259688526392, - "learning_rate": 0.00019999872179450224, - "loss": 46.0, - "step": 10004 - }, - { - "epoch": 1.6112162325375419, - "grad_norm": 0.0020998804830014706, - "learning_rate": 0.00019999872153869541, - "loss": 46.0, - "step": 10005 - }, - { - "epoch": 1.6113772696163293, - "grad_norm": 0.0010032132267951965, - "learning_rate": 0.00019999872128286298, - "loss": 46.0, - "step": 10006 - }, - { - "epoch": 1.6115383066951166, - "grad_norm": 0.0012266855919733644, - "learning_rate": 0.00019999872102700495, - "loss": 46.0, - "step": 10007 - }, - { - "epoch": 1.6116993437739038, - "grad_norm": 0.0028242762200534344, - "learning_rate": 0.00019999872077112137, - "loss": 46.0, - "step": 10008 - }, - { - "epoch": 1.6118603808526912, - "grad_norm": 0.0016357668209820986, - "learning_rate": 0.00019999872051521217, - "loss": 46.0, - "step": 10009 - }, - { - "epoch": 1.6120214179314787, - "grad_norm": 0.0021341571118682623, - "learning_rate": 0.00019999872025927733, - "loss": 46.0, - "step": 10010 - }, - { - "epoch": 1.6121824550102661, - "grad_norm": 0.002838290296494961, - "learning_rate": 0.00019999872000331693, - "loss": 46.0, - "step": 10011 - }, - { - "epoch": 1.6123434920890536, - "grad_norm": 0.0021008311305195093, - "learning_rate": 0.00019999871974733094, - "loss": 46.0, - "step": 10012 - }, - { - "epoch": 1.612504529167841, - "grad_norm": 0.0010792015818879008, - "learning_rate": 0.00019999871949131937, - "loss": 46.0, - "step": 10013 - }, - { - "epoch": 1.6126655662466283, - "grad_norm": 0.008334732614457607, - "learning_rate": 0.0001999987192352822, - "loss": 46.0, - "step": 10014 - }, - { - "epoch": 1.6128266033254157, - "grad_norm": 0.017220793291926384, - "learning_rate": 0.0001999987189792194, - "loss": 46.0, - "step": 10015 - }, - { - "epoch": 1.612987640404203, - "grad_norm": 0.0006704721599817276, - "learning_rate": 0.00019999871872313106, - "loss": 46.0, - "step": 10016 - }, - { - "epoch": 1.6131486774829904, - "grad_norm": 0.0012759979581460357, - "learning_rate": 0.0001999987184670171, - "loss": 46.0, - "step": 10017 - }, - { - "epoch": 1.6133097145617779, - "grad_norm": 0.001640747650526464, - "learning_rate": 0.00019999871821087752, - "loss": 46.0, - "step": 10018 - }, - { - "epoch": 1.6134707516405653, - "grad_norm": 0.001990459393709898, - "learning_rate": 0.0001999987179547124, - "loss": 46.0, - "step": 10019 - }, - { - "epoch": 1.6136317887193528, - "grad_norm": 0.004355066921561956, - "learning_rate": 0.00019999871769852163, - "loss": 46.0, - "step": 10020 - }, - { - "epoch": 1.61379282579814, - "grad_norm": 0.006216387264430523, - "learning_rate": 0.0001999987174423053, - "loss": 46.0, - "step": 10021 - }, - { - "epoch": 1.6139538628769274, - "grad_norm": 0.0019049688708037138, - "learning_rate": 0.00019999871718606334, - "loss": 46.0, - "step": 10022 - }, - { - "epoch": 1.6141148999557147, - "grad_norm": 0.0012330982135608792, - "learning_rate": 0.0001999987169297958, - "loss": 46.0, - "step": 10023 - }, - { - "epoch": 1.6142759370345021, - "grad_norm": 0.004897539969533682, - "learning_rate": 0.0001999987166735027, - "loss": 46.0, - "step": 10024 - }, - { - "epoch": 1.6144369741132896, - "grad_norm": 0.0013105187099426985, - "learning_rate": 0.00019999871641718396, - "loss": 46.0, - "step": 10025 - }, - { - "epoch": 1.614598011192077, - "grad_norm": 0.002436260925605893, - "learning_rate": 0.00019999871616083965, - "loss": 46.0, - "step": 10026 - }, - { - "epoch": 1.6147590482708645, - "grad_norm": 0.0014236794086173177, - "learning_rate": 0.00019999871590446974, - "loss": 46.0, - "step": 10027 - }, - { - "epoch": 1.614920085349652, - "grad_norm": 0.002821186790242791, - "learning_rate": 0.00019999871564807425, - "loss": 46.0, - "step": 10028 - }, - { - "epoch": 1.6150811224284392, - "grad_norm": 0.00046754733193665743, - "learning_rate": 0.00019999871539165315, - "loss": 46.0, - "step": 10029 - }, - { - "epoch": 1.6152421595072264, - "grad_norm": 0.0026387618854641914, - "learning_rate": 0.00019999871513520648, - "loss": 46.0, - "step": 10030 - }, - { - "epoch": 1.6154031965860138, - "grad_norm": 0.0010120307561010122, - "learning_rate": 0.00019999871487873415, - "loss": 46.0, - "step": 10031 - }, - { - "epoch": 1.6155642336648013, - "grad_norm": 0.004986585583537817, - "learning_rate": 0.00019999871462223628, - "loss": 46.0, - "step": 10032 - }, - { - "epoch": 1.6157252707435887, - "grad_norm": 0.006543920375406742, - "learning_rate": 0.0001999987143657128, - "loss": 46.0, - "step": 10033 - }, - { - "epoch": 1.6158863078223762, - "grad_norm": 0.0034259026870131493, - "learning_rate": 0.00019999871410916376, - "loss": 46.0, - "step": 10034 - }, - { - "epoch": 1.6160473449011636, - "grad_norm": 0.000730679021216929, - "learning_rate": 0.00019999871385258908, - "loss": 46.0, - "step": 10035 - }, - { - "epoch": 1.6162083819799509, - "grad_norm": 0.0012241887161508203, - "learning_rate": 0.00019999871359598884, - "loss": 46.0, - "step": 10036 - }, - { - "epoch": 1.6163694190587383, - "grad_norm": 0.0061356136575341225, - "learning_rate": 0.00019999871333936295, - "loss": 46.0, - "step": 10037 - }, - { - "epoch": 1.6165304561375256, - "grad_norm": 0.0018628608668223023, - "learning_rate": 0.0001999987130827115, - "loss": 46.0, - "step": 10038 - }, - { - "epoch": 1.616691493216313, - "grad_norm": 0.0005156780825927854, - "learning_rate": 0.00019999871282603445, - "loss": 46.0, - "step": 10039 - }, - { - "epoch": 1.6168525302951005, - "grad_norm": 0.0021635459270328283, - "learning_rate": 0.00019999871256933186, - "loss": 46.0, - "step": 10040 - }, - { - "epoch": 1.617013567373888, - "grad_norm": 0.0024970651138573885, - "learning_rate": 0.0001999987123126036, - "loss": 46.0, - "step": 10041 - }, - { - "epoch": 1.6171746044526754, - "grad_norm": 0.0034847150091081858, - "learning_rate": 0.00019999871205584978, - "loss": 46.0, - "step": 10042 - }, - { - "epoch": 1.6173356415314626, - "grad_norm": 0.009560378268361092, - "learning_rate": 0.00019999871179907035, - "loss": 46.0, - "step": 10043 - }, - { - "epoch": 1.61749667861025, - "grad_norm": 0.000757599831558764, - "learning_rate": 0.00019999871154226533, - "loss": 46.0, - "step": 10044 - }, - { - "epoch": 1.6176577156890373, - "grad_norm": 0.0008367106202058494, - "learning_rate": 0.00019999871128543472, - "loss": 46.0, - "step": 10045 - }, - { - "epoch": 1.6178187527678247, - "grad_norm": 0.0029868390411138535, - "learning_rate": 0.00019999871102857852, - "loss": 46.0, - "step": 10046 - }, - { - "epoch": 1.6179797898466122, - "grad_norm": 0.0024790638126432896, - "learning_rate": 0.00019999871077169674, - "loss": 46.0, - "step": 10047 - }, - { - "epoch": 1.6181408269253996, - "grad_norm": 0.003104433650150895, - "learning_rate": 0.00019999871051478934, - "loss": 46.0, - "step": 10048 - }, - { - "epoch": 1.618301864004187, - "grad_norm": 0.0014766334788873792, - "learning_rate": 0.00019999871025785636, - "loss": 46.0, - "step": 10049 - }, - { - "epoch": 1.6184629010829743, - "grad_norm": 0.0014610042562708259, - "learning_rate": 0.00019999871000089776, - "loss": 46.0, - "step": 10050 - }, - { - "epoch": 1.6186239381617618, - "grad_norm": 0.0005733121070079505, - "learning_rate": 0.0001999987097439136, - "loss": 46.0, - "step": 10051 - }, - { - "epoch": 1.618784975240549, - "grad_norm": 0.013445157557725906, - "learning_rate": 0.0001999987094869038, - "loss": 46.0, - "step": 10052 - }, - { - "epoch": 1.6189460123193364, - "grad_norm": 0.0010347983334213495, - "learning_rate": 0.00019999870922986844, - "loss": 46.0, - "step": 10053 - }, - { - "epoch": 1.6191070493981239, - "grad_norm": 0.0031378509011119604, - "learning_rate": 0.00019999870897280747, - "loss": 46.0, - "step": 10054 - }, - { - "epoch": 1.6192680864769113, - "grad_norm": 0.0025368474889546633, - "learning_rate": 0.00019999870871572093, - "loss": 46.0, - "step": 10055 - }, - { - "epoch": 1.6194291235556988, - "grad_norm": 0.001810396439395845, - "learning_rate": 0.00019999870845860879, - "loss": 46.0, - "step": 10056 - }, - { - "epoch": 1.6195901606344862, - "grad_norm": 0.004318727646023035, - "learning_rate": 0.00019999870820147102, - "loss": 46.0, - "step": 10057 - }, - { - "epoch": 1.6197511977132735, - "grad_norm": 0.004343224223703146, - "learning_rate": 0.0001999987079443077, - "loss": 46.0, - "step": 10058 - }, - { - "epoch": 1.619912234792061, - "grad_norm": 0.010591312311589718, - "learning_rate": 0.00019999870768711876, - "loss": 46.0, - "step": 10059 - }, - { - "epoch": 1.6200732718708482, - "grad_norm": 0.002126262290403247, - "learning_rate": 0.00019999870742990424, - "loss": 46.0, - "step": 10060 - }, - { - "epoch": 1.6202343089496356, - "grad_norm": 0.002966416534036398, - "learning_rate": 0.0001999987071726641, - "loss": 46.0, - "step": 10061 - }, - { - "epoch": 1.620395346028423, - "grad_norm": 0.0011163183953613043, - "learning_rate": 0.0001999987069153984, - "loss": 46.0, - "step": 10062 - }, - { - "epoch": 1.6205563831072105, - "grad_norm": 0.00035459158243611455, - "learning_rate": 0.0001999987066581071, - "loss": 46.0, - "step": 10063 - }, - { - "epoch": 1.620717420185998, - "grad_norm": 0.0022138794884085655, - "learning_rate": 0.00019999870640079016, - "loss": 46.0, - "step": 10064 - }, - { - "epoch": 1.6208784572647852, - "grad_norm": 0.00615911278873682, - "learning_rate": 0.00019999870614344768, - "loss": 46.0, - "step": 10065 - }, - { - "epoch": 1.6210394943435726, - "grad_norm": 0.0027898841071873903, - "learning_rate": 0.00019999870588607957, - "loss": 46.0, - "step": 10066 - }, - { - "epoch": 1.6212005314223599, - "grad_norm": 0.004338650498539209, - "learning_rate": 0.00019999870562868589, - "loss": 46.0, - "step": 10067 - }, - { - "epoch": 1.6213615685011473, - "grad_norm": 0.001677638734690845, - "learning_rate": 0.00019999870537126658, - "loss": 46.0, - "step": 10068 - }, - { - "epoch": 1.6215226055799348, - "grad_norm": 0.0007829164969734848, - "learning_rate": 0.00019999870511382172, - "loss": 46.0, - "step": 10069 - }, - { - "epoch": 1.6216836426587222, - "grad_norm": 0.002520563080906868, - "learning_rate": 0.00019999870485635124, - "loss": 46.0, - "step": 10070 - }, - { - "epoch": 1.6218446797375097, - "grad_norm": 0.004305894486606121, - "learning_rate": 0.00019999870459885518, - "loss": 46.0, - "step": 10071 - }, - { - "epoch": 1.622005716816297, - "grad_norm": 0.0006340468535199761, - "learning_rate": 0.00019999870434133353, - "loss": 46.0, - "step": 10072 - }, - { - "epoch": 1.6221667538950844, - "grad_norm": 0.0066359075717628, - "learning_rate": 0.00019999870408378626, - "loss": 46.0, - "step": 10073 - }, - { - "epoch": 1.6223277909738716, - "grad_norm": 0.0027352499309927225, - "learning_rate": 0.0001999987038262134, - "loss": 46.0, - "step": 10074 - }, - { - "epoch": 1.622488828052659, - "grad_norm": 0.0009624132653698325, - "learning_rate": 0.00019999870356861497, - "loss": 46.0, - "step": 10075 - }, - { - "epoch": 1.6226498651314465, - "grad_norm": 0.0006998457829467952, - "learning_rate": 0.0001999987033109909, - "loss": 46.0, - "step": 10076 - }, - { - "epoch": 1.622810902210234, - "grad_norm": 0.0012244939571246505, - "learning_rate": 0.00019999870305334127, - "loss": 46.0, - "step": 10077 - }, - { - "epoch": 1.6229719392890214, - "grad_norm": 0.005225862842053175, - "learning_rate": 0.00019999870279566607, - "loss": 46.0, - "step": 10078 - }, - { - "epoch": 1.6231329763678088, - "grad_norm": 0.0021019408013671637, - "learning_rate": 0.00019999870253796523, - "loss": 46.0, - "step": 10079 - }, - { - "epoch": 1.623294013446596, - "grad_norm": 0.0016421145992353559, - "learning_rate": 0.0001999987022802388, - "loss": 46.0, - "step": 10080 - }, - { - "epoch": 1.6234550505253833, - "grad_norm": 0.004970487207174301, - "learning_rate": 0.0001999987020224868, - "loss": 46.0, - "step": 10081 - }, - { - "epoch": 1.6236160876041708, - "grad_norm": 0.0025287049356848, - "learning_rate": 0.00019999870176470917, - "loss": 46.0, - "step": 10082 - }, - { - "epoch": 1.6237771246829582, - "grad_norm": 0.0009500497253611684, - "learning_rate": 0.00019999870150690598, - "loss": 46.0, - "step": 10083 - }, - { - "epoch": 1.6239381617617457, - "grad_norm": 0.004572180565446615, - "learning_rate": 0.0001999987012490772, - "loss": 46.0, - "step": 10084 - }, - { - "epoch": 1.624099198840533, - "grad_norm": 0.0036276737228035927, - "learning_rate": 0.0001999987009912228, - "loss": 46.0, - "step": 10085 - }, - { - "epoch": 1.6242602359193206, - "grad_norm": 0.003646471770480275, - "learning_rate": 0.0001999987007333428, - "loss": 46.0, - "step": 10086 - }, - { - "epoch": 1.6244212729981078, - "grad_norm": 0.003856019349768758, - "learning_rate": 0.00019999870047543724, - "loss": 46.0, - "step": 10087 - }, - { - "epoch": 1.6245823100768952, - "grad_norm": 0.001449517090804875, - "learning_rate": 0.00019999870021750605, - "loss": 46.0, - "step": 10088 - }, - { - "epoch": 1.6247433471556825, - "grad_norm": 0.002414804883301258, - "learning_rate": 0.00019999869995954928, - "loss": 46.0, - "step": 10089 - }, - { - "epoch": 1.62490438423447, - "grad_norm": 0.0013962793163955212, - "learning_rate": 0.00019999869970156693, - "loss": 46.0, - "step": 10090 - }, - { - "epoch": 1.6250654213132574, - "grad_norm": 0.001952785300090909, - "learning_rate": 0.00019999869944355898, - "loss": 46.0, - "step": 10091 - }, - { - "epoch": 1.6252264583920448, - "grad_norm": 0.0030772138852626085, - "learning_rate": 0.0001999986991855254, - "loss": 46.0, - "step": 10092 - }, - { - "epoch": 1.6253874954708323, - "grad_norm": 0.009740648791193962, - "learning_rate": 0.00019999869892746626, - "loss": 46.0, - "step": 10093 - }, - { - "epoch": 1.6255485325496195, - "grad_norm": 0.004067290108650923, - "learning_rate": 0.00019999869866938152, - "loss": 46.0, - "step": 10094 - }, - { - "epoch": 1.625709569628407, - "grad_norm": 0.0013128085993230343, - "learning_rate": 0.00019999869841127118, - "loss": 46.0, - "step": 10095 - }, - { - "epoch": 1.6258706067071942, - "grad_norm": 0.005055727902799845, - "learning_rate": 0.00019999869815313524, - "loss": 46.0, - "step": 10096 - }, - { - "epoch": 1.6260316437859816, - "grad_norm": 0.00602863309904933, - "learning_rate": 0.00019999869789497372, - "loss": 46.0, - "step": 10097 - }, - { - "epoch": 1.626192680864769, - "grad_norm": 0.004003756679594517, - "learning_rate": 0.00019999869763678662, - "loss": 46.0, - "step": 10098 - }, - { - "epoch": 1.6263537179435565, - "grad_norm": 0.001058023888617754, - "learning_rate": 0.0001999986973785739, - "loss": 46.0, - "step": 10099 - }, - { - "epoch": 1.626514755022344, - "grad_norm": 0.0008028603042475879, - "learning_rate": 0.00019999869712033558, - "loss": 46.0, - "step": 10100 - }, - { - "epoch": 1.6266757921011314, - "grad_norm": 0.00355736305937171, - "learning_rate": 0.0001999986968620717, - "loss": 46.0, - "step": 10101 - }, - { - "epoch": 1.6268368291799187, - "grad_norm": 0.0012655669124796987, - "learning_rate": 0.00019999869660378218, - "loss": 46.0, - "step": 10102 - }, - { - "epoch": 1.626997866258706, - "grad_norm": 0.0006586768431589007, - "learning_rate": 0.00019999869634546708, - "loss": 46.0, - "step": 10103 - }, - { - "epoch": 1.6271589033374934, - "grad_norm": 0.001410821219906211, - "learning_rate": 0.00019999869608712637, - "loss": 46.0, - "step": 10104 - }, - { - "epoch": 1.6273199404162808, - "grad_norm": 0.0027455328963696957, - "learning_rate": 0.00019999869582876012, - "loss": 46.0, - "step": 10105 - }, - { - "epoch": 1.6274809774950683, - "grad_norm": 0.004590548574924469, - "learning_rate": 0.00019999869557036824, - "loss": 46.0, - "step": 10106 - }, - { - "epoch": 1.6276420145738557, - "grad_norm": 0.0038277723360806704, - "learning_rate": 0.00019999869531195076, - "loss": 46.0, - "step": 10107 - }, - { - "epoch": 1.6278030516526432, - "grad_norm": 0.006106822285801172, - "learning_rate": 0.0001999986950535077, - "loss": 46.0, - "step": 10108 - }, - { - "epoch": 1.6279640887314304, - "grad_norm": 0.0021642930805683136, - "learning_rate": 0.00019999869479503903, - "loss": 46.0, - "step": 10109 - }, - { - "epoch": 1.6281251258102178, - "grad_norm": 0.004288241732865572, - "learning_rate": 0.0001999986945365448, - "loss": 46.0, - "step": 10110 - }, - { - "epoch": 1.628286162889005, - "grad_norm": 0.0019926796667277813, - "learning_rate": 0.00019999869427802494, - "loss": 46.0, - "step": 10111 - }, - { - "epoch": 1.6284471999677925, - "grad_norm": 0.001437365310266614, - "learning_rate": 0.00019999869401947948, - "loss": 46.0, - "step": 10112 - }, - { - "epoch": 1.62860823704658, - "grad_norm": 0.0014885510317981243, - "learning_rate": 0.00019999869376090845, - "loss": 46.0, - "step": 10113 - }, - { - "epoch": 1.6287692741253674, - "grad_norm": 0.0024821716360747814, - "learning_rate": 0.00019999869350231182, - "loss": 46.0, - "step": 10114 - }, - { - "epoch": 1.6289303112041549, - "grad_norm": 0.001589068560861051, - "learning_rate": 0.0001999986932436896, - "loss": 46.0, - "step": 10115 - }, - { - "epoch": 1.629091348282942, - "grad_norm": 0.0037374033126980066, - "learning_rate": 0.00019999869298504178, - "loss": 46.0, - "step": 10116 - }, - { - "epoch": 1.6292523853617296, - "grad_norm": 0.000879802624695003, - "learning_rate": 0.00019999869272636835, - "loss": 46.0, - "step": 10117 - }, - { - "epoch": 1.6294134224405168, - "grad_norm": 0.004092895891517401, - "learning_rate": 0.00019999869246766934, - "loss": 46.0, - "step": 10118 - }, - { - "epoch": 1.6295744595193042, - "grad_norm": 0.002839423716068268, - "learning_rate": 0.00019999869220894474, - "loss": 46.0, - "step": 10119 - }, - { - "epoch": 1.6297354965980917, - "grad_norm": 0.0038170686457306147, - "learning_rate": 0.00019999869195019452, - "loss": 46.0, - "step": 10120 - }, - { - "epoch": 1.6298965336768791, - "grad_norm": 0.0006704450352117419, - "learning_rate": 0.00019999869169141875, - "loss": 46.0, - "step": 10121 - }, - { - "epoch": 1.6300575707556666, - "grad_norm": 0.004688435699790716, - "learning_rate": 0.00019999869143261733, - "loss": 46.0, - "step": 10122 - }, - { - "epoch": 1.630218607834454, - "grad_norm": 0.0006784223951399326, - "learning_rate": 0.00019999869117379038, - "loss": 46.0, - "step": 10123 - }, - { - "epoch": 1.6303796449132413, - "grad_norm": 0.0009879088029265404, - "learning_rate": 0.0001999986909149378, - "loss": 46.0, - "step": 10124 - }, - { - "epoch": 1.6305406819920285, - "grad_norm": 0.0014195271069183946, - "learning_rate": 0.0001999986906560596, - "loss": 46.0, - "step": 10125 - }, - { - "epoch": 1.630701719070816, - "grad_norm": 0.002923595020547509, - "learning_rate": 0.00019999869039715585, - "loss": 46.0, - "step": 10126 - }, - { - "epoch": 1.6308627561496034, - "grad_norm": 0.0018372135236859322, - "learning_rate": 0.00019999869013822647, - "loss": 46.0, - "step": 10127 - }, - { - "epoch": 1.6310237932283909, - "grad_norm": 0.003369334153831005, - "learning_rate": 0.00019999868987927153, - "loss": 46.0, - "step": 10128 - }, - { - "epoch": 1.6311848303071783, - "grad_norm": 0.0015605166554450989, - "learning_rate": 0.00019999868962029094, - "loss": 46.0, - "step": 10129 - }, - { - "epoch": 1.6313458673859658, - "grad_norm": 0.011558981612324715, - "learning_rate": 0.00019999868936128483, - "loss": 46.0, - "step": 10130 - }, - { - "epoch": 1.631506904464753, - "grad_norm": 0.003711790544912219, - "learning_rate": 0.00019999868910225308, - "loss": 46.0, - "step": 10131 - }, - { - "epoch": 1.6316679415435404, - "grad_norm": 0.008824239484965801, - "learning_rate": 0.00019999868884319576, - "loss": 46.0, - "step": 10132 - }, - { - "epoch": 1.6318289786223277, - "grad_norm": 0.001055867993272841, - "learning_rate": 0.00019999868858411283, - "loss": 46.0, - "step": 10133 - }, - { - "epoch": 1.6319900157011151, - "grad_norm": 0.008462686091661453, - "learning_rate": 0.0001999986883250043, - "loss": 46.0, - "step": 10134 - }, - { - "epoch": 1.6321510527799026, - "grad_norm": 0.0019918810576200485, - "learning_rate": 0.00019999868806587018, - "loss": 46.0, - "step": 10135 - }, - { - "epoch": 1.63231208985869, - "grad_norm": 0.0006822144496254623, - "learning_rate": 0.00019999868780671046, - "loss": 46.0, - "step": 10136 - }, - { - "epoch": 1.6324731269374775, - "grad_norm": 0.0006239225040189922, - "learning_rate": 0.00019999868754752516, - "loss": 46.0, - "step": 10137 - }, - { - "epoch": 1.6326341640162647, - "grad_norm": 0.0004670976777561009, - "learning_rate": 0.00019999868728831426, - "loss": 46.0, - "step": 10138 - }, - { - "epoch": 1.6327952010950522, - "grad_norm": 0.002166206017136574, - "learning_rate": 0.00019999868702907776, - "loss": 46.0, - "step": 10139 - }, - { - "epoch": 1.6329562381738394, - "grad_norm": 0.0006342603010125458, - "learning_rate": 0.0001999986867698157, - "loss": 46.0, - "step": 10140 - }, - { - "epoch": 1.6331172752526268, - "grad_norm": 0.000780830392614007, - "learning_rate": 0.00019999868651052798, - "loss": 46.0, - "step": 10141 - }, - { - "epoch": 1.6332783123314143, - "grad_norm": 0.005827606189996004, - "learning_rate": 0.0001999986862512147, - "loss": 46.0, - "step": 10142 - }, - { - "epoch": 1.6334393494102017, - "grad_norm": 0.0009667090489529073, - "learning_rate": 0.00019999868599187583, - "loss": 46.0, - "step": 10143 - }, - { - "epoch": 1.6336003864889892, - "grad_norm": 0.0012722114333882928, - "learning_rate": 0.00019999868573251136, - "loss": 46.0, - "step": 10144 - }, - { - "epoch": 1.6337614235677764, - "grad_norm": 0.0021969599183648825, - "learning_rate": 0.00019999868547312128, - "loss": 46.0, - "step": 10145 - }, - { - "epoch": 1.6339224606465639, - "grad_norm": 0.00491515826433897, - "learning_rate": 0.00019999868521370563, - "loss": 46.0, - "step": 10146 - }, - { - "epoch": 1.634083497725351, - "grad_norm": 0.0008893043850548565, - "learning_rate": 0.00019999868495426437, - "loss": 46.0, - "step": 10147 - }, - { - "epoch": 1.6342445348041386, - "grad_norm": 0.0010812173131853342, - "learning_rate": 0.00019999868469479753, - "loss": 46.0, - "step": 10148 - }, - { - "epoch": 1.634405571882926, - "grad_norm": 0.008252987638115883, - "learning_rate": 0.0001999986844353051, - "loss": 46.0, - "step": 10149 - }, - { - "epoch": 1.6345666089617135, - "grad_norm": 0.0012162404600530863, - "learning_rate": 0.00019999868417578705, - "loss": 46.0, - "step": 10150 - }, - { - "epoch": 1.634727646040501, - "grad_norm": 0.0004576060746330768, - "learning_rate": 0.0001999986839162434, - "loss": 46.0, - "step": 10151 - }, - { - "epoch": 1.6348886831192884, - "grad_norm": 0.0017532332567498088, - "learning_rate": 0.0001999986836566742, - "loss": 46.0, - "step": 10152 - }, - { - "epoch": 1.6350497201980756, - "grad_norm": 0.0017380168428644538, - "learning_rate": 0.00019999868339707936, - "loss": 46.0, - "step": 10153 - }, - { - "epoch": 1.635210757276863, - "grad_norm": 0.0017242757603526115, - "learning_rate": 0.00019999868313745896, - "loss": 46.0, - "step": 10154 - }, - { - "epoch": 1.6353717943556503, - "grad_norm": 0.004668163601309061, - "learning_rate": 0.00019999868287781295, - "loss": 46.0, - "step": 10155 - }, - { - "epoch": 1.6355328314344377, - "grad_norm": 0.003891122993081808, - "learning_rate": 0.00019999868261814135, - "loss": 46.0, - "step": 10156 - }, - { - "epoch": 1.6356938685132252, - "grad_norm": 0.000941638310905546, - "learning_rate": 0.00019999868235844414, - "loss": 46.0, - "step": 10157 - }, - { - "epoch": 1.6358549055920126, - "grad_norm": 0.0007921447395347059, - "learning_rate": 0.00019999868209872137, - "loss": 46.0, - "step": 10158 - }, - { - "epoch": 1.6360159426708, - "grad_norm": 0.003627803875133395, - "learning_rate": 0.00019999868183897296, - "loss": 46.0, - "step": 10159 - }, - { - "epoch": 1.6361769797495873, - "grad_norm": 0.004973144736140966, - "learning_rate": 0.00019999868157919898, - "loss": 46.0, - "step": 10160 - }, - { - "epoch": 1.6363380168283748, - "grad_norm": 0.0053692711517214775, - "learning_rate": 0.00019999868131939942, - "loss": 46.0, - "step": 10161 - }, - { - "epoch": 1.636499053907162, - "grad_norm": 0.0034927669912576675, - "learning_rate": 0.00019999868105957425, - "loss": 46.0, - "step": 10162 - }, - { - "epoch": 1.6366600909859494, - "grad_norm": 0.001765651279129088, - "learning_rate": 0.00019999868079972349, - "loss": 46.0, - "step": 10163 - }, - { - "epoch": 1.636821128064737, - "grad_norm": 0.001500483718700707, - "learning_rate": 0.0001999986805398471, - "loss": 46.0, - "step": 10164 - }, - { - "epoch": 1.6369821651435243, - "grad_norm": 0.0013322671875357628, - "learning_rate": 0.00019999868027994517, - "loss": 46.0, - "step": 10165 - }, - { - "epoch": 1.6371432022223118, - "grad_norm": 0.0007708729826845229, - "learning_rate": 0.00019999868002001762, - "loss": 46.0, - "step": 10166 - }, - { - "epoch": 1.637304239301099, - "grad_norm": 0.005682768300175667, - "learning_rate": 0.00019999867976006449, - "loss": 46.0, - "step": 10167 - }, - { - "epoch": 1.6374652763798865, - "grad_norm": 0.0011884463019669056, - "learning_rate": 0.00019999867950008576, - "loss": 46.0, - "step": 10168 - }, - { - "epoch": 1.6376263134586737, - "grad_norm": 0.002665251726284623, - "learning_rate": 0.0001999986792400814, - "loss": 46.0, - "step": 10169 - }, - { - "epoch": 1.6377873505374612, - "grad_norm": 0.0014309600228443742, - "learning_rate": 0.00019999867898005147, - "loss": 46.0, - "step": 10170 - }, - { - "epoch": 1.6379483876162486, - "grad_norm": 0.0068282983265817165, - "learning_rate": 0.00019999867871999596, - "loss": 46.0, - "step": 10171 - }, - { - "epoch": 1.638109424695036, - "grad_norm": 0.0024610208347439766, - "learning_rate": 0.00019999867845991483, - "loss": 46.0, - "step": 10172 - }, - { - "epoch": 1.6382704617738235, - "grad_norm": 0.0019890815019607544, - "learning_rate": 0.00019999867819980814, - "loss": 46.0, - "step": 10173 - }, - { - "epoch": 1.638431498852611, - "grad_norm": 0.005978357046842575, - "learning_rate": 0.0001999986779396758, - "loss": 46.0, - "step": 10174 - }, - { - "epoch": 1.6385925359313982, - "grad_norm": 0.000870869611389935, - "learning_rate": 0.00019999867767951792, - "loss": 46.0, - "step": 10175 - }, - { - "epoch": 1.6387535730101856, - "grad_norm": 0.0015010355273261666, - "learning_rate": 0.00019999867741933442, - "loss": 46.0, - "step": 10176 - }, - { - "epoch": 1.6389146100889729, - "grad_norm": 0.0017040945822373033, - "learning_rate": 0.00019999867715912535, - "loss": 46.0, - "step": 10177 - }, - { - "epoch": 1.6390756471677603, - "grad_norm": 0.0031766793690621853, - "learning_rate": 0.00019999867689889068, - "loss": 46.0, - "step": 10178 - }, - { - "epoch": 1.6392366842465478, - "grad_norm": 0.006567544303834438, - "learning_rate": 0.00019999867663863036, - "loss": 46.0, - "step": 10179 - }, - { - "epoch": 1.6393977213253352, - "grad_norm": 0.0015725432895123959, - "learning_rate": 0.0001999986763783445, - "loss": 46.0, - "step": 10180 - }, - { - "epoch": 1.6395587584041227, - "grad_norm": 0.005981299560517073, - "learning_rate": 0.000199998676118033, - "loss": 46.0, - "step": 10181 - }, - { - "epoch": 1.63971979548291, - "grad_norm": 0.0015052786329761147, - "learning_rate": 0.00019999867585769599, - "loss": 46.0, - "step": 10182 - }, - { - "epoch": 1.6398808325616974, - "grad_norm": 0.004097385797649622, - "learning_rate": 0.00019999867559733332, - "loss": 46.0, - "step": 10183 - }, - { - "epoch": 1.6400418696404846, - "grad_norm": 0.006585290189832449, - "learning_rate": 0.00019999867533694506, - "loss": 46.0, - "step": 10184 - }, - { - "epoch": 1.640202906719272, - "grad_norm": 0.0008066860609687865, - "learning_rate": 0.00019999867507653122, - "loss": 46.0, - "step": 10185 - }, - { - "epoch": 1.6403639437980595, - "grad_norm": 0.0019000923493877053, - "learning_rate": 0.0001999986748160918, - "loss": 46.0, - "step": 10186 - }, - { - "epoch": 1.640524980876847, - "grad_norm": 0.0015420460840687156, - "learning_rate": 0.00019999867455562675, - "loss": 46.0, - "step": 10187 - }, - { - "epoch": 1.6406860179556344, - "grad_norm": 0.006298865657299757, - "learning_rate": 0.00019999867429513612, - "loss": 46.0, - "step": 10188 - }, - { - "epoch": 1.6408470550344216, - "grad_norm": 0.0009785126894712448, - "learning_rate": 0.0001999986740346199, - "loss": 46.0, - "step": 10189 - }, - { - "epoch": 1.641008092113209, - "grad_norm": 0.007423033472150564, - "learning_rate": 0.0001999986737740781, - "loss": 46.0, - "step": 10190 - }, - { - "epoch": 1.6411691291919963, - "grad_norm": 0.0024120002053678036, - "learning_rate": 0.00019999867351351067, - "loss": 46.0, - "step": 10191 - }, - { - "epoch": 1.6413301662707838, - "grad_norm": 0.005326361861079931, - "learning_rate": 0.00019999867325291767, - "loss": 46.0, - "step": 10192 - }, - { - "epoch": 1.6414912033495712, - "grad_norm": 0.0005744045483879745, - "learning_rate": 0.00019999867299229907, - "loss": 46.0, - "step": 10193 - }, - { - "epoch": 1.6416522404283587, - "grad_norm": 0.0020154910162091255, - "learning_rate": 0.00019999867273165487, - "loss": 46.0, - "step": 10194 - }, - { - "epoch": 1.6418132775071461, - "grad_norm": 0.0012288293801248074, - "learning_rate": 0.00019999867247098507, - "loss": 46.0, - "step": 10195 - }, - { - "epoch": 1.6419743145859336, - "grad_norm": 0.0016620673704892397, - "learning_rate": 0.0001999986722102897, - "loss": 46.0, - "step": 10196 - }, - { - "epoch": 1.6421353516647208, - "grad_norm": 0.00709503935649991, - "learning_rate": 0.0001999986719495687, - "loss": 46.0, - "step": 10197 - }, - { - "epoch": 1.642296388743508, - "grad_norm": 0.010401827283203602, - "learning_rate": 0.00019999867168882214, - "loss": 46.0, - "step": 10198 - }, - { - "epoch": 1.6424574258222955, - "grad_norm": 0.0028349391650408506, - "learning_rate": 0.00019999867142804996, - "loss": 46.0, - "step": 10199 - }, - { - "epoch": 1.642618462901083, - "grad_norm": 0.00155677052680403, - "learning_rate": 0.0001999986711672522, - "loss": 46.0, - "step": 10200 - }, - { - "epoch": 1.6427794999798704, - "grad_norm": 0.001592310145497322, - "learning_rate": 0.00019999867090642886, - "loss": 46.0, - "step": 10201 - }, - { - "epoch": 1.6429405370586578, - "grad_norm": 0.0013410663232207298, - "learning_rate": 0.0001999986706455799, - "loss": 46.0, - "step": 10202 - }, - { - "epoch": 1.6431015741374453, - "grad_norm": 0.004570495802909136, - "learning_rate": 0.00019999867038470535, - "loss": 46.0, - "step": 10203 - }, - { - "epoch": 1.6432626112162325, - "grad_norm": 0.0069716391153633595, - "learning_rate": 0.0001999986701238052, - "loss": 46.0, - "step": 10204 - }, - { - "epoch": 1.64342364829502, - "grad_norm": 0.0009891677182167768, - "learning_rate": 0.00019999866986287947, - "loss": 46.0, - "step": 10205 - }, - { - "epoch": 1.6435846853738072, - "grad_norm": 0.001915937289595604, - "learning_rate": 0.00019999866960192814, - "loss": 46.0, - "step": 10206 - }, - { - "epoch": 1.6437457224525946, - "grad_norm": 0.004006378818303347, - "learning_rate": 0.00019999866934095121, - "loss": 46.0, - "step": 10207 - }, - { - "epoch": 1.643906759531382, - "grad_norm": 0.001221159240230918, - "learning_rate": 0.00019999866907994868, - "loss": 46.0, - "step": 10208 - }, - { - "epoch": 1.6440677966101696, - "grad_norm": 0.0025894588325172663, - "learning_rate": 0.0001999986688189206, - "loss": 46.0, - "step": 10209 - }, - { - "epoch": 1.644228833688957, - "grad_norm": 0.0018897461704909801, - "learning_rate": 0.0001999986685578669, - "loss": 46.0, - "step": 10210 - }, - { - "epoch": 1.6443898707677442, - "grad_norm": 0.0026194225065410137, - "learning_rate": 0.00019999866829678757, - "loss": 46.0, - "step": 10211 - }, - { - "epoch": 1.6445509078465317, - "grad_norm": 0.0016701591666787863, - "learning_rate": 0.00019999866803568268, - "loss": 46.0, - "step": 10212 - }, - { - "epoch": 1.644711944925319, - "grad_norm": 0.007778188679367304, - "learning_rate": 0.00019999866777455219, - "loss": 46.0, - "step": 10213 - }, - { - "epoch": 1.6448729820041064, - "grad_norm": 0.009426028467714787, - "learning_rate": 0.0001999986675133961, - "loss": 46.0, - "step": 10214 - }, - { - "epoch": 1.6450340190828938, - "grad_norm": 0.002500662812963128, - "learning_rate": 0.00019999866725221442, - "loss": 46.0, - "step": 10215 - }, - { - "epoch": 1.6451950561616813, - "grad_norm": 0.0014767794637009501, - "learning_rate": 0.00019999866699100714, - "loss": 46.0, - "step": 10216 - }, - { - "epoch": 1.6453560932404687, - "grad_norm": 0.0012752891052514315, - "learning_rate": 0.0001999986667297743, - "loss": 46.0, - "step": 10217 - }, - { - "epoch": 1.6455171303192562, - "grad_norm": 0.005201061721891165, - "learning_rate": 0.0001999986664685158, - "loss": 46.0, - "step": 10218 - }, - { - "epoch": 1.6456781673980434, - "grad_norm": 0.0018691556761041284, - "learning_rate": 0.00019999866620723172, - "loss": 46.0, - "step": 10219 - }, - { - "epoch": 1.6458392044768306, - "grad_norm": 0.0005572540685534477, - "learning_rate": 0.00019999866594592208, - "loss": 46.0, - "step": 10220 - }, - { - "epoch": 1.646000241555618, - "grad_norm": 0.0027833315543830395, - "learning_rate": 0.00019999866568458683, - "loss": 46.0, - "step": 10221 - }, - { - "epoch": 1.6461612786344055, - "grad_norm": 0.00598552543669939, - "learning_rate": 0.000199998665423226, - "loss": 46.0, - "step": 10222 - }, - { - "epoch": 1.646322315713193, - "grad_norm": 0.00397255877032876, - "learning_rate": 0.00019999866516183957, - "loss": 46.0, - "step": 10223 - }, - { - "epoch": 1.6464833527919804, - "grad_norm": 0.0010870033875107765, - "learning_rate": 0.00019999866490042753, - "loss": 46.0, - "step": 10224 - }, - { - "epoch": 1.6466443898707679, - "grad_norm": 0.0025970637798309326, - "learning_rate": 0.0001999986646389899, - "loss": 46.0, - "step": 10225 - }, - { - "epoch": 1.6468054269495551, - "grad_norm": 0.0015767483273521066, - "learning_rate": 0.00019999866437752669, - "loss": 46.0, - "step": 10226 - }, - { - "epoch": 1.6469664640283426, - "grad_norm": 0.001275165006518364, - "learning_rate": 0.00019999866411603786, - "loss": 46.0, - "step": 10227 - }, - { - "epoch": 1.6471275011071298, - "grad_norm": 0.0018989950185641646, - "learning_rate": 0.00019999866385452347, - "loss": 46.0, - "step": 10228 - }, - { - "epoch": 1.6472885381859173, - "grad_norm": 0.003920397255569696, - "learning_rate": 0.00019999866359298344, - "loss": 46.0, - "step": 10229 - }, - { - "epoch": 1.6474495752647047, - "grad_norm": 0.01568520814180374, - "learning_rate": 0.00019999866333141785, - "loss": 46.0, - "step": 10230 - }, - { - "epoch": 1.6476106123434922, - "grad_norm": 0.0010440874611958861, - "learning_rate": 0.00019999866306982667, - "loss": 46.0, - "step": 10231 - }, - { - "epoch": 1.6477716494222796, - "grad_norm": 0.0030033974908292294, - "learning_rate": 0.00019999866280820988, - "loss": 46.0, - "step": 10232 - }, - { - "epoch": 1.6479326865010668, - "grad_norm": 0.0017947761807590723, - "learning_rate": 0.0001999986625465675, - "loss": 46.0, - "step": 10233 - }, - { - "epoch": 1.6480937235798543, - "grad_norm": 0.0037991649005562067, - "learning_rate": 0.0001999986622848995, - "loss": 46.0, - "step": 10234 - }, - { - "epoch": 1.6482547606586415, - "grad_norm": 0.0007412537233904004, - "learning_rate": 0.00019999866202320593, - "loss": 46.0, - "step": 10235 - }, - { - "epoch": 1.648415797737429, - "grad_norm": 0.0010383485350757837, - "learning_rate": 0.0001999986617614868, - "loss": 46.0, - "step": 10236 - }, - { - "epoch": 1.6485768348162164, - "grad_norm": 0.003825170686468482, - "learning_rate": 0.000199998661499742, - "loss": 46.0, - "step": 10237 - }, - { - "epoch": 1.6487378718950039, - "grad_norm": 0.0006131341215223074, - "learning_rate": 0.00019999866123797166, - "loss": 46.0, - "step": 10238 - }, - { - "epoch": 1.6488989089737913, - "grad_norm": 0.0015460059512406588, - "learning_rate": 0.0001999986609761757, - "loss": 46.0, - "step": 10239 - }, - { - "epoch": 1.6490599460525786, - "grad_norm": 0.0004370794340502471, - "learning_rate": 0.00019999866071435416, - "loss": 46.0, - "step": 10240 - }, - { - "epoch": 1.649220983131366, - "grad_norm": 0.0009183295769616961, - "learning_rate": 0.00019999866045250703, - "loss": 46.0, - "step": 10241 - }, - { - "epoch": 1.6493820202101532, - "grad_norm": 0.0026633599773049355, - "learning_rate": 0.0001999986601906343, - "loss": 46.0, - "step": 10242 - }, - { - "epoch": 1.6495430572889407, - "grad_norm": 0.0006786429439671338, - "learning_rate": 0.00019999865992873596, - "loss": 46.0, - "step": 10243 - }, - { - "epoch": 1.6497040943677281, - "grad_norm": 0.0035209462512284517, - "learning_rate": 0.00019999865966681207, - "loss": 46.0, - "step": 10244 - }, - { - "epoch": 1.6498651314465156, - "grad_norm": 0.0015742137329652905, - "learning_rate": 0.00019999865940486253, - "loss": 46.0, - "step": 10245 - }, - { - "epoch": 1.650026168525303, - "grad_norm": 0.0005557678523473442, - "learning_rate": 0.0001999986591428874, - "loss": 46.0, - "step": 10246 - }, - { - "epoch": 1.6501872056040905, - "grad_norm": 0.0043942974880337715, - "learning_rate": 0.0001999986588808867, - "loss": 46.0, - "step": 10247 - }, - { - "epoch": 1.6503482426828777, - "grad_norm": 0.001118987798690796, - "learning_rate": 0.00019999865861886044, - "loss": 46.0, - "step": 10248 - }, - { - "epoch": 1.6505092797616652, - "grad_norm": 0.002809450961649418, - "learning_rate": 0.00019999865835680853, - "loss": 46.0, - "step": 10249 - }, - { - "epoch": 1.6506703168404524, - "grad_norm": 0.002697139512747526, - "learning_rate": 0.000199998658094731, - "loss": 46.0, - "step": 10250 - }, - { - "epoch": 1.6508313539192399, - "grad_norm": 0.007467437069863081, - "learning_rate": 0.00019999865783262795, - "loss": 46.0, - "step": 10251 - }, - { - "epoch": 1.6509923909980273, - "grad_norm": 0.002739156596362591, - "learning_rate": 0.00019999865757049928, - "loss": 46.0, - "step": 10252 - }, - { - "epoch": 1.6511534280768148, - "grad_norm": 0.005171090364456177, - "learning_rate": 0.000199998657308345, - "loss": 46.0, - "step": 10253 - }, - { - "epoch": 1.6513144651556022, - "grad_norm": 0.0052896603010594845, - "learning_rate": 0.00019999865704616512, - "loss": 46.0, - "step": 10254 - }, - { - "epoch": 1.6514755022343894, - "grad_norm": 0.0020279455929994583, - "learning_rate": 0.00019999865678395966, - "loss": 46.0, - "step": 10255 - }, - { - "epoch": 1.6516365393131769, - "grad_norm": 0.0008441900718025863, - "learning_rate": 0.00019999865652172862, - "loss": 46.0, - "step": 10256 - }, - { - "epoch": 1.6517975763919641, - "grad_norm": 0.0030460739508271217, - "learning_rate": 0.00019999865625947195, - "loss": 46.0, - "step": 10257 - }, - { - "epoch": 1.6519586134707516, - "grad_norm": 0.0014858118956908584, - "learning_rate": 0.00019999865599718973, - "loss": 46.0, - "step": 10258 - }, - { - "epoch": 1.652119650549539, - "grad_norm": 0.0009504615445621312, - "learning_rate": 0.00019999865573488184, - "loss": 46.0, - "step": 10259 - }, - { - "epoch": 1.6522806876283265, - "grad_norm": 0.0033596823923289776, - "learning_rate": 0.00019999865547254842, - "loss": 46.0, - "step": 10260 - }, - { - "epoch": 1.652441724707114, - "grad_norm": 0.0036205481737852097, - "learning_rate": 0.00019999865521018939, - "loss": 46.0, - "step": 10261 - }, - { - "epoch": 1.6526027617859012, - "grad_norm": 0.00516926683485508, - "learning_rate": 0.0001999986549478048, - "loss": 46.0, - "step": 10262 - }, - { - "epoch": 1.6527637988646886, - "grad_norm": 0.0014452082104980946, - "learning_rate": 0.00019999865468539455, - "loss": 46.0, - "step": 10263 - }, - { - "epoch": 1.6529248359434758, - "grad_norm": 0.0010142697719857097, - "learning_rate": 0.00019999865442295875, - "loss": 46.0, - "step": 10264 - }, - { - "epoch": 1.6530858730222633, - "grad_norm": 0.0022039192263036966, - "learning_rate": 0.00019999865416049734, - "loss": 46.0, - "step": 10265 - }, - { - "epoch": 1.6532469101010507, - "grad_norm": 0.00158783420920372, - "learning_rate": 0.00019999865389801037, - "loss": 46.0, - "step": 10266 - }, - { - "epoch": 1.6534079471798382, - "grad_norm": 0.006714479066431522, - "learning_rate": 0.00019999865363549776, - "loss": 46.0, - "step": 10267 - }, - { - "epoch": 1.6535689842586256, - "grad_norm": 0.001299915136769414, - "learning_rate": 0.00019999865337295956, - "loss": 46.0, - "step": 10268 - }, - { - "epoch": 1.653730021337413, - "grad_norm": 0.0018057352863252163, - "learning_rate": 0.00019999865311039577, - "loss": 46.0, - "step": 10269 - }, - { - "epoch": 1.6538910584162003, - "grad_norm": 0.002297317376360297, - "learning_rate": 0.0001999986528478064, - "loss": 46.0, - "step": 10270 - }, - { - "epoch": 1.6540520954949878, - "grad_norm": 0.004488641861826181, - "learning_rate": 0.0001999986525851914, - "loss": 46.0, - "step": 10271 - }, - { - "epoch": 1.654213132573775, - "grad_norm": 0.00581676559522748, - "learning_rate": 0.00019999865232255085, - "loss": 46.0, - "step": 10272 - }, - { - "epoch": 1.6543741696525625, - "grad_norm": 0.0016572366002947092, - "learning_rate": 0.0001999986520598847, - "loss": 46.0, - "step": 10273 - }, - { - "epoch": 1.65453520673135, - "grad_norm": 0.002199936658143997, - "learning_rate": 0.00019999865179719294, - "loss": 46.0, - "step": 10274 - }, - { - "epoch": 1.6546962438101374, - "grad_norm": 0.0015104350168257952, - "learning_rate": 0.00019999865153447557, - "loss": 46.0, - "step": 10275 - }, - { - "epoch": 1.6548572808889248, - "grad_norm": 0.0017150352941825986, - "learning_rate": 0.00019999865127173262, - "loss": 46.0, - "step": 10276 - }, - { - "epoch": 1.655018317967712, - "grad_norm": 0.001269400236196816, - "learning_rate": 0.00019999865100896408, - "loss": 46.0, - "step": 10277 - }, - { - "epoch": 1.6551793550464995, - "grad_norm": 0.0008260617032647133, - "learning_rate": 0.00019999865074616995, - "loss": 46.0, - "step": 10278 - }, - { - "epoch": 1.6553403921252867, - "grad_norm": 0.0034680834505707026, - "learning_rate": 0.0001999986504833502, - "loss": 46.0, - "step": 10279 - }, - { - "epoch": 1.6555014292040742, - "grad_norm": 0.002518394961953163, - "learning_rate": 0.0001999986502205049, - "loss": 46.0, - "step": 10280 - }, - { - "epoch": 1.6556624662828616, - "grad_norm": 0.0015852086944505572, - "learning_rate": 0.00019999864995763397, - "loss": 46.0, - "step": 10281 - }, - { - "epoch": 1.655823503361649, - "grad_norm": 0.0008194456459023058, - "learning_rate": 0.00019999864969473747, - "loss": 46.0, - "step": 10282 - }, - { - "epoch": 1.6559845404404365, - "grad_norm": 0.0013457053573802114, - "learning_rate": 0.00019999864943181535, - "loss": 46.0, - "step": 10283 - }, - { - "epoch": 1.6561455775192238, - "grad_norm": 0.0010190659668296576, - "learning_rate": 0.00019999864916886765, - "loss": 46.0, - "step": 10284 - }, - { - "epoch": 1.6563066145980112, - "grad_norm": 0.00438708858564496, - "learning_rate": 0.00019999864890589436, - "loss": 46.0, - "step": 10285 - }, - { - "epoch": 1.6564676516767984, - "grad_norm": 0.0016959699569270015, - "learning_rate": 0.00019999864864289545, - "loss": 46.0, - "step": 10286 - }, - { - "epoch": 1.6566286887555859, - "grad_norm": 0.0015905431937426329, - "learning_rate": 0.00019999864837987096, - "loss": 46.0, - "step": 10287 - }, - { - "epoch": 1.6567897258343733, - "grad_norm": 0.00187137839384377, - "learning_rate": 0.00019999864811682088, - "loss": 46.0, - "step": 10288 - }, - { - "epoch": 1.6569507629131608, - "grad_norm": 0.0037138783372938633, - "learning_rate": 0.00019999864785374524, - "loss": 46.0, - "step": 10289 - }, - { - "epoch": 1.6571117999919482, - "grad_norm": 0.0038526931311935186, - "learning_rate": 0.00019999864759064396, - "loss": 46.0, - "step": 10290 - }, - { - "epoch": 1.6572728370707357, - "grad_norm": 0.004028433468192816, - "learning_rate": 0.0001999986473275171, - "loss": 46.0, - "step": 10291 - }, - { - "epoch": 1.657433874149523, - "grad_norm": 0.0028323526494205, - "learning_rate": 0.00019999864706436463, - "loss": 46.0, - "step": 10292 - }, - { - "epoch": 1.6575949112283102, - "grad_norm": 0.002596818143501878, - "learning_rate": 0.00019999864680118656, - "loss": 46.0, - "step": 10293 - }, - { - "epoch": 1.6577559483070976, - "grad_norm": 0.0006851371726952493, - "learning_rate": 0.00019999864653798293, - "loss": 46.0, - "step": 10294 - }, - { - "epoch": 1.657916985385885, - "grad_norm": 0.0004906481481157243, - "learning_rate": 0.00019999864627475366, - "loss": 46.0, - "step": 10295 - }, - { - "epoch": 1.6580780224646725, - "grad_norm": 0.001583976554684341, - "learning_rate": 0.00019999864601149883, - "loss": 46.0, - "step": 10296 - }, - { - "epoch": 1.65823905954346, - "grad_norm": 0.0004942187806591392, - "learning_rate": 0.0001999986457482184, - "loss": 46.0, - "step": 10297 - }, - { - "epoch": 1.6584000966222474, - "grad_norm": 0.001061291666701436, - "learning_rate": 0.00019999864548491235, - "loss": 46.0, - "step": 10298 - }, - { - "epoch": 1.6585611337010346, - "grad_norm": 0.006979250814765692, - "learning_rate": 0.00019999864522158073, - "loss": 46.0, - "step": 10299 - }, - { - "epoch": 1.658722170779822, - "grad_norm": 0.002171772997826338, - "learning_rate": 0.00019999864495822352, - "loss": 46.0, - "step": 10300 - }, - { - "epoch": 1.6588832078586093, - "grad_norm": 0.0016173458425328135, - "learning_rate": 0.00019999864469484073, - "loss": 46.0, - "step": 10301 - }, - { - "epoch": 1.6590442449373968, - "grad_norm": 0.0008664227789267898, - "learning_rate": 0.00019999864443143232, - "loss": 46.0, - "step": 10302 - }, - { - "epoch": 1.6592052820161842, - "grad_norm": 0.008758303709328175, - "learning_rate": 0.00019999864416799833, - "loss": 46.0, - "step": 10303 - }, - { - "epoch": 1.6593663190949717, - "grad_norm": 0.002159829018637538, - "learning_rate": 0.00019999864390453872, - "loss": 46.0, - "step": 10304 - }, - { - "epoch": 1.6595273561737591, - "grad_norm": 0.0011274589924141765, - "learning_rate": 0.00019999864364105355, - "loss": 46.0, - "step": 10305 - }, - { - "epoch": 1.6596883932525464, - "grad_norm": 0.002077885903418064, - "learning_rate": 0.00019999864337754276, - "loss": 46.0, - "step": 10306 - }, - { - "epoch": 1.6598494303313338, - "grad_norm": 0.0023729309905320406, - "learning_rate": 0.0001999986431140064, - "loss": 46.0, - "step": 10307 - }, - { - "epoch": 1.660010467410121, - "grad_norm": 0.0013342833844944835, - "learning_rate": 0.0001999986428504444, - "loss": 46.0, - "step": 10308 - }, - { - "epoch": 1.6601715044889085, - "grad_norm": 0.0010401697363704443, - "learning_rate": 0.00019999864258685683, - "loss": 46.0, - "step": 10309 - }, - { - "epoch": 1.660332541567696, - "grad_norm": 0.0008103396976366639, - "learning_rate": 0.00019999864232324367, - "loss": 46.0, - "step": 10310 - }, - { - "epoch": 1.6604935786464834, - "grad_norm": 0.00264181406237185, - "learning_rate": 0.00019999864205960492, - "loss": 46.0, - "step": 10311 - }, - { - "epoch": 1.6606546157252708, - "grad_norm": 0.0016249459004029632, - "learning_rate": 0.00019999864179594056, - "loss": 46.0, - "step": 10312 - }, - { - "epoch": 1.6608156528040583, - "grad_norm": 0.0004028141556773335, - "learning_rate": 0.00019999864153225064, - "loss": 46.0, - "step": 10313 - }, - { - "epoch": 1.6609766898828455, - "grad_norm": 0.002409035572782159, - "learning_rate": 0.0001999986412685351, - "loss": 46.0, - "step": 10314 - }, - { - "epoch": 1.6611377269616328, - "grad_norm": 0.002102794824168086, - "learning_rate": 0.00019999864100479395, - "loss": 46.0, - "step": 10315 - }, - { - "epoch": 1.6612987640404202, - "grad_norm": 0.002698926953598857, - "learning_rate": 0.00019999864074102724, - "loss": 46.0, - "step": 10316 - }, - { - "epoch": 1.6614598011192077, - "grad_norm": 0.006986147724092007, - "learning_rate": 0.00019999864047723492, - "loss": 46.0, - "step": 10317 - }, - { - "epoch": 1.661620838197995, - "grad_norm": 0.0012697026832029223, - "learning_rate": 0.00019999864021341698, - "loss": 46.0, - "step": 10318 - }, - { - "epoch": 1.6617818752767826, - "grad_norm": 0.0062049999833106995, - "learning_rate": 0.00019999863994957348, - "loss": 46.0, - "step": 10319 - }, - { - "epoch": 1.66194291235557, - "grad_norm": 0.008781444281339645, - "learning_rate": 0.00019999863968570437, - "loss": 46.0, - "step": 10320 - }, - { - "epoch": 1.6621039494343572, - "grad_norm": 0.0037513836286962032, - "learning_rate": 0.00019999863942180967, - "loss": 46.0, - "step": 10321 - }, - { - "epoch": 1.6622649865131447, - "grad_norm": 0.007861552760004997, - "learning_rate": 0.00019999863915788935, - "loss": 46.0, - "step": 10322 - }, - { - "epoch": 1.662426023591932, - "grad_norm": 0.004364061634987593, - "learning_rate": 0.00019999863889394348, - "loss": 46.0, - "step": 10323 - }, - { - "epoch": 1.6625870606707194, - "grad_norm": 0.0011777520412579179, - "learning_rate": 0.00019999863862997202, - "loss": 46.0, - "step": 10324 - }, - { - "epoch": 1.6627480977495068, - "grad_norm": 0.0009405204909853637, - "learning_rate": 0.00019999863836597491, - "loss": 46.0, - "step": 10325 - }, - { - "epoch": 1.6629091348282943, - "grad_norm": 0.0006122518097981811, - "learning_rate": 0.00019999863810195225, - "loss": 46.0, - "step": 10326 - }, - { - "epoch": 1.6630701719070817, - "grad_norm": 0.005554147996008396, - "learning_rate": 0.000199998637837904, - "loss": 46.0, - "step": 10327 - }, - { - "epoch": 1.663231208985869, - "grad_norm": 0.001578698051162064, - "learning_rate": 0.00019999863757383014, - "loss": 46.0, - "step": 10328 - }, - { - "epoch": 1.6633922460646564, - "grad_norm": 0.0014352636644616723, - "learning_rate": 0.00019999863730973068, - "loss": 46.0, - "step": 10329 - }, - { - "epoch": 1.6635532831434436, - "grad_norm": 0.0026615578681230545, - "learning_rate": 0.00019999863704560562, - "loss": 46.0, - "step": 10330 - }, - { - "epoch": 1.663714320222231, - "grad_norm": 0.0030296051409095526, - "learning_rate": 0.000199998636781455, - "loss": 46.0, - "step": 10331 - }, - { - "epoch": 1.6638753573010185, - "grad_norm": 0.0016279906267300248, - "learning_rate": 0.00019999863651727875, - "loss": 46.0, - "step": 10332 - }, - { - "epoch": 1.664036394379806, - "grad_norm": 0.003992682788521051, - "learning_rate": 0.0001999986362530769, - "loss": 46.0, - "step": 10333 - }, - { - "epoch": 1.6641974314585934, - "grad_norm": 0.000827592913992703, - "learning_rate": 0.00019999863598884948, - "loss": 46.0, - "step": 10334 - }, - { - "epoch": 1.664358468537381, - "grad_norm": 0.0023400168865919113, - "learning_rate": 0.00019999863572459642, - "loss": 46.0, - "step": 10335 - }, - { - "epoch": 1.6645195056161681, - "grad_norm": 0.0009639761410653591, - "learning_rate": 0.00019999863546031783, - "loss": 46.0, - "step": 10336 - }, - { - "epoch": 1.6646805426949554, - "grad_norm": 0.005953082349151373, - "learning_rate": 0.0001999986351960136, - "loss": 46.0, - "step": 10337 - }, - { - "epoch": 1.6648415797737428, - "grad_norm": 0.0012322901748120785, - "learning_rate": 0.0001999986349316838, - "loss": 46.0, - "step": 10338 - }, - { - "epoch": 1.6650026168525303, - "grad_norm": 0.0005517352255992591, - "learning_rate": 0.0001999986346673284, - "loss": 46.0, - "step": 10339 - }, - { - "epoch": 1.6651636539313177, - "grad_norm": 0.0006666937842965126, - "learning_rate": 0.00019999863440294742, - "loss": 46.0, - "step": 10340 - }, - { - "epoch": 1.6653246910101052, - "grad_norm": 0.0027998704463243484, - "learning_rate": 0.00019999863413854084, - "loss": 46.0, - "step": 10341 - }, - { - "epoch": 1.6654857280888926, - "grad_norm": 0.008945444598793983, - "learning_rate": 0.00019999863387410862, - "loss": 46.0, - "step": 10342 - }, - { - "epoch": 1.6656467651676798, - "grad_norm": 0.00030072429217398167, - "learning_rate": 0.00019999863360965086, - "loss": 46.0, - "step": 10343 - }, - { - "epoch": 1.6658078022464673, - "grad_norm": 0.002474376931786537, - "learning_rate": 0.00019999863334516747, - "loss": 46.0, - "step": 10344 - }, - { - "epoch": 1.6659688393252545, - "grad_norm": 0.0035807553213089705, - "learning_rate": 0.0001999986330806585, - "loss": 46.0, - "step": 10345 - }, - { - "epoch": 1.666129876404042, - "grad_norm": 0.0034403803292661905, - "learning_rate": 0.00019999863281612394, - "loss": 46.0, - "step": 10346 - }, - { - "epoch": 1.6662909134828294, - "grad_norm": 0.001939290203154087, - "learning_rate": 0.0001999986325515638, - "loss": 46.0, - "step": 10347 - }, - { - "epoch": 1.6664519505616169, - "grad_norm": 0.0046937232837080956, - "learning_rate": 0.00019999863228697802, - "loss": 46.0, - "step": 10348 - }, - { - "epoch": 1.6666129876404043, - "grad_norm": 0.002770220162346959, - "learning_rate": 0.00019999863202236668, - "loss": 46.0, - "step": 10349 - }, - { - "epoch": 1.6667740247191916, - "grad_norm": 0.0027611434925347567, - "learning_rate": 0.00019999863175772974, - "loss": 46.0, - "step": 10350 - }, - { - "epoch": 1.666935061797979, - "grad_norm": 0.0010709353955462575, - "learning_rate": 0.0001999986314930672, - "loss": 46.0, - "step": 10351 - }, - { - "epoch": 1.6670960988767662, - "grad_norm": 0.0037821882870048285, - "learning_rate": 0.00019999863122837906, - "loss": 46.0, - "step": 10352 - }, - { - "epoch": 1.6672571359555537, - "grad_norm": 0.005217619240283966, - "learning_rate": 0.00019999863096366535, - "loss": 46.0, - "step": 10353 - }, - { - "epoch": 1.6674181730343411, - "grad_norm": 0.004672898445278406, - "learning_rate": 0.00019999863069892603, - "loss": 46.0, - "step": 10354 - }, - { - "epoch": 1.6675792101131286, - "grad_norm": 0.0007431456469930708, - "learning_rate": 0.00019999863043416112, - "loss": 46.0, - "step": 10355 - }, - { - "epoch": 1.667740247191916, - "grad_norm": 0.003903965000063181, - "learning_rate": 0.0001999986301693706, - "loss": 46.0, - "step": 10356 - }, - { - "epoch": 1.6679012842707033, - "grad_norm": 0.002124632941558957, - "learning_rate": 0.00019999862990455452, - "loss": 46.0, - "step": 10357 - }, - { - "epoch": 1.6680623213494907, - "grad_norm": 0.0014969630865380168, - "learning_rate": 0.0001999986296397128, - "loss": 46.0, - "step": 10358 - }, - { - "epoch": 1.668223358428278, - "grad_norm": 0.0021574695128947496, - "learning_rate": 0.0001999986293748455, - "loss": 46.0, - "step": 10359 - }, - { - "epoch": 1.6683843955070654, - "grad_norm": 0.0008957255049608648, - "learning_rate": 0.00019999862910995261, - "loss": 46.0, - "step": 10360 - }, - { - "epoch": 1.6685454325858529, - "grad_norm": 0.001666371594183147, - "learning_rate": 0.00019999862884503416, - "loss": 46.0, - "step": 10361 - }, - { - "epoch": 1.6687064696646403, - "grad_norm": 0.0006663691601715982, - "learning_rate": 0.00019999862858009006, - "loss": 46.0, - "step": 10362 - }, - { - "epoch": 1.6688675067434278, - "grad_norm": 0.0009327695588581264, - "learning_rate": 0.00019999862831512042, - "loss": 46.0, - "step": 10363 - }, - { - "epoch": 1.6690285438222152, - "grad_norm": 0.002431015018373728, - "learning_rate": 0.00019999862805012515, - "loss": 46.0, - "step": 10364 - }, - { - "epoch": 1.6691895809010024, - "grad_norm": 0.0015025166794657707, - "learning_rate": 0.0001999986277851043, - "loss": 46.0, - "step": 10365 - }, - { - "epoch": 1.66935061797979, - "grad_norm": 0.001371413585729897, - "learning_rate": 0.0001999986275200578, - "loss": 46.0, - "step": 10366 - }, - { - "epoch": 1.6695116550585771, - "grad_norm": 0.003519990248605609, - "learning_rate": 0.00019999862725498578, - "loss": 46.0, - "step": 10367 - }, - { - "epoch": 1.6696726921373646, - "grad_norm": 0.006053016055375338, - "learning_rate": 0.00019999862698988813, - "loss": 46.0, - "step": 10368 - }, - { - "epoch": 1.669833729216152, - "grad_norm": 0.005175014957785606, - "learning_rate": 0.0001999986267247649, - "loss": 46.0, - "step": 10369 - }, - { - "epoch": 1.6699947662949395, - "grad_norm": 0.0027856845408678055, - "learning_rate": 0.00019999862645961607, - "loss": 46.0, - "step": 10370 - }, - { - "epoch": 1.670155803373727, - "grad_norm": 0.003092190483585, - "learning_rate": 0.00019999862619444163, - "loss": 46.0, - "step": 10371 - }, - { - "epoch": 1.6703168404525142, - "grad_norm": 0.001366854296065867, - "learning_rate": 0.0001999986259292416, - "loss": 46.0, - "step": 10372 - }, - { - "epoch": 1.6704778775313016, - "grad_norm": 0.002011434640735388, - "learning_rate": 0.00019999862566401602, - "loss": 46.0, - "step": 10373 - }, - { - "epoch": 1.6706389146100888, - "grad_norm": 0.0028051792178303003, - "learning_rate": 0.0001999986253987648, - "loss": 46.0, - "step": 10374 - }, - { - "epoch": 1.6707999516888763, - "grad_norm": 0.003266475861892104, - "learning_rate": 0.000199998625133488, - "loss": 46.0, - "step": 10375 - }, - { - "epoch": 1.6709609887676637, - "grad_norm": 0.0028007877990603447, - "learning_rate": 0.0001999986248681856, - "loss": 46.0, - "step": 10376 - }, - { - "epoch": 1.6711220258464512, - "grad_norm": 0.00100128713529557, - "learning_rate": 0.00019999862460285759, - "loss": 46.0, - "step": 10377 - }, - { - "epoch": 1.6712830629252386, - "grad_norm": 0.0011375052854418755, - "learning_rate": 0.000199998624337504, - "loss": 46.0, - "step": 10378 - }, - { - "epoch": 1.6714441000040259, - "grad_norm": 0.0004539019719231874, - "learning_rate": 0.00019999862407212482, - "loss": 46.0, - "step": 10379 - }, - { - "epoch": 1.6716051370828133, - "grad_norm": 0.0016809777589514852, - "learning_rate": 0.00019999862380672004, - "loss": 46.0, - "step": 10380 - }, - { - "epoch": 1.6717661741616006, - "grad_norm": 0.0006973856943659484, - "learning_rate": 0.00019999862354128965, - "loss": 46.0, - "step": 10381 - }, - { - "epoch": 1.671927211240388, - "grad_norm": 0.0005322683718986809, - "learning_rate": 0.0001999986232758337, - "loss": 46.0, - "step": 10382 - }, - { - "epoch": 1.6720882483191755, - "grad_norm": 0.00047642423305660486, - "learning_rate": 0.00019999862301035216, - "loss": 46.0, - "step": 10383 - }, - { - "epoch": 1.672249285397963, - "grad_norm": 0.004444230813533068, - "learning_rate": 0.00019999862274484498, - "loss": 46.0, - "step": 10384 - }, - { - "epoch": 1.6724103224767504, - "grad_norm": 0.001323769916780293, - "learning_rate": 0.00019999862247931221, - "loss": 46.0, - "step": 10385 - }, - { - "epoch": 1.6725713595555378, - "grad_norm": 0.0016513635637238622, - "learning_rate": 0.0001999986222137539, - "loss": 46.0, - "step": 10386 - }, - { - "epoch": 1.672732396634325, - "grad_norm": 0.001027752528898418, - "learning_rate": 0.00019999862194816997, - "loss": 46.0, - "step": 10387 - }, - { - "epoch": 1.6728934337131125, - "grad_norm": 0.005808922462165356, - "learning_rate": 0.00019999862168256042, - "loss": 46.0, - "step": 10388 - }, - { - "epoch": 1.6730544707918997, - "grad_norm": 0.001681674038991332, - "learning_rate": 0.0001999986214169253, - "loss": 46.0, - "step": 10389 - }, - { - "epoch": 1.6732155078706872, - "grad_norm": 0.001717392704449594, - "learning_rate": 0.00019999862115126457, - "loss": 46.0, - "step": 10390 - }, - { - "epoch": 1.6733765449494746, - "grad_norm": 0.0018831808120012283, - "learning_rate": 0.00019999862088557825, - "loss": 46.0, - "step": 10391 - }, - { - "epoch": 1.673537582028262, - "grad_norm": 0.0006793508655391634, - "learning_rate": 0.00019999862061986635, - "loss": 46.0, - "step": 10392 - }, - { - "epoch": 1.6736986191070495, - "grad_norm": 0.001540188561193645, - "learning_rate": 0.00019999862035412883, - "loss": 46.0, - "step": 10393 - }, - { - "epoch": 1.6738596561858368, - "grad_norm": 0.002044814173132181, - "learning_rate": 0.00019999862008836575, - "loss": 46.0, - "step": 10394 - }, - { - "epoch": 1.6740206932646242, - "grad_norm": 0.002085526240989566, - "learning_rate": 0.00019999861982257703, - "loss": 46.0, - "step": 10395 - }, - { - "epoch": 1.6741817303434114, - "grad_norm": 0.005129432305693626, - "learning_rate": 0.00019999861955676275, - "loss": 46.0, - "step": 10396 - }, - { - "epoch": 1.674342767422199, - "grad_norm": 0.001831444795243442, - "learning_rate": 0.00019999861929092288, - "loss": 46.0, - "step": 10397 - }, - { - "epoch": 1.6745038045009863, - "grad_norm": 0.0021805427968502045, - "learning_rate": 0.00019999861902505737, - "loss": 46.0, - "step": 10398 - }, - { - "epoch": 1.6746648415797738, - "grad_norm": 0.008991080336272717, - "learning_rate": 0.00019999861875916633, - "loss": 46.0, - "step": 10399 - }, - { - "epoch": 1.6748258786585613, - "grad_norm": 0.0030124543700367212, - "learning_rate": 0.00019999861849324965, - "loss": 46.0, - "step": 10400 - }, - { - "epoch": 1.6749869157373485, - "grad_norm": 0.0007078531198203564, - "learning_rate": 0.00019999861822730738, - "loss": 46.0, - "step": 10401 - }, - { - "epoch": 1.675147952816136, - "grad_norm": 0.0069088684394955635, - "learning_rate": 0.00019999861796133952, - "loss": 46.0, - "step": 10402 - }, - { - "epoch": 1.6753089898949232, - "grad_norm": 0.0019473673310130835, - "learning_rate": 0.00019999861769534608, - "loss": 46.0, - "step": 10403 - }, - { - "epoch": 1.6754700269737106, - "grad_norm": 0.0026252041570842266, - "learning_rate": 0.00019999861742932704, - "loss": 46.0, - "step": 10404 - }, - { - "epoch": 1.675631064052498, - "grad_norm": 0.000608101487159729, - "learning_rate": 0.0001999986171632824, - "loss": 46.0, - "step": 10405 - }, - { - "epoch": 1.6757921011312855, - "grad_norm": 0.001595841720700264, - "learning_rate": 0.00019999861689721217, - "loss": 46.0, - "step": 10406 - }, - { - "epoch": 1.675953138210073, - "grad_norm": 0.0007078467751853168, - "learning_rate": 0.00019999861663111634, - "loss": 46.0, - "step": 10407 - }, - { - "epoch": 1.6761141752888604, - "grad_norm": 0.006640944164246321, - "learning_rate": 0.0001999986163649949, - "loss": 46.0, - "step": 10408 - }, - { - "epoch": 1.6762752123676476, - "grad_norm": 0.007485799957066774, - "learning_rate": 0.0001999986160988479, - "loss": 46.0, - "step": 10409 - }, - { - "epoch": 1.6764362494464349, - "grad_norm": 0.0008861521491780877, - "learning_rate": 0.00019999861583267528, - "loss": 46.0, - "step": 10410 - }, - { - "epoch": 1.6765972865252223, - "grad_norm": 0.004999096039682627, - "learning_rate": 0.00019999861556647708, - "loss": 46.0, - "step": 10411 - }, - { - "epoch": 1.6767583236040098, - "grad_norm": 0.0010776834096759558, - "learning_rate": 0.00019999861530025327, - "loss": 46.0, - "step": 10412 - }, - { - "epoch": 1.6769193606827972, - "grad_norm": 0.00090724544133991, - "learning_rate": 0.00019999861503400387, - "loss": 46.0, - "step": 10413 - }, - { - "epoch": 1.6770803977615847, - "grad_norm": 0.0009070896776393056, - "learning_rate": 0.0001999986147677289, - "loss": 46.0, - "step": 10414 - }, - { - "epoch": 1.6772414348403721, - "grad_norm": 0.00188609235920012, - "learning_rate": 0.0001999986145014283, - "loss": 46.0, - "step": 10415 - }, - { - "epoch": 1.6774024719191594, - "grad_norm": 0.002111490350216627, - "learning_rate": 0.0001999986142351021, - "loss": 46.0, - "step": 10416 - }, - { - "epoch": 1.6775635089979468, - "grad_norm": 0.0020061295945197344, - "learning_rate": 0.00019999861396875036, - "loss": 46.0, - "step": 10417 - }, - { - "epoch": 1.677724546076734, - "grad_norm": 0.005179078318178654, - "learning_rate": 0.00019999861370237297, - "loss": 46.0, - "step": 10418 - }, - { - "epoch": 1.6778855831555215, - "grad_norm": 0.0013346009654924273, - "learning_rate": 0.00019999861343597002, - "loss": 46.0, - "step": 10419 - }, - { - "epoch": 1.678046620234309, - "grad_norm": 0.001554424874484539, - "learning_rate": 0.00019999861316954146, - "loss": 46.0, - "step": 10420 - }, - { - "epoch": 1.6782076573130964, - "grad_norm": 0.007755435537546873, - "learning_rate": 0.0001999986129030873, - "loss": 46.0, - "step": 10421 - }, - { - "epoch": 1.6783686943918839, - "grad_norm": 0.0023411880247294903, - "learning_rate": 0.00019999861263660755, - "loss": 46.0, - "step": 10422 - }, - { - "epoch": 1.678529731470671, - "grad_norm": 0.0031238319352269173, - "learning_rate": 0.00019999861237010222, - "loss": 46.0, - "step": 10423 - }, - { - "epoch": 1.6786907685494585, - "grad_norm": 0.0005404056864790618, - "learning_rate": 0.00019999861210357128, - "loss": 46.0, - "step": 10424 - }, - { - "epoch": 1.6788518056282458, - "grad_norm": 0.0013121241936460137, - "learning_rate": 0.00019999861183701473, - "loss": 46.0, - "step": 10425 - }, - { - "epoch": 1.6790128427070332, - "grad_norm": 0.002822204725816846, - "learning_rate": 0.00019999861157043262, - "loss": 46.0, - "step": 10426 - }, - { - "epoch": 1.6791738797858207, - "grad_norm": 0.009026975370943546, - "learning_rate": 0.0001999986113038249, - "loss": 46.0, - "step": 10427 - }, - { - "epoch": 1.6793349168646081, - "grad_norm": 0.002976997522637248, - "learning_rate": 0.0001999986110371916, - "loss": 46.0, - "step": 10428 - }, - { - "epoch": 1.6794959539433956, - "grad_norm": 0.0013816198334097862, - "learning_rate": 0.00019999861077053268, - "loss": 46.0, - "step": 10429 - }, - { - "epoch": 1.679656991022183, - "grad_norm": 0.002703386591747403, - "learning_rate": 0.0001999986105038482, - "loss": 46.0, - "step": 10430 - }, - { - "epoch": 1.6798180281009703, - "grad_norm": 0.001805583480745554, - "learning_rate": 0.0001999986102371381, - "loss": 46.0, - "step": 10431 - }, - { - "epoch": 1.6799790651797575, - "grad_norm": 0.0015338781522586942, - "learning_rate": 0.00019999860997040237, - "loss": 46.0, - "step": 10432 - }, - { - "epoch": 1.680140102258545, - "grad_norm": 0.0004922823281958699, - "learning_rate": 0.0001999986097036411, - "loss": 46.0, - "step": 10433 - }, - { - "epoch": 1.6803011393373324, - "grad_norm": 0.00591637659817934, - "learning_rate": 0.00019999860943685423, - "loss": 46.0, - "step": 10434 - }, - { - "epoch": 1.6804621764161198, - "grad_norm": 0.0025271170306950808, - "learning_rate": 0.00019999860917004173, - "loss": 46.0, - "step": 10435 - }, - { - "epoch": 1.6806232134949073, - "grad_norm": 0.0028867062646895647, - "learning_rate": 0.0001999986089032037, - "loss": 46.0, - "step": 10436 - }, - { - "epoch": 1.6807842505736947, - "grad_norm": 0.002870681695640087, - "learning_rate": 0.00019999860863634, - "loss": 46.0, - "step": 10437 - }, - { - "epoch": 1.680945287652482, - "grad_norm": 0.0017795058665797114, - "learning_rate": 0.00019999860836945074, - "loss": 46.0, - "step": 10438 - }, - { - "epoch": 1.6811063247312694, - "grad_norm": 0.004960164427757263, - "learning_rate": 0.0001999986081025359, - "loss": 46.0, - "step": 10439 - }, - { - "epoch": 1.6812673618100566, - "grad_norm": 0.005027100909501314, - "learning_rate": 0.00019999860783559545, - "loss": 46.0, - "step": 10440 - }, - { - "epoch": 1.681428398888844, - "grad_norm": 0.0020708946976810694, - "learning_rate": 0.0001999986075686294, - "loss": 46.0, - "step": 10441 - }, - { - "epoch": 1.6815894359676316, - "grad_norm": 0.002186340279877186, - "learning_rate": 0.00019999860730163778, - "loss": 46.0, - "step": 10442 - }, - { - "epoch": 1.681750473046419, - "grad_norm": 0.0005775339668616652, - "learning_rate": 0.00019999860703462052, - "loss": 46.0, - "step": 10443 - }, - { - "epoch": 1.6819115101252065, - "grad_norm": 0.010404218919575214, - "learning_rate": 0.0001999986067675777, - "loss": 46.0, - "step": 10444 - }, - { - "epoch": 1.6820725472039937, - "grad_norm": 0.0047836159355938435, - "learning_rate": 0.00019999860650050927, - "loss": 46.0, - "step": 10445 - }, - { - "epoch": 1.6822335842827811, - "grad_norm": 0.0027169575914740562, - "learning_rate": 0.00019999860623341528, - "loss": 46.0, - "step": 10446 - }, - { - "epoch": 1.6823946213615684, - "grad_norm": 0.003834177041426301, - "learning_rate": 0.00019999860596629567, - "loss": 46.0, - "step": 10447 - }, - { - "epoch": 1.6825556584403558, - "grad_norm": 0.0018635675078257918, - "learning_rate": 0.00019999860569915048, - "loss": 46.0, - "step": 10448 - }, - { - "epoch": 1.6827166955191433, - "grad_norm": 0.003225313499569893, - "learning_rate": 0.00019999860543197965, - "loss": 46.0, - "step": 10449 - }, - { - "epoch": 1.6828777325979307, - "grad_norm": 0.005864087026566267, - "learning_rate": 0.00019999860516478325, - "loss": 46.0, - "step": 10450 - }, - { - "epoch": 1.6830387696767182, - "grad_norm": 0.0051914844661951065, - "learning_rate": 0.00019999860489756127, - "loss": 46.0, - "step": 10451 - }, - { - "epoch": 1.6831998067555054, - "grad_norm": 0.003226950066164136, - "learning_rate": 0.0001999986046303137, - "loss": 46.0, - "step": 10452 - }, - { - "epoch": 1.6833608438342929, - "grad_norm": 0.002338156569749117, - "learning_rate": 0.00019999860436304052, - "loss": 46.0, - "step": 10453 - }, - { - "epoch": 1.68352188091308, - "grad_norm": 0.0019909224938601255, - "learning_rate": 0.00019999860409574172, - "loss": 46.0, - "step": 10454 - }, - { - "epoch": 1.6836829179918675, - "grad_norm": 0.001400559674948454, - "learning_rate": 0.0001999986038284174, - "loss": 46.0, - "step": 10455 - }, - { - "epoch": 1.683843955070655, - "grad_norm": 0.0006413241499103606, - "learning_rate": 0.00019999860356106742, - "loss": 46.0, - "step": 10456 - }, - { - "epoch": 1.6840049921494424, - "grad_norm": 0.0011588013730943203, - "learning_rate": 0.0001999986032936919, - "loss": 46.0, - "step": 10457 - }, - { - "epoch": 1.6841660292282299, - "grad_norm": 0.0029584101866930723, - "learning_rate": 0.00019999860302629072, - "loss": 46.0, - "step": 10458 - }, - { - "epoch": 1.6843270663070173, - "grad_norm": 0.0014233908150345087, - "learning_rate": 0.00019999860275886399, - "loss": 46.0, - "step": 10459 - }, - { - "epoch": 1.6844881033858046, - "grad_norm": 0.0007274019299075007, - "learning_rate": 0.00019999860249141164, - "loss": 46.0, - "step": 10460 - }, - { - "epoch": 1.684649140464592, - "grad_norm": 0.002970034023746848, - "learning_rate": 0.0001999986022239337, - "loss": 46.0, - "step": 10461 - }, - { - "epoch": 1.6848101775433793, - "grad_norm": 0.0008836840861476958, - "learning_rate": 0.00019999860195643016, - "loss": 46.0, - "step": 10462 - }, - { - "epoch": 1.6849712146221667, - "grad_norm": 0.00044969565351493657, - "learning_rate": 0.00019999860168890105, - "loss": 46.0, - "step": 10463 - }, - { - "epoch": 1.6851322517009542, - "grad_norm": 0.010037108324468136, - "learning_rate": 0.00019999860142134635, - "loss": 46.0, - "step": 10464 - }, - { - "epoch": 1.6852932887797416, - "grad_norm": 0.0020440032240003347, - "learning_rate": 0.00019999860115376604, - "loss": 46.0, - "step": 10465 - }, - { - "epoch": 1.685454325858529, - "grad_norm": 0.005088374018669128, - "learning_rate": 0.00019999860088616012, - "loss": 46.0, - "step": 10466 - }, - { - "epoch": 1.6856153629373163, - "grad_norm": 0.0041874367743730545, - "learning_rate": 0.00019999860061852863, - "loss": 46.0, - "step": 10467 - }, - { - "epoch": 1.6857764000161037, - "grad_norm": 0.0022942230571061373, - "learning_rate": 0.00019999860035087153, - "loss": 46.0, - "step": 10468 - }, - { - "epoch": 1.685937437094891, - "grad_norm": 0.005675806198269129, - "learning_rate": 0.00019999860008318885, - "loss": 46.0, - "step": 10469 - }, - { - "epoch": 1.6860984741736784, - "grad_norm": 0.0020505287684500217, - "learning_rate": 0.00019999859981548057, - "loss": 46.0, - "step": 10470 - }, - { - "epoch": 1.6862595112524659, - "grad_norm": 0.0015413566725328565, - "learning_rate": 0.0001999985995477467, - "loss": 46.0, - "step": 10471 - }, - { - "epoch": 1.6864205483312533, - "grad_norm": 0.0045183924958109856, - "learning_rate": 0.0001999985992799872, - "loss": 46.0, - "step": 10472 - }, - { - "epoch": 1.6865815854100408, - "grad_norm": 0.009042619727551937, - "learning_rate": 0.00019999859901220215, - "loss": 46.0, - "step": 10473 - }, - { - "epoch": 1.686742622488828, - "grad_norm": 0.004377848003059626, - "learning_rate": 0.00019999859874439147, - "loss": 46.0, - "step": 10474 - }, - { - "epoch": 1.6869036595676155, - "grad_norm": 0.0050247712060809135, - "learning_rate": 0.0001999985984765552, - "loss": 46.0, - "step": 10475 - }, - { - "epoch": 1.6870646966464027, - "grad_norm": 0.006816830951720476, - "learning_rate": 0.0001999985982086934, - "loss": 46.0, - "step": 10476 - }, - { - "epoch": 1.6872257337251901, - "grad_norm": 0.0011554453521966934, - "learning_rate": 0.00019999859794080592, - "loss": 46.0, - "step": 10477 - }, - { - "epoch": 1.6873867708039776, - "grad_norm": 0.001298525370657444, - "learning_rate": 0.00019999859767289287, - "loss": 46.0, - "step": 10478 - }, - { - "epoch": 1.687547807882765, - "grad_norm": 0.007545560132712126, - "learning_rate": 0.00019999859740495426, - "loss": 46.0, - "step": 10479 - }, - { - "epoch": 1.6877088449615525, - "grad_norm": 0.0017515876097604632, - "learning_rate": 0.00019999859713699003, - "loss": 46.0, - "step": 10480 - }, - { - "epoch": 1.68786988204034, - "grad_norm": 0.005050517152994871, - "learning_rate": 0.00019999859686900022, - "loss": 46.0, - "step": 10481 - }, - { - "epoch": 1.6880309191191272, - "grad_norm": 0.0007013885769993067, - "learning_rate": 0.0001999985966009848, - "loss": 46.0, - "step": 10482 - }, - { - "epoch": 1.6881919561979146, - "grad_norm": 0.007724381051957607, - "learning_rate": 0.00019999859633294378, - "loss": 46.0, - "step": 10483 - }, - { - "epoch": 1.6883529932767019, - "grad_norm": 0.0010990517912432551, - "learning_rate": 0.00019999859606487718, - "loss": 46.0, - "step": 10484 - }, - { - "epoch": 1.6885140303554893, - "grad_norm": 0.0032372481655329466, - "learning_rate": 0.00019999859579678496, - "loss": 46.0, - "step": 10485 - }, - { - "epoch": 1.6886750674342768, - "grad_norm": 0.0037654205225408077, - "learning_rate": 0.00019999859552866718, - "loss": 46.0, - "step": 10486 - }, - { - "epoch": 1.6888361045130642, - "grad_norm": 0.004126657731831074, - "learning_rate": 0.0001999985952605238, - "loss": 46.0, - "step": 10487 - }, - { - "epoch": 1.6889971415918517, - "grad_norm": 0.005669938400387764, - "learning_rate": 0.00019999859499235482, - "loss": 46.0, - "step": 10488 - }, - { - "epoch": 1.6891581786706389, - "grad_norm": 0.007425690069794655, - "learning_rate": 0.00019999859472416022, - "loss": 46.0, - "step": 10489 - }, - { - "epoch": 1.6893192157494263, - "grad_norm": 0.0020629935897886753, - "learning_rate": 0.00019999859445594004, - "loss": 46.0, - "step": 10490 - }, - { - "epoch": 1.6894802528282136, - "grad_norm": 0.005646908655762672, - "learning_rate": 0.00019999859418769428, - "loss": 46.0, - "step": 10491 - }, - { - "epoch": 1.689641289907001, - "grad_norm": 0.004508323967456818, - "learning_rate": 0.00019999859391942292, - "loss": 46.0, - "step": 10492 - }, - { - "epoch": 1.6898023269857885, - "grad_norm": 0.0023845683317631483, - "learning_rate": 0.00019999859365112593, - "loss": 46.0, - "step": 10493 - }, - { - "epoch": 1.689963364064576, - "grad_norm": 0.003487972542643547, - "learning_rate": 0.0001999985933828034, - "loss": 46.0, - "step": 10494 - }, - { - "epoch": 1.6901244011433634, - "grad_norm": 0.0010903405491262674, - "learning_rate": 0.00019999859311445526, - "loss": 46.0, - "step": 10495 - }, - { - "epoch": 1.6902854382221506, - "grad_norm": 0.0012227409752085805, - "learning_rate": 0.0001999985928460815, - "loss": 46.0, - "step": 10496 - }, - { - "epoch": 1.690446475300938, - "grad_norm": 0.008045082911849022, - "learning_rate": 0.0001999985925776822, - "loss": 46.0, - "step": 10497 - }, - { - "epoch": 1.6906075123797253, - "grad_norm": 0.004485879093408585, - "learning_rate": 0.00019999859230925723, - "loss": 46.0, - "step": 10498 - }, - { - "epoch": 1.6907685494585127, - "grad_norm": 0.009232739917933941, - "learning_rate": 0.00019999859204080674, - "loss": 46.0, - "step": 10499 - }, - { - "epoch": 1.6909295865373002, - "grad_norm": 0.0043303947895765305, - "learning_rate": 0.0001999985917723306, - "loss": 46.0, - "step": 10500 - }, - { - "epoch": 1.6910906236160876, - "grad_norm": 0.003724894020706415, - "learning_rate": 0.0001999985915038289, - "loss": 46.0, - "step": 10501 - }, - { - "epoch": 1.691251660694875, - "grad_norm": 0.0005904401186853647, - "learning_rate": 0.0001999985912353016, - "loss": 46.0, - "step": 10502 - }, - { - "epoch": 1.6914126977736625, - "grad_norm": 0.004251156002283096, - "learning_rate": 0.0001999985909667487, - "loss": 46.0, - "step": 10503 - }, - { - "epoch": 1.6915737348524498, - "grad_norm": 0.001525015919469297, - "learning_rate": 0.0001999985906981702, - "loss": 46.0, - "step": 10504 - }, - { - "epoch": 1.691734771931237, - "grad_norm": 0.0038381016347557306, - "learning_rate": 0.0001999985904295661, - "loss": 46.0, - "step": 10505 - }, - { - "epoch": 1.6918958090100245, - "grad_norm": 0.0017822892405092716, - "learning_rate": 0.00019999859016093643, - "loss": 46.0, - "step": 10506 - }, - { - "epoch": 1.692056846088812, - "grad_norm": 0.004787055775523186, - "learning_rate": 0.00019999858989228113, - "loss": 46.0, - "step": 10507 - }, - { - "epoch": 1.6922178831675994, - "grad_norm": 0.0019879417959600687, - "learning_rate": 0.00019999858962360025, - "loss": 46.0, - "step": 10508 - }, - { - "epoch": 1.6923789202463868, - "grad_norm": 0.0044783358462154865, - "learning_rate": 0.00019999858935489378, - "loss": 46.0, - "step": 10509 - }, - { - "epoch": 1.6925399573251743, - "grad_norm": 0.003443249501287937, - "learning_rate": 0.00019999858908616172, - "loss": 46.0, - "step": 10510 - }, - { - "epoch": 1.6927009944039615, - "grad_norm": 0.0019783645402640104, - "learning_rate": 0.00019999858881740407, - "loss": 46.0, - "step": 10511 - }, - { - "epoch": 1.692862031482749, - "grad_norm": 0.0028253672644495964, - "learning_rate": 0.00019999858854862081, - "loss": 46.0, - "step": 10512 - }, - { - "epoch": 1.6930230685615362, - "grad_norm": 0.0017712815897539258, - "learning_rate": 0.00019999858827981197, - "loss": 46.0, - "step": 10513 - }, - { - "epoch": 1.6931841056403236, - "grad_norm": 0.0016477230237796903, - "learning_rate": 0.00019999858801097753, - "loss": 46.0, - "step": 10514 - }, - { - "epoch": 1.693345142719111, - "grad_norm": 0.0048409923911094666, - "learning_rate": 0.0001999985877421175, - "loss": 46.0, - "step": 10515 - }, - { - "epoch": 1.6935061797978985, - "grad_norm": 0.005189493298530579, - "learning_rate": 0.00019999858747323185, - "loss": 46.0, - "step": 10516 - }, - { - "epoch": 1.693667216876686, - "grad_norm": 0.0038858221378177404, - "learning_rate": 0.00019999858720432063, - "loss": 46.0, - "step": 10517 - }, - { - "epoch": 1.6938282539554732, - "grad_norm": 0.008748218417167664, - "learning_rate": 0.0001999985869353838, - "loss": 46.0, - "step": 10518 - }, - { - "epoch": 1.6939892910342607, - "grad_norm": 0.0016101914225146174, - "learning_rate": 0.0001999985866664214, - "loss": 46.0, - "step": 10519 - }, - { - "epoch": 1.6941503281130479, - "grad_norm": 0.006393904332071543, - "learning_rate": 0.00019999858639743338, - "loss": 46.0, - "step": 10520 - }, - { - "epoch": 1.6943113651918353, - "grad_norm": 0.003465829649940133, - "learning_rate": 0.00019999858612841978, - "loss": 46.0, - "step": 10521 - }, - { - "epoch": 1.6944724022706228, - "grad_norm": 0.0013447541277855635, - "learning_rate": 0.00019999858585938057, - "loss": 46.0, - "step": 10522 - }, - { - "epoch": 1.6946334393494102, - "grad_norm": 0.014294285327196121, - "learning_rate": 0.0001999985855903158, - "loss": 46.0, - "step": 10523 - }, - { - "epoch": 1.6947944764281977, - "grad_norm": 0.0008617123239673674, - "learning_rate": 0.00019999858532122539, - "loss": 46.0, - "step": 10524 - }, - { - "epoch": 1.6949555135069851, - "grad_norm": 0.001656767213717103, - "learning_rate": 0.0001999985850521094, - "loss": 46.0, - "step": 10525 - }, - { - "epoch": 1.6951165505857724, - "grad_norm": 0.0006299621309153736, - "learning_rate": 0.00019999858478296785, - "loss": 46.0, - "step": 10526 - }, - { - "epoch": 1.6952775876645596, - "grad_norm": 0.00292469235137105, - "learning_rate": 0.0001999985845138007, - "loss": 46.0, - "step": 10527 - }, - { - "epoch": 1.695438624743347, - "grad_norm": 0.00276189879514277, - "learning_rate": 0.0001999985842446079, - "loss": 46.0, - "step": 10528 - }, - { - "epoch": 1.6955996618221345, - "grad_norm": 0.0007011765846982598, - "learning_rate": 0.00019999858397538956, - "loss": 46.0, - "step": 10529 - }, - { - "epoch": 1.695760698900922, - "grad_norm": 0.003778912825509906, - "learning_rate": 0.0001999985837061456, - "loss": 46.0, - "step": 10530 - }, - { - "epoch": 1.6959217359797094, - "grad_norm": 0.0007657675887458026, - "learning_rate": 0.00019999858343687605, - "loss": 46.0, - "step": 10531 - }, - { - "epoch": 1.6960827730584969, - "grad_norm": 0.002568767871707678, - "learning_rate": 0.0001999985831675809, - "loss": 46.0, - "step": 10532 - }, - { - "epoch": 1.696243810137284, - "grad_norm": 0.0007921484066173434, - "learning_rate": 0.00019999858289826018, - "loss": 46.0, - "step": 10533 - }, - { - "epoch": 1.6964048472160715, - "grad_norm": 0.001671319711022079, - "learning_rate": 0.00019999858262891384, - "loss": 46.0, - "step": 10534 - }, - { - "epoch": 1.6965658842948588, - "grad_norm": 0.0014859768562018871, - "learning_rate": 0.00019999858235954192, - "loss": 46.0, - "step": 10535 - }, - { - "epoch": 1.6967269213736462, - "grad_norm": 0.0006414958625100553, - "learning_rate": 0.0001999985820901444, - "loss": 46.0, - "step": 10536 - }, - { - "epoch": 1.6968879584524337, - "grad_norm": 0.013287168927490711, - "learning_rate": 0.0001999985818207213, - "loss": 46.0, - "step": 10537 - }, - { - "epoch": 1.6970489955312211, - "grad_norm": 0.0016671044286340475, - "learning_rate": 0.00019999858155127256, - "loss": 46.0, - "step": 10538 - }, - { - "epoch": 1.6972100326100086, - "grad_norm": 0.005191594362258911, - "learning_rate": 0.00019999858128179828, - "loss": 46.0, - "step": 10539 - }, - { - "epoch": 1.6973710696887958, - "grad_norm": 0.0026636270340532064, - "learning_rate": 0.00019999858101229836, - "loss": 46.0, - "step": 10540 - }, - { - "epoch": 1.6975321067675833, - "grad_norm": 0.006815551780164242, - "learning_rate": 0.00019999858074277286, - "loss": 46.0, - "step": 10541 - }, - { - "epoch": 1.6976931438463705, - "grad_norm": 0.0030364496633410454, - "learning_rate": 0.0001999985804732218, - "loss": 46.0, - "step": 10542 - }, - { - "epoch": 1.697854180925158, - "grad_norm": 0.004536924418061972, - "learning_rate": 0.0001999985802036451, - "loss": 46.0, - "step": 10543 - }, - { - "epoch": 1.6980152180039454, - "grad_norm": 0.0014559825649484992, - "learning_rate": 0.00019999857993404282, - "loss": 46.0, - "step": 10544 - }, - { - "epoch": 1.6981762550827328, - "grad_norm": 0.0029906032141298056, - "learning_rate": 0.00019999857966441494, - "loss": 46.0, - "step": 10545 - }, - { - "epoch": 1.6983372921615203, - "grad_norm": 0.0011763200163841248, - "learning_rate": 0.0001999985793947615, - "loss": 46.0, - "step": 10546 - }, - { - "epoch": 1.6984983292403077, - "grad_norm": 0.0030931145884096622, - "learning_rate": 0.00019999857912508242, - "loss": 46.0, - "step": 10547 - }, - { - "epoch": 1.698659366319095, - "grad_norm": 0.0013452148996293545, - "learning_rate": 0.00019999857885537775, - "loss": 46.0, - "step": 10548 - }, - { - "epoch": 1.6988204033978822, - "grad_norm": 0.003104675794020295, - "learning_rate": 0.00019999857858564752, - "loss": 46.0, - "step": 10549 - }, - { - "epoch": 1.6989814404766697, - "grad_norm": 0.002120691817253828, - "learning_rate": 0.00019999857831589168, - "loss": 46.0, - "step": 10550 - }, - { - "epoch": 1.699142477555457, - "grad_norm": 0.0011800124775618315, - "learning_rate": 0.00019999857804611025, - "loss": 46.0, - "step": 10551 - }, - { - "epoch": 1.6993035146342446, - "grad_norm": 0.001063268049620092, - "learning_rate": 0.0001999985777763032, - "loss": 46.0, - "step": 10552 - }, - { - "epoch": 1.699464551713032, - "grad_norm": 0.0006542173214256763, - "learning_rate": 0.00019999857750647057, - "loss": 46.0, - "step": 10553 - }, - { - "epoch": 1.6996255887918195, - "grad_norm": 0.0008364635286852717, - "learning_rate": 0.00019999857723661238, - "loss": 46.0, - "step": 10554 - }, - { - "epoch": 1.6997866258706067, - "grad_norm": 0.0070970249362289906, - "learning_rate": 0.00019999857696672852, - "loss": 46.0, - "step": 10555 - }, - { - "epoch": 1.6999476629493941, - "grad_norm": 0.002407848834991455, - "learning_rate": 0.00019999857669681913, - "loss": 46.0, - "step": 10556 - }, - { - "epoch": 1.7001087000281814, - "grad_norm": 0.0021727916318923235, - "learning_rate": 0.00019999857642688412, - "loss": 46.0, - "step": 10557 - }, - { - "epoch": 1.7002697371069688, - "grad_norm": 0.001900134957395494, - "learning_rate": 0.00019999857615692353, - "loss": 46.0, - "step": 10558 - }, - { - "epoch": 1.7004307741857563, - "grad_norm": 0.0013372318353503942, - "learning_rate": 0.00019999857588693735, - "loss": 46.0, - "step": 10559 - }, - { - "epoch": 1.7005918112645437, - "grad_norm": 0.0026397083420306444, - "learning_rate": 0.00019999857561692555, - "loss": 46.0, - "step": 10560 - }, - { - "epoch": 1.7007528483433312, - "grad_norm": 0.003049772698432207, - "learning_rate": 0.00019999857534688814, - "loss": 46.0, - "step": 10561 - }, - { - "epoch": 1.7009138854221184, - "grad_norm": 0.0014704653294757009, - "learning_rate": 0.00019999857507682517, - "loss": 46.0, - "step": 10562 - }, - { - "epoch": 1.7010749225009059, - "grad_norm": 0.0013092717854306102, - "learning_rate": 0.0001999985748067366, - "loss": 46.0, - "step": 10563 - }, - { - "epoch": 1.701235959579693, - "grad_norm": 0.0011484537972137332, - "learning_rate": 0.00019999857453662244, - "loss": 46.0, - "step": 10564 - }, - { - "epoch": 1.7013969966584805, - "grad_norm": 0.006104714702814817, - "learning_rate": 0.00019999857426648268, - "loss": 46.0, - "step": 10565 - }, - { - "epoch": 1.701558033737268, - "grad_norm": 0.0030514909885823727, - "learning_rate": 0.0001999985739963173, - "loss": 46.0, - "step": 10566 - }, - { - "epoch": 1.7017190708160554, - "grad_norm": 0.0008539797854609787, - "learning_rate": 0.00019999857372612637, - "loss": 46.0, - "step": 10567 - }, - { - "epoch": 1.701880107894843, - "grad_norm": 0.002890487667173147, - "learning_rate": 0.00019999857345590982, - "loss": 46.0, - "step": 10568 - }, - { - "epoch": 1.7020411449736301, - "grad_norm": 0.0019331686198711395, - "learning_rate": 0.0001999985731856677, - "loss": 46.0, - "step": 10569 - }, - { - "epoch": 1.7022021820524176, - "grad_norm": 0.0006012069643475115, - "learning_rate": 0.00019999857291539997, - "loss": 46.0, - "step": 10570 - }, - { - "epoch": 1.7023632191312048, - "grad_norm": 0.005402646958827972, - "learning_rate": 0.00019999857264510663, - "loss": 46.0, - "step": 10571 - }, - { - "epoch": 1.7025242562099923, - "grad_norm": 0.0021092521492391825, - "learning_rate": 0.0001999985723747877, - "loss": 46.0, - "step": 10572 - }, - { - "epoch": 1.7026852932887797, - "grad_norm": 0.001945003285072744, - "learning_rate": 0.0001999985721044432, - "loss": 46.0, - "step": 10573 - }, - { - "epoch": 1.7028463303675672, - "grad_norm": 0.002139392076060176, - "learning_rate": 0.00019999857183407307, - "loss": 46.0, - "step": 10574 - }, - { - "epoch": 1.7030073674463546, - "grad_norm": 0.000649197434540838, - "learning_rate": 0.00019999857156367736, - "loss": 46.0, - "step": 10575 - }, - { - "epoch": 1.703168404525142, - "grad_norm": 0.002186570083722472, - "learning_rate": 0.0001999985712932561, - "loss": 46.0, - "step": 10576 - }, - { - "epoch": 1.7033294416039293, - "grad_norm": 0.0016852536937221885, - "learning_rate": 0.00019999857102280917, - "loss": 46.0, - "step": 10577 - }, - { - "epoch": 1.7034904786827167, - "grad_norm": 0.0052175563760101795, - "learning_rate": 0.00019999857075233667, - "loss": 46.0, - "step": 10578 - }, - { - "epoch": 1.703651515761504, - "grad_norm": 0.004924371372908354, - "learning_rate": 0.00019999857048183859, - "loss": 46.0, - "step": 10579 - }, - { - "epoch": 1.7038125528402914, - "grad_norm": 0.002078639343380928, - "learning_rate": 0.0001999985702113149, - "loss": 46.0, - "step": 10580 - }, - { - "epoch": 1.7039735899190789, - "grad_norm": 0.0028434270061552525, - "learning_rate": 0.00019999856994076565, - "loss": 46.0, - "step": 10581 - }, - { - "epoch": 1.7041346269978663, - "grad_norm": 0.003825762076303363, - "learning_rate": 0.00019999856967019077, - "loss": 46.0, - "step": 10582 - }, - { - "epoch": 1.7042956640766538, - "grad_norm": 0.0025177013594657183, - "learning_rate": 0.0001999985693995903, - "loss": 46.0, - "step": 10583 - }, - { - "epoch": 1.704456701155441, - "grad_norm": 0.00609858613461256, - "learning_rate": 0.00019999856912896426, - "loss": 46.0, - "step": 10584 - }, - { - "epoch": 1.7046177382342285, - "grad_norm": 0.0009502783650532365, - "learning_rate": 0.0001999985688583126, - "loss": 46.0, - "step": 10585 - }, - { - "epoch": 1.7047787753130157, - "grad_norm": 0.003186178160831332, - "learning_rate": 0.00019999856858763534, - "loss": 46.0, - "step": 10586 - }, - { - "epoch": 1.7049398123918031, - "grad_norm": 0.0022999586071819067, - "learning_rate": 0.0001999985683169325, - "loss": 46.0, - "step": 10587 - }, - { - "epoch": 1.7051008494705906, - "grad_norm": 0.0005596989649347961, - "learning_rate": 0.00019999856804620407, - "loss": 46.0, - "step": 10588 - }, - { - "epoch": 1.705261886549378, - "grad_norm": 0.0008181995945051312, - "learning_rate": 0.00019999856777545003, - "loss": 46.0, - "step": 10589 - }, - { - "epoch": 1.7054229236281655, - "grad_norm": 0.0013957306509837508, - "learning_rate": 0.0001999985675046704, - "loss": 46.0, - "step": 10590 - }, - { - "epoch": 1.7055839607069527, - "grad_norm": 0.002393920673057437, - "learning_rate": 0.00019999856723386522, - "loss": 46.0, - "step": 10591 - }, - { - "epoch": 1.7057449977857402, - "grad_norm": 0.0023064424749463797, - "learning_rate": 0.0001999985669630344, - "loss": 46.0, - "step": 10592 - }, - { - "epoch": 1.7059060348645274, - "grad_norm": 0.0005696122534573078, - "learning_rate": 0.00019999856669217797, - "loss": 46.0, - "step": 10593 - }, - { - "epoch": 1.7060670719433149, - "grad_norm": 0.0012736358912661672, - "learning_rate": 0.000199998566421296, - "loss": 46.0, - "step": 10594 - }, - { - "epoch": 1.7062281090221023, - "grad_norm": 0.0030530868098139763, - "learning_rate": 0.00019999856615038838, - "loss": 46.0, - "step": 10595 - }, - { - "epoch": 1.7063891461008898, - "grad_norm": 0.005021310411393642, - "learning_rate": 0.0001999985658794552, - "loss": 46.0, - "step": 10596 - }, - { - "epoch": 1.7065501831796772, - "grad_norm": 0.002605863381177187, - "learning_rate": 0.00019999856560849638, - "loss": 46.0, - "step": 10597 - }, - { - "epoch": 1.7067112202584647, - "grad_norm": 0.0021888704504817724, - "learning_rate": 0.00019999856533751203, - "loss": 46.0, - "step": 10598 - }, - { - "epoch": 1.706872257337252, - "grad_norm": 0.0046259211376309395, - "learning_rate": 0.00019999856506650206, - "loss": 46.0, - "step": 10599 - }, - { - "epoch": 1.7070332944160393, - "grad_norm": 0.0022350698709487915, - "learning_rate": 0.00019999856479546648, - "loss": 46.0, - "step": 10600 - }, - { - "epoch": 1.7071943314948266, - "grad_norm": 0.002283544046804309, - "learning_rate": 0.0001999985645244053, - "loss": 46.0, - "step": 10601 - }, - { - "epoch": 1.707355368573614, - "grad_norm": 0.007390520069748163, - "learning_rate": 0.00019999856425331855, - "loss": 46.0, - "step": 10602 - }, - { - "epoch": 1.7075164056524015, - "grad_norm": 0.0028412521351128817, - "learning_rate": 0.0001999985639822062, - "loss": 46.0, - "step": 10603 - }, - { - "epoch": 1.707677442731189, - "grad_norm": 0.0016974268946796656, - "learning_rate": 0.00019999856371106825, - "loss": 46.0, - "step": 10604 - }, - { - "epoch": 1.7078384798099764, - "grad_norm": 0.0005231903051026165, - "learning_rate": 0.00019999856343990474, - "loss": 46.0, - "step": 10605 - }, - { - "epoch": 1.7079995168887636, - "grad_norm": 0.0020515720825642347, - "learning_rate": 0.00019999856316871558, - "loss": 46.0, - "step": 10606 - }, - { - "epoch": 1.708160553967551, - "grad_norm": 0.0014094403013586998, - "learning_rate": 0.00019999856289750086, - "loss": 46.0, - "step": 10607 - }, - { - "epoch": 1.7083215910463383, - "grad_norm": 0.0012496472336351871, - "learning_rate": 0.0001999985626262605, - "loss": 46.0, - "step": 10608 - }, - { - "epoch": 1.7084826281251257, - "grad_norm": 0.0014657059218734503, - "learning_rate": 0.0001999985623549946, - "loss": 46.0, - "step": 10609 - }, - { - "epoch": 1.7086436652039132, - "grad_norm": 0.0004291498626116663, - "learning_rate": 0.0001999985620837031, - "loss": 46.0, - "step": 10610 - }, - { - "epoch": 1.7088047022827006, - "grad_norm": 0.006984542589634657, - "learning_rate": 0.00019999856181238598, - "loss": 46.0, - "step": 10611 - }, - { - "epoch": 1.708965739361488, - "grad_norm": 0.0013984456891193986, - "learning_rate": 0.00019999856154104327, - "loss": 46.0, - "step": 10612 - }, - { - "epoch": 1.7091267764402753, - "grad_norm": 0.0006348855094984174, - "learning_rate": 0.00019999856126967497, - "loss": 46.0, - "step": 10613 - }, - { - "epoch": 1.7092878135190628, - "grad_norm": 0.00048029652680270374, - "learning_rate": 0.0001999985609982811, - "loss": 46.0, - "step": 10614 - }, - { - "epoch": 1.70944885059785, - "grad_norm": 0.001243694918230176, - "learning_rate": 0.0001999985607268616, - "loss": 46.0, - "step": 10615 - }, - { - "epoch": 1.7096098876766375, - "grad_norm": 0.0006153068970888853, - "learning_rate": 0.0001999985604554165, - "loss": 46.0, - "step": 10616 - }, - { - "epoch": 1.709770924755425, - "grad_norm": 0.010551068931818008, - "learning_rate": 0.00019999856018394584, - "loss": 46.0, - "step": 10617 - }, - { - "epoch": 1.7099319618342124, - "grad_norm": 0.003209968563169241, - "learning_rate": 0.00019999855991244958, - "loss": 46.0, - "step": 10618 - }, - { - "epoch": 1.7100929989129998, - "grad_norm": 0.0038249429780989885, - "learning_rate": 0.0001999985596409277, - "loss": 46.0, - "step": 10619 - }, - { - "epoch": 1.7102540359917873, - "grad_norm": 0.0031154875177890062, - "learning_rate": 0.00019999855936938025, - "loss": 46.0, - "step": 10620 - }, - { - "epoch": 1.7104150730705745, - "grad_norm": 0.004832983948290348, - "learning_rate": 0.0001999985590978072, - "loss": 46.0, - "step": 10621 - }, - { - "epoch": 1.7105761101493617, - "grad_norm": 0.0009924547048285604, - "learning_rate": 0.00019999855882620854, - "loss": 46.0, - "step": 10622 - }, - { - "epoch": 1.7107371472281492, - "grad_norm": 0.0009576721931807697, - "learning_rate": 0.00019999855855458432, - "loss": 46.0, - "step": 10623 - }, - { - "epoch": 1.7108981843069366, - "grad_norm": 0.0005204388871788979, - "learning_rate": 0.00019999855828293445, - "loss": 46.0, - "step": 10624 - }, - { - "epoch": 1.711059221385724, - "grad_norm": 0.001394303166307509, - "learning_rate": 0.00019999855801125903, - "loss": 46.0, - "step": 10625 - }, - { - "epoch": 1.7112202584645115, - "grad_norm": 0.004019347485154867, - "learning_rate": 0.000199998557739558, - "loss": 46.0, - "step": 10626 - }, - { - "epoch": 1.711381295543299, - "grad_norm": 0.00600590743124485, - "learning_rate": 0.0001999985574678314, - "loss": 46.0, - "step": 10627 - }, - { - "epoch": 1.7115423326220862, - "grad_norm": 0.004105504136532545, - "learning_rate": 0.00019999855719607918, - "loss": 46.0, - "step": 10628 - }, - { - "epoch": 1.7117033697008737, - "grad_norm": 0.0021854748483747244, - "learning_rate": 0.00019999855692430136, - "loss": 46.0, - "step": 10629 - }, - { - "epoch": 1.711864406779661, - "grad_norm": 0.004823281895369291, - "learning_rate": 0.00019999855665249795, - "loss": 46.0, - "step": 10630 - }, - { - "epoch": 1.7120254438584483, - "grad_norm": 0.003068856429308653, - "learning_rate": 0.00019999855638066895, - "loss": 46.0, - "step": 10631 - }, - { - "epoch": 1.7121864809372358, - "grad_norm": 0.0004929940914735198, - "learning_rate": 0.00019999855610881439, - "loss": 46.0, - "step": 10632 - }, - { - "epoch": 1.7123475180160233, - "grad_norm": 0.0009507111390121281, - "learning_rate": 0.00019999855583693418, - "loss": 46.0, - "step": 10633 - }, - { - "epoch": 1.7125085550948107, - "grad_norm": 0.0011723112547770143, - "learning_rate": 0.0001999985555650284, - "loss": 46.0, - "step": 10634 - }, - { - "epoch": 1.712669592173598, - "grad_norm": 0.0032061573583632708, - "learning_rate": 0.00019999855529309702, - "loss": 46.0, - "step": 10635 - }, - { - "epoch": 1.7128306292523854, - "grad_norm": 0.003046361729502678, - "learning_rate": 0.00019999855502114006, - "loss": 46.0, - "step": 10636 - }, - { - "epoch": 1.7129916663311726, - "grad_norm": 0.00501189474016428, - "learning_rate": 0.0001999985547491575, - "loss": 46.0, - "step": 10637 - }, - { - "epoch": 1.71315270340996, - "grad_norm": 0.004618125967681408, - "learning_rate": 0.0001999985544771493, - "loss": 46.0, - "step": 10638 - }, - { - "epoch": 1.7133137404887475, - "grad_norm": 0.0027390005998313427, - "learning_rate": 0.00019999855420511556, - "loss": 46.0, - "step": 10639 - }, - { - "epoch": 1.713474777567535, - "grad_norm": 0.007732787169516087, - "learning_rate": 0.00019999855393305622, - "loss": 46.0, - "step": 10640 - }, - { - "epoch": 1.7136358146463224, - "grad_norm": 0.002800698159262538, - "learning_rate": 0.0001999985536609713, - "loss": 46.0, - "step": 10641 - }, - { - "epoch": 1.7137968517251099, - "grad_norm": 0.0013721325667575002, - "learning_rate": 0.00019999855338886073, - "loss": 46.0, - "step": 10642 - }, - { - "epoch": 1.713957888803897, - "grad_norm": 0.0018444042652845383, - "learning_rate": 0.0001999985531167246, - "loss": 46.0, - "step": 10643 - }, - { - "epoch": 1.7141189258826843, - "grad_norm": 0.0048058126121759415, - "learning_rate": 0.00019999855284456289, - "loss": 46.0, - "step": 10644 - }, - { - "epoch": 1.7142799629614718, - "grad_norm": 0.0027314776089042425, - "learning_rate": 0.00019999855257237556, - "loss": 46.0, - "step": 10645 - }, - { - "epoch": 1.7144410000402592, - "grad_norm": 0.00786896888166666, - "learning_rate": 0.00019999855230016264, - "loss": 46.0, - "step": 10646 - }, - { - "epoch": 1.7146020371190467, - "grad_norm": 0.005011396948248148, - "learning_rate": 0.00019999855202792414, - "loss": 46.0, - "step": 10647 - }, - { - "epoch": 1.7147630741978341, - "grad_norm": 0.003180519910529256, - "learning_rate": 0.00019999855175566002, - "loss": 46.0, - "step": 10648 - }, - { - "epoch": 1.7149241112766216, - "grad_norm": 0.015887390822172165, - "learning_rate": 0.00019999855148337031, - "loss": 46.0, - "step": 10649 - }, - { - "epoch": 1.7150851483554088, - "grad_norm": 0.002253420650959015, - "learning_rate": 0.00019999855121105502, - "loss": 46.0, - "step": 10650 - }, - { - "epoch": 1.7152461854341963, - "grad_norm": 0.0015703889075666666, - "learning_rate": 0.00019999855093871417, - "loss": 46.0, - "step": 10651 - }, - { - "epoch": 1.7154072225129835, - "grad_norm": 0.002088170498609543, - "learning_rate": 0.00019999855066634768, - "loss": 46.0, - "step": 10652 - }, - { - "epoch": 1.715568259591771, - "grad_norm": 0.004208439029753208, - "learning_rate": 0.0001999985503939556, - "loss": 46.0, - "step": 10653 - }, - { - "epoch": 1.7157292966705584, - "grad_norm": 0.0012495253467932343, - "learning_rate": 0.00019999855012153793, - "loss": 46.0, - "step": 10654 - }, - { - "epoch": 1.7158903337493459, - "grad_norm": 0.014561749994754791, - "learning_rate": 0.00019999854984909465, - "loss": 46.0, - "step": 10655 - }, - { - "epoch": 1.7160513708281333, - "grad_norm": 0.0022735148668289185, - "learning_rate": 0.00019999854957662578, - "loss": 46.0, - "step": 10656 - }, - { - "epoch": 1.7162124079069205, - "grad_norm": 0.0007662269636057317, - "learning_rate": 0.00019999854930413132, - "loss": 46.0, - "step": 10657 - }, - { - "epoch": 1.716373444985708, - "grad_norm": 0.010263331234455109, - "learning_rate": 0.00019999854903161128, - "loss": 46.0, - "step": 10658 - }, - { - "epoch": 1.7165344820644952, - "grad_norm": 0.0019299895502626896, - "learning_rate": 0.00019999854875906562, - "loss": 46.0, - "step": 10659 - }, - { - "epoch": 1.7166955191432827, - "grad_norm": 0.006850752513855696, - "learning_rate": 0.0001999985484864944, - "loss": 46.0, - "step": 10660 - }, - { - "epoch": 1.7168565562220701, - "grad_norm": 0.00429815286770463, - "learning_rate": 0.00019999854821389757, - "loss": 46.0, - "step": 10661 - }, - { - "epoch": 1.7170175933008576, - "grad_norm": 0.003795732744038105, - "learning_rate": 0.00019999854794127512, - "loss": 46.0, - "step": 10662 - }, - { - "epoch": 1.717178630379645, - "grad_norm": 0.0024037258699536324, - "learning_rate": 0.00019999854766862712, - "loss": 46.0, - "step": 10663 - }, - { - "epoch": 1.7173396674584323, - "grad_norm": 0.0025079180486500263, - "learning_rate": 0.0001999985473959535, - "loss": 46.0, - "step": 10664 - }, - { - "epoch": 1.7175007045372197, - "grad_norm": 0.005466458387672901, - "learning_rate": 0.00019999854712325429, - "loss": 46.0, - "step": 10665 - }, - { - "epoch": 1.717661741616007, - "grad_norm": 0.001966609386727214, - "learning_rate": 0.0001999985468505295, - "loss": 46.0, - "step": 10666 - }, - { - "epoch": 1.7178227786947944, - "grad_norm": 0.0025278415996581316, - "learning_rate": 0.00019999854657777908, - "loss": 46.0, - "step": 10667 - }, - { - "epoch": 1.7179838157735818, - "grad_norm": 0.002257103566080332, - "learning_rate": 0.00019999854630500308, - "loss": 46.0, - "step": 10668 - }, - { - "epoch": 1.7181448528523693, - "grad_norm": 0.0006267820135690272, - "learning_rate": 0.0001999985460322015, - "loss": 46.0, - "step": 10669 - }, - { - "epoch": 1.7183058899311567, - "grad_norm": 0.0011026536813005805, - "learning_rate": 0.0001999985457593743, - "loss": 46.0, - "step": 10670 - }, - { - "epoch": 1.7184669270099442, - "grad_norm": 0.0006493310211226344, - "learning_rate": 0.00019999854548652152, - "loss": 46.0, - "step": 10671 - }, - { - "epoch": 1.7186279640887314, - "grad_norm": 0.0011368111008778214, - "learning_rate": 0.00019999854521364317, - "loss": 46.0, - "step": 10672 - }, - { - "epoch": 1.7187890011675189, - "grad_norm": 0.003450807649642229, - "learning_rate": 0.00019999854494073919, - "loss": 46.0, - "step": 10673 - }, - { - "epoch": 1.718950038246306, - "grad_norm": 0.0024938916321843863, - "learning_rate": 0.0001999985446678096, - "loss": 46.0, - "step": 10674 - }, - { - "epoch": 1.7191110753250936, - "grad_norm": 0.0014378870837390423, - "learning_rate": 0.00019999854439485448, - "loss": 46.0, - "step": 10675 - }, - { - "epoch": 1.719272112403881, - "grad_norm": 0.005106337834149599, - "learning_rate": 0.0001999985441218737, - "loss": 46.0, - "step": 10676 - }, - { - "epoch": 1.7194331494826685, - "grad_norm": 0.001010688953101635, - "learning_rate": 0.00019999854384886737, - "loss": 46.0, - "step": 10677 - }, - { - "epoch": 1.719594186561456, - "grad_norm": 0.005435951519757509, - "learning_rate": 0.00019999854357583542, - "loss": 46.0, - "step": 10678 - }, - { - "epoch": 1.7197552236402431, - "grad_norm": 0.0010438169119879603, - "learning_rate": 0.0001999985433027779, - "loss": 46.0, - "step": 10679 - }, - { - "epoch": 1.7199162607190306, - "grad_norm": 0.006695493124425411, - "learning_rate": 0.00019999854302969476, - "loss": 46.0, - "step": 10680 - }, - { - "epoch": 1.7200772977978178, - "grad_norm": 0.0007165994611568749, - "learning_rate": 0.00019999854275658602, - "loss": 46.0, - "step": 10681 - }, - { - "epoch": 1.7202383348766053, - "grad_norm": 0.0049116904847323895, - "learning_rate": 0.00019999854248345172, - "loss": 46.0, - "step": 10682 - }, - { - "epoch": 1.7203993719553927, - "grad_norm": 0.0026281452737748623, - "learning_rate": 0.0001999985422102918, - "loss": 46.0, - "step": 10683 - }, - { - "epoch": 1.7205604090341802, - "grad_norm": 0.0010164750274270773, - "learning_rate": 0.0001999985419371063, - "loss": 46.0, - "step": 10684 - }, - { - "epoch": 1.7207214461129676, - "grad_norm": 0.0014132176293060184, - "learning_rate": 0.0001999985416638952, - "loss": 46.0, - "step": 10685 - }, - { - "epoch": 1.7208824831917549, - "grad_norm": 0.0012437221594154835, - "learning_rate": 0.0001999985413906585, - "loss": 46.0, - "step": 10686 - }, - { - "epoch": 1.7210435202705423, - "grad_norm": 0.0027496113907545805, - "learning_rate": 0.00019999854111739623, - "loss": 46.0, - "step": 10687 - }, - { - "epoch": 1.7212045573493295, - "grad_norm": 0.004544076509773731, - "learning_rate": 0.00019999854084410833, - "loss": 46.0, - "step": 10688 - }, - { - "epoch": 1.721365594428117, - "grad_norm": 0.004530207719653845, - "learning_rate": 0.00019999854057079484, - "loss": 46.0, - "step": 10689 - }, - { - "epoch": 1.7215266315069044, - "grad_norm": 0.0020138320978730917, - "learning_rate": 0.00019999854029745576, - "loss": 46.0, - "step": 10690 - }, - { - "epoch": 1.7216876685856919, - "grad_norm": 0.006559364497661591, - "learning_rate": 0.0001999985400240911, - "loss": 46.0, - "step": 10691 - }, - { - "epoch": 1.7218487056644793, - "grad_norm": 0.0025726009625941515, - "learning_rate": 0.00019999853975070085, - "loss": 46.0, - "step": 10692 - }, - { - "epoch": 1.7220097427432668, - "grad_norm": 0.0027490509673953056, - "learning_rate": 0.00019999853947728498, - "loss": 46.0, - "step": 10693 - }, - { - "epoch": 1.722170779822054, - "grad_norm": 0.002343369647860527, - "learning_rate": 0.00019999853920384353, - "loss": 46.0, - "step": 10694 - }, - { - "epoch": 1.7223318169008415, - "grad_norm": 0.002913028234615922, - "learning_rate": 0.0001999985389303765, - "loss": 46.0, - "step": 10695 - }, - { - "epoch": 1.7224928539796287, - "grad_norm": 0.0008392426534555852, - "learning_rate": 0.00019999853865688386, - "loss": 46.0, - "step": 10696 - }, - { - "epoch": 1.7226538910584162, - "grad_norm": 0.0016698619583621621, - "learning_rate": 0.00019999853838336562, - "loss": 46.0, - "step": 10697 - }, - { - "epoch": 1.7228149281372036, - "grad_norm": 0.0011312097776681185, - "learning_rate": 0.00019999853810982176, - "loss": 46.0, - "step": 10698 - }, - { - "epoch": 1.722975965215991, - "grad_norm": 0.0017419848591089249, - "learning_rate": 0.00019999853783625235, - "loss": 46.0, - "step": 10699 - }, - { - "epoch": 1.7231370022947785, - "grad_norm": 0.001562235876917839, - "learning_rate": 0.00019999853756265734, - "loss": 46.0, - "step": 10700 - }, - { - "epoch": 1.7232980393735657, - "grad_norm": 0.0017675141571089625, - "learning_rate": 0.00019999853728903673, - "loss": 46.0, - "step": 10701 - }, - { - "epoch": 1.7234590764523532, - "grad_norm": 0.006568699609488249, - "learning_rate": 0.0001999985370153905, - "loss": 46.0, - "step": 10702 - }, - { - "epoch": 1.7236201135311404, - "grad_norm": 0.0073175267316401005, - "learning_rate": 0.0001999985367417187, - "loss": 46.0, - "step": 10703 - }, - { - "epoch": 1.7237811506099279, - "grad_norm": 0.0012807713355869055, - "learning_rate": 0.00019999853646802132, - "loss": 46.0, - "step": 10704 - }, - { - "epoch": 1.7239421876887153, - "grad_norm": 0.003323088865727186, - "learning_rate": 0.00019999853619429833, - "loss": 46.0, - "step": 10705 - }, - { - "epoch": 1.7241032247675028, - "grad_norm": 0.0092667480930686, - "learning_rate": 0.00019999853592054972, - "loss": 46.0, - "step": 10706 - }, - { - "epoch": 1.7242642618462902, - "grad_norm": 0.0019082697108387947, - "learning_rate": 0.00019999853564677556, - "loss": 46.0, - "step": 10707 - }, - { - "epoch": 1.7244252989250775, - "grad_norm": 0.0026916007045656443, - "learning_rate": 0.00019999853537297577, - "loss": 46.0, - "step": 10708 - }, - { - "epoch": 1.724586336003865, - "grad_norm": 0.0018295894842594862, - "learning_rate": 0.0001999985350991504, - "loss": 46.0, - "step": 10709 - }, - { - "epoch": 1.7247473730826521, - "grad_norm": 0.0038157927338033915, - "learning_rate": 0.00019999853482529948, - "loss": 46.0, - "step": 10710 - }, - { - "epoch": 1.7249084101614396, - "grad_norm": 0.0014430490555241704, - "learning_rate": 0.0001999985345514229, - "loss": 46.0, - "step": 10711 - }, - { - "epoch": 1.725069447240227, - "grad_norm": 0.0020202582236379385, - "learning_rate": 0.00019999853427752075, - "loss": 46.0, - "step": 10712 - }, - { - "epoch": 1.7252304843190145, - "grad_norm": 0.0012450660578906536, - "learning_rate": 0.00019999853400359298, - "loss": 46.0, - "step": 10713 - }, - { - "epoch": 1.725391521397802, - "grad_norm": 0.00481125945225358, - "learning_rate": 0.00019999853372963965, - "loss": 46.0, - "step": 10714 - }, - { - "epoch": 1.7255525584765894, - "grad_norm": 0.002589769195765257, - "learning_rate": 0.0001999985334556607, - "loss": 46.0, - "step": 10715 - }, - { - "epoch": 1.7257135955553766, - "grad_norm": 0.0039285593666136265, - "learning_rate": 0.00019999853318165617, - "loss": 46.0, - "step": 10716 - }, - { - "epoch": 1.7258746326341639, - "grad_norm": 0.004839100409299135, - "learning_rate": 0.00019999853290762607, - "loss": 46.0, - "step": 10717 - }, - { - "epoch": 1.7260356697129513, - "grad_norm": 0.001381782116368413, - "learning_rate": 0.00019999853263357034, - "loss": 46.0, - "step": 10718 - }, - { - "epoch": 1.7261967067917388, - "grad_norm": 0.0013794592814520001, - "learning_rate": 0.00019999853235948902, - "loss": 46.0, - "step": 10719 - }, - { - "epoch": 1.7263577438705262, - "grad_norm": 0.0008513790089637041, - "learning_rate": 0.0001999985320853821, - "loss": 46.0, - "step": 10720 - }, - { - "epoch": 1.7265187809493137, - "grad_norm": 0.0006822136929258704, - "learning_rate": 0.0001999985318112496, - "loss": 46.0, - "step": 10721 - }, - { - "epoch": 1.726679818028101, - "grad_norm": 0.006813152693212032, - "learning_rate": 0.00019999853153709153, - "loss": 46.0, - "step": 10722 - }, - { - "epoch": 1.7268408551068883, - "grad_norm": 0.0026363907381892204, - "learning_rate": 0.00019999853126290783, - "loss": 46.0, - "step": 10723 - }, - { - "epoch": 1.7270018921856758, - "grad_norm": 0.002446510596200824, - "learning_rate": 0.00019999853098869852, - "loss": 46.0, - "step": 10724 - }, - { - "epoch": 1.727162929264463, - "grad_norm": 0.0021651689894497395, - "learning_rate": 0.00019999853071446367, - "loss": 46.0, - "step": 10725 - }, - { - "epoch": 1.7273239663432505, - "grad_norm": 0.004140754695981741, - "learning_rate": 0.0001999985304402032, - "loss": 46.0, - "step": 10726 - }, - { - "epoch": 1.727485003422038, - "grad_norm": 0.006260409951210022, - "learning_rate": 0.0001999985301659171, - "loss": 46.0, - "step": 10727 - }, - { - "epoch": 1.7276460405008254, - "grad_norm": 0.0015275570331141353, - "learning_rate": 0.00019999852989160543, - "loss": 46.0, - "step": 10728 - }, - { - "epoch": 1.7278070775796128, - "grad_norm": 0.0005883629783056676, - "learning_rate": 0.00019999852961726818, - "loss": 46.0, - "step": 10729 - }, - { - "epoch": 1.7279681146584, - "grad_norm": 0.005475928541272879, - "learning_rate": 0.00019999852934290532, - "loss": 46.0, - "step": 10730 - }, - { - "epoch": 1.7281291517371875, - "grad_norm": 0.0029752531554549932, - "learning_rate": 0.0001999985290685169, - "loss": 46.0, - "step": 10731 - }, - { - "epoch": 1.7282901888159747, - "grad_norm": 0.0016609854064881802, - "learning_rate": 0.00019999852879410283, - "loss": 46.0, - "step": 10732 - }, - { - "epoch": 1.7284512258947622, - "grad_norm": 0.003141415538266301, - "learning_rate": 0.00019999852851966318, - "loss": 46.0, - "step": 10733 - }, - { - "epoch": 1.7286122629735496, - "grad_norm": 0.0017880770610645413, - "learning_rate": 0.00019999852824519797, - "loss": 46.0, - "step": 10734 - }, - { - "epoch": 1.728773300052337, - "grad_norm": 0.0016692924546077847, - "learning_rate": 0.00019999852797070714, - "loss": 46.0, - "step": 10735 - }, - { - "epoch": 1.7289343371311245, - "grad_norm": 0.0047713397070765495, - "learning_rate": 0.00019999852769619073, - "loss": 46.0, - "step": 10736 - }, - { - "epoch": 1.729095374209912, - "grad_norm": 0.007905526086688042, - "learning_rate": 0.0001999985274216487, - "loss": 46.0, - "step": 10737 - }, - { - "epoch": 1.7292564112886992, - "grad_norm": 0.004555199295282364, - "learning_rate": 0.0001999985271470811, - "loss": 46.0, - "step": 10738 - }, - { - "epoch": 1.7294174483674865, - "grad_norm": 0.0021585929207503796, - "learning_rate": 0.0001999985268724879, - "loss": 46.0, - "step": 10739 - }, - { - "epoch": 1.729578485446274, - "grad_norm": 0.0022551454603672028, - "learning_rate": 0.00019999852659786908, - "loss": 46.0, - "step": 10740 - }, - { - "epoch": 1.7297395225250614, - "grad_norm": 0.005573060363531113, - "learning_rate": 0.0001999985263232247, - "loss": 46.0, - "step": 10741 - }, - { - "epoch": 1.7299005596038488, - "grad_norm": 0.001217327662743628, - "learning_rate": 0.0001999985260485547, - "loss": 46.0, - "step": 10742 - }, - { - "epoch": 1.7300615966826363, - "grad_norm": 0.0032988889142870903, - "learning_rate": 0.0001999985257738591, - "loss": 46.0, - "step": 10743 - }, - { - "epoch": 1.7302226337614237, - "grad_norm": 0.0018557668663561344, - "learning_rate": 0.00019999852549913794, - "loss": 46.0, - "step": 10744 - }, - { - "epoch": 1.730383670840211, - "grad_norm": 0.0011874701594933867, - "learning_rate": 0.00019999852522439117, - "loss": 46.0, - "step": 10745 - }, - { - "epoch": 1.7305447079189984, - "grad_norm": 0.003213569289073348, - "learning_rate": 0.00019999852494961883, - "loss": 46.0, - "step": 10746 - }, - { - "epoch": 1.7307057449977856, - "grad_norm": 0.0032414908055216074, - "learning_rate": 0.00019999852467482085, - "loss": 46.0, - "step": 10747 - }, - { - "epoch": 1.730866782076573, - "grad_norm": 0.001104968716390431, - "learning_rate": 0.00019999852439999728, - "loss": 46.0, - "step": 10748 - }, - { - "epoch": 1.7310278191553605, - "grad_norm": 0.0014070621691644192, - "learning_rate": 0.00019999852412514815, - "loss": 46.0, - "step": 10749 - }, - { - "epoch": 1.731188856234148, - "grad_norm": 0.0006176167516969144, - "learning_rate": 0.0001999985238502734, - "loss": 46.0, - "step": 10750 - }, - { - "epoch": 1.7313498933129354, - "grad_norm": 0.002404885832220316, - "learning_rate": 0.00019999852357537306, - "loss": 46.0, - "step": 10751 - }, - { - "epoch": 1.7315109303917227, - "grad_norm": 0.0027448267210274935, - "learning_rate": 0.00019999852330044714, - "loss": 46.0, - "step": 10752 - }, - { - "epoch": 1.73167196747051, - "grad_norm": 0.0029412193689495325, - "learning_rate": 0.0001999985230254956, - "loss": 46.0, - "step": 10753 - }, - { - "epoch": 1.7318330045492973, - "grad_norm": 0.006089497357606888, - "learning_rate": 0.0001999985227505185, - "loss": 46.0, - "step": 10754 - }, - { - "epoch": 1.7319940416280848, - "grad_norm": 0.0033259836491197348, - "learning_rate": 0.0001999985224755158, - "loss": 46.0, - "step": 10755 - }, - { - "epoch": 1.7321550787068722, - "grad_norm": 0.004637986421585083, - "learning_rate": 0.00019999852220048747, - "loss": 46.0, - "step": 10756 - }, - { - "epoch": 1.7323161157856597, - "grad_norm": 0.0025347708724439144, - "learning_rate": 0.00019999852192543357, - "loss": 46.0, - "step": 10757 - }, - { - "epoch": 1.7324771528644471, - "grad_norm": 0.0022497184108942747, - "learning_rate": 0.00019999852165035408, - "loss": 46.0, - "step": 10758 - }, - { - "epoch": 1.7326381899432344, - "grad_norm": 0.00305104348808527, - "learning_rate": 0.00019999852137524897, - "loss": 46.0, - "step": 10759 - }, - { - "epoch": 1.7327992270220218, - "grad_norm": 0.0047032530419528484, - "learning_rate": 0.00019999852110011827, - "loss": 46.0, - "step": 10760 - }, - { - "epoch": 1.732960264100809, - "grad_norm": 0.003722802270203829, - "learning_rate": 0.000199998520824962, - "loss": 46.0, - "step": 10761 - }, - { - "epoch": 1.7331213011795965, - "grad_norm": 0.006887431256473064, - "learning_rate": 0.00019999852054978013, - "loss": 46.0, - "step": 10762 - }, - { - "epoch": 1.733282338258384, - "grad_norm": 0.0011110524646937847, - "learning_rate": 0.00019999852027457264, - "loss": 46.0, - "step": 10763 - }, - { - "epoch": 1.7334433753371714, - "grad_norm": 0.0016281026182696223, - "learning_rate": 0.00019999851999933957, - "loss": 46.0, - "step": 10764 - }, - { - "epoch": 1.7336044124159589, - "grad_norm": 0.0025362223386764526, - "learning_rate": 0.00019999851972408092, - "loss": 46.0, - "step": 10765 - }, - { - "epoch": 1.7337654494947463, - "grad_norm": 0.0022491959389299154, - "learning_rate": 0.00019999851944879664, - "loss": 46.0, - "step": 10766 - }, - { - "epoch": 1.7339264865735335, - "grad_norm": 0.001708249794319272, - "learning_rate": 0.0001999985191734868, - "loss": 46.0, - "step": 10767 - }, - { - "epoch": 1.734087523652321, - "grad_norm": 0.0020848913118243217, - "learning_rate": 0.00019999851889815137, - "loss": 46.0, - "step": 10768 - }, - { - "epoch": 1.7342485607311082, - "grad_norm": 0.01726636476814747, - "learning_rate": 0.0001999985186227903, - "loss": 46.0, - "step": 10769 - }, - { - "epoch": 1.7344095978098957, - "grad_norm": 0.0009378970135003328, - "learning_rate": 0.0001999985183474037, - "loss": 46.0, - "step": 10770 - }, - { - "epoch": 1.7345706348886831, - "grad_norm": 0.0011219424195587635, - "learning_rate": 0.00019999851807199145, - "loss": 46.0, - "step": 10771 - }, - { - "epoch": 1.7347316719674706, - "grad_norm": 0.0044549452140927315, - "learning_rate": 0.00019999851779655363, - "loss": 46.0, - "step": 10772 - }, - { - "epoch": 1.734892709046258, - "grad_norm": 0.010219248943030834, - "learning_rate": 0.00019999851752109022, - "loss": 46.0, - "step": 10773 - }, - { - "epoch": 1.7350537461250453, - "grad_norm": 0.0013460562331601977, - "learning_rate": 0.0001999985172456012, - "loss": 46.0, - "step": 10774 - }, - { - "epoch": 1.7352147832038327, - "grad_norm": 0.0018596885493025184, - "learning_rate": 0.0001999985169700866, - "loss": 46.0, - "step": 10775 - }, - { - "epoch": 1.73537582028262, - "grad_norm": 0.0012383024441078305, - "learning_rate": 0.0001999985166945464, - "loss": 46.0, - "step": 10776 - }, - { - "epoch": 1.7355368573614074, - "grad_norm": 0.005149056203663349, - "learning_rate": 0.0001999985164189806, - "loss": 46.0, - "step": 10777 - }, - { - "epoch": 1.7356978944401948, - "grad_norm": 0.0019533196464180946, - "learning_rate": 0.00019999851614338924, - "loss": 46.0, - "step": 10778 - }, - { - "epoch": 1.7358589315189823, - "grad_norm": 0.0037077427841722965, - "learning_rate": 0.00019999851586777222, - "loss": 46.0, - "step": 10779 - }, - { - "epoch": 1.7360199685977697, - "grad_norm": 0.0005321336211636662, - "learning_rate": 0.00019999851559212965, - "loss": 46.0, - "step": 10780 - }, - { - "epoch": 1.736181005676557, - "grad_norm": 0.001478381222113967, - "learning_rate": 0.0001999985153164615, - "loss": 46.0, - "step": 10781 - }, - { - "epoch": 1.7363420427553444, - "grad_norm": 0.0036501639988273382, - "learning_rate": 0.00019999851504076772, - "loss": 46.0, - "step": 10782 - }, - { - "epoch": 1.7365030798341317, - "grad_norm": 0.00226794695481658, - "learning_rate": 0.00019999851476504836, - "loss": 46.0, - "step": 10783 - }, - { - "epoch": 1.736664116912919, - "grad_norm": 0.0012095397105440497, - "learning_rate": 0.0001999985144893034, - "loss": 46.0, - "step": 10784 - }, - { - "epoch": 1.7368251539917066, - "grad_norm": 0.0053879451006650925, - "learning_rate": 0.00019999851421353287, - "loss": 46.0, - "step": 10785 - }, - { - "epoch": 1.736986191070494, - "grad_norm": 0.0015686068218201399, - "learning_rate": 0.0001999985139377367, - "loss": 46.0, - "step": 10786 - }, - { - "epoch": 1.7371472281492815, - "grad_norm": 0.00041315285488963127, - "learning_rate": 0.00019999851366191496, - "loss": 46.0, - "step": 10787 - }, - { - "epoch": 1.737308265228069, - "grad_norm": 0.0007580140954814851, - "learning_rate": 0.00019999851338606763, - "loss": 46.0, - "step": 10788 - }, - { - "epoch": 1.7374693023068561, - "grad_norm": 0.004159773234277964, - "learning_rate": 0.0001999985131101947, - "loss": 46.0, - "step": 10789 - }, - { - "epoch": 1.7376303393856436, - "grad_norm": 0.00813358649611473, - "learning_rate": 0.0001999985128342962, - "loss": 46.0, - "step": 10790 - }, - { - "epoch": 1.7377913764644308, - "grad_norm": 0.0037237240467220545, - "learning_rate": 0.00019999851255837205, - "loss": 46.0, - "step": 10791 - }, - { - "epoch": 1.7379524135432183, - "grad_norm": 0.0012467398773878813, - "learning_rate": 0.00019999851228242235, - "loss": 46.0, - "step": 10792 - }, - { - "epoch": 1.7381134506220057, - "grad_norm": 0.01693449728190899, - "learning_rate": 0.00019999851200644704, - "loss": 46.0, - "step": 10793 - }, - { - "epoch": 1.7382744877007932, - "grad_norm": 0.0006624520756304264, - "learning_rate": 0.00019999851173044614, - "loss": 46.0, - "step": 10794 - }, - { - "epoch": 1.7384355247795806, - "grad_norm": 0.0037024826742708683, - "learning_rate": 0.00019999851145441965, - "loss": 46.0, - "step": 10795 - }, - { - "epoch": 1.7385965618583679, - "grad_norm": 0.0020319244358688593, - "learning_rate": 0.00019999851117836754, - "loss": 46.0, - "step": 10796 - }, - { - "epoch": 1.7387575989371553, - "grad_norm": 0.0014004985569044948, - "learning_rate": 0.00019999851090228988, - "loss": 46.0, - "step": 10797 - }, - { - "epoch": 1.7389186360159425, - "grad_norm": 0.006149820983409882, - "learning_rate": 0.00019999851062618658, - "loss": 46.0, - "step": 10798 - }, - { - "epoch": 1.73907967309473, - "grad_norm": 0.001070016180165112, - "learning_rate": 0.0001999985103500577, - "loss": 46.0, - "step": 10799 - }, - { - "epoch": 1.7392407101735174, - "grad_norm": 0.0007530535804107785, - "learning_rate": 0.00019999851007390323, - "loss": 46.0, - "step": 10800 - }, - { - "epoch": 1.739401747252305, - "grad_norm": 0.003215379547327757, - "learning_rate": 0.0001999985097977232, - "loss": 46.0, - "step": 10801 - }, - { - "epoch": 1.7395627843310923, - "grad_norm": 0.004696669988334179, - "learning_rate": 0.00019999850952151754, - "loss": 46.0, - "step": 10802 - }, - { - "epoch": 1.7397238214098796, - "grad_norm": 0.004864111077040434, - "learning_rate": 0.00019999850924528627, - "loss": 46.0, - "step": 10803 - }, - { - "epoch": 1.739884858488667, - "grad_norm": 0.00274386047385633, - "learning_rate": 0.00019999850896902942, - "loss": 46.0, - "step": 10804 - }, - { - "epoch": 1.7400458955674543, - "grad_norm": 0.0027007407043129206, - "learning_rate": 0.000199998508692747, - "loss": 46.0, - "step": 10805 - }, - { - "epoch": 1.7402069326462417, - "grad_norm": 0.0010712225921452045, - "learning_rate": 0.00019999850841643895, - "loss": 46.0, - "step": 10806 - }, - { - "epoch": 1.7403679697250292, - "grad_norm": 0.001988667994737625, - "learning_rate": 0.00019999850814010533, - "loss": 46.0, - "step": 10807 - }, - { - "epoch": 1.7405290068038166, - "grad_norm": 0.0022766145411878824, - "learning_rate": 0.0001999985078637461, - "loss": 46.0, - "step": 10808 - }, - { - "epoch": 1.740690043882604, - "grad_norm": 0.0021920788567513227, - "learning_rate": 0.0001999985075873613, - "loss": 46.0, - "step": 10809 - }, - { - "epoch": 1.7408510809613915, - "grad_norm": 0.006652799900621176, - "learning_rate": 0.00019999850731095088, - "loss": 46.0, - "step": 10810 - }, - { - "epoch": 1.7410121180401787, - "grad_norm": 0.002119033597409725, - "learning_rate": 0.00019999850703451486, - "loss": 46.0, - "step": 10811 - }, - { - "epoch": 1.741173155118966, - "grad_norm": 0.0011126274475827813, - "learning_rate": 0.00019999850675805325, - "loss": 46.0, - "step": 10812 - }, - { - "epoch": 1.7413341921977534, - "grad_norm": 0.0020380797795951366, - "learning_rate": 0.00019999850648156606, - "loss": 46.0, - "step": 10813 - }, - { - "epoch": 1.7414952292765409, - "grad_norm": 0.006282332818955183, - "learning_rate": 0.00019999850620505328, - "loss": 46.0, - "step": 10814 - }, - { - "epoch": 1.7416562663553283, - "grad_norm": 0.002152771456167102, - "learning_rate": 0.0001999985059285149, - "loss": 46.0, - "step": 10815 - }, - { - "epoch": 1.7418173034341158, - "grad_norm": 0.006459617521613836, - "learning_rate": 0.0001999985056519509, - "loss": 46.0, - "step": 10816 - }, - { - "epoch": 1.7419783405129032, - "grad_norm": 0.002866796450689435, - "learning_rate": 0.00019999850537536133, - "loss": 46.0, - "step": 10817 - }, - { - "epoch": 1.7421393775916905, - "grad_norm": 0.005442943423986435, - "learning_rate": 0.00019999850509874615, - "loss": 46.0, - "step": 10818 - }, - { - "epoch": 1.742300414670478, - "grad_norm": 0.001381971058435738, - "learning_rate": 0.0001999985048221054, - "loss": 46.0, - "step": 10819 - }, - { - "epoch": 1.7424614517492651, - "grad_norm": 0.002078956924378872, - "learning_rate": 0.00019999850454543902, - "loss": 46.0, - "step": 10820 - }, - { - "epoch": 1.7426224888280526, - "grad_norm": 0.0022242553532123566, - "learning_rate": 0.0001999985042687471, - "loss": 46.0, - "step": 10821 - }, - { - "epoch": 1.74278352590684, - "grad_norm": 0.0018119171727448702, - "learning_rate": 0.00019999850399202954, - "loss": 46.0, - "step": 10822 - }, - { - "epoch": 1.7429445629856275, - "grad_norm": 0.0018037857953459024, - "learning_rate": 0.00019999850371528642, - "loss": 46.0, - "step": 10823 - }, - { - "epoch": 1.743105600064415, - "grad_norm": 0.002616539131850004, - "learning_rate": 0.00019999850343851766, - "loss": 46.0, - "step": 10824 - }, - { - "epoch": 1.7432666371432022, - "grad_norm": 0.0026036249473690987, - "learning_rate": 0.00019999850316172334, - "loss": 46.0, - "step": 10825 - }, - { - "epoch": 1.7434276742219896, - "grad_norm": 0.004549722652882338, - "learning_rate": 0.0001999985028849034, - "loss": 46.0, - "step": 10826 - }, - { - "epoch": 1.7435887113007769, - "grad_norm": 0.007684149779379368, - "learning_rate": 0.00019999850260805788, - "loss": 46.0, - "step": 10827 - }, - { - "epoch": 1.7437497483795643, - "grad_norm": 0.0133134126663208, - "learning_rate": 0.00019999850233118677, - "loss": 46.0, - "step": 10828 - }, - { - "epoch": 1.7439107854583518, - "grad_norm": 0.0036709101404994726, - "learning_rate": 0.00019999850205429007, - "loss": 46.0, - "step": 10829 - }, - { - "epoch": 1.7440718225371392, - "grad_norm": 0.001921869465149939, - "learning_rate": 0.00019999850177736776, - "loss": 46.0, - "step": 10830 - }, - { - "epoch": 1.7442328596159267, - "grad_norm": 0.001875140005722642, - "learning_rate": 0.00019999850150041986, - "loss": 46.0, - "step": 10831 - }, - { - "epoch": 1.7443938966947141, - "grad_norm": 0.002384265186265111, - "learning_rate": 0.00019999850122344638, - "loss": 46.0, - "step": 10832 - }, - { - "epoch": 1.7445549337735013, - "grad_norm": 0.008249553851783276, - "learning_rate": 0.0001999985009464473, - "loss": 46.0, - "step": 10833 - }, - { - "epoch": 1.7447159708522886, - "grad_norm": 0.00331481103785336, - "learning_rate": 0.0001999985006694226, - "loss": 46.0, - "step": 10834 - }, - { - "epoch": 1.744877007931076, - "grad_norm": 0.0043636648915708065, - "learning_rate": 0.00019999850039237235, - "loss": 46.0, - "step": 10835 - }, - { - "epoch": 1.7450380450098635, - "grad_norm": 0.0020867707207798958, - "learning_rate": 0.00019999850011529646, - "loss": 46.0, - "step": 10836 - }, - { - "epoch": 1.745199082088651, - "grad_norm": 0.008378506638109684, - "learning_rate": 0.000199998499838195, - "loss": 46.0, - "step": 10837 - }, - { - "epoch": 1.7453601191674384, - "grad_norm": 0.0014831061707809567, - "learning_rate": 0.00019999849956106792, - "loss": 46.0, - "step": 10838 - }, - { - "epoch": 1.7455211562462258, - "grad_norm": 0.000990537810139358, - "learning_rate": 0.00019999849928391527, - "loss": 46.0, - "step": 10839 - }, - { - "epoch": 1.745682193325013, - "grad_norm": 0.006173607427626848, - "learning_rate": 0.00019999849900673703, - "loss": 46.0, - "step": 10840 - }, - { - "epoch": 1.7458432304038005, - "grad_norm": 0.004493341781198978, - "learning_rate": 0.0001999984987295332, - "loss": 46.0, - "step": 10841 - }, - { - "epoch": 1.7460042674825877, - "grad_norm": 0.003403579583391547, - "learning_rate": 0.00019999849845230377, - "loss": 46.0, - "step": 10842 - }, - { - "epoch": 1.7461653045613752, - "grad_norm": 0.004680973943322897, - "learning_rate": 0.00019999849817504872, - "loss": 46.0, - "step": 10843 - }, - { - "epoch": 1.7463263416401626, - "grad_norm": 0.002826865529641509, - "learning_rate": 0.0001999984978977681, - "loss": 46.0, - "step": 10844 - }, - { - "epoch": 1.74648737871895, - "grad_norm": 0.0021834252402186394, - "learning_rate": 0.0001999984976204619, - "loss": 46.0, - "step": 10845 - }, - { - "epoch": 1.7466484157977376, - "grad_norm": 0.0014757632743567228, - "learning_rate": 0.00019999849734313006, - "loss": 46.0, - "step": 10846 - }, - { - "epoch": 1.7468094528765248, - "grad_norm": 0.002079938305541873, - "learning_rate": 0.00019999849706577266, - "loss": 46.0, - "step": 10847 - }, - { - "epoch": 1.7469704899553122, - "grad_norm": 0.0010253607761114836, - "learning_rate": 0.00019999849678838967, - "loss": 46.0, - "step": 10848 - }, - { - "epoch": 1.7471315270340995, - "grad_norm": 0.005101647228002548, - "learning_rate": 0.00019999849651098107, - "loss": 46.0, - "step": 10849 - }, - { - "epoch": 1.747292564112887, - "grad_norm": 0.0032603980507701635, - "learning_rate": 0.00019999849623354688, - "loss": 46.0, - "step": 10850 - }, - { - "epoch": 1.7474536011916744, - "grad_norm": 0.013747045770287514, - "learning_rate": 0.00019999849595608708, - "loss": 46.0, - "step": 10851 - }, - { - "epoch": 1.7476146382704618, - "grad_norm": 0.00044209155021235347, - "learning_rate": 0.00019999849567860168, - "loss": 46.0, - "step": 10852 - }, - { - "epoch": 1.7477756753492493, - "grad_norm": 0.0018534163245931268, - "learning_rate": 0.00019999849540109073, - "loss": 46.0, - "step": 10853 - }, - { - "epoch": 1.7479367124280367, - "grad_norm": 0.004616633988916874, - "learning_rate": 0.00019999849512355414, - "loss": 46.0, - "step": 10854 - }, - { - "epoch": 1.748097749506824, - "grad_norm": 0.0012869354104623199, - "learning_rate": 0.00019999849484599201, - "loss": 46.0, - "step": 10855 - }, - { - "epoch": 1.7482587865856112, - "grad_norm": 0.0005482708802446723, - "learning_rate": 0.00019999849456840422, - "loss": 46.0, - "step": 10856 - }, - { - "epoch": 1.7484198236643986, - "grad_norm": 0.0009960244642570615, - "learning_rate": 0.00019999849429079087, - "loss": 46.0, - "step": 10857 - }, - { - "epoch": 1.748580860743186, - "grad_norm": 0.0022940030321478844, - "learning_rate": 0.00019999849401315195, - "loss": 46.0, - "step": 10858 - }, - { - "epoch": 1.7487418978219735, - "grad_norm": 0.0032357554882764816, - "learning_rate": 0.00019999849373548737, - "loss": 46.0, - "step": 10859 - }, - { - "epoch": 1.748902934900761, - "grad_norm": 0.0041608610190451145, - "learning_rate": 0.00019999849345779725, - "loss": 46.0, - "step": 10860 - }, - { - "epoch": 1.7490639719795484, - "grad_norm": 0.0016584380064159632, - "learning_rate": 0.00019999849318008152, - "loss": 46.0, - "step": 10861 - }, - { - "epoch": 1.7492250090583357, - "grad_norm": 0.0013155200285837054, - "learning_rate": 0.00019999849290234018, - "loss": 46.0, - "step": 10862 - }, - { - "epoch": 1.7493860461371231, - "grad_norm": 0.0018878270639106631, - "learning_rate": 0.00019999849262457327, - "loss": 46.0, - "step": 10863 - }, - { - "epoch": 1.7495470832159103, - "grad_norm": 0.0009368465980514884, - "learning_rate": 0.00019999849234678075, - "loss": 46.0, - "step": 10864 - }, - { - "epoch": 1.7497081202946978, - "grad_norm": 0.0011551823699846864, - "learning_rate": 0.00019999849206896265, - "loss": 46.0, - "step": 10865 - }, - { - "epoch": 1.7498691573734853, - "grad_norm": 0.0009309992310591042, - "learning_rate": 0.00019999849179111893, - "loss": 46.0, - "step": 10866 - }, - { - "epoch": 1.7500301944522727, - "grad_norm": 0.0018212506547570229, - "learning_rate": 0.00019999849151324965, - "loss": 46.0, - "step": 10867 - }, - { - "epoch": 1.7501912315310602, - "grad_norm": 0.004179096780717373, - "learning_rate": 0.00019999849123535475, - "loss": 46.0, - "step": 10868 - }, - { - "epoch": 1.7503522686098474, - "grad_norm": 0.003813309594988823, - "learning_rate": 0.00019999849095743427, - "loss": 46.0, - "step": 10869 - }, - { - "epoch": 1.7505133056886348, - "grad_norm": 0.004842016380280256, - "learning_rate": 0.00019999849067948818, - "loss": 46.0, - "step": 10870 - }, - { - "epoch": 1.750674342767422, - "grad_norm": 0.0022694820072501898, - "learning_rate": 0.0001999984904015165, - "loss": 46.0, - "step": 10871 - }, - { - "epoch": 1.7508353798462095, - "grad_norm": 0.00515178544446826, - "learning_rate": 0.00019999849012351922, - "loss": 46.0, - "step": 10872 - }, - { - "epoch": 1.750996416924997, - "grad_norm": 0.001147416653111577, - "learning_rate": 0.00019999848984549636, - "loss": 46.0, - "step": 10873 - }, - { - "epoch": 1.7511574540037844, - "grad_norm": 0.0023850752040743828, - "learning_rate": 0.00019999848956744792, - "loss": 46.0, - "step": 10874 - }, - { - "epoch": 1.7513184910825719, - "grad_norm": 0.002134798327460885, - "learning_rate": 0.00019999848928937386, - "loss": 46.0, - "step": 10875 - }, - { - "epoch": 1.751479528161359, - "grad_norm": 0.001114926184527576, - "learning_rate": 0.00019999848901127421, - "loss": 46.0, - "step": 10876 - }, - { - "epoch": 1.7516405652401466, - "grad_norm": 0.0016794002149254084, - "learning_rate": 0.00019999848873314898, - "loss": 46.0, - "step": 10877 - }, - { - "epoch": 1.7518016023189338, - "grad_norm": 0.0038413633592426777, - "learning_rate": 0.0001999984884549981, - "loss": 46.0, - "step": 10878 - }, - { - "epoch": 1.7519626393977212, - "grad_norm": 0.0028941063210368156, - "learning_rate": 0.00019999848817682167, - "loss": 46.0, - "step": 10879 - }, - { - "epoch": 1.7521236764765087, - "grad_norm": 0.0024616015143692493, - "learning_rate": 0.00019999848789861965, - "loss": 46.0, - "step": 10880 - }, - { - "epoch": 1.7522847135552961, - "grad_norm": 0.0035471355076879263, - "learning_rate": 0.00019999848762039204, - "loss": 46.0, - "step": 10881 - }, - { - "epoch": 1.7524457506340836, - "grad_norm": 0.0022235140204429626, - "learning_rate": 0.00019999848734213882, - "loss": 46.0, - "step": 10882 - }, - { - "epoch": 1.752606787712871, - "grad_norm": 0.0005679349415004253, - "learning_rate": 0.00019999848706386, - "loss": 46.0, - "step": 10883 - }, - { - "epoch": 1.7527678247916583, - "grad_norm": 0.0012403653236106038, - "learning_rate": 0.00019999848678555558, - "loss": 46.0, - "step": 10884 - }, - { - "epoch": 1.7529288618704457, - "grad_norm": 0.005488630384206772, - "learning_rate": 0.00019999848650722562, - "loss": 46.0, - "step": 10885 - }, - { - "epoch": 1.753089898949233, - "grad_norm": 0.006263222545385361, - "learning_rate": 0.00019999848622887, - "loss": 46.0, - "step": 10886 - }, - { - "epoch": 1.7532509360280204, - "grad_norm": 0.0037260418757796288, - "learning_rate": 0.0001999984859504888, - "loss": 46.0, - "step": 10887 - }, - { - "epoch": 1.7534119731068079, - "grad_norm": 0.0018899147398769855, - "learning_rate": 0.00019999848567208204, - "loss": 46.0, - "step": 10888 - }, - { - "epoch": 1.7535730101855953, - "grad_norm": 0.006363023538142443, - "learning_rate": 0.00019999848539364968, - "loss": 46.0, - "step": 10889 - }, - { - "epoch": 1.7537340472643828, - "grad_norm": 0.0022546444088220596, - "learning_rate": 0.0001999984851151917, - "loss": 46.0, - "step": 10890 - }, - { - "epoch": 1.75389508434317, - "grad_norm": 0.003685698611661792, - "learning_rate": 0.0001999984848367081, - "loss": 46.0, - "step": 10891 - }, - { - "epoch": 1.7540561214219574, - "grad_norm": 0.0036330418661236763, - "learning_rate": 0.00019999848455819896, - "loss": 46.0, - "step": 10892 - }, - { - "epoch": 1.7542171585007447, - "grad_norm": 0.007977779023349285, - "learning_rate": 0.0001999984842796642, - "loss": 46.0, - "step": 10893 - }, - { - "epoch": 1.7543781955795321, - "grad_norm": 0.004041213542222977, - "learning_rate": 0.00019999848400110385, - "loss": 46.0, - "step": 10894 - }, - { - "epoch": 1.7545392326583196, - "grad_norm": 0.006915631704032421, - "learning_rate": 0.0001999984837225179, - "loss": 46.0, - "step": 10895 - }, - { - "epoch": 1.754700269737107, - "grad_norm": 0.002794482745230198, - "learning_rate": 0.00019999848344390636, - "loss": 46.0, - "step": 10896 - }, - { - "epoch": 1.7548613068158945, - "grad_norm": 0.0004204817523714155, - "learning_rate": 0.00019999848316526925, - "loss": 46.0, - "step": 10897 - }, - { - "epoch": 1.7550223438946817, - "grad_norm": 0.0068509881384670734, - "learning_rate": 0.00019999848288660652, - "loss": 46.0, - "step": 10898 - }, - { - "epoch": 1.7551833809734692, - "grad_norm": 0.003944136202335358, - "learning_rate": 0.0001999984826079182, - "loss": 46.0, - "step": 10899 - }, - { - "epoch": 1.7553444180522564, - "grad_norm": 0.0008018723456189036, - "learning_rate": 0.00019999848232920425, - "loss": 46.0, - "step": 10900 - }, - { - "epoch": 1.7555054551310438, - "grad_norm": 0.0015046069165691733, - "learning_rate": 0.00019999848205046476, - "loss": 46.0, - "step": 10901 - }, - { - "epoch": 1.7556664922098313, - "grad_norm": 0.00173277803696692, - "learning_rate": 0.00019999848177169966, - "loss": 46.0, - "step": 10902 - }, - { - "epoch": 1.7558275292886187, - "grad_norm": 0.002506011165678501, - "learning_rate": 0.00019999848149290897, - "loss": 46.0, - "step": 10903 - }, - { - "epoch": 1.7559885663674062, - "grad_norm": 0.0020869409199804068, - "learning_rate": 0.00019999848121409267, - "loss": 46.0, - "step": 10904 - }, - { - "epoch": 1.7561496034461936, - "grad_norm": 0.001737023238092661, - "learning_rate": 0.00019999848093525078, - "loss": 46.0, - "step": 10905 - }, - { - "epoch": 1.7563106405249809, - "grad_norm": 0.002367757260799408, - "learning_rate": 0.0001999984806563833, - "loss": 46.0, - "step": 10906 - }, - { - "epoch": 1.7564716776037683, - "grad_norm": 0.0016208909219130874, - "learning_rate": 0.00019999848037749023, - "loss": 46.0, - "step": 10907 - }, - { - "epoch": 1.7566327146825556, - "grad_norm": 0.0015691753942519426, - "learning_rate": 0.00019999848009857153, - "loss": 46.0, - "step": 10908 - }, - { - "epoch": 1.756793751761343, - "grad_norm": 0.007605371531099081, - "learning_rate": 0.00019999847981962726, - "loss": 46.0, - "step": 10909 - }, - { - "epoch": 1.7569547888401305, - "grad_norm": 0.0015439819544553757, - "learning_rate": 0.0001999984795406574, - "loss": 46.0, - "step": 10910 - }, - { - "epoch": 1.757115825918918, - "grad_norm": 0.0035237285774201155, - "learning_rate": 0.00019999847926166196, - "loss": 46.0, - "step": 10911 - }, - { - "epoch": 1.7572768629977054, - "grad_norm": 0.0011734850704669952, - "learning_rate": 0.00019999847898264094, - "loss": 46.0, - "step": 10912 - }, - { - "epoch": 1.7574379000764926, - "grad_norm": 0.002724883845075965, - "learning_rate": 0.00019999847870359427, - "loss": 46.0, - "step": 10913 - }, - { - "epoch": 1.75759893715528, - "grad_norm": 0.00047419476322829723, - "learning_rate": 0.00019999847842452204, - "loss": 46.0, - "step": 10914 - }, - { - "epoch": 1.7577599742340673, - "grad_norm": 0.0021352467592805624, - "learning_rate": 0.0001999984781454242, - "loss": 46.0, - "step": 10915 - }, - { - "epoch": 1.7579210113128547, - "grad_norm": 0.005455997306853533, - "learning_rate": 0.00019999847786630076, - "loss": 46.0, - "step": 10916 - }, - { - "epoch": 1.7580820483916422, - "grad_norm": 0.005487693939357996, - "learning_rate": 0.00019999847758715174, - "loss": 46.0, - "step": 10917 - }, - { - "epoch": 1.7582430854704296, - "grad_norm": 0.0016837469302117825, - "learning_rate": 0.0001999984773079771, - "loss": 46.0, - "step": 10918 - }, - { - "epoch": 1.758404122549217, - "grad_norm": 0.0015634101582691073, - "learning_rate": 0.00019999847702877692, - "loss": 46.0, - "step": 10919 - }, - { - "epoch": 1.7585651596280043, - "grad_norm": 0.002548001706600189, - "learning_rate": 0.0001999984767495511, - "loss": 46.0, - "step": 10920 - }, - { - "epoch": 1.7587261967067918, - "grad_norm": 0.0010101523948833346, - "learning_rate": 0.0001999984764702997, - "loss": 46.0, - "step": 10921 - }, - { - "epoch": 1.758887233785579, - "grad_norm": 0.007910455577075481, - "learning_rate": 0.0001999984761910227, - "loss": 46.0, - "step": 10922 - }, - { - "epoch": 1.7590482708643664, - "grad_norm": 0.0021781001705676317, - "learning_rate": 0.0001999984759117201, - "loss": 46.0, - "step": 10923 - }, - { - "epoch": 1.7592093079431539, - "grad_norm": 0.0010348019422963262, - "learning_rate": 0.00019999847563239193, - "loss": 46.0, - "step": 10924 - }, - { - "epoch": 1.7593703450219413, - "grad_norm": 0.0014855475164949894, - "learning_rate": 0.00019999847535303816, - "loss": 46.0, - "step": 10925 - }, - { - "epoch": 1.7595313821007288, - "grad_norm": 0.004134501330554485, - "learning_rate": 0.00019999847507365878, - "loss": 46.0, - "step": 10926 - }, - { - "epoch": 1.7596924191795162, - "grad_norm": 0.0018700124928727746, - "learning_rate": 0.00019999847479425383, - "loss": 46.0, - "step": 10927 - }, - { - "epoch": 1.7598534562583035, - "grad_norm": 0.0019415926653891802, - "learning_rate": 0.00019999847451482325, - "loss": 46.0, - "step": 10928 - }, - { - "epoch": 1.7600144933370907, - "grad_norm": 0.0006833779625594616, - "learning_rate": 0.0001999984742353671, - "loss": 46.0, - "step": 10929 - }, - { - "epoch": 1.7601755304158782, - "grad_norm": 0.0006792580243200064, - "learning_rate": 0.00019999847395588534, - "loss": 46.0, - "step": 10930 - }, - { - "epoch": 1.7603365674946656, - "grad_norm": 0.0008282547350972891, - "learning_rate": 0.000199998473676378, - "loss": 46.0, - "step": 10931 - }, - { - "epoch": 1.760497604573453, - "grad_norm": 0.0020121545530855656, - "learning_rate": 0.00019999847339684506, - "loss": 46.0, - "step": 10932 - }, - { - "epoch": 1.7606586416522405, - "grad_norm": 0.0007697369437664747, - "learning_rate": 0.0001999984731172865, - "loss": 46.0, - "step": 10933 - }, - { - "epoch": 1.760819678731028, - "grad_norm": 0.0019026916706934571, - "learning_rate": 0.0001999984728377024, - "loss": 46.0, - "step": 10934 - }, - { - "epoch": 1.7609807158098152, - "grad_norm": 0.002064588014036417, - "learning_rate": 0.00019999847255809268, - "loss": 46.0, - "step": 10935 - }, - { - "epoch": 1.7611417528886026, - "grad_norm": 0.004713888745754957, - "learning_rate": 0.00019999847227845737, - "loss": 46.0, - "step": 10936 - }, - { - "epoch": 1.7613027899673899, - "grad_norm": 0.001684645307250321, - "learning_rate": 0.00019999847199879644, - "loss": 46.0, - "step": 10937 - }, - { - "epoch": 1.7614638270461773, - "grad_norm": 0.004242990631610155, - "learning_rate": 0.00019999847171910996, - "loss": 46.0, - "step": 10938 - }, - { - "epoch": 1.7616248641249648, - "grad_norm": 0.007821135222911835, - "learning_rate": 0.00019999847143939783, - "loss": 46.0, - "step": 10939 - }, - { - "epoch": 1.7617859012037522, - "grad_norm": 0.0021978523582220078, - "learning_rate": 0.00019999847115966012, - "loss": 46.0, - "step": 10940 - }, - { - "epoch": 1.7619469382825397, - "grad_norm": 0.0012232176959514618, - "learning_rate": 0.00019999847087989685, - "loss": 46.0, - "step": 10941 - }, - { - "epoch": 1.762107975361327, - "grad_norm": 0.005942869931459427, - "learning_rate": 0.00019999847060010796, - "loss": 46.0, - "step": 10942 - }, - { - "epoch": 1.7622690124401144, - "grad_norm": 0.0005504640284925699, - "learning_rate": 0.00019999847032029349, - "loss": 46.0, - "step": 10943 - }, - { - "epoch": 1.7624300495189016, - "grad_norm": 0.001276353606954217, - "learning_rate": 0.0001999984700404534, - "loss": 46.0, - "step": 10944 - }, - { - "epoch": 1.762591086597689, - "grad_norm": 0.0026126888114959, - "learning_rate": 0.00019999846976058775, - "loss": 46.0, - "step": 10945 - }, - { - "epoch": 1.7627521236764765, - "grad_norm": 0.002981804311275482, - "learning_rate": 0.00019999846948069648, - "loss": 46.0, - "step": 10946 - }, - { - "epoch": 1.762913160755264, - "grad_norm": 0.0033719688653945923, - "learning_rate": 0.00019999846920077963, - "loss": 46.0, - "step": 10947 - }, - { - "epoch": 1.7630741978340514, - "grad_norm": 0.000553334888536483, - "learning_rate": 0.00019999846892083717, - "loss": 46.0, - "step": 10948 - }, - { - "epoch": 1.7632352349128388, - "grad_norm": 0.002527834614738822, - "learning_rate": 0.00019999846864086912, - "loss": 46.0, - "step": 10949 - }, - { - "epoch": 1.763396271991626, - "grad_norm": 0.0035288031212985516, - "learning_rate": 0.0001999984683608755, - "loss": 46.0, - "step": 10950 - }, - { - "epoch": 1.7635573090704133, - "grad_norm": 0.000837587402202189, - "learning_rate": 0.00019999846808085625, - "loss": 46.0, - "step": 10951 - }, - { - "epoch": 1.7637183461492008, - "grad_norm": 0.0011087177554145455, - "learning_rate": 0.0001999984678008114, - "loss": 46.0, - "step": 10952 - }, - { - "epoch": 1.7638793832279882, - "grad_norm": 0.0007402384071610868, - "learning_rate": 0.000199998467520741, - "loss": 46.0, - "step": 10953 - }, - { - "epoch": 1.7640404203067757, - "grad_norm": 0.0013717241818085313, - "learning_rate": 0.00019999846724064497, - "loss": 46.0, - "step": 10954 - }, - { - "epoch": 1.764201457385563, - "grad_norm": 0.002512422390282154, - "learning_rate": 0.00019999846696052337, - "loss": 46.0, - "step": 10955 - }, - { - "epoch": 1.7643624944643506, - "grad_norm": 0.0009923626203089952, - "learning_rate": 0.00019999846668037615, - "loss": 46.0, - "step": 10956 - }, - { - "epoch": 1.7645235315431378, - "grad_norm": 0.0004795429122168571, - "learning_rate": 0.00019999846640020335, - "loss": 46.0, - "step": 10957 - }, - { - "epoch": 1.7646845686219252, - "grad_norm": 0.002552557270973921, - "learning_rate": 0.00019999846612000496, - "loss": 46.0, - "step": 10958 - }, - { - "epoch": 1.7648456057007125, - "grad_norm": 0.004905923269689083, - "learning_rate": 0.00019999846583978098, - "loss": 46.0, - "step": 10959 - }, - { - "epoch": 1.7650066427795, - "grad_norm": 0.0016231703339144588, - "learning_rate": 0.0001999984655595314, - "loss": 46.0, - "step": 10960 - }, - { - "epoch": 1.7651676798582874, - "grad_norm": 0.004946983885020018, - "learning_rate": 0.0001999984652792562, - "loss": 46.0, - "step": 10961 - }, - { - "epoch": 1.7653287169370748, - "grad_norm": 0.0017494576750323176, - "learning_rate": 0.00019999846499895542, - "loss": 46.0, - "step": 10962 - }, - { - "epoch": 1.7654897540158623, - "grad_norm": 0.003942117094993591, - "learning_rate": 0.00019999846471862906, - "loss": 46.0, - "step": 10963 - }, - { - "epoch": 1.7656507910946495, - "grad_norm": 0.002034541219472885, - "learning_rate": 0.0001999984644382771, - "loss": 46.0, - "step": 10964 - }, - { - "epoch": 1.765811828173437, - "grad_norm": 0.0023761694319546223, - "learning_rate": 0.00019999846415789954, - "loss": 46.0, - "step": 10965 - }, - { - "epoch": 1.7659728652522242, - "grad_norm": 0.0012503170873969793, - "learning_rate": 0.0001999984638774964, - "loss": 46.0, - "step": 10966 - }, - { - "epoch": 1.7661339023310116, - "grad_norm": 0.002663065679371357, - "learning_rate": 0.00019999846359706764, - "loss": 46.0, - "step": 10967 - }, - { - "epoch": 1.766294939409799, - "grad_norm": 0.0020416222978383303, - "learning_rate": 0.0001999984633166133, - "loss": 46.0, - "step": 10968 - }, - { - "epoch": 1.7664559764885865, - "grad_norm": 0.003008096944540739, - "learning_rate": 0.00019999846303613337, - "loss": 46.0, - "step": 10969 - }, - { - "epoch": 1.766617013567374, - "grad_norm": 0.0011060424149036407, - "learning_rate": 0.00019999846275562785, - "loss": 46.0, - "step": 10970 - }, - { - "epoch": 1.7667780506461612, - "grad_norm": 0.0020178980194032192, - "learning_rate": 0.00019999846247509672, - "loss": 46.0, - "step": 10971 - }, - { - "epoch": 1.7669390877249487, - "grad_norm": 0.0030181705951690674, - "learning_rate": 0.00019999846219454, - "loss": 46.0, - "step": 10972 - }, - { - "epoch": 1.767100124803736, - "grad_norm": 0.0021134752314537764, - "learning_rate": 0.0001999984619139577, - "loss": 46.0, - "step": 10973 - }, - { - "epoch": 1.7672611618825234, - "grad_norm": 0.0006657113553956151, - "learning_rate": 0.00019999846163334977, - "loss": 46.0, - "step": 10974 - }, - { - "epoch": 1.7674221989613108, - "grad_norm": 0.000494988402351737, - "learning_rate": 0.00019999846135271626, - "loss": 46.0, - "step": 10975 - }, - { - "epoch": 1.7675832360400983, - "grad_norm": 0.0023772960994392633, - "learning_rate": 0.00019999846107205717, - "loss": 46.0, - "step": 10976 - }, - { - "epoch": 1.7677442731188857, - "grad_norm": 0.0017018452053889632, - "learning_rate": 0.00019999846079137248, - "loss": 46.0, - "step": 10977 - }, - { - "epoch": 1.7679053101976732, - "grad_norm": 0.0007068027043715119, - "learning_rate": 0.0001999984605106622, - "loss": 46.0, - "step": 10978 - }, - { - "epoch": 1.7680663472764604, - "grad_norm": 0.0027879702392965555, - "learning_rate": 0.0001999984602299263, - "loss": 46.0, - "step": 10979 - }, - { - "epoch": 1.7682273843552478, - "grad_norm": 0.006277864333242178, - "learning_rate": 0.00019999845994916483, - "loss": 46.0, - "step": 10980 - }, - { - "epoch": 1.768388421434035, - "grad_norm": 0.003597882343456149, - "learning_rate": 0.00019999845966837775, - "loss": 46.0, - "step": 10981 - }, - { - "epoch": 1.7685494585128225, - "grad_norm": 0.0007268081535585225, - "learning_rate": 0.0001999984593875651, - "loss": 46.0, - "step": 10982 - }, - { - "epoch": 1.76871049559161, - "grad_norm": 0.0027878275141119957, - "learning_rate": 0.00019999845910672684, - "loss": 46.0, - "step": 10983 - }, - { - "epoch": 1.7688715326703974, - "grad_norm": 0.0010864431969821453, - "learning_rate": 0.000199998458825863, - "loss": 46.0, - "step": 10984 - }, - { - "epoch": 1.7690325697491849, - "grad_norm": 0.0018990386743098497, - "learning_rate": 0.00019999845854497354, - "loss": 46.0, - "step": 10985 - }, - { - "epoch": 1.769193606827972, - "grad_norm": 0.0013602948747575283, - "learning_rate": 0.00019999845826405851, - "loss": 46.0, - "step": 10986 - }, - { - "epoch": 1.7693546439067596, - "grad_norm": 0.0064102960750460625, - "learning_rate": 0.00019999845798311788, - "loss": 46.0, - "step": 10987 - }, - { - "epoch": 1.7695156809855468, - "grad_norm": 0.0028815045952796936, - "learning_rate": 0.00019999845770215163, - "loss": 46.0, - "step": 10988 - }, - { - "epoch": 1.7696767180643342, - "grad_norm": 0.0006191595457494259, - "learning_rate": 0.00019999845742115982, - "loss": 46.0, - "step": 10989 - }, - { - "epoch": 1.7698377551431217, - "grad_norm": 0.0019128312123939395, - "learning_rate": 0.0001999984571401424, - "loss": 46.0, - "step": 10990 - }, - { - "epoch": 1.7699987922219091, - "grad_norm": 0.0007664788281545043, - "learning_rate": 0.00019999845685909938, - "loss": 46.0, - "step": 10991 - }, - { - "epoch": 1.7701598293006966, - "grad_norm": 0.0071024163626134396, - "learning_rate": 0.00019999845657803076, - "loss": 46.0, - "step": 10992 - }, - { - "epoch": 1.7703208663794838, - "grad_norm": 0.001521787140518427, - "learning_rate": 0.00019999845629693657, - "loss": 46.0, - "step": 10993 - }, - { - "epoch": 1.7704819034582713, - "grad_norm": 0.0025173607282340527, - "learning_rate": 0.00019999845601581677, - "loss": 46.0, - "step": 10994 - }, - { - "epoch": 1.7706429405370585, - "grad_norm": 0.001532764988951385, - "learning_rate": 0.0001999984557346714, - "loss": 46.0, - "step": 10995 - }, - { - "epoch": 1.770803977615846, - "grad_norm": 0.007769228424876928, - "learning_rate": 0.00019999845545350039, - "loss": 46.0, - "step": 10996 - }, - { - "epoch": 1.7709650146946334, - "grad_norm": 0.0013940640492364764, - "learning_rate": 0.0001999984551723038, - "loss": 46.0, - "step": 10997 - }, - { - "epoch": 1.7711260517734209, - "grad_norm": 0.007811498362571001, - "learning_rate": 0.00019999845489108165, - "loss": 46.0, - "step": 10998 - }, - { - "epoch": 1.7712870888522083, - "grad_norm": 0.00822006817907095, - "learning_rate": 0.00019999845460983386, - "loss": 46.0, - "step": 10999 - }, - { - "epoch": 1.7714481259309958, - "grad_norm": 0.007976683788001537, - "learning_rate": 0.0001999984543285605, - "loss": 46.0, - "step": 11000 - }, - { - "epoch": 1.771609163009783, - "grad_norm": 0.005001617129892111, - "learning_rate": 0.00019999845404726155, - "loss": 46.0, - "step": 11001 - }, - { - "epoch": 1.7717702000885704, - "grad_norm": 0.001979105407372117, - "learning_rate": 0.000199998453765937, - "loss": 46.0, - "step": 11002 - }, - { - "epoch": 1.7719312371673577, - "grad_norm": 0.0013469480909407139, - "learning_rate": 0.00019999845348458686, - "loss": 46.0, - "step": 11003 - }, - { - "epoch": 1.7720922742461451, - "grad_norm": 0.0015179608017206192, - "learning_rate": 0.00019999845320321108, - "loss": 46.0, - "step": 11004 - }, - { - "epoch": 1.7722533113249326, - "grad_norm": 0.0017213443061336875, - "learning_rate": 0.00019999845292180976, - "loss": 46.0, - "step": 11005 - }, - { - "epoch": 1.77241434840372, - "grad_norm": 0.0049031986854970455, - "learning_rate": 0.0001999984526403828, - "loss": 46.0, - "step": 11006 - }, - { - "epoch": 1.7725753854825075, - "grad_norm": 0.008329499512910843, - "learning_rate": 0.00019999845235893032, - "loss": 46.0, - "step": 11007 - }, - { - "epoch": 1.7727364225612947, - "grad_norm": 0.0007381970062851906, - "learning_rate": 0.0001999984520774522, - "loss": 46.0, - "step": 11008 - }, - { - "epoch": 1.7728974596400822, - "grad_norm": 0.01034065056592226, - "learning_rate": 0.00019999845179594848, - "loss": 46.0, - "step": 11009 - }, - { - "epoch": 1.7730584967188694, - "grad_norm": 0.007265487220138311, - "learning_rate": 0.00019999845151441915, - "loss": 46.0, - "step": 11010 - }, - { - "epoch": 1.7732195337976568, - "grad_norm": 0.0008545235614292324, - "learning_rate": 0.00019999845123286426, - "loss": 46.0, - "step": 11011 - }, - { - "epoch": 1.7733805708764443, - "grad_norm": 0.0013570995070040226, - "learning_rate": 0.00019999845095128378, - "loss": 46.0, - "step": 11012 - }, - { - "epoch": 1.7735416079552317, - "grad_norm": 0.0018267452251166105, - "learning_rate": 0.00019999845066967766, - "loss": 46.0, - "step": 11013 - }, - { - "epoch": 1.7737026450340192, - "grad_norm": 0.006035581696778536, - "learning_rate": 0.00019999845038804596, - "loss": 46.0, - "step": 11014 - }, - { - "epoch": 1.7738636821128064, - "grad_norm": 0.0011682414915412664, - "learning_rate": 0.0001999984501063887, - "loss": 46.0, - "step": 11015 - }, - { - "epoch": 1.7740247191915939, - "grad_norm": 0.0011177061824128032, - "learning_rate": 0.00019999844982470584, - "loss": 46.0, - "step": 11016 - }, - { - "epoch": 1.774185756270381, - "grad_norm": 0.004796769469976425, - "learning_rate": 0.00019999844954299737, - "loss": 46.0, - "step": 11017 - }, - { - "epoch": 1.7743467933491686, - "grad_norm": 0.0036517970729619265, - "learning_rate": 0.0001999984492612633, - "loss": 46.0, - "step": 11018 - }, - { - "epoch": 1.774507830427956, - "grad_norm": 0.0033792583271861076, - "learning_rate": 0.00019999844897950362, - "loss": 46.0, - "step": 11019 - }, - { - "epoch": 1.7746688675067435, - "grad_norm": 0.002855784958228469, - "learning_rate": 0.0001999984486977184, - "loss": 46.0, - "step": 11020 - }, - { - "epoch": 1.774829904585531, - "grad_norm": 0.008407426066696644, - "learning_rate": 0.00019999844841590752, - "loss": 46.0, - "step": 11021 - }, - { - "epoch": 1.7749909416643184, - "grad_norm": 0.0003833010559901595, - "learning_rate": 0.0001999984481340711, - "loss": 46.0, - "step": 11022 - }, - { - "epoch": 1.7751519787431056, - "grad_norm": 0.0029628907795995474, - "learning_rate": 0.00019999844785220904, - "loss": 46.0, - "step": 11023 - }, - { - "epoch": 1.7753130158218928, - "grad_norm": 0.002565759001299739, - "learning_rate": 0.0001999984475703214, - "loss": 46.0, - "step": 11024 - }, - { - "epoch": 1.7754740529006803, - "grad_norm": 0.0013392409309744835, - "learning_rate": 0.00019999844728840817, - "loss": 46.0, - "step": 11025 - }, - { - "epoch": 1.7756350899794677, - "grad_norm": 0.0029403886292129755, - "learning_rate": 0.00019999844700646936, - "loss": 46.0, - "step": 11026 - }, - { - "epoch": 1.7757961270582552, - "grad_norm": 0.0050983186811208725, - "learning_rate": 0.00019999844672450494, - "loss": 46.0, - "step": 11027 - }, - { - "epoch": 1.7759571641370426, - "grad_norm": 0.0010983977699652314, - "learning_rate": 0.00019999844644251496, - "loss": 46.0, - "step": 11028 - }, - { - "epoch": 1.77611820121583, - "grad_norm": 0.0008620453882031143, - "learning_rate": 0.00019999844616049934, - "loss": 46.0, - "step": 11029 - }, - { - "epoch": 1.7762792382946173, - "grad_norm": 0.004579078871756792, - "learning_rate": 0.00019999844587845816, - "loss": 46.0, - "step": 11030 - }, - { - "epoch": 1.7764402753734048, - "grad_norm": 0.0017242144094780087, - "learning_rate": 0.00019999844559639133, - "loss": 46.0, - "step": 11031 - }, - { - "epoch": 1.776601312452192, - "grad_norm": 0.008659831248223782, - "learning_rate": 0.00019999844531429898, - "loss": 46.0, - "step": 11032 - }, - { - "epoch": 1.7767623495309794, - "grad_norm": 0.0005555757670663297, - "learning_rate": 0.00019999844503218098, - "loss": 46.0, - "step": 11033 - }, - { - "epoch": 1.776923386609767, - "grad_norm": 0.0010951929725706577, - "learning_rate": 0.00019999844475003742, - "loss": 46.0, - "step": 11034 - }, - { - "epoch": 1.7770844236885543, - "grad_norm": 0.0007955901091918349, - "learning_rate": 0.00019999844446786825, - "loss": 46.0, - "step": 11035 - }, - { - "epoch": 1.7772454607673418, - "grad_norm": 0.001042502699419856, - "learning_rate": 0.0001999984441856735, - "loss": 46.0, - "step": 11036 - }, - { - "epoch": 1.777406497846129, - "grad_norm": 0.0028987498953938484, - "learning_rate": 0.0001999984439034531, - "loss": 46.0, - "step": 11037 - }, - { - "epoch": 1.7775675349249165, - "grad_norm": 0.002884478308260441, - "learning_rate": 0.00019999844362120713, - "loss": 46.0, - "step": 11038 - }, - { - "epoch": 1.7777285720037037, - "grad_norm": 0.002744531724601984, - "learning_rate": 0.0001999984433389356, - "loss": 46.0, - "step": 11039 - }, - { - "epoch": 1.7778896090824912, - "grad_norm": 0.000942451530136168, - "learning_rate": 0.00019999844305663847, - "loss": 46.0, - "step": 11040 - }, - { - "epoch": 1.7780506461612786, - "grad_norm": 0.0004730231303256005, - "learning_rate": 0.00019999844277431573, - "loss": 46.0, - "step": 11041 - }, - { - "epoch": 1.778211683240066, - "grad_norm": 0.0009635805035941303, - "learning_rate": 0.0001999984424919674, - "loss": 46.0, - "step": 11042 - }, - { - "epoch": 1.7783727203188535, - "grad_norm": 0.000491833605337888, - "learning_rate": 0.00019999844220959347, - "loss": 46.0, - "step": 11043 - }, - { - "epoch": 1.778533757397641, - "grad_norm": 0.0017154552042484283, - "learning_rate": 0.00019999844192719395, - "loss": 46.0, - "step": 11044 - }, - { - "epoch": 1.7786947944764282, - "grad_norm": 0.0026958847884088755, - "learning_rate": 0.00019999844164476883, - "loss": 46.0, - "step": 11045 - }, - { - "epoch": 1.7788558315552154, - "grad_norm": 0.0055764829739928246, - "learning_rate": 0.00019999844136231812, - "loss": 46.0, - "step": 11046 - }, - { - "epoch": 1.7790168686340029, - "grad_norm": 0.0014100803527981043, - "learning_rate": 0.0001999984410798418, - "loss": 46.0, - "step": 11047 - }, - { - "epoch": 1.7791779057127903, - "grad_norm": 0.0010405626380816102, - "learning_rate": 0.0001999984407973399, - "loss": 46.0, - "step": 11048 - }, - { - "epoch": 1.7793389427915778, - "grad_norm": 0.0006019024294801056, - "learning_rate": 0.00019999844051481243, - "loss": 46.0, - "step": 11049 - }, - { - "epoch": 1.7794999798703652, - "grad_norm": 0.004517868161201477, - "learning_rate": 0.00019999844023225934, - "loss": 46.0, - "step": 11050 - }, - { - "epoch": 1.7796610169491527, - "grad_norm": 0.00223662331700325, - "learning_rate": 0.00019999843994968064, - "loss": 46.0, - "step": 11051 - }, - { - "epoch": 1.77982205402794, - "grad_norm": 0.0016908921534195542, - "learning_rate": 0.00019999843966707638, - "loss": 46.0, - "step": 11052 - }, - { - "epoch": 1.7799830911067274, - "grad_norm": 0.001135583152063191, - "learning_rate": 0.0001999984393844465, - "loss": 46.0, - "step": 11053 - }, - { - "epoch": 1.7801441281855146, - "grad_norm": 0.0024618771858513355, - "learning_rate": 0.00019999843910179104, - "loss": 46.0, - "step": 11054 - }, - { - "epoch": 1.780305165264302, - "grad_norm": 0.001972004771232605, - "learning_rate": 0.00019999843881911, - "loss": 46.0, - "step": 11055 - }, - { - "epoch": 1.7804662023430895, - "grad_norm": 0.0011904244311153889, - "learning_rate": 0.00019999843853640332, - "loss": 46.0, - "step": 11056 - }, - { - "epoch": 1.780627239421877, - "grad_norm": 0.004262834321707487, - "learning_rate": 0.00019999843825367107, - "loss": 46.0, - "step": 11057 - }, - { - "epoch": 1.7807882765006644, - "grad_norm": 0.0008145908359438181, - "learning_rate": 0.00019999843797091323, - "loss": 46.0, - "step": 11058 - }, - { - "epoch": 1.7809493135794516, - "grad_norm": 0.004048207774758339, - "learning_rate": 0.00019999843768812978, - "loss": 46.0, - "step": 11059 - }, - { - "epoch": 1.781110350658239, - "grad_norm": 0.003517892211675644, - "learning_rate": 0.00019999843740532076, - "loss": 46.0, - "step": 11060 - }, - { - "epoch": 1.7812713877370263, - "grad_norm": 0.000693217443767935, - "learning_rate": 0.00019999843712248613, - "loss": 46.0, - "step": 11061 - }, - { - "epoch": 1.7814324248158138, - "grad_norm": 0.0027239397168159485, - "learning_rate": 0.00019999843683962592, - "loss": 46.0, - "step": 11062 - }, - { - "epoch": 1.7815934618946012, - "grad_norm": 0.00394403887912631, - "learning_rate": 0.0001999984365567401, - "loss": 46.0, - "step": 11063 - }, - { - "epoch": 1.7817544989733887, - "grad_norm": 0.0013011100236326456, - "learning_rate": 0.00019999843627382867, - "loss": 46.0, - "step": 11064 - }, - { - "epoch": 1.7819155360521761, - "grad_norm": 0.0030322198290377855, - "learning_rate": 0.00019999843599089167, - "loss": 46.0, - "step": 11065 - }, - { - "epoch": 1.7820765731309636, - "grad_norm": 0.003137225518003106, - "learning_rate": 0.00019999843570792908, - "loss": 46.0, - "step": 11066 - }, - { - "epoch": 1.7822376102097508, - "grad_norm": 0.005587464664131403, - "learning_rate": 0.0001999984354249409, - "loss": 46.0, - "step": 11067 - }, - { - "epoch": 1.782398647288538, - "grad_norm": 0.009867976419627666, - "learning_rate": 0.0001999984351419271, - "loss": 46.0, - "step": 11068 - }, - { - "epoch": 1.7825596843673255, - "grad_norm": 0.002365303225815296, - "learning_rate": 0.00019999843485888773, - "loss": 46.0, - "step": 11069 - }, - { - "epoch": 1.782720721446113, - "grad_norm": 0.005722887814044952, - "learning_rate": 0.00019999843457582273, - "loss": 46.0, - "step": 11070 - }, - { - "epoch": 1.7828817585249004, - "grad_norm": 0.0009947649668902159, - "learning_rate": 0.00019999843429273218, - "loss": 46.0, - "step": 11071 - }, - { - "epoch": 1.7830427956036878, - "grad_norm": 0.004566850606352091, - "learning_rate": 0.00019999843400961598, - "loss": 46.0, - "step": 11072 - }, - { - "epoch": 1.7832038326824753, - "grad_norm": 0.000928681343793869, - "learning_rate": 0.00019999843372647426, - "loss": 46.0, - "step": 11073 - }, - { - "epoch": 1.7833648697612625, - "grad_norm": 0.0011853943578898907, - "learning_rate": 0.00019999843344330689, - "loss": 46.0, - "step": 11074 - }, - { - "epoch": 1.78352590684005, - "grad_norm": 0.0008863420225679874, - "learning_rate": 0.00019999843316011393, - "loss": 46.0, - "step": 11075 - }, - { - "epoch": 1.7836869439188372, - "grad_norm": 0.003711366793140769, - "learning_rate": 0.00019999843287689539, - "loss": 46.0, - "step": 11076 - }, - { - "epoch": 1.7838479809976246, - "grad_norm": 0.0014373952290043235, - "learning_rate": 0.00019999843259365125, - "loss": 46.0, - "step": 11077 - }, - { - "epoch": 1.784009018076412, - "grad_norm": 0.0012223607627674937, - "learning_rate": 0.00019999843231038154, - "loss": 46.0, - "step": 11078 - }, - { - "epoch": 1.7841700551551996, - "grad_norm": 0.0006321249529719353, - "learning_rate": 0.0001999984320270862, - "loss": 46.0, - "step": 11079 - }, - { - "epoch": 1.784331092233987, - "grad_norm": 0.003856287570670247, - "learning_rate": 0.00019999843174376526, - "loss": 46.0, - "step": 11080 - }, - { - "epoch": 1.7844921293127742, - "grad_norm": 0.004373412579298019, - "learning_rate": 0.00019999843146041878, - "loss": 46.0, - "step": 11081 - }, - { - "epoch": 1.7846531663915617, - "grad_norm": 0.001382243586704135, - "learning_rate": 0.00019999843117704665, - "loss": 46.0, - "step": 11082 - }, - { - "epoch": 1.784814203470349, - "grad_norm": 0.0038277197163552046, - "learning_rate": 0.00019999843089364895, - "loss": 46.0, - "step": 11083 - }, - { - "epoch": 1.7849752405491364, - "grad_norm": 0.005997433792799711, - "learning_rate": 0.00019999843061022565, - "loss": 46.0, - "step": 11084 - }, - { - "epoch": 1.7851362776279238, - "grad_norm": 0.001283555757254362, - "learning_rate": 0.00019999843032677674, - "loss": 46.0, - "step": 11085 - }, - { - "epoch": 1.7852973147067113, - "grad_norm": 0.0015188554534688592, - "learning_rate": 0.00019999843004330224, - "loss": 46.0, - "step": 11086 - }, - { - "epoch": 1.7854583517854987, - "grad_norm": 0.0017779256450012326, - "learning_rate": 0.00019999842975980219, - "loss": 46.0, - "step": 11087 - }, - { - "epoch": 1.785619388864286, - "grad_norm": 0.001990334363654256, - "learning_rate": 0.0001999984294762765, - "loss": 46.0, - "step": 11088 - }, - { - "epoch": 1.7857804259430734, - "grad_norm": 0.002235524356365204, - "learning_rate": 0.00019999842919272523, - "loss": 46.0, - "step": 11089 - }, - { - "epoch": 1.7859414630218606, - "grad_norm": 0.004958270117640495, - "learning_rate": 0.00019999842890914836, - "loss": 46.0, - "step": 11090 - }, - { - "epoch": 1.786102500100648, - "grad_norm": 0.007544436492025852, - "learning_rate": 0.0001999984286255459, - "loss": 46.0, - "step": 11091 - }, - { - "epoch": 1.7862635371794355, - "grad_norm": 0.0014472788898274302, - "learning_rate": 0.00019999842834191785, - "loss": 46.0, - "step": 11092 - }, - { - "epoch": 1.786424574258223, - "grad_norm": 0.0021815740037709475, - "learning_rate": 0.0001999984280582642, - "loss": 46.0, - "step": 11093 - }, - { - "epoch": 1.7865856113370104, - "grad_norm": 0.002626087050884962, - "learning_rate": 0.00019999842777458494, - "loss": 46.0, - "step": 11094 - }, - { - "epoch": 1.786746648415798, - "grad_norm": 0.0033277596812695265, - "learning_rate": 0.0001999984274908801, - "loss": 46.0, - "step": 11095 - }, - { - "epoch": 1.7869076854945851, - "grad_norm": 0.0010635490762069821, - "learning_rate": 0.00019999842720714968, - "loss": 46.0, - "step": 11096 - }, - { - "epoch": 1.7870687225733726, - "grad_norm": 0.0014558301772922277, - "learning_rate": 0.00019999842692339367, - "loss": 46.0, - "step": 11097 - }, - { - "epoch": 1.7872297596521598, - "grad_norm": 0.002081095241010189, - "learning_rate": 0.00019999842663961204, - "loss": 46.0, - "step": 11098 - }, - { - "epoch": 1.7873907967309473, - "grad_norm": 0.0071091377176344395, - "learning_rate": 0.00019999842635580478, - "loss": 46.0, - "step": 11099 - }, - { - "epoch": 1.7875518338097347, - "grad_norm": 0.0022333275992423296, - "learning_rate": 0.00019999842607197198, - "loss": 46.0, - "step": 11100 - }, - { - "epoch": 1.7877128708885222, - "grad_norm": 0.0010902582434937358, - "learning_rate": 0.0001999984257881136, - "loss": 46.0, - "step": 11101 - }, - { - "epoch": 1.7878739079673096, - "grad_norm": 0.0030951553490012884, - "learning_rate": 0.0001999984255042296, - "loss": 46.0, - "step": 11102 - }, - { - "epoch": 1.7880349450460968, - "grad_norm": 0.005409586243331432, - "learning_rate": 0.00019999842522032, - "loss": 46.0, - "step": 11103 - }, - { - "epoch": 1.7881959821248843, - "grad_norm": 0.0015288225840777159, - "learning_rate": 0.00019999842493638478, - "loss": 46.0, - "step": 11104 - }, - { - "epoch": 1.7883570192036715, - "grad_norm": 0.0022097965702414513, - "learning_rate": 0.00019999842465242402, - "loss": 46.0, - "step": 11105 - }, - { - "epoch": 1.788518056282459, - "grad_norm": 0.001397671177983284, - "learning_rate": 0.00019999842436843764, - "loss": 46.0, - "step": 11106 - }, - { - "epoch": 1.7886790933612464, - "grad_norm": 0.0022432473488152027, - "learning_rate": 0.00019999842408442565, - "loss": 46.0, - "step": 11107 - }, - { - "epoch": 1.7888401304400339, - "grad_norm": 0.0027394231874495745, - "learning_rate": 0.00019999842380038813, - "loss": 46.0, - "step": 11108 - }, - { - "epoch": 1.7890011675188213, - "grad_norm": 0.0031087223906069994, - "learning_rate": 0.00019999842351632493, - "loss": 46.0, - "step": 11109 - }, - { - "epoch": 1.7891622045976086, - "grad_norm": 0.0008448674925602973, - "learning_rate": 0.00019999842323223618, - "loss": 46.0, - "step": 11110 - }, - { - "epoch": 1.789323241676396, - "grad_norm": 0.0008297382737509906, - "learning_rate": 0.00019999842294812184, - "loss": 46.0, - "step": 11111 - }, - { - "epoch": 1.7894842787551832, - "grad_norm": 0.0034324219450354576, - "learning_rate": 0.0001999984226639819, - "loss": 46.0, - "step": 11112 - }, - { - "epoch": 1.7896453158339707, - "grad_norm": 0.0009411865612491965, - "learning_rate": 0.00019999842237981638, - "loss": 46.0, - "step": 11113 - }, - { - "epoch": 1.7898063529127581, - "grad_norm": 0.005342352204024792, - "learning_rate": 0.0001999984220956252, - "loss": 46.0, - "step": 11114 - }, - { - "epoch": 1.7899673899915456, - "grad_norm": 0.003260920522734523, - "learning_rate": 0.00019999842181140848, - "loss": 46.0, - "step": 11115 - }, - { - "epoch": 1.790128427070333, - "grad_norm": 0.000732087588403374, - "learning_rate": 0.00019999842152716618, - "loss": 46.0, - "step": 11116 - }, - { - "epoch": 1.7902894641491205, - "grad_norm": 0.0010504265083000064, - "learning_rate": 0.00019999842124289826, - "loss": 46.0, - "step": 11117 - }, - { - "epoch": 1.7904505012279077, - "grad_norm": 0.0037848721258342266, - "learning_rate": 0.00019999842095860473, - "loss": 46.0, - "step": 11118 - }, - { - "epoch": 1.7906115383066952, - "grad_norm": 0.011950554326176643, - "learning_rate": 0.0001999984206742856, - "loss": 46.0, - "step": 11119 - }, - { - "epoch": 1.7907725753854824, - "grad_norm": 0.00043025804916396737, - "learning_rate": 0.00019999842038994094, - "loss": 46.0, - "step": 11120 - }, - { - "epoch": 1.7909336124642699, - "grad_norm": 0.00383752747438848, - "learning_rate": 0.00019999842010557064, - "loss": 46.0, - "step": 11121 - }, - { - "epoch": 1.7910946495430573, - "grad_norm": 0.002125471830368042, - "learning_rate": 0.00019999841982117476, - "loss": 46.0, - "step": 11122 - }, - { - "epoch": 1.7912556866218448, - "grad_norm": 0.010294078849256039, - "learning_rate": 0.00019999841953675324, - "loss": 46.0, - "step": 11123 - }, - { - "epoch": 1.7914167237006322, - "grad_norm": 0.0018050142098218203, - "learning_rate": 0.0001999984192523062, - "loss": 46.0, - "step": 11124 - }, - { - "epoch": 1.7915777607794194, - "grad_norm": 0.014061805792152882, - "learning_rate": 0.00019999841896783352, - "loss": 46.0, - "step": 11125 - }, - { - "epoch": 1.7917387978582069, - "grad_norm": 0.004811234772205353, - "learning_rate": 0.00019999841868333524, - "loss": 46.0, - "step": 11126 - }, - { - "epoch": 1.7918998349369941, - "grad_norm": 0.016827654093503952, - "learning_rate": 0.00019999841839881137, - "loss": 46.0, - "step": 11127 - }, - { - "epoch": 1.7920608720157816, - "grad_norm": 0.001457533217035234, - "learning_rate": 0.00019999841811426191, - "loss": 46.0, - "step": 11128 - }, - { - "epoch": 1.792221909094569, - "grad_norm": 0.004937728866934776, - "learning_rate": 0.00019999841782968687, - "loss": 46.0, - "step": 11129 - }, - { - "epoch": 1.7923829461733565, - "grad_norm": 0.0046903169713914394, - "learning_rate": 0.00019999841754508624, - "loss": 46.0, - "step": 11130 - }, - { - "epoch": 1.792543983252144, - "grad_norm": 0.0005806509288959205, - "learning_rate": 0.00019999841726046, - "loss": 46.0, - "step": 11131 - }, - { - "epoch": 1.7927050203309312, - "grad_norm": 0.0009940201416611671, - "learning_rate": 0.00019999841697580814, - "loss": 46.0, - "step": 11132 - }, - { - "epoch": 1.7928660574097186, - "grad_norm": 0.004427929874509573, - "learning_rate": 0.00019999841669113074, - "loss": 46.0, - "step": 11133 - }, - { - "epoch": 1.7930270944885058, - "grad_norm": 0.010929770767688751, - "learning_rate": 0.00019999841640642768, - "loss": 46.0, - "step": 11134 - }, - { - "epoch": 1.7931881315672933, - "grad_norm": 0.000417500821640715, - "learning_rate": 0.0001999984161216991, - "loss": 46.0, - "step": 11135 - }, - { - "epoch": 1.7933491686460807, - "grad_norm": 0.0029296651482582092, - "learning_rate": 0.00019999841583694488, - "loss": 46.0, - "step": 11136 - }, - { - "epoch": 1.7935102057248682, - "grad_norm": 0.006515244022011757, - "learning_rate": 0.00019999841555216508, - "loss": 46.0, - "step": 11137 - }, - { - "epoch": 1.7936712428036556, - "grad_norm": 0.004864226095378399, - "learning_rate": 0.00019999841526735965, - "loss": 46.0, - "step": 11138 - }, - { - "epoch": 1.793832279882443, - "grad_norm": 0.0009025073959492147, - "learning_rate": 0.00019999841498252868, - "loss": 46.0, - "step": 11139 - }, - { - "epoch": 1.7939933169612303, - "grad_norm": 0.0016181219834834337, - "learning_rate": 0.00019999841469767207, - "loss": 46.0, - "step": 11140 - }, - { - "epoch": 1.7941543540400176, - "grad_norm": 0.0027067612390965223, - "learning_rate": 0.0001999984144127899, - "loss": 46.0, - "step": 11141 - }, - { - "epoch": 1.794315391118805, - "grad_norm": 0.0003472942335065454, - "learning_rate": 0.0001999984141278821, - "loss": 46.0, - "step": 11142 - }, - { - "epoch": 1.7944764281975925, - "grad_norm": 0.0009301080135628581, - "learning_rate": 0.00019999841384294874, - "loss": 46.0, - "step": 11143 - }, - { - "epoch": 1.79463746527638, - "grad_norm": 0.0020068504381924868, - "learning_rate": 0.00019999841355798975, - "loss": 46.0, - "step": 11144 - }, - { - "epoch": 1.7947985023551674, - "grad_norm": 0.0008990314672701061, - "learning_rate": 0.0001999984132730052, - "loss": 46.0, - "step": 11145 - }, - { - "epoch": 1.7949595394339548, - "grad_norm": 0.005422293208539486, - "learning_rate": 0.00019999841298799504, - "loss": 46.0, - "step": 11146 - }, - { - "epoch": 1.795120576512742, - "grad_norm": 0.0032308408990502357, - "learning_rate": 0.00019999841270295932, - "loss": 46.0, - "step": 11147 - }, - { - "epoch": 1.7952816135915295, - "grad_norm": 0.002076471922919154, - "learning_rate": 0.00019999841241789796, - "loss": 46.0, - "step": 11148 - }, - { - "epoch": 1.7954426506703167, - "grad_norm": 0.012175084091722965, - "learning_rate": 0.00019999841213281104, - "loss": 46.0, - "step": 11149 - }, - { - "epoch": 1.7956036877491042, - "grad_norm": 0.0008831815794110298, - "learning_rate": 0.00019999841184769847, - "loss": 46.0, - "step": 11150 - }, - { - "epoch": 1.7957647248278916, - "grad_norm": 0.0016825733473524451, - "learning_rate": 0.00019999841156256035, - "loss": 46.0, - "step": 11151 - }, - { - "epoch": 1.795925761906679, - "grad_norm": 0.001760553102940321, - "learning_rate": 0.0001999984112773966, - "loss": 46.0, - "step": 11152 - }, - { - "epoch": 1.7960867989854665, - "grad_norm": 0.0017452806932851672, - "learning_rate": 0.00019999841099220734, - "loss": 46.0, - "step": 11153 - }, - { - "epoch": 1.7962478360642538, - "grad_norm": 0.0019896673038601875, - "learning_rate": 0.0001999984107069924, - "loss": 46.0, - "step": 11154 - }, - { - "epoch": 1.7964088731430412, - "grad_norm": 0.008706822991371155, - "learning_rate": 0.0001999984104217519, - "loss": 46.0, - "step": 11155 - }, - { - "epoch": 1.7965699102218284, - "grad_norm": 0.0017248884541913867, - "learning_rate": 0.0001999984101364858, - "loss": 46.0, - "step": 11156 - }, - { - "epoch": 1.7967309473006159, - "grad_norm": 0.001512069022282958, - "learning_rate": 0.00019999840985119409, - "loss": 46.0, - "step": 11157 - }, - { - "epoch": 1.7968919843794033, - "grad_norm": 0.002633160911500454, - "learning_rate": 0.0001999984095658768, - "loss": 46.0, - "step": 11158 - }, - { - "epoch": 1.7970530214581908, - "grad_norm": 0.004081765189766884, - "learning_rate": 0.00019999840928053392, - "loss": 46.0, - "step": 11159 - }, - { - "epoch": 1.7972140585369782, - "grad_norm": 0.0039664460346102715, - "learning_rate": 0.00019999840899516543, - "loss": 46.0, - "step": 11160 - }, - { - "epoch": 1.7973750956157657, - "grad_norm": 0.003466918133199215, - "learning_rate": 0.00019999840870977135, - "loss": 46.0, - "step": 11161 - }, - { - "epoch": 1.797536132694553, - "grad_norm": 0.0006607816321775317, - "learning_rate": 0.00019999840842435172, - "loss": 46.0, - "step": 11162 - }, - { - "epoch": 1.7976971697733402, - "grad_norm": 0.00045749059063382447, - "learning_rate": 0.00019999840813890644, - "loss": 46.0, - "step": 11163 - }, - { - "epoch": 1.7978582068521276, - "grad_norm": 0.0013399996096268296, - "learning_rate": 0.0001999984078534356, - "loss": 46.0, - "step": 11164 - }, - { - "epoch": 1.798019243930915, - "grad_norm": 0.0010656245285645127, - "learning_rate": 0.00019999840756793912, - "loss": 46.0, - "step": 11165 - }, - { - "epoch": 1.7981802810097025, - "grad_norm": 0.0008989977068267763, - "learning_rate": 0.00019999840728241708, - "loss": 46.0, - "step": 11166 - }, - { - "epoch": 1.79834131808849, - "grad_norm": 0.003475752891972661, - "learning_rate": 0.00019999840699686945, - "loss": 46.0, - "step": 11167 - }, - { - "epoch": 1.7985023551672774, - "grad_norm": 0.006849256344139576, - "learning_rate": 0.0001999984067112962, - "loss": 46.0, - "step": 11168 - }, - { - "epoch": 1.7986633922460646, - "grad_norm": 0.003347514197230339, - "learning_rate": 0.00019999840642569738, - "loss": 46.0, - "step": 11169 - }, - { - "epoch": 1.798824429324852, - "grad_norm": 0.0012060721637681127, - "learning_rate": 0.00019999840614007294, - "loss": 46.0, - "step": 11170 - }, - { - "epoch": 1.7989854664036393, - "grad_norm": 0.003787968773394823, - "learning_rate": 0.00019999840585442296, - "loss": 46.0, - "step": 11171 - }, - { - "epoch": 1.7991465034824268, - "grad_norm": 0.0014672133838757873, - "learning_rate": 0.00019999840556874735, - "loss": 46.0, - "step": 11172 - }, - { - "epoch": 1.7993075405612142, - "grad_norm": 0.00232651992700994, - "learning_rate": 0.00019999840528304611, - "loss": 46.0, - "step": 11173 - }, - { - "epoch": 1.7994685776400017, - "grad_norm": 0.0010064038215205073, - "learning_rate": 0.00019999840499731932, - "loss": 46.0, - "step": 11174 - }, - { - "epoch": 1.7996296147187891, - "grad_norm": 0.0007844401989132166, - "learning_rate": 0.00019999840471156692, - "loss": 46.0, - "step": 11175 - }, - { - "epoch": 1.7997906517975764, - "grad_norm": 0.00077760562999174, - "learning_rate": 0.00019999840442578895, - "loss": 46.0, - "step": 11176 - }, - { - "epoch": 1.7999516888763638, - "grad_norm": 0.0030735102482140064, - "learning_rate": 0.00019999840413998534, - "loss": 46.0, - "step": 11177 - }, - { - "epoch": 1.800112725955151, - "grad_norm": 0.0036428668536245823, - "learning_rate": 0.00019999840385415615, - "loss": 46.0, - "step": 11178 - }, - { - "epoch": 1.8002737630339385, - "grad_norm": 0.0007202904671430588, - "learning_rate": 0.0001999984035683014, - "loss": 46.0, - "step": 11179 - }, - { - "epoch": 1.800434800112726, - "grad_norm": 0.008539890870451927, - "learning_rate": 0.000199998403282421, - "loss": 46.0, - "step": 11180 - }, - { - "epoch": 1.8005958371915134, - "grad_norm": 0.008461201563477516, - "learning_rate": 0.00019999840299651507, - "loss": 46.0, - "step": 11181 - }, - { - "epoch": 1.8007568742703008, - "grad_norm": 0.004049209877848625, - "learning_rate": 0.0001999984027105835, - "loss": 46.0, - "step": 11182 - }, - { - "epoch": 1.800917911349088, - "grad_norm": 0.0008900587563402951, - "learning_rate": 0.00019999840242462634, - "loss": 46.0, - "step": 11183 - }, - { - "epoch": 1.8010789484278755, - "grad_norm": 0.0012589116813614964, - "learning_rate": 0.0001999984021386436, - "loss": 46.0, - "step": 11184 - }, - { - "epoch": 1.8012399855066628, - "grad_norm": 0.001192400697618723, - "learning_rate": 0.00019999840185263527, - "loss": 46.0, - "step": 11185 - }, - { - "epoch": 1.8014010225854502, - "grad_norm": 0.004492586944252253, - "learning_rate": 0.00019999840156660135, - "loss": 46.0, - "step": 11186 - }, - { - "epoch": 1.8015620596642377, - "grad_norm": 0.0010713412193581462, - "learning_rate": 0.00019999840128054181, - "loss": 46.0, - "step": 11187 - }, - { - "epoch": 1.801723096743025, - "grad_norm": 0.004886297509074211, - "learning_rate": 0.0001999984009944567, - "loss": 46.0, - "step": 11188 - }, - { - "epoch": 1.8018841338218126, - "grad_norm": 0.00322319520637393, - "learning_rate": 0.00019999840070834596, - "loss": 46.0, - "step": 11189 - }, - { - "epoch": 1.8020451709006, - "grad_norm": 0.0016112119192257524, - "learning_rate": 0.00019999840042220966, - "loss": 46.0, - "step": 11190 - }, - { - "epoch": 1.8022062079793872, - "grad_norm": 0.0011197234271094203, - "learning_rate": 0.00019999840013604773, - "loss": 46.0, - "step": 11191 - }, - { - "epoch": 1.8023672450581747, - "grad_norm": 0.0025336844846606255, - "learning_rate": 0.00019999839984986023, - "loss": 46.0, - "step": 11192 - }, - { - "epoch": 1.802528282136962, - "grad_norm": 0.0019435323774814606, - "learning_rate": 0.00019999839956364715, - "loss": 46.0, - "step": 11193 - }, - { - "epoch": 1.8026893192157494, - "grad_norm": 0.005620239768177271, - "learning_rate": 0.00019999839927740848, - "loss": 46.0, - "step": 11194 - }, - { - "epoch": 1.8028503562945368, - "grad_norm": 0.006061875261366367, - "learning_rate": 0.00019999839899114417, - "loss": 46.0, - "step": 11195 - }, - { - "epoch": 1.8030113933733243, - "grad_norm": 0.0011227671056985855, - "learning_rate": 0.0001999983987048543, - "loss": 46.0, - "step": 11196 - }, - { - "epoch": 1.8031724304521117, - "grad_norm": 0.00596654461696744, - "learning_rate": 0.00019999839841853883, - "loss": 46.0, - "step": 11197 - }, - { - "epoch": 1.803333467530899, - "grad_norm": 0.0009404318989254534, - "learning_rate": 0.00019999839813219773, - "loss": 46.0, - "step": 11198 - }, - { - "epoch": 1.8034945046096864, - "grad_norm": 0.0018894915701821446, - "learning_rate": 0.00019999839784583107, - "loss": 46.0, - "step": 11199 - }, - { - "epoch": 1.8036555416884736, - "grad_norm": 0.0013029336696490645, - "learning_rate": 0.00019999839755943883, - "loss": 46.0, - "step": 11200 - }, - { - "epoch": 1.803816578767261, - "grad_norm": 0.006391707342118025, - "learning_rate": 0.00019999839727302096, - "loss": 46.0, - "step": 11201 - }, - { - "epoch": 1.8039776158460485, - "grad_norm": 0.001937276916578412, - "learning_rate": 0.00019999839698657751, - "loss": 46.0, - "step": 11202 - }, - { - "epoch": 1.804138652924836, - "grad_norm": 0.004002357367426157, - "learning_rate": 0.0001999983967001085, - "loss": 46.0, - "step": 11203 - }, - { - "epoch": 1.8042996900036234, - "grad_norm": 0.0018983158515766263, - "learning_rate": 0.00019999839641361385, - "loss": 46.0, - "step": 11204 - }, - { - "epoch": 1.8044607270824107, - "grad_norm": 0.0010678655235096812, - "learning_rate": 0.00019999839612709362, - "loss": 46.0, - "step": 11205 - }, - { - "epoch": 1.8046217641611981, - "grad_norm": 0.002290706615895033, - "learning_rate": 0.0001999983958405478, - "loss": 46.0, - "step": 11206 - }, - { - "epoch": 1.8047828012399854, - "grad_norm": 0.0022672186605632305, - "learning_rate": 0.00019999839555397638, - "loss": 46.0, - "step": 11207 - }, - { - "epoch": 1.8049438383187728, - "grad_norm": 0.007677840534597635, - "learning_rate": 0.00019999839526737935, - "loss": 46.0, - "step": 11208 - }, - { - "epoch": 1.8051048753975603, - "grad_norm": 0.003321537747979164, - "learning_rate": 0.00019999839498075677, - "loss": 46.0, - "step": 11209 - }, - { - "epoch": 1.8052659124763477, - "grad_norm": 0.0005885874852538109, - "learning_rate": 0.00019999839469410854, - "loss": 46.0, - "step": 11210 - }, - { - "epoch": 1.8054269495551352, - "grad_norm": 0.0008507912280037999, - "learning_rate": 0.00019999839440743475, - "loss": 46.0, - "step": 11211 - }, - { - "epoch": 1.8055879866339226, - "grad_norm": 0.009996253065764904, - "learning_rate": 0.00019999839412073537, - "loss": 46.0, - "step": 11212 - }, - { - "epoch": 1.8057490237127098, - "grad_norm": 0.0023658068384975195, - "learning_rate": 0.00019999839383401039, - "loss": 46.0, - "step": 11213 - }, - { - "epoch": 1.8059100607914973, - "grad_norm": 0.0008899774984456599, - "learning_rate": 0.0001999983935472598, - "loss": 46.0, - "step": 11214 - }, - { - "epoch": 1.8060710978702845, - "grad_norm": 0.0015827525639906526, - "learning_rate": 0.00019999839326048362, - "loss": 46.0, - "step": 11215 - }, - { - "epoch": 1.806232134949072, - "grad_norm": 0.0017011427553370595, - "learning_rate": 0.00019999839297368187, - "loss": 46.0, - "step": 11216 - }, - { - "epoch": 1.8063931720278594, - "grad_norm": 0.0013393504777923226, - "learning_rate": 0.0001999983926868545, - "loss": 46.0, - "step": 11217 - }, - { - "epoch": 1.8065542091066469, - "grad_norm": 0.000996196991764009, - "learning_rate": 0.00019999839240000152, - "loss": 46.0, - "step": 11218 - }, - { - "epoch": 1.8067152461854343, - "grad_norm": 0.004124980419874191, - "learning_rate": 0.00019999839211312298, - "loss": 46.0, - "step": 11219 - }, - { - "epoch": 1.8068762832642216, - "grad_norm": 0.0042527890764176846, - "learning_rate": 0.00019999839182621886, - "loss": 46.0, - "step": 11220 - }, - { - "epoch": 1.807037320343009, - "grad_norm": 0.001644301344640553, - "learning_rate": 0.0001999983915392891, - "loss": 46.0, - "step": 11221 - }, - { - "epoch": 1.8071983574217962, - "grad_norm": 0.006192200351506472, - "learning_rate": 0.00019999839125233376, - "loss": 46.0, - "step": 11222 - }, - { - "epoch": 1.8073593945005837, - "grad_norm": 0.010850329883396626, - "learning_rate": 0.00019999839096535282, - "loss": 46.0, - "step": 11223 - }, - { - "epoch": 1.8075204315793711, - "grad_norm": 0.0019111887086182833, - "learning_rate": 0.0001999983906783463, - "loss": 46.0, - "step": 11224 - }, - { - "epoch": 1.8076814686581586, - "grad_norm": 0.0009883996099233627, - "learning_rate": 0.00019999839039131417, - "loss": 46.0, - "step": 11225 - }, - { - "epoch": 1.807842505736946, - "grad_norm": 0.001829432207159698, - "learning_rate": 0.00019999839010425647, - "loss": 46.0, - "step": 11226 - }, - { - "epoch": 1.8080035428157333, - "grad_norm": 0.0009486796916462481, - "learning_rate": 0.00019999838981717315, - "loss": 46.0, - "step": 11227 - }, - { - "epoch": 1.8081645798945207, - "grad_norm": 0.007451027166098356, - "learning_rate": 0.00019999838953006424, - "loss": 46.0, - "step": 11228 - }, - { - "epoch": 1.808325616973308, - "grad_norm": 0.0038440402131527662, - "learning_rate": 0.00019999838924292975, - "loss": 46.0, - "step": 11229 - }, - { - "epoch": 1.8084866540520954, - "grad_norm": 0.0021127837244421244, - "learning_rate": 0.00019999838895576967, - "loss": 46.0, - "step": 11230 - }, - { - "epoch": 1.8086476911308829, - "grad_norm": 0.0008059654501266778, - "learning_rate": 0.00019999838866858398, - "loss": 46.0, - "step": 11231 - }, - { - "epoch": 1.8088087282096703, - "grad_norm": 0.0007508113048970699, - "learning_rate": 0.0001999983883813727, - "loss": 46.0, - "step": 11232 - }, - { - "epoch": 1.8089697652884578, - "grad_norm": 0.0016701332060620189, - "learning_rate": 0.00019999838809413585, - "loss": 46.0, - "step": 11233 - }, - { - "epoch": 1.8091308023672452, - "grad_norm": 0.0005689797108061612, - "learning_rate": 0.00019999838780687337, - "loss": 46.0, - "step": 11234 - }, - { - "epoch": 1.8092918394460324, - "grad_norm": 0.0019886535592377186, - "learning_rate": 0.0001999983875195853, - "loss": 46.0, - "step": 11235 - }, - { - "epoch": 1.8094528765248197, - "grad_norm": 0.003964284434914589, - "learning_rate": 0.00019999838723227164, - "loss": 46.0, - "step": 11236 - }, - { - "epoch": 1.8096139136036071, - "grad_norm": 0.002529874909669161, - "learning_rate": 0.0001999983869449324, - "loss": 46.0, - "step": 11237 - }, - { - "epoch": 1.8097749506823946, - "grad_norm": 0.0005866981227882206, - "learning_rate": 0.00019999838665756754, - "loss": 46.0, - "step": 11238 - }, - { - "epoch": 1.809935987761182, - "grad_norm": 0.0005656818393617868, - "learning_rate": 0.0001999983863701771, - "loss": 46.0, - "step": 11239 - }, - { - "epoch": 1.8100970248399695, - "grad_norm": 0.0052567641250789165, - "learning_rate": 0.00019999838608276106, - "loss": 46.0, - "step": 11240 - }, - { - "epoch": 1.810258061918757, - "grad_norm": 0.0016020999755710363, - "learning_rate": 0.00019999838579531942, - "loss": 46.0, - "step": 11241 - }, - { - "epoch": 1.8104190989975442, - "grad_norm": 0.012540334835648537, - "learning_rate": 0.0001999983855078522, - "loss": 46.0, - "step": 11242 - }, - { - "epoch": 1.8105801360763316, - "grad_norm": 0.005992401856929064, - "learning_rate": 0.0001999983852203594, - "loss": 46.0, - "step": 11243 - }, - { - "epoch": 1.8107411731551188, - "grad_norm": 0.0042040590196847916, - "learning_rate": 0.00019999838493284098, - "loss": 46.0, - "step": 11244 - }, - { - "epoch": 1.8109022102339063, - "grad_norm": 0.0014169916976243258, - "learning_rate": 0.00019999838464529696, - "loss": 46.0, - "step": 11245 - }, - { - "epoch": 1.8110632473126937, - "grad_norm": 0.00492845568805933, - "learning_rate": 0.00019999838435772737, - "loss": 46.0, - "step": 11246 - }, - { - "epoch": 1.8112242843914812, - "grad_norm": 0.0024931305088102818, - "learning_rate": 0.00019999838407013218, - "loss": 46.0, - "step": 11247 - }, - { - "epoch": 1.8113853214702687, - "grad_norm": 0.0008138782577589154, - "learning_rate": 0.00019999838378251136, - "loss": 46.0, - "step": 11248 - }, - { - "epoch": 1.8115463585490559, - "grad_norm": 0.0016083779046311975, - "learning_rate": 0.000199998383494865, - "loss": 46.0, - "step": 11249 - }, - { - "epoch": 1.8117073956278433, - "grad_norm": 0.0009699150687083602, - "learning_rate": 0.000199998383207193, - "loss": 46.0, - "step": 11250 - }, - { - "epoch": 1.8118684327066306, - "grad_norm": 0.0062147765420377254, - "learning_rate": 0.00019999838291949543, - "loss": 46.0, - "step": 11251 - }, - { - "epoch": 1.812029469785418, - "grad_norm": 0.0003888392820954323, - "learning_rate": 0.00019999838263177227, - "loss": 46.0, - "step": 11252 - }, - { - "epoch": 1.8121905068642055, - "grad_norm": 0.0010161014506593347, - "learning_rate": 0.0001999983823440235, - "loss": 46.0, - "step": 11253 - }, - { - "epoch": 1.812351543942993, - "grad_norm": 0.0005654977867379785, - "learning_rate": 0.00019999838205624914, - "loss": 46.0, - "step": 11254 - }, - { - "epoch": 1.8125125810217804, - "grad_norm": 0.00197664275765419, - "learning_rate": 0.00019999838176844922, - "loss": 46.0, - "step": 11255 - }, - { - "epoch": 1.8126736181005678, - "grad_norm": 0.001464974251575768, - "learning_rate": 0.00019999838148062365, - "loss": 46.0, - "step": 11256 - }, - { - "epoch": 1.812834655179355, - "grad_norm": 0.0038541234098374844, - "learning_rate": 0.0001999983811927725, - "loss": 46.0, - "step": 11257 - }, - { - "epoch": 1.8129956922581423, - "grad_norm": 0.0004624953435268253, - "learning_rate": 0.00019999838090489576, - "loss": 46.0, - "step": 11258 - }, - { - "epoch": 1.8131567293369297, - "grad_norm": 0.005003497004508972, - "learning_rate": 0.00019999838061699344, - "loss": 46.0, - "step": 11259 - }, - { - "epoch": 1.8133177664157172, - "grad_norm": 0.0018303110264241695, - "learning_rate": 0.00019999838032906553, - "loss": 46.0, - "step": 11260 - }, - { - "epoch": 1.8134788034945046, - "grad_norm": 0.000560486689209938, - "learning_rate": 0.000199998380041112, - "loss": 46.0, - "step": 11261 - }, - { - "epoch": 1.813639840573292, - "grad_norm": 0.0006303409463725984, - "learning_rate": 0.0001999983797531329, - "loss": 46.0, - "step": 11262 - }, - { - "epoch": 1.8138008776520795, - "grad_norm": 0.000938627403229475, - "learning_rate": 0.00019999837946512816, - "loss": 46.0, - "step": 11263 - }, - { - "epoch": 1.8139619147308668, - "grad_norm": 0.0007530644070357084, - "learning_rate": 0.00019999837917709787, - "loss": 46.0, - "step": 11264 - }, - { - "epoch": 1.8141229518096542, - "grad_norm": 0.00154742575250566, - "learning_rate": 0.00019999837888904197, - "loss": 46.0, - "step": 11265 - }, - { - "epoch": 1.8142839888884414, - "grad_norm": 0.0017158687114715576, - "learning_rate": 0.00019999837860096048, - "loss": 46.0, - "step": 11266 - }, - { - "epoch": 1.814445025967229, - "grad_norm": 0.0024674772284924984, - "learning_rate": 0.0001999983783128534, - "loss": 46.0, - "step": 11267 - }, - { - "epoch": 1.8146060630460163, - "grad_norm": 0.0018584816716611385, - "learning_rate": 0.00019999837802472072, - "loss": 46.0, - "step": 11268 - }, - { - "epoch": 1.8147671001248038, - "grad_norm": 0.002340343315154314, - "learning_rate": 0.00019999837773656244, - "loss": 46.0, - "step": 11269 - }, - { - "epoch": 1.8149281372035913, - "grad_norm": 0.001429402851499617, - "learning_rate": 0.00019999837744837857, - "loss": 46.0, - "step": 11270 - }, - { - "epoch": 1.8150891742823785, - "grad_norm": 0.0009373221546411514, - "learning_rate": 0.0001999983771601691, - "loss": 46.0, - "step": 11271 - }, - { - "epoch": 1.815250211361166, - "grad_norm": 0.00586380809545517, - "learning_rate": 0.00019999837687193406, - "loss": 46.0, - "step": 11272 - }, - { - "epoch": 1.8154112484399532, - "grad_norm": 0.0020879872608929873, - "learning_rate": 0.0001999983765836734, - "loss": 46.0, - "step": 11273 - }, - { - "epoch": 1.8155722855187406, - "grad_norm": 0.0012953775003552437, - "learning_rate": 0.00019999837629538716, - "loss": 46.0, - "step": 11274 - }, - { - "epoch": 1.815733322597528, - "grad_norm": 0.0011106071760877967, - "learning_rate": 0.0001999983760070753, - "loss": 46.0, - "step": 11275 - }, - { - "epoch": 1.8158943596763155, - "grad_norm": 0.0014667804352939129, - "learning_rate": 0.00019999837571873784, - "loss": 46.0, - "step": 11276 - }, - { - "epoch": 1.816055396755103, - "grad_norm": 0.000782331801019609, - "learning_rate": 0.00019999837543037484, - "loss": 46.0, - "step": 11277 - }, - { - "epoch": 1.8162164338338902, - "grad_norm": 0.001211573020555079, - "learning_rate": 0.00019999837514198622, - "loss": 46.0, - "step": 11278 - }, - { - "epoch": 1.8163774709126776, - "grad_norm": 0.02746388502418995, - "learning_rate": 0.000199998374853572, - "loss": 46.0, - "step": 11279 - }, - { - "epoch": 1.8165385079914649, - "grad_norm": 0.0011048989836126566, - "learning_rate": 0.00019999837456513217, - "loss": 46.0, - "step": 11280 - }, - { - "epoch": 1.8166995450702523, - "grad_norm": 0.0009253582684323192, - "learning_rate": 0.00019999837427666677, - "loss": 46.0, - "step": 11281 - }, - { - "epoch": 1.8168605821490398, - "grad_norm": 0.0002986912149935961, - "learning_rate": 0.00019999837398817575, - "loss": 46.0, - "step": 11282 - }, - { - "epoch": 1.8170216192278272, - "grad_norm": 0.0059528895653784275, - "learning_rate": 0.00019999837369965917, - "loss": 46.0, - "step": 11283 - }, - { - "epoch": 1.8171826563066147, - "grad_norm": 0.0021651028655469418, - "learning_rate": 0.00019999837341111697, - "loss": 46.0, - "step": 11284 - }, - { - "epoch": 1.8173436933854021, - "grad_norm": 0.002069145906716585, - "learning_rate": 0.0001999983731225492, - "loss": 46.0, - "step": 11285 - }, - { - "epoch": 1.8175047304641894, - "grad_norm": 0.0011010889429599047, - "learning_rate": 0.0001999983728339558, - "loss": 46.0, - "step": 11286 - }, - { - "epoch": 1.8176657675429768, - "grad_norm": 0.0021088651847094297, - "learning_rate": 0.00019999837254533684, - "loss": 46.0, - "step": 11287 - }, - { - "epoch": 1.817826804621764, - "grad_norm": 0.0018376075895503163, - "learning_rate": 0.00019999837225669227, - "loss": 46.0, - "step": 11288 - }, - { - "epoch": 1.8179878417005515, - "grad_norm": 0.003436165861785412, - "learning_rate": 0.00019999837196802212, - "loss": 46.0, - "step": 11289 - }, - { - "epoch": 1.818148878779339, - "grad_norm": 0.001971396617591381, - "learning_rate": 0.00019999837167932635, - "loss": 46.0, - "step": 11290 - }, - { - "epoch": 1.8183099158581264, - "grad_norm": 0.0022066179662942886, - "learning_rate": 0.000199998371390605, - "loss": 46.0, - "step": 11291 - }, - { - "epoch": 1.8184709529369139, - "grad_norm": 0.0029349513351917267, - "learning_rate": 0.00019999837110185804, - "loss": 46.0, - "step": 11292 - }, - { - "epoch": 1.818631990015701, - "grad_norm": 0.0008590839570388198, - "learning_rate": 0.0001999983708130855, - "loss": 46.0, - "step": 11293 - }, - { - "epoch": 1.8187930270944885, - "grad_norm": 0.0011101976269856095, - "learning_rate": 0.00019999837052428736, - "loss": 46.0, - "step": 11294 - }, - { - "epoch": 1.8189540641732758, - "grad_norm": 0.0019354196265339851, - "learning_rate": 0.00019999837023546363, - "loss": 46.0, - "step": 11295 - }, - { - "epoch": 1.8191151012520632, - "grad_norm": 0.0041565727442502975, - "learning_rate": 0.0001999983699466143, - "loss": 46.0, - "step": 11296 - }, - { - "epoch": 1.8192761383308507, - "grad_norm": 0.0008438201039098203, - "learning_rate": 0.0001999983696577394, - "loss": 46.0, - "step": 11297 - }, - { - "epoch": 1.8194371754096381, - "grad_norm": 0.000578771170694381, - "learning_rate": 0.00019999836936883888, - "loss": 46.0, - "step": 11298 - }, - { - "epoch": 1.8195982124884256, - "grad_norm": 0.006573663093149662, - "learning_rate": 0.00019999836907991277, - "loss": 46.0, - "step": 11299 - }, - { - "epoch": 1.8197592495672128, - "grad_norm": 0.0014425547560676932, - "learning_rate": 0.00019999836879096105, - "loss": 46.0, - "step": 11300 - }, - { - "epoch": 1.8199202866460003, - "grad_norm": 0.002111180918291211, - "learning_rate": 0.00019999836850198376, - "loss": 46.0, - "step": 11301 - }, - { - "epoch": 1.8200813237247875, - "grad_norm": 0.000962501741014421, - "learning_rate": 0.00019999836821298086, - "loss": 46.0, - "step": 11302 - }, - { - "epoch": 1.820242360803575, - "grad_norm": 0.0017520018154755235, - "learning_rate": 0.00019999836792395238, - "loss": 46.0, - "step": 11303 - }, - { - "epoch": 1.8204033978823624, - "grad_norm": 0.003518523881211877, - "learning_rate": 0.0001999983676348983, - "loss": 46.0, - "step": 11304 - }, - { - "epoch": 1.8205644349611498, - "grad_norm": 0.005110064055770636, - "learning_rate": 0.00019999836734581862, - "loss": 46.0, - "step": 11305 - }, - { - "epoch": 1.8207254720399373, - "grad_norm": 0.0010770312510430813, - "learning_rate": 0.00019999836705671337, - "loss": 46.0, - "step": 11306 - }, - { - "epoch": 1.8208865091187247, - "grad_norm": 0.003938900772482157, - "learning_rate": 0.00019999836676758248, - "loss": 46.0, - "step": 11307 - }, - { - "epoch": 1.821047546197512, - "grad_norm": 0.004902741406112909, - "learning_rate": 0.00019999836647842603, - "loss": 46.0, - "step": 11308 - }, - { - "epoch": 1.8212085832762994, - "grad_norm": 0.001318879658356309, - "learning_rate": 0.000199998366189244, - "loss": 46.0, - "step": 11309 - }, - { - "epoch": 1.8213696203550866, - "grad_norm": 0.0043305763974785805, - "learning_rate": 0.00019999836590003632, - "loss": 46.0, - "step": 11310 - }, - { - "epoch": 1.821530657433874, - "grad_norm": 0.008512002415955067, - "learning_rate": 0.00019999836561080309, - "loss": 46.0, - "step": 11311 - }, - { - "epoch": 1.8216916945126616, - "grad_norm": 0.0012935369741171598, - "learning_rate": 0.00019999836532154426, - "loss": 46.0, - "step": 11312 - }, - { - "epoch": 1.821852731591449, - "grad_norm": 0.0015662142541259527, - "learning_rate": 0.00019999836503225985, - "loss": 46.0, - "step": 11313 - }, - { - "epoch": 1.8220137686702365, - "grad_norm": 0.005370223894715309, - "learning_rate": 0.0001999983647429498, - "loss": 46.0, - "step": 11314 - }, - { - "epoch": 1.8221748057490237, - "grad_norm": 0.003937610890716314, - "learning_rate": 0.00019999836445361418, - "loss": 46.0, - "step": 11315 - }, - { - "epoch": 1.8223358428278111, - "grad_norm": 0.000888905196916312, - "learning_rate": 0.00019999836416425298, - "loss": 46.0, - "step": 11316 - }, - { - "epoch": 1.8224968799065984, - "grad_norm": 0.0008690909016877413, - "learning_rate": 0.00019999836387486614, - "loss": 46.0, - "step": 11317 - }, - { - "epoch": 1.8226579169853858, - "grad_norm": 0.005930494517087936, - "learning_rate": 0.00019999836358545374, - "loss": 46.0, - "step": 11318 - }, - { - "epoch": 1.8228189540641733, - "grad_norm": 0.0005782339721918106, - "learning_rate": 0.00019999836329601575, - "loss": 46.0, - "step": 11319 - }, - { - "epoch": 1.8229799911429607, - "grad_norm": 0.006285279057919979, - "learning_rate": 0.00019999836300655218, - "loss": 46.0, - "step": 11320 - }, - { - "epoch": 1.8231410282217482, - "grad_norm": 0.0019795040134340525, - "learning_rate": 0.000199998362717063, - "loss": 46.0, - "step": 11321 - }, - { - "epoch": 1.8233020653005354, - "grad_norm": 0.0017771889688447118, - "learning_rate": 0.00019999836242754818, - "loss": 46.0, - "step": 11322 - }, - { - "epoch": 1.8234631023793229, - "grad_norm": 0.0016590761952102184, - "learning_rate": 0.00019999836213800782, - "loss": 46.0, - "step": 11323 - }, - { - "epoch": 1.82362413945811, - "grad_norm": 0.0014575679088011384, - "learning_rate": 0.00019999836184844184, - "loss": 46.0, - "step": 11324 - }, - { - "epoch": 1.8237851765368975, - "grad_norm": 0.0008977229590527713, - "learning_rate": 0.00019999836155885027, - "loss": 46.0, - "step": 11325 - }, - { - "epoch": 1.823946213615685, - "grad_norm": 0.010617556981742382, - "learning_rate": 0.00019999836126923312, - "loss": 46.0, - "step": 11326 - }, - { - "epoch": 1.8241072506944724, - "grad_norm": 0.0016906767850741744, - "learning_rate": 0.00019999836097959038, - "loss": 46.0, - "step": 11327 - }, - { - "epoch": 1.8242682877732599, - "grad_norm": 0.0035667498596012592, - "learning_rate": 0.00019999836068992203, - "loss": 46.0, - "step": 11328 - }, - { - "epoch": 1.8244293248520473, - "grad_norm": 0.009043061174452305, - "learning_rate": 0.0001999983604002281, - "loss": 46.0, - "step": 11329 - }, - { - "epoch": 1.8245903619308346, - "grad_norm": 0.007218494080007076, - "learning_rate": 0.00019999836011050856, - "loss": 46.0, - "step": 11330 - }, - { - "epoch": 1.8247513990096218, - "grad_norm": 0.0045862761326134205, - "learning_rate": 0.0001999983598207634, - "loss": 46.0, - "step": 11331 - }, - { - "epoch": 1.8249124360884093, - "grad_norm": 0.0011916913790628314, - "learning_rate": 0.0001999983595309927, - "loss": 46.0, - "step": 11332 - }, - { - "epoch": 1.8250734731671967, - "grad_norm": 0.0031784556340426207, - "learning_rate": 0.0001999983592411964, - "loss": 46.0, - "step": 11333 - }, - { - "epoch": 1.8252345102459842, - "grad_norm": 0.000653648457955569, - "learning_rate": 0.00019999835895137443, - "loss": 46.0, - "step": 11334 - }, - { - "epoch": 1.8253955473247716, - "grad_norm": 0.0008914176723919809, - "learning_rate": 0.00019999835866152697, - "loss": 46.0, - "step": 11335 - }, - { - "epoch": 1.825556584403559, - "grad_norm": 0.000840010994579643, - "learning_rate": 0.00019999835837165386, - "loss": 46.0, - "step": 11336 - }, - { - "epoch": 1.8257176214823463, - "grad_norm": 0.004974375944584608, - "learning_rate": 0.00019999835808175514, - "loss": 46.0, - "step": 11337 - }, - { - "epoch": 1.8258786585611337, - "grad_norm": 0.0030716145411133766, - "learning_rate": 0.00019999835779183086, - "loss": 46.0, - "step": 11338 - }, - { - "epoch": 1.826039695639921, - "grad_norm": 0.0022781628649681807, - "learning_rate": 0.000199998357501881, - "loss": 46.0, - "step": 11339 - }, - { - "epoch": 1.8262007327187084, - "grad_norm": 0.0011669025989249349, - "learning_rate": 0.0001999983572119055, - "loss": 46.0, - "step": 11340 - }, - { - "epoch": 1.8263617697974959, - "grad_norm": 0.003255236893892288, - "learning_rate": 0.00019999835692190442, - "loss": 46.0, - "step": 11341 - }, - { - "epoch": 1.8265228068762833, - "grad_norm": 0.0025436037685722113, - "learning_rate": 0.00019999835663187776, - "loss": 46.0, - "step": 11342 - }, - { - "epoch": 1.8266838439550708, - "grad_norm": 0.0008401311351917684, - "learning_rate": 0.00019999835634182546, - "loss": 46.0, - "step": 11343 - }, - { - "epoch": 1.826844881033858, - "grad_norm": 0.02161703258752823, - "learning_rate": 0.0001999983560517476, - "loss": 46.0, - "step": 11344 - }, - { - "epoch": 1.8270059181126455, - "grad_norm": 0.0020726663060486317, - "learning_rate": 0.00019999835576164416, - "loss": 46.0, - "step": 11345 - }, - { - "epoch": 1.8271669551914327, - "grad_norm": 0.004923627711832523, - "learning_rate": 0.0001999983554715151, - "loss": 46.0, - "step": 11346 - }, - { - "epoch": 1.8273279922702201, - "grad_norm": 0.0007343343459069729, - "learning_rate": 0.00019999835518136048, - "loss": 46.0, - "step": 11347 - }, - { - "epoch": 1.8274890293490076, - "grad_norm": 0.000806807482149452, - "learning_rate": 0.00019999835489118025, - "loss": 46.0, - "step": 11348 - }, - { - "epoch": 1.827650066427795, - "grad_norm": 0.002343647414818406, - "learning_rate": 0.0001999983546009744, - "loss": 46.0, - "step": 11349 - }, - { - "epoch": 1.8278111035065825, - "grad_norm": 0.0006349949981085956, - "learning_rate": 0.000199998354310743, - "loss": 46.0, - "step": 11350 - }, - { - "epoch": 1.82797214058537, - "grad_norm": 0.002084990032017231, - "learning_rate": 0.00019999835402048597, - "loss": 46.0, - "step": 11351 - }, - { - "epoch": 1.8281331776641572, - "grad_norm": 0.0035368534736335278, - "learning_rate": 0.00019999835373020334, - "loss": 46.0, - "step": 11352 - }, - { - "epoch": 1.8282942147429444, - "grad_norm": 0.005981109105050564, - "learning_rate": 0.0001999983534398951, - "loss": 46.0, - "step": 11353 - }, - { - "epoch": 1.8284552518217319, - "grad_norm": 0.0016671504126861691, - "learning_rate": 0.00019999835314956133, - "loss": 46.0, - "step": 11354 - }, - { - "epoch": 1.8286162889005193, - "grad_norm": 0.004126428160816431, - "learning_rate": 0.00019999835285920193, - "loss": 46.0, - "step": 11355 - }, - { - "epoch": 1.8287773259793068, - "grad_norm": 0.001696393359452486, - "learning_rate": 0.00019999835256881695, - "loss": 46.0, - "step": 11356 - }, - { - "epoch": 1.8289383630580942, - "grad_norm": 0.0052888840436935425, - "learning_rate": 0.00019999835227840635, - "loss": 46.0, - "step": 11357 - }, - { - "epoch": 1.8290994001368817, - "grad_norm": 0.002679096534848213, - "learning_rate": 0.0001999983519879702, - "loss": 46.0, - "step": 11358 - }, - { - "epoch": 1.8292604372156689, - "grad_norm": 0.0017173058586195111, - "learning_rate": 0.0001999983516975084, - "loss": 46.0, - "step": 11359 - }, - { - "epoch": 1.8294214742944563, - "grad_norm": 0.00480238813906908, - "learning_rate": 0.00019999835140702103, - "loss": 46.0, - "step": 11360 - }, - { - "epoch": 1.8295825113732436, - "grad_norm": 0.0010833420092239976, - "learning_rate": 0.00019999835111650805, - "loss": 46.0, - "step": 11361 - }, - { - "epoch": 1.829743548452031, - "grad_norm": 0.0047287773340940475, - "learning_rate": 0.00019999835082596952, - "loss": 46.0, - "step": 11362 - }, - { - "epoch": 1.8299045855308185, - "grad_norm": 0.0011713794665411115, - "learning_rate": 0.00019999835053540537, - "loss": 46.0, - "step": 11363 - }, - { - "epoch": 1.830065622609606, - "grad_norm": 0.0070471977815032005, - "learning_rate": 0.0001999983502448156, - "loss": 46.0, - "step": 11364 - }, - { - "epoch": 1.8302266596883934, - "grad_norm": 0.0023480458185076714, - "learning_rate": 0.00019999834995420028, - "loss": 46.0, - "step": 11365 - }, - { - "epoch": 1.8303876967671806, - "grad_norm": 0.0011858188081532717, - "learning_rate": 0.00019999834966355932, - "loss": 46.0, - "step": 11366 - }, - { - "epoch": 1.830548733845968, - "grad_norm": 0.0026653336826711893, - "learning_rate": 0.0001999983493728928, - "loss": 46.0, - "step": 11367 - }, - { - "epoch": 1.8307097709247553, - "grad_norm": 0.002672742586582899, - "learning_rate": 0.00019999834908220068, - "loss": 46.0, - "step": 11368 - }, - { - "epoch": 1.8308708080035427, - "grad_norm": 0.0010761015582829714, - "learning_rate": 0.00019999834879148292, - "loss": 46.0, - "step": 11369 - }, - { - "epoch": 1.8310318450823302, - "grad_norm": 0.011883526109158993, - "learning_rate": 0.00019999834850073964, - "loss": 46.0, - "step": 11370 - }, - { - "epoch": 1.8311928821611176, - "grad_norm": 0.0021962234750390053, - "learning_rate": 0.0001999983482099707, - "loss": 46.0, - "step": 11371 - }, - { - "epoch": 1.831353919239905, - "grad_norm": 0.0008276525768451393, - "learning_rate": 0.00019999834791917622, - "loss": 46.0, - "step": 11372 - }, - { - "epoch": 1.8315149563186925, - "grad_norm": 0.0004925379762426019, - "learning_rate": 0.00019999834762835615, - "loss": 46.0, - "step": 11373 - }, - { - "epoch": 1.8316759933974798, - "grad_norm": 0.0026018882635980844, - "learning_rate": 0.00019999834733751043, - "loss": 46.0, - "step": 11374 - }, - { - "epoch": 1.831837030476267, - "grad_norm": 0.0019435338908806443, - "learning_rate": 0.00019999834704663915, - "loss": 46.0, - "step": 11375 - }, - { - "epoch": 1.8319980675550545, - "grad_norm": 0.0010475931921973825, - "learning_rate": 0.00019999834675574226, - "loss": 46.0, - "step": 11376 - }, - { - "epoch": 1.832159104633842, - "grad_norm": 0.0008468086016364396, - "learning_rate": 0.0001999983464648198, - "loss": 46.0, - "step": 11377 - }, - { - "epoch": 1.8323201417126294, - "grad_norm": 0.004870264325290918, - "learning_rate": 0.00019999834617387172, - "loss": 46.0, - "step": 11378 - }, - { - "epoch": 1.8324811787914168, - "grad_norm": 0.0036125630140304565, - "learning_rate": 0.00019999834588289807, - "loss": 46.0, - "step": 11379 - }, - { - "epoch": 1.8326422158702043, - "grad_norm": 0.0034376876428723335, - "learning_rate": 0.00019999834559189877, - "loss": 46.0, - "step": 11380 - }, - { - "epoch": 1.8328032529489915, - "grad_norm": 0.0023218696005642414, - "learning_rate": 0.00019999834530087395, - "loss": 46.0, - "step": 11381 - }, - { - "epoch": 1.832964290027779, - "grad_norm": 0.0018498097779229283, - "learning_rate": 0.0001999983450098235, - "loss": 46.0, - "step": 11382 - }, - { - "epoch": 1.8331253271065662, - "grad_norm": 0.007345468737185001, - "learning_rate": 0.00019999834471874745, - "loss": 46.0, - "step": 11383 - }, - { - "epoch": 1.8332863641853536, - "grad_norm": 0.002498486777767539, - "learning_rate": 0.00019999834442764583, - "loss": 46.0, - "step": 11384 - }, - { - "epoch": 1.833447401264141, - "grad_norm": 0.003603970631957054, - "learning_rate": 0.00019999834413651858, - "loss": 46.0, - "step": 11385 - }, - { - "epoch": 1.8336084383429285, - "grad_norm": 0.0016164917033165693, - "learning_rate": 0.0001999983438453658, - "loss": 46.0, - "step": 11386 - }, - { - "epoch": 1.833769475421716, - "grad_norm": 0.002884289715439081, - "learning_rate": 0.00019999834355418733, - "loss": 46.0, - "step": 11387 - }, - { - "epoch": 1.8339305125005032, - "grad_norm": 0.004006147384643555, - "learning_rate": 0.0001999983432629833, - "loss": 46.0, - "step": 11388 - }, - { - "epoch": 1.8340915495792907, - "grad_norm": 0.0023818037007004023, - "learning_rate": 0.0001999983429717537, - "loss": 46.0, - "step": 11389 - }, - { - "epoch": 1.8342525866580779, - "grad_norm": 0.0025085010565817356, - "learning_rate": 0.0001999983426804985, - "loss": 46.0, - "step": 11390 - }, - { - "epoch": 1.8344136237368653, - "grad_norm": 0.005747461691498756, - "learning_rate": 0.00019999834238921773, - "loss": 46.0, - "step": 11391 - }, - { - "epoch": 1.8345746608156528, - "grad_norm": 0.0006192016880959272, - "learning_rate": 0.00019999834209791134, - "loss": 46.0, - "step": 11392 - }, - { - "epoch": 1.8347356978944402, - "grad_norm": 0.00795027893036604, - "learning_rate": 0.00019999834180657936, - "loss": 46.0, - "step": 11393 - }, - { - "epoch": 1.8348967349732277, - "grad_norm": 0.003281216137111187, - "learning_rate": 0.00019999834151522173, - "loss": 46.0, - "step": 11394 - }, - { - "epoch": 1.835057772052015, - "grad_norm": 0.0007629744941368699, - "learning_rate": 0.00019999834122383858, - "loss": 46.0, - "step": 11395 - }, - { - "epoch": 1.8352188091308024, - "grad_norm": 0.0009485826594755054, - "learning_rate": 0.0001999983409324298, - "loss": 46.0, - "step": 11396 - }, - { - "epoch": 1.8353798462095896, - "grad_norm": 0.006807301193475723, - "learning_rate": 0.00019999834064099545, - "loss": 46.0, - "step": 11397 - }, - { - "epoch": 1.835540883288377, - "grad_norm": 0.0008100178674794734, - "learning_rate": 0.00019999834034953548, - "loss": 46.0, - "step": 11398 - }, - { - "epoch": 1.8357019203671645, - "grad_norm": 0.0005330259446054697, - "learning_rate": 0.00019999834005804995, - "loss": 46.0, - "step": 11399 - }, - { - "epoch": 1.835862957445952, - "grad_norm": 0.004904255736619234, - "learning_rate": 0.0001999983397665388, - "loss": 46.0, - "step": 11400 - }, - { - "epoch": 1.8360239945247394, - "grad_norm": 0.003417830914258957, - "learning_rate": 0.00019999833947500204, - "loss": 46.0, - "step": 11401 - }, - { - "epoch": 1.8361850316035269, - "grad_norm": 0.0037887864746153355, - "learning_rate": 0.00019999833918343972, - "loss": 46.0, - "step": 11402 - }, - { - "epoch": 1.836346068682314, - "grad_norm": 0.003927609417587519, - "learning_rate": 0.0001999983388918518, - "loss": 46.0, - "step": 11403 - }, - { - "epoch": 1.8365071057611015, - "grad_norm": 0.0014977715909481049, - "learning_rate": 0.00019999833860023824, - "loss": 46.0, - "step": 11404 - }, - { - "epoch": 1.8366681428398888, - "grad_norm": 0.002865336136892438, - "learning_rate": 0.00019999833830859913, - "loss": 46.0, - "step": 11405 - }, - { - "epoch": 1.8368291799186762, - "grad_norm": 0.0008764376980252564, - "learning_rate": 0.0001999983380169344, - "loss": 46.0, - "step": 11406 - }, - { - "epoch": 1.8369902169974637, - "grad_norm": 0.0014379406347870827, - "learning_rate": 0.00019999833772524412, - "loss": 46.0, - "step": 11407 - }, - { - "epoch": 1.8371512540762511, - "grad_norm": 0.0012700404040515423, - "learning_rate": 0.0001999983374335282, - "loss": 46.0, - "step": 11408 - }, - { - "epoch": 1.8373122911550386, - "grad_norm": 0.0008513263310305774, - "learning_rate": 0.00019999833714178672, - "loss": 46.0, - "step": 11409 - }, - { - "epoch": 1.8374733282338258, - "grad_norm": 0.0018022852018475533, - "learning_rate": 0.00019999833685001962, - "loss": 46.0, - "step": 11410 - }, - { - "epoch": 1.8376343653126133, - "grad_norm": 0.0019342032028362155, - "learning_rate": 0.00019999833655822693, - "loss": 46.0, - "step": 11411 - }, - { - "epoch": 1.8377954023914005, - "grad_norm": 0.0008623074973002076, - "learning_rate": 0.00019999833626640866, - "loss": 46.0, - "step": 11412 - }, - { - "epoch": 1.837956439470188, - "grad_norm": 0.005737262777984142, - "learning_rate": 0.00019999833597456477, - "loss": 46.0, - "step": 11413 - }, - { - "epoch": 1.8381174765489754, - "grad_norm": 0.002845275914296508, - "learning_rate": 0.0001999983356826953, - "loss": 46.0, - "step": 11414 - }, - { - "epoch": 1.8382785136277628, - "grad_norm": 0.0012129410170018673, - "learning_rate": 0.00019999833539080023, - "loss": 46.0, - "step": 11415 - }, - { - "epoch": 1.8384395507065503, - "grad_norm": 0.006648307200521231, - "learning_rate": 0.00019999833509887959, - "loss": 46.0, - "step": 11416 - }, - { - "epoch": 1.8386005877853375, - "grad_norm": 0.002591558964923024, - "learning_rate": 0.00019999833480693332, - "loss": 46.0, - "step": 11417 - }, - { - "epoch": 1.838761624864125, - "grad_norm": 0.0012823701836168766, - "learning_rate": 0.00019999833451496147, - "loss": 46.0, - "step": 11418 - }, - { - "epoch": 1.8389226619429122, - "grad_norm": 0.004582727327942848, - "learning_rate": 0.00019999833422296403, - "loss": 46.0, - "step": 11419 - }, - { - "epoch": 1.8390836990216997, - "grad_norm": 0.0009938521543517709, - "learning_rate": 0.00019999833393094098, - "loss": 46.0, - "step": 11420 - }, - { - "epoch": 1.839244736100487, - "grad_norm": 0.007162528112530708, - "learning_rate": 0.00019999833363889237, - "loss": 46.0, - "step": 11421 - }, - { - "epoch": 1.8394057731792746, - "grad_norm": 0.0027463152073323727, - "learning_rate": 0.00019999833334681814, - "loss": 46.0, - "step": 11422 - }, - { - "epoch": 1.839566810258062, - "grad_norm": 0.0008964014705270529, - "learning_rate": 0.00019999833305471833, - "loss": 46.0, - "step": 11423 - }, - { - "epoch": 1.8397278473368495, - "grad_norm": 0.0054879747331142426, - "learning_rate": 0.0001999983327625929, - "loss": 46.0, - "step": 11424 - }, - { - "epoch": 1.8398888844156367, - "grad_norm": 0.0012115861754864454, - "learning_rate": 0.0001999983324704419, - "loss": 46.0, - "step": 11425 - }, - { - "epoch": 1.8400499214944241, - "grad_norm": 0.0011201546294614673, - "learning_rate": 0.0001999983321782653, - "loss": 46.0, - "step": 11426 - }, - { - "epoch": 1.8402109585732114, - "grad_norm": 0.0012666896218433976, - "learning_rate": 0.0001999983318860631, - "loss": 46.0, - "step": 11427 - }, - { - "epoch": 1.8403719956519988, - "grad_norm": 0.0021752959582954645, - "learning_rate": 0.0001999983315938353, - "loss": 46.0, - "step": 11428 - }, - { - "epoch": 1.8405330327307863, - "grad_norm": 0.002495809458196163, - "learning_rate": 0.00019999833130158193, - "loss": 46.0, - "step": 11429 - }, - { - "epoch": 1.8406940698095737, - "grad_norm": 0.0005240380996838212, - "learning_rate": 0.00019999833100930295, - "loss": 46.0, - "step": 11430 - }, - { - "epoch": 1.8408551068883612, - "grad_norm": 0.0048749265260994434, - "learning_rate": 0.00019999833071699836, - "loss": 46.0, - "step": 11431 - }, - { - "epoch": 1.8410161439671484, - "grad_norm": 0.009719732217490673, - "learning_rate": 0.0001999983304246682, - "loss": 46.0, - "step": 11432 - }, - { - "epoch": 1.8411771810459359, - "grad_norm": 0.0030871760100126266, - "learning_rate": 0.00019999833013231242, - "loss": 46.0, - "step": 11433 - }, - { - "epoch": 1.841338218124723, - "grad_norm": 0.0014130567433312535, - "learning_rate": 0.00019999832983993106, - "loss": 46.0, - "step": 11434 - }, - { - "epoch": 1.8414992552035105, - "grad_norm": 0.0034021991305053234, - "learning_rate": 0.00019999832954752412, - "loss": 46.0, - "step": 11435 - }, - { - "epoch": 1.841660292282298, - "grad_norm": 0.0028927046805620193, - "learning_rate": 0.00019999832925509157, - "loss": 46.0, - "step": 11436 - }, - { - "epoch": 1.8418213293610854, - "grad_norm": 0.0019391439855098724, - "learning_rate": 0.00019999832896263342, - "loss": 46.0, - "step": 11437 - }, - { - "epoch": 1.841982366439873, - "grad_norm": 0.004207367543131113, - "learning_rate": 0.00019999832867014967, - "loss": 46.0, - "step": 11438 - }, - { - "epoch": 1.8421434035186601, - "grad_norm": 0.004299505148082972, - "learning_rate": 0.00019999832837764035, - "loss": 46.0, - "step": 11439 - }, - { - "epoch": 1.8423044405974476, - "grad_norm": 0.0037303371354937553, - "learning_rate": 0.00019999832808510545, - "loss": 46.0, - "step": 11440 - }, - { - "epoch": 1.8424654776762348, - "grad_norm": 0.0047846343368291855, - "learning_rate": 0.0001999983277925449, - "loss": 46.0, - "step": 11441 - }, - { - "epoch": 1.8426265147550223, - "grad_norm": 0.0018996148137375712, - "learning_rate": 0.0001999983274999588, - "loss": 46.0, - "step": 11442 - }, - { - "epoch": 1.8427875518338097, - "grad_norm": 0.0009257634519599378, - "learning_rate": 0.00019999832720734708, - "loss": 46.0, - "step": 11443 - }, - { - "epoch": 1.8429485889125972, - "grad_norm": 0.009656638838350773, - "learning_rate": 0.0001999983269147098, - "loss": 46.0, - "step": 11444 - }, - { - "epoch": 1.8431096259913846, - "grad_norm": 0.002508472418412566, - "learning_rate": 0.00019999832662204688, - "loss": 46.0, - "step": 11445 - }, - { - "epoch": 1.843270663070172, - "grad_norm": 0.0007254515076056123, - "learning_rate": 0.00019999832632935837, - "loss": 46.0, - "step": 11446 - }, - { - "epoch": 1.8434317001489593, - "grad_norm": 0.001405279035679996, - "learning_rate": 0.0001999983260366443, - "loss": 46.0, - "step": 11447 - }, - { - "epoch": 1.8435927372277465, - "grad_norm": 0.0017582313157618046, - "learning_rate": 0.00019999832574390462, - "loss": 46.0, - "step": 11448 - }, - { - "epoch": 1.843753774306534, - "grad_norm": 0.0026010593865066767, - "learning_rate": 0.00019999832545113935, - "loss": 46.0, - "step": 11449 - }, - { - "epoch": 1.8439148113853214, - "grad_norm": 0.001315655536018312, - "learning_rate": 0.00019999832515834847, - "loss": 46.0, - "step": 11450 - }, - { - "epoch": 1.8440758484641089, - "grad_norm": 0.0004457289178390056, - "learning_rate": 0.00019999832486553202, - "loss": 46.0, - "step": 11451 - }, - { - "epoch": 1.8442368855428963, - "grad_norm": 0.0066457754001021385, - "learning_rate": 0.00019999832457268996, - "loss": 46.0, - "step": 11452 - }, - { - "epoch": 1.8443979226216838, - "grad_norm": 0.0007113823085092008, - "learning_rate": 0.0001999983242798223, - "loss": 46.0, - "step": 11453 - }, - { - "epoch": 1.844558959700471, - "grad_norm": 0.002563699847087264, - "learning_rate": 0.00019999832398692906, - "loss": 46.0, - "step": 11454 - }, - { - "epoch": 1.8447199967792585, - "grad_norm": 0.009163310751318932, - "learning_rate": 0.0001999983236940102, - "loss": 46.0, - "step": 11455 - }, - { - "epoch": 1.8448810338580457, - "grad_norm": 0.001349274767562747, - "learning_rate": 0.00019999832340106578, - "loss": 46.0, - "step": 11456 - }, - { - "epoch": 1.8450420709368331, - "grad_norm": 0.0012398994294926524, - "learning_rate": 0.00019999832310809573, - "loss": 46.0, - "step": 11457 - }, - { - "epoch": 1.8452031080156206, - "grad_norm": 0.0011812186567112803, - "learning_rate": 0.00019999832281510012, - "loss": 46.0, - "step": 11458 - }, - { - "epoch": 1.845364145094408, - "grad_norm": 0.007003973703831434, - "learning_rate": 0.0001999983225220789, - "loss": 46.0, - "step": 11459 - }, - { - "epoch": 1.8455251821731955, - "grad_norm": 0.0018861133139580488, - "learning_rate": 0.00019999832222903206, - "loss": 46.0, - "step": 11460 - }, - { - "epoch": 1.8456862192519827, - "grad_norm": 0.0014199883444234729, - "learning_rate": 0.00019999832193595967, - "loss": 46.0, - "step": 11461 - }, - { - "epoch": 1.8458472563307702, - "grad_norm": 0.0026348743122071028, - "learning_rate": 0.00019999832164286168, - "loss": 46.0, - "step": 11462 - }, - { - "epoch": 1.8460082934095574, - "grad_norm": 0.0029695951379835606, - "learning_rate": 0.00019999832134973806, - "loss": 46.0, - "step": 11463 - }, - { - "epoch": 1.8461693304883449, - "grad_norm": 0.0029272474348545074, - "learning_rate": 0.00019999832105658887, - "loss": 46.0, - "step": 11464 - }, - { - "epoch": 1.8463303675671323, - "grad_norm": 0.00149630312807858, - "learning_rate": 0.0001999983207634141, - "loss": 46.0, - "step": 11465 - }, - { - "epoch": 1.8464914046459198, - "grad_norm": 0.003263527061790228, - "learning_rate": 0.0001999983204702137, - "loss": 46.0, - "step": 11466 - }, - { - "epoch": 1.8466524417247072, - "grad_norm": 0.0008884789422154427, - "learning_rate": 0.00019999832017698774, - "loss": 46.0, - "step": 11467 - }, - { - "epoch": 1.8468134788034947, - "grad_norm": 0.0009438117849640548, - "learning_rate": 0.00019999831988373615, - "loss": 46.0, - "step": 11468 - }, - { - "epoch": 1.846974515882282, - "grad_norm": 0.0008721146732568741, - "learning_rate": 0.000199998319590459, - "loss": 46.0, - "step": 11469 - }, - { - "epoch": 1.8471355529610691, - "grad_norm": 0.004317587707191706, - "learning_rate": 0.00019999831929715624, - "loss": 46.0, - "step": 11470 - }, - { - "epoch": 1.8472965900398566, - "grad_norm": 0.003280248725786805, - "learning_rate": 0.0001999983190038279, - "loss": 46.0, - "step": 11471 - }, - { - "epoch": 1.847457627118644, - "grad_norm": 0.008520183153450489, - "learning_rate": 0.00019999831871047395, - "loss": 46.0, - "step": 11472 - }, - { - "epoch": 1.8476186641974315, - "grad_norm": 0.0033995364792644978, - "learning_rate": 0.0001999983184170944, - "loss": 46.0, - "step": 11473 - }, - { - "epoch": 1.847779701276219, - "grad_norm": 0.009308294393122196, - "learning_rate": 0.00019999831812368926, - "loss": 46.0, - "step": 11474 - }, - { - "epoch": 1.8479407383550064, - "grad_norm": 0.002007026458159089, - "learning_rate": 0.00019999831783025856, - "loss": 46.0, - "step": 11475 - }, - { - "epoch": 1.8481017754337936, - "grad_norm": 0.00215074117295444, - "learning_rate": 0.00019999831753680222, - "loss": 46.0, - "step": 11476 - }, - { - "epoch": 1.848262812512581, - "grad_norm": 0.0014058294473215938, - "learning_rate": 0.00019999831724332027, - "loss": 46.0, - "step": 11477 - }, - { - "epoch": 1.8484238495913683, - "grad_norm": 0.0022512900177389383, - "learning_rate": 0.00019999831694981278, - "loss": 46.0, - "step": 11478 - }, - { - "epoch": 1.8485848866701557, - "grad_norm": 0.009319338016211987, - "learning_rate": 0.00019999831665627968, - "loss": 46.0, - "step": 11479 - }, - { - "epoch": 1.8487459237489432, - "grad_norm": 0.004338906146585941, - "learning_rate": 0.00019999831636272094, - "loss": 46.0, - "step": 11480 - }, - { - "epoch": 1.8489069608277307, - "grad_norm": 0.00343674561008811, - "learning_rate": 0.0001999983160691367, - "loss": 46.0, - "step": 11481 - }, - { - "epoch": 1.849067997906518, - "grad_norm": 0.000801559304818511, - "learning_rate": 0.0001999983157755268, - "loss": 46.0, - "step": 11482 - }, - { - "epoch": 1.8492290349853053, - "grad_norm": 0.0018976490246132016, - "learning_rate": 0.00019999831548189127, - "loss": 46.0, - "step": 11483 - }, - { - "epoch": 1.8493900720640928, - "grad_norm": 0.001177231315523386, - "learning_rate": 0.00019999831518823023, - "loss": 46.0, - "step": 11484 - }, - { - "epoch": 1.84955110914288, - "grad_norm": 0.0013412476982921362, - "learning_rate": 0.00019999831489454355, - "loss": 46.0, - "step": 11485 - }, - { - "epoch": 1.8497121462216675, - "grad_norm": 0.006405207794159651, - "learning_rate": 0.00019999831460083126, - "loss": 46.0, - "step": 11486 - }, - { - "epoch": 1.849873183300455, - "grad_norm": 0.008552424609661102, - "learning_rate": 0.0001999983143070934, - "loss": 46.0, - "step": 11487 - }, - { - "epoch": 1.8500342203792424, - "grad_norm": 0.0073061492294073105, - "learning_rate": 0.00019999831401332994, - "loss": 46.0, - "step": 11488 - }, - { - "epoch": 1.8501952574580298, - "grad_norm": 0.0010532313026487827, - "learning_rate": 0.00019999831371954088, - "loss": 46.0, - "step": 11489 - }, - { - "epoch": 1.850356294536817, - "grad_norm": 0.0008252942352555692, - "learning_rate": 0.00019999831342572624, - "loss": 46.0, - "step": 11490 - }, - { - "epoch": 1.8505173316156045, - "grad_norm": 0.0019892535638064146, - "learning_rate": 0.00019999831313188599, - "loss": 46.0, - "step": 11491 - }, - { - "epoch": 1.8506783686943917, - "grad_norm": 0.006403793580830097, - "learning_rate": 0.00019999831283802014, - "loss": 46.0, - "step": 11492 - }, - { - "epoch": 1.8508394057731792, - "grad_norm": 0.00657274853438139, - "learning_rate": 0.00019999831254412874, - "loss": 46.0, - "step": 11493 - }, - { - "epoch": 1.8510004428519666, - "grad_norm": 0.001463776221498847, - "learning_rate": 0.0001999983122502117, - "loss": 46.0, - "step": 11494 - }, - { - "epoch": 1.851161479930754, - "grad_norm": 0.0025666444562375546, - "learning_rate": 0.0001999983119562691, - "loss": 46.0, - "step": 11495 - }, - { - "epoch": 1.8513225170095415, - "grad_norm": 0.0010501445503905416, - "learning_rate": 0.00019999831166230087, - "loss": 46.0, - "step": 11496 - }, - { - "epoch": 1.851483554088329, - "grad_norm": 0.001359405112452805, - "learning_rate": 0.00019999831136830707, - "loss": 46.0, - "step": 11497 - }, - { - "epoch": 1.8516445911671162, - "grad_norm": 0.002873091259971261, - "learning_rate": 0.00019999831107428767, - "loss": 46.0, - "step": 11498 - }, - { - "epoch": 1.8518056282459037, - "grad_norm": 0.00535928551107645, - "learning_rate": 0.00019999831078024264, - "loss": 46.0, - "step": 11499 - }, - { - "epoch": 1.851966665324691, - "grad_norm": 0.000866666785441339, - "learning_rate": 0.00019999831048617207, - "loss": 46.0, - "step": 11500 - }, - { - "epoch": 1.8521277024034783, - "grad_norm": 0.002204623306170106, - "learning_rate": 0.0001999983101920759, - "loss": 46.0, - "step": 11501 - }, - { - "epoch": 1.8522887394822658, - "grad_norm": 0.0007331451633945107, - "learning_rate": 0.0001999983098979541, - "loss": 46.0, - "step": 11502 - }, - { - "epoch": 1.8524497765610533, - "grad_norm": 0.0011486758012324572, - "learning_rate": 0.0001999983096038067, - "loss": 46.0, - "step": 11503 - }, - { - "epoch": 1.8526108136398407, - "grad_norm": 0.005773249547928572, - "learning_rate": 0.00019999830930963377, - "loss": 46.0, - "step": 11504 - }, - { - "epoch": 1.852771850718628, - "grad_norm": 0.003237711265683174, - "learning_rate": 0.00019999830901543518, - "loss": 46.0, - "step": 11505 - }, - { - "epoch": 1.8529328877974154, - "grad_norm": 0.0017491430044174194, - "learning_rate": 0.00019999830872121104, - "loss": 46.0, - "step": 11506 - }, - { - "epoch": 1.8530939248762026, - "grad_norm": 0.0005698419408872724, - "learning_rate": 0.00019999830842696128, - "loss": 46.0, - "step": 11507 - }, - { - "epoch": 1.85325496195499, - "grad_norm": 0.005310474429279566, - "learning_rate": 0.00019999830813268593, - "loss": 46.0, - "step": 11508 - }, - { - "epoch": 1.8534159990337775, - "grad_norm": 0.003777619218453765, - "learning_rate": 0.000199998307838385, - "loss": 46.0, - "step": 11509 - }, - { - "epoch": 1.853577036112565, - "grad_norm": 0.003426079172641039, - "learning_rate": 0.00019999830754405845, - "loss": 46.0, - "step": 11510 - }, - { - "epoch": 1.8537380731913524, - "grad_norm": 0.0011892326874658465, - "learning_rate": 0.00019999830724970634, - "loss": 46.0, - "step": 11511 - }, - { - "epoch": 1.8538991102701396, - "grad_norm": 0.0034862798638641834, - "learning_rate": 0.0001999983069553286, - "loss": 46.0, - "step": 11512 - }, - { - "epoch": 1.854060147348927, - "grad_norm": 0.003071578685194254, - "learning_rate": 0.00019999830666092528, - "loss": 46.0, - "step": 11513 - }, - { - "epoch": 1.8542211844277143, - "grad_norm": 0.0022352172527462244, - "learning_rate": 0.00019999830636649636, - "loss": 46.0, - "step": 11514 - }, - { - "epoch": 1.8543822215065018, - "grad_norm": 0.0010983470128849149, - "learning_rate": 0.00019999830607204188, - "loss": 46.0, - "step": 11515 - }, - { - "epoch": 1.8545432585852892, - "grad_norm": 0.012094882316887379, - "learning_rate": 0.00019999830577756175, - "loss": 46.0, - "step": 11516 - }, - { - "epoch": 1.8547042956640767, - "grad_norm": 0.0008304381044581532, - "learning_rate": 0.00019999830548305604, - "loss": 46.0, - "step": 11517 - }, - { - "epoch": 1.8548653327428641, - "grad_norm": 0.0022238020319491625, - "learning_rate": 0.00019999830518852477, - "loss": 46.0, - "step": 11518 - }, - { - "epoch": 1.8550263698216516, - "grad_norm": 0.007429752964526415, - "learning_rate": 0.00019999830489396788, - "loss": 46.0, - "step": 11519 - }, - { - "epoch": 1.8551874069004388, - "grad_norm": 0.005457818508148193, - "learning_rate": 0.0001999983045993854, - "loss": 46.0, - "step": 11520 - }, - { - "epoch": 1.8553484439792263, - "grad_norm": 0.0009096584981307387, - "learning_rate": 0.00019999830430477732, - "loss": 46.0, - "step": 11521 - }, - { - "epoch": 1.8555094810580135, - "grad_norm": 0.0026716883294284344, - "learning_rate": 0.00019999830401014364, - "loss": 46.0, - "step": 11522 - }, - { - "epoch": 1.855670518136801, - "grad_norm": 0.0003413644735701382, - "learning_rate": 0.0001999983037154844, - "loss": 46.0, - "step": 11523 - }, - { - "epoch": 1.8558315552155884, - "grad_norm": 0.0016049012774601579, - "learning_rate": 0.00019999830342079956, - "loss": 46.0, - "step": 11524 - }, - { - "epoch": 1.8559925922943759, - "grad_norm": 0.006110606715083122, - "learning_rate": 0.00019999830312608907, - "loss": 46.0, - "step": 11525 - }, - { - "epoch": 1.8561536293731633, - "grad_norm": 0.002021973952651024, - "learning_rate": 0.00019999830283135304, - "loss": 46.0, - "step": 11526 - }, - { - "epoch": 1.8563146664519505, - "grad_norm": 0.008725442923605442, - "learning_rate": 0.00019999830253659138, - "loss": 46.0, - "step": 11527 - }, - { - "epoch": 1.856475703530738, - "grad_norm": 0.0022730696946382523, - "learning_rate": 0.00019999830224180415, - "loss": 46.0, - "step": 11528 - }, - { - "epoch": 1.8566367406095252, - "grad_norm": 0.00481440918520093, - "learning_rate": 0.00019999830194699131, - "loss": 46.0, - "step": 11529 - }, - { - "epoch": 1.8567977776883127, - "grad_norm": 0.00156695069745183, - "learning_rate": 0.0001999983016521529, - "loss": 46.0, - "step": 11530 - }, - { - "epoch": 1.8569588147671001, - "grad_norm": 0.009470343589782715, - "learning_rate": 0.00019999830135728887, - "loss": 46.0, - "step": 11531 - }, - { - "epoch": 1.8571198518458876, - "grad_norm": 0.0007126287673600018, - "learning_rate": 0.00019999830106239927, - "loss": 46.0, - "step": 11532 - }, - { - "epoch": 1.857280888924675, - "grad_norm": 0.0016426616348326206, - "learning_rate": 0.00019999830076748406, - "loss": 46.0, - "step": 11533 - }, - { - "epoch": 1.8574419260034623, - "grad_norm": 0.0019434961723163724, - "learning_rate": 0.00019999830047254325, - "loss": 46.0, - "step": 11534 - }, - { - "epoch": 1.8576029630822497, - "grad_norm": 0.0013755474938079715, - "learning_rate": 0.00019999830017757687, - "loss": 46.0, - "step": 11535 - }, - { - "epoch": 1.857764000161037, - "grad_norm": 0.0008873820188455284, - "learning_rate": 0.00019999829988258486, - "loss": 46.0, - "step": 11536 - }, - { - "epoch": 1.8579250372398244, - "grad_norm": 0.004061360843479633, - "learning_rate": 0.0001999982995875673, - "loss": 46.0, - "step": 11537 - }, - { - "epoch": 1.8580860743186118, - "grad_norm": 0.0008830491569824517, - "learning_rate": 0.0001999982992925241, - "loss": 46.0, - "step": 11538 - }, - { - "epoch": 1.8582471113973993, - "grad_norm": 0.0017756131710484624, - "learning_rate": 0.00019999829899745533, - "loss": 46.0, - "step": 11539 - }, - { - "epoch": 1.8584081484761867, - "grad_norm": 0.00431841891258955, - "learning_rate": 0.00019999829870236095, - "loss": 46.0, - "step": 11540 - }, - { - "epoch": 1.8585691855549742, - "grad_norm": 0.004266228061169386, - "learning_rate": 0.000199998298407241, - "loss": 46.0, - "step": 11541 - }, - { - "epoch": 1.8587302226337614, - "grad_norm": 0.0016785580664873123, - "learning_rate": 0.00019999829811209545, - "loss": 46.0, - "step": 11542 - }, - { - "epoch": 1.8588912597125486, - "grad_norm": 0.004245802294462919, - "learning_rate": 0.00019999829781692428, - "loss": 46.0, - "step": 11543 - }, - { - "epoch": 1.859052296791336, - "grad_norm": 0.006059711799025536, - "learning_rate": 0.00019999829752172753, - "loss": 46.0, - "step": 11544 - }, - { - "epoch": 1.8592133338701236, - "grad_norm": 0.002805279800668359, - "learning_rate": 0.00019999829722650519, - "loss": 46.0, - "step": 11545 - }, - { - "epoch": 1.859374370948911, - "grad_norm": 0.003590754931792617, - "learning_rate": 0.00019999829693125726, - "loss": 46.0, - "step": 11546 - }, - { - "epoch": 1.8595354080276985, - "grad_norm": 0.0025306125171482563, - "learning_rate": 0.00019999829663598374, - "loss": 46.0, - "step": 11547 - }, - { - "epoch": 1.859696445106486, - "grad_norm": 0.0012398662511259317, - "learning_rate": 0.0001999982963406846, - "loss": 46.0, - "step": 11548 - }, - { - "epoch": 1.8598574821852731, - "grad_norm": 0.0016159494407474995, - "learning_rate": 0.00019999829604535986, - "loss": 46.0, - "step": 11549 - }, - { - "epoch": 1.8600185192640606, - "grad_norm": 0.002433615271002054, - "learning_rate": 0.00019999829575000956, - "loss": 46.0, - "step": 11550 - }, - { - "epoch": 1.8601795563428478, - "grad_norm": 0.000999464187771082, - "learning_rate": 0.00019999829545463366, - "loss": 46.0, - "step": 11551 - }, - { - "epoch": 1.8603405934216353, - "grad_norm": 0.002432487206533551, - "learning_rate": 0.00019999829515923216, - "loss": 46.0, - "step": 11552 - }, - { - "epoch": 1.8605016305004227, - "grad_norm": 0.0009705192060209811, - "learning_rate": 0.00019999829486380506, - "loss": 46.0, - "step": 11553 - }, - { - "epoch": 1.8606626675792102, - "grad_norm": 0.004051779862493277, - "learning_rate": 0.00019999829456835238, - "loss": 46.0, - "step": 11554 - }, - { - "epoch": 1.8608237046579976, - "grad_norm": 0.001016879454255104, - "learning_rate": 0.00019999829427287409, - "loss": 46.0, - "step": 11555 - }, - { - "epoch": 1.8609847417367849, - "grad_norm": 0.0016925642266869545, - "learning_rate": 0.00019999829397737023, - "loss": 46.0, - "step": 11556 - }, - { - "epoch": 1.8611457788155723, - "grad_norm": 0.004090378526598215, - "learning_rate": 0.00019999829368184073, - "loss": 46.0, - "step": 11557 - }, - { - "epoch": 1.8613068158943595, - "grad_norm": 0.005295174196362495, - "learning_rate": 0.00019999829338628568, - "loss": 46.0, - "step": 11558 - }, - { - "epoch": 1.861467852973147, - "grad_norm": 0.004252488724887371, - "learning_rate": 0.000199998293090705, - "loss": 46.0, - "step": 11559 - }, - { - "epoch": 1.8616288900519344, - "grad_norm": 0.0010820492170751095, - "learning_rate": 0.00019999829279509875, - "loss": 46.0, - "step": 11560 - }, - { - "epoch": 1.8617899271307219, - "grad_norm": 0.0027612620033323765, - "learning_rate": 0.0001999982924994669, - "loss": 46.0, - "step": 11561 - }, - { - "epoch": 1.8619509642095093, - "grad_norm": 0.002777194371446967, - "learning_rate": 0.00019999829220380947, - "loss": 46.0, - "step": 11562 - }, - { - "epoch": 1.8621120012882968, - "grad_norm": 0.0005786261754110456, - "learning_rate": 0.0001999982919081264, - "loss": 46.0, - "step": 11563 - }, - { - "epoch": 1.862273038367084, - "grad_norm": 0.0017317441524937749, - "learning_rate": 0.00019999829161241776, - "loss": 46.0, - "step": 11564 - }, - { - "epoch": 1.8624340754458713, - "grad_norm": 0.0030908421613276005, - "learning_rate": 0.00019999829131668354, - "loss": 46.0, - "step": 11565 - }, - { - "epoch": 1.8625951125246587, - "grad_norm": 0.0004570278979372233, - "learning_rate": 0.0001999982910209237, - "loss": 46.0, - "step": 11566 - }, - { - "epoch": 1.8627561496034462, - "grad_norm": 0.0009962570620700717, - "learning_rate": 0.0001999982907251383, - "loss": 46.0, - "step": 11567 - }, - { - "epoch": 1.8629171866822336, - "grad_norm": 0.0015381964622065425, - "learning_rate": 0.00019999829042932727, - "loss": 46.0, - "step": 11568 - }, - { - "epoch": 1.863078223761021, - "grad_norm": 0.0021539456211030483, - "learning_rate": 0.00019999829013349067, - "loss": 46.0, - "step": 11569 - }, - { - "epoch": 1.8632392608398085, - "grad_norm": 0.002047761343419552, - "learning_rate": 0.00019999828983762849, - "loss": 46.0, - "step": 11570 - }, - { - "epoch": 1.8634002979185957, - "grad_norm": 0.003137151710689068, - "learning_rate": 0.00019999828954174066, - "loss": 46.0, - "step": 11571 - }, - { - "epoch": 1.8635613349973832, - "grad_norm": 0.01071912981569767, - "learning_rate": 0.00019999828924582727, - "loss": 46.0, - "step": 11572 - }, - { - "epoch": 1.8637223720761704, - "grad_norm": 0.0056599159725010395, - "learning_rate": 0.00019999828894988827, - "loss": 46.0, - "step": 11573 - }, - { - "epoch": 1.8638834091549579, - "grad_norm": 0.004475565627217293, - "learning_rate": 0.0001999982886539237, - "loss": 46.0, - "step": 11574 - }, - { - "epoch": 1.8640444462337453, - "grad_norm": 0.005175105761736631, - "learning_rate": 0.00019999828835793354, - "loss": 46.0, - "step": 11575 - }, - { - "epoch": 1.8642054833125328, - "grad_norm": 0.0025757038965821266, - "learning_rate": 0.00019999828806191778, - "loss": 46.0, - "step": 11576 - }, - { - "epoch": 1.8643665203913202, - "grad_norm": 0.0015190429985523224, - "learning_rate": 0.0001999982877658764, - "loss": 46.0, - "step": 11577 - }, - { - "epoch": 1.8645275574701075, - "grad_norm": 0.0010950901778414845, - "learning_rate": 0.00019999828746980946, - "loss": 46.0, - "step": 11578 - }, - { - "epoch": 1.864688594548895, - "grad_norm": 0.0014548160834237933, - "learning_rate": 0.00019999828717371689, - "loss": 46.0, - "step": 11579 - }, - { - "epoch": 1.8648496316276821, - "grad_norm": 0.0021911978255957365, - "learning_rate": 0.00019999828687759875, - "loss": 46.0, - "step": 11580 - }, - { - "epoch": 1.8650106687064696, - "grad_norm": 0.0030266642570495605, - "learning_rate": 0.000199998286581455, - "loss": 46.0, - "step": 11581 - }, - { - "epoch": 1.865171705785257, - "grad_norm": 0.002875728067010641, - "learning_rate": 0.00019999828628528568, - "loss": 46.0, - "step": 11582 - }, - { - "epoch": 1.8653327428640445, - "grad_norm": 0.0014987209578976035, - "learning_rate": 0.00019999828598909073, - "loss": 46.0, - "step": 11583 - }, - { - "epoch": 1.865493779942832, - "grad_norm": 0.0017213376704603434, - "learning_rate": 0.00019999828569287022, - "loss": 46.0, - "step": 11584 - }, - { - "epoch": 1.8656548170216194, - "grad_norm": 0.0033472541254013777, - "learning_rate": 0.00019999828539662412, - "loss": 46.0, - "step": 11585 - }, - { - "epoch": 1.8658158541004066, - "grad_norm": 0.0066199894063174725, - "learning_rate": 0.0001999982851003524, - "loss": 46.0, - "step": 11586 - }, - { - "epoch": 1.8659768911791939, - "grad_norm": 0.0005458767409436405, - "learning_rate": 0.0001999982848040551, - "loss": 46.0, - "step": 11587 - }, - { - "epoch": 1.8661379282579813, - "grad_norm": 0.001661198097281158, - "learning_rate": 0.00019999828450773216, - "loss": 46.0, - "step": 11588 - }, - { - "epoch": 1.8662989653367688, - "grad_norm": 0.0021142465993762016, - "learning_rate": 0.00019999828421138368, - "loss": 46.0, - "step": 11589 - }, - { - "epoch": 1.8664600024155562, - "grad_norm": 0.008855815045535564, - "learning_rate": 0.0001999982839150096, - "loss": 46.0, - "step": 11590 - }, - { - "epoch": 1.8666210394943437, - "grad_norm": 0.012997549958527088, - "learning_rate": 0.0001999982836186099, - "loss": 46.0, - "step": 11591 - }, - { - "epoch": 1.866782076573131, - "grad_norm": 0.00476828683167696, - "learning_rate": 0.00019999828332218462, - "loss": 46.0, - "step": 11592 - }, - { - "epoch": 1.8669431136519183, - "grad_norm": 0.0006384350708685815, - "learning_rate": 0.00019999828302573374, - "loss": 46.0, - "step": 11593 - }, - { - "epoch": 1.8671041507307058, - "grad_norm": 0.00263122352771461, - "learning_rate": 0.0001999982827292573, - "loss": 46.0, - "step": 11594 - }, - { - "epoch": 1.867265187809493, - "grad_norm": 0.00040063809137791395, - "learning_rate": 0.00019999828243275522, - "loss": 46.0, - "step": 11595 - }, - { - "epoch": 1.8674262248882805, - "grad_norm": 0.001571177621372044, - "learning_rate": 0.00019999828213622758, - "loss": 46.0, - "step": 11596 - }, - { - "epoch": 1.867587261967068, - "grad_norm": 0.0012726279674097896, - "learning_rate": 0.0001999982818396743, - "loss": 46.0, - "step": 11597 - }, - { - "epoch": 1.8677482990458554, - "grad_norm": 0.006881889421492815, - "learning_rate": 0.00019999828154309546, - "loss": 46.0, - "step": 11598 - }, - { - "epoch": 1.8679093361246428, - "grad_norm": 0.0048666601069271564, - "learning_rate": 0.00019999828124649103, - "loss": 46.0, - "step": 11599 - }, - { - "epoch": 1.86807037320343, - "grad_norm": 0.0014073617057874799, - "learning_rate": 0.00019999828094986098, - "loss": 46.0, - "step": 11600 - }, - { - "epoch": 1.8682314102822175, - "grad_norm": 0.0013188141165301204, - "learning_rate": 0.00019999828065320535, - "loss": 46.0, - "step": 11601 - }, - { - "epoch": 1.8683924473610047, - "grad_norm": 0.005279910285025835, - "learning_rate": 0.00019999828035652413, - "loss": 46.0, - "step": 11602 - }, - { - "epoch": 1.8685534844397922, - "grad_norm": 0.0031094886362552643, - "learning_rate": 0.0001999982800598173, - "loss": 46.0, - "step": 11603 - }, - { - "epoch": 1.8687145215185796, - "grad_norm": 0.0006005127215757966, - "learning_rate": 0.0001999982797630849, - "loss": 46.0, - "step": 11604 - }, - { - "epoch": 1.868875558597367, - "grad_norm": 0.002337319077923894, - "learning_rate": 0.0001999982794663269, - "loss": 46.0, - "step": 11605 - }, - { - "epoch": 1.8690365956761545, - "grad_norm": 0.005711226258426905, - "learning_rate": 0.0001999982791695433, - "loss": 46.0, - "step": 11606 - }, - { - "epoch": 1.8691976327549418, - "grad_norm": 0.0006825447198934853, - "learning_rate": 0.0001999982788727341, - "loss": 46.0, - "step": 11607 - }, - { - "epoch": 1.8693586698337292, - "grad_norm": 0.002837394131347537, - "learning_rate": 0.00019999827857589933, - "loss": 46.0, - "step": 11608 - }, - { - "epoch": 1.8695197069125165, - "grad_norm": 0.00829993560910225, - "learning_rate": 0.00019999827827903892, - "loss": 46.0, - "step": 11609 - }, - { - "epoch": 1.869680743991304, - "grad_norm": 0.00921242218464613, - "learning_rate": 0.00019999827798215293, - "loss": 46.0, - "step": 11610 - }, - { - "epoch": 1.8698417810700914, - "grad_norm": 0.0067711821757256985, - "learning_rate": 0.00019999827768524137, - "loss": 46.0, - "step": 11611 - }, - { - "epoch": 1.8700028181488788, - "grad_norm": 0.0025815435219556093, - "learning_rate": 0.0001999982773883042, - "loss": 46.0, - "step": 11612 - }, - { - "epoch": 1.8701638552276663, - "grad_norm": 0.0024665442761033773, - "learning_rate": 0.0001999982770913414, - "loss": 46.0, - "step": 11613 - }, - { - "epoch": 1.8703248923064537, - "grad_norm": 0.0028704360593110323, - "learning_rate": 0.00019999827679435307, - "loss": 46.0, - "step": 11614 - }, - { - "epoch": 1.870485929385241, - "grad_norm": 0.0065760137513279915, - "learning_rate": 0.00019999827649733914, - "loss": 46.0, - "step": 11615 - }, - { - "epoch": 1.8706469664640284, - "grad_norm": 0.004264004994183779, - "learning_rate": 0.0001999982762002996, - "loss": 46.0, - "step": 11616 - }, - { - "epoch": 1.8708080035428156, - "grad_norm": 0.002643846906721592, - "learning_rate": 0.00019999827590323443, - "loss": 46.0, - "step": 11617 - }, - { - "epoch": 1.870969040621603, - "grad_norm": 0.0017271623946726322, - "learning_rate": 0.0001999982756061437, - "loss": 46.0, - "step": 11618 - }, - { - "epoch": 1.8711300777003905, - "grad_norm": 0.0018560613971203566, - "learning_rate": 0.00019999827530902737, - "loss": 46.0, - "step": 11619 - }, - { - "epoch": 1.871291114779178, - "grad_norm": 0.0025461779441684484, - "learning_rate": 0.00019999827501188545, - "loss": 46.0, - "step": 11620 - }, - { - "epoch": 1.8714521518579654, - "grad_norm": 0.001947728917002678, - "learning_rate": 0.00019999827471471794, - "loss": 46.0, - "step": 11621 - }, - { - "epoch": 1.8716131889367527, - "grad_norm": 0.0035788097884505987, - "learning_rate": 0.00019999827441752484, - "loss": 46.0, - "step": 11622 - }, - { - "epoch": 1.87177422601554, - "grad_norm": 0.002290220931172371, - "learning_rate": 0.00019999827412030613, - "loss": 46.0, - "step": 11623 - }, - { - "epoch": 1.8719352630943273, - "grad_norm": 0.0038459710776805878, - "learning_rate": 0.00019999827382306183, - "loss": 46.0, - "step": 11624 - }, - { - "epoch": 1.8720963001731148, - "grad_norm": 0.002096254611387849, - "learning_rate": 0.00019999827352579192, - "loss": 46.0, - "step": 11625 - }, - { - "epoch": 1.8722573372519022, - "grad_norm": 0.004677208140492439, - "learning_rate": 0.00019999827322849642, - "loss": 46.0, - "step": 11626 - }, - { - "epoch": 1.8724183743306897, - "grad_norm": 0.001063927891664207, - "learning_rate": 0.00019999827293117533, - "loss": 46.0, - "step": 11627 - }, - { - "epoch": 1.8725794114094771, - "grad_norm": 0.01260798703879118, - "learning_rate": 0.00019999827263382866, - "loss": 46.0, - "step": 11628 - }, - { - "epoch": 1.8727404484882644, - "grad_norm": 0.0033484536688774824, - "learning_rate": 0.0001999982723364564, - "loss": 46.0, - "step": 11629 - }, - { - "epoch": 1.8729014855670518, - "grad_norm": 0.005107702221721411, - "learning_rate": 0.00019999827203905852, - "loss": 46.0, - "step": 11630 - }, - { - "epoch": 1.873062522645839, - "grad_norm": 0.0005591654335148633, - "learning_rate": 0.00019999827174163506, - "loss": 46.0, - "step": 11631 - }, - { - "epoch": 1.8732235597246265, - "grad_norm": 0.002750370418652892, - "learning_rate": 0.000199998271444186, - "loss": 46.0, - "step": 11632 - }, - { - "epoch": 1.873384596803414, - "grad_norm": 0.0036735234316438437, - "learning_rate": 0.00019999827114671134, - "loss": 46.0, - "step": 11633 - }, - { - "epoch": 1.8735456338822014, - "grad_norm": 0.001277828123420477, - "learning_rate": 0.0001999982708492111, - "loss": 46.0, - "step": 11634 - }, - { - "epoch": 1.8737066709609889, - "grad_norm": 0.0029443674720823765, - "learning_rate": 0.00019999827055168528, - "loss": 46.0, - "step": 11635 - }, - { - "epoch": 1.8738677080397763, - "grad_norm": 0.0015158854657784104, - "learning_rate": 0.00019999827025413383, - "loss": 46.0, - "step": 11636 - }, - { - "epoch": 1.8740287451185635, - "grad_norm": 0.005589430220425129, - "learning_rate": 0.0001999982699565568, - "loss": 46.0, - "step": 11637 - }, - { - "epoch": 1.874189782197351, - "grad_norm": 0.001601747702807188, - "learning_rate": 0.0001999982696589542, - "loss": 46.0, - "step": 11638 - }, - { - "epoch": 1.8743508192761382, - "grad_norm": 0.002457372611388564, - "learning_rate": 0.00019999826936132595, - "loss": 46.0, - "step": 11639 - }, - { - "epoch": 1.8745118563549257, - "grad_norm": 0.0007880728226155043, - "learning_rate": 0.00019999826906367215, - "loss": 46.0, - "step": 11640 - }, - { - "epoch": 1.8746728934337131, - "grad_norm": 0.002802757080644369, - "learning_rate": 0.00019999826876599276, - "loss": 46.0, - "step": 11641 - }, - { - "epoch": 1.8748339305125006, - "grad_norm": 0.0026106261648237705, - "learning_rate": 0.00019999826846828773, - "loss": 46.0, - "step": 11642 - }, - { - "epoch": 1.874994967591288, - "grad_norm": 0.0028694793581962585, - "learning_rate": 0.00019999826817055714, - "loss": 46.0, - "step": 11643 - }, - { - "epoch": 1.8751560046700753, - "grad_norm": 0.0012493396643549204, - "learning_rate": 0.00019999826787280096, - "loss": 46.0, - "step": 11644 - }, - { - "epoch": 1.8753170417488627, - "grad_norm": 0.003053989727050066, - "learning_rate": 0.00019999826757501917, - "loss": 46.0, - "step": 11645 - }, - { - "epoch": 1.87547807882765, - "grad_norm": 0.0018995861755684018, - "learning_rate": 0.0001999982672772118, - "loss": 46.0, - "step": 11646 - }, - { - "epoch": 1.8756391159064374, - "grad_norm": 0.0034354848321527243, - "learning_rate": 0.00019999826697937882, - "loss": 46.0, - "step": 11647 - }, - { - "epoch": 1.8758001529852248, - "grad_norm": 0.002067354740574956, - "learning_rate": 0.00019999826668152024, - "loss": 46.0, - "step": 11648 - }, - { - "epoch": 1.8759611900640123, - "grad_norm": 0.002321695676073432, - "learning_rate": 0.00019999826638363607, - "loss": 46.0, - "step": 11649 - }, - { - "epoch": 1.8761222271427997, - "grad_norm": 0.0025709739420562983, - "learning_rate": 0.00019999826608572635, - "loss": 46.0, - "step": 11650 - }, - { - "epoch": 1.876283264221587, - "grad_norm": 0.001130796386860311, - "learning_rate": 0.00019999826578779098, - "loss": 46.0, - "step": 11651 - }, - { - "epoch": 1.8764443013003744, - "grad_norm": 0.0021283223759382963, - "learning_rate": 0.00019999826548983002, - "loss": 46.0, - "step": 11652 - }, - { - "epoch": 1.8766053383791617, - "grad_norm": 0.0015117021976038814, - "learning_rate": 0.0001999982651918435, - "loss": 46.0, - "step": 11653 - }, - { - "epoch": 1.876766375457949, - "grad_norm": 0.0017427293350920081, - "learning_rate": 0.00019999826489383135, - "loss": 46.0, - "step": 11654 - }, - { - "epoch": 1.8769274125367366, - "grad_norm": 0.0008957125246524811, - "learning_rate": 0.00019999826459579363, - "loss": 46.0, - "step": 11655 - }, - { - "epoch": 1.877088449615524, - "grad_norm": 0.0009859104175120592, - "learning_rate": 0.00019999826429773033, - "loss": 46.0, - "step": 11656 - }, - { - "epoch": 1.8772494866943115, - "grad_norm": 0.0010851643746718764, - "learning_rate": 0.00019999826399964138, - "loss": 46.0, - "step": 11657 - }, - { - "epoch": 1.877410523773099, - "grad_norm": 0.0011887041619047523, - "learning_rate": 0.00019999826370152687, - "loss": 46.0, - "step": 11658 - }, - { - "epoch": 1.8775715608518861, - "grad_norm": 0.0006630821735598147, - "learning_rate": 0.00019999826340338678, - "loss": 46.0, - "step": 11659 - }, - { - "epoch": 1.8777325979306734, - "grad_norm": 0.002834790386259556, - "learning_rate": 0.00019999826310522107, - "loss": 46.0, - "step": 11660 - }, - { - "epoch": 1.8778936350094608, - "grad_norm": 0.0010537327034398913, - "learning_rate": 0.00019999826280702978, - "loss": 46.0, - "step": 11661 - }, - { - "epoch": 1.8780546720882483, - "grad_norm": 0.0024477799888700247, - "learning_rate": 0.0001999982625088129, - "loss": 46.0, - "step": 11662 - }, - { - "epoch": 1.8782157091670357, - "grad_norm": 0.0026908584404736757, - "learning_rate": 0.0001999982622105704, - "loss": 46.0, - "step": 11663 - }, - { - "epoch": 1.8783767462458232, - "grad_norm": 0.001839268021285534, - "learning_rate": 0.00019999826191230234, - "loss": 46.0, - "step": 11664 - }, - { - "epoch": 1.8785377833246106, - "grad_norm": 0.0004310556105338037, - "learning_rate": 0.00019999826161400865, - "loss": 46.0, - "step": 11665 - }, - { - "epoch": 1.8786988204033979, - "grad_norm": 0.0031633267644792795, - "learning_rate": 0.0001999982613156894, - "loss": 46.0, - "step": 11666 - }, - { - "epoch": 1.8788598574821853, - "grad_norm": 0.0012949950760230422, - "learning_rate": 0.00019999826101734454, - "loss": 46.0, - "step": 11667 - }, - { - "epoch": 1.8790208945609725, - "grad_norm": 0.01011162344366312, - "learning_rate": 0.00019999826071897408, - "loss": 46.0, - "step": 11668 - }, - { - "epoch": 1.87918193163976, - "grad_norm": 0.0018819993129000068, - "learning_rate": 0.00019999826042057804, - "loss": 46.0, - "step": 11669 - }, - { - "epoch": 1.8793429687185474, - "grad_norm": 0.0006986952503211796, - "learning_rate": 0.00019999826012215638, - "loss": 46.0, - "step": 11670 - }, - { - "epoch": 1.879504005797335, - "grad_norm": 0.002481229603290558, - "learning_rate": 0.00019999825982370913, - "loss": 46.0, - "step": 11671 - }, - { - "epoch": 1.8796650428761223, - "grad_norm": 0.0022390899248421192, - "learning_rate": 0.0001999982595252363, - "loss": 46.0, - "step": 11672 - }, - { - "epoch": 1.8798260799549096, - "grad_norm": 0.0016635936917737126, - "learning_rate": 0.00019999825922673787, - "loss": 46.0, - "step": 11673 - }, - { - "epoch": 1.879987117033697, - "grad_norm": 0.0009636238100938499, - "learning_rate": 0.00019999825892821384, - "loss": 46.0, - "step": 11674 - }, - { - "epoch": 1.8801481541124843, - "grad_norm": 0.0032580525148659945, - "learning_rate": 0.0001999982586296642, - "loss": 46.0, - "step": 11675 - }, - { - "epoch": 1.8803091911912717, - "grad_norm": 0.003457412589341402, - "learning_rate": 0.000199998258331089, - "loss": 46.0, - "step": 11676 - }, - { - "epoch": 1.8804702282700592, - "grad_norm": 0.0006288232980296016, - "learning_rate": 0.0001999982580324882, - "loss": 46.0, - "step": 11677 - }, - { - "epoch": 1.8806312653488466, - "grad_norm": 0.0021243446972221136, - "learning_rate": 0.0001999982577338618, - "loss": 46.0, - "step": 11678 - }, - { - "epoch": 1.880792302427634, - "grad_norm": 0.003309336258098483, - "learning_rate": 0.00019999825743520982, - "loss": 46.0, - "step": 11679 - }, - { - "epoch": 1.8809533395064215, - "grad_norm": 0.0008861838723532856, - "learning_rate": 0.00019999825713653223, - "loss": 46.0, - "step": 11680 - }, - { - "epoch": 1.8811143765852087, - "grad_norm": 0.005014640279114246, - "learning_rate": 0.00019999825683782906, - "loss": 46.0, - "step": 11681 - }, - { - "epoch": 1.881275413663996, - "grad_norm": 0.003561594756320119, - "learning_rate": 0.00019999825653910027, - "loss": 46.0, - "step": 11682 - }, - { - "epoch": 1.8814364507427834, - "grad_norm": 0.0036646199878305197, - "learning_rate": 0.0001999982562403459, - "loss": 46.0, - "step": 11683 - }, - { - "epoch": 1.8815974878215709, - "grad_norm": 0.003349104430526495, - "learning_rate": 0.00019999825594156593, - "loss": 46.0, - "step": 11684 - }, - { - "epoch": 1.8817585249003583, - "grad_norm": 0.010918417945504189, - "learning_rate": 0.00019999825564276038, - "loss": 46.0, - "step": 11685 - }, - { - "epoch": 1.8819195619791458, - "grad_norm": 0.0031295963563024998, - "learning_rate": 0.0001999982553439292, - "loss": 46.0, - "step": 11686 - }, - { - "epoch": 1.8820805990579332, - "grad_norm": 0.0008877825457602739, - "learning_rate": 0.00019999825504507247, - "loss": 46.0, - "step": 11687 - }, - { - "epoch": 1.8822416361367205, - "grad_norm": 0.0033893361687660217, - "learning_rate": 0.0001999982547461901, - "loss": 46.0, - "step": 11688 - }, - { - "epoch": 1.882402673215508, - "grad_norm": 0.0012382828863337636, - "learning_rate": 0.00019999825444728218, - "loss": 46.0, - "step": 11689 - }, - { - "epoch": 1.8825637102942951, - "grad_norm": 0.0024679130874574184, - "learning_rate": 0.00019999825414834864, - "loss": 46.0, - "step": 11690 - }, - { - "epoch": 1.8827247473730826, - "grad_norm": 0.0005847827414982021, - "learning_rate": 0.0001999982538493895, - "loss": 46.0, - "step": 11691 - }, - { - "epoch": 1.88288578445187, - "grad_norm": 0.0009010388166643679, - "learning_rate": 0.0001999982535504048, - "loss": 46.0, - "step": 11692 - }, - { - "epoch": 1.8830468215306575, - "grad_norm": 0.003370491787791252, - "learning_rate": 0.00019999825325139447, - "loss": 46.0, - "step": 11693 - }, - { - "epoch": 1.883207858609445, - "grad_norm": 0.0011616488918662071, - "learning_rate": 0.00019999825295235855, - "loss": 46.0, - "step": 11694 - }, - { - "epoch": 1.8833688956882322, - "grad_norm": 0.006525881588459015, - "learning_rate": 0.00019999825265329705, - "loss": 46.0, - "step": 11695 - }, - { - "epoch": 1.8835299327670196, - "grad_norm": 0.001878933166153729, - "learning_rate": 0.00019999825235420996, - "loss": 46.0, - "step": 11696 - }, - { - "epoch": 1.8836909698458069, - "grad_norm": 0.0008436068892478943, - "learning_rate": 0.00019999825205509729, - "loss": 46.0, - "step": 11697 - }, - { - "epoch": 1.8838520069245943, - "grad_norm": 0.0012475269613787532, - "learning_rate": 0.00019999825175595897, - "loss": 46.0, - "step": 11698 - }, - { - "epoch": 1.8840130440033818, - "grad_norm": 0.005057751666754484, - "learning_rate": 0.0001999982514567951, - "loss": 46.0, - "step": 11699 - }, - { - "epoch": 1.8841740810821692, - "grad_norm": 0.001804113737307489, - "learning_rate": 0.0001999982511576056, - "loss": 46.0, - "step": 11700 - }, - { - "epoch": 1.8843351181609567, - "grad_norm": 0.0028276785742491484, - "learning_rate": 0.00019999825085839054, - "loss": 46.0, - "step": 11701 - }, - { - "epoch": 1.884496155239744, - "grad_norm": 0.004614020697772503, - "learning_rate": 0.00019999825055914988, - "loss": 46.0, - "step": 11702 - }, - { - "epoch": 1.8846571923185313, - "grad_norm": 0.0017730029067024589, - "learning_rate": 0.00019999825025988362, - "loss": 46.0, - "step": 11703 - }, - { - "epoch": 1.8848182293973186, - "grad_norm": 0.008341995067894459, - "learning_rate": 0.00019999824996059175, - "loss": 46.0, - "step": 11704 - }, - { - "epoch": 1.884979266476106, - "grad_norm": 0.0023817610926926136, - "learning_rate": 0.00019999824966127433, - "loss": 46.0, - "step": 11705 - }, - { - "epoch": 1.8851403035548935, - "grad_norm": 0.002262350171804428, - "learning_rate": 0.00019999824936193128, - "loss": 46.0, - "step": 11706 - }, - { - "epoch": 1.885301340633681, - "grad_norm": 0.0019864633213728666, - "learning_rate": 0.00019999824906256263, - "loss": 46.0, - "step": 11707 - }, - { - "epoch": 1.8854623777124684, - "grad_norm": 0.0007944966200739145, - "learning_rate": 0.00019999824876316838, - "loss": 46.0, - "step": 11708 - }, - { - "epoch": 1.8856234147912558, - "grad_norm": 0.0011223673354834318, - "learning_rate": 0.00019999824846374855, - "loss": 46.0, - "step": 11709 - }, - { - "epoch": 1.885784451870043, - "grad_norm": 0.002139553427696228, - "learning_rate": 0.00019999824816430316, - "loss": 46.0, - "step": 11710 - }, - { - "epoch": 1.8859454889488305, - "grad_norm": 0.003116552485153079, - "learning_rate": 0.00019999824786483213, - "loss": 46.0, - "step": 11711 - }, - { - "epoch": 1.8861065260276177, - "grad_norm": 0.0035305460914969444, - "learning_rate": 0.0001999982475653355, - "loss": 46.0, - "step": 11712 - }, - { - "epoch": 1.8862675631064052, - "grad_norm": 0.00039185149944387376, - "learning_rate": 0.0001999982472658133, - "loss": 46.0, - "step": 11713 - }, - { - "epoch": 1.8864286001851926, - "grad_norm": 0.0018278735224157572, - "learning_rate": 0.00019999824696626553, - "loss": 46.0, - "step": 11714 - }, - { - "epoch": 1.88658963726398, - "grad_norm": 0.002578949322924018, - "learning_rate": 0.00019999824666669212, - "loss": 46.0, - "step": 11715 - }, - { - "epoch": 1.8867506743427676, - "grad_norm": 0.005684688687324524, - "learning_rate": 0.00019999824636709313, - "loss": 46.0, - "step": 11716 - }, - { - "epoch": 1.8869117114215548, - "grad_norm": 0.0036906027235090733, - "learning_rate": 0.00019999824606746854, - "loss": 46.0, - "step": 11717 - }, - { - "epoch": 1.8870727485003422, - "grad_norm": 0.001963048242032528, - "learning_rate": 0.00019999824576781835, - "loss": 46.0, - "step": 11718 - }, - { - "epoch": 1.8872337855791295, - "grad_norm": 0.0016451362753286958, - "learning_rate": 0.00019999824546814262, - "loss": 46.0, - "step": 11719 - }, - { - "epoch": 1.887394822657917, - "grad_norm": 0.0032822638750076294, - "learning_rate": 0.00019999824516844124, - "loss": 46.0, - "step": 11720 - }, - { - "epoch": 1.8875558597367044, - "grad_norm": 0.0008854749030433595, - "learning_rate": 0.00019999824486871429, - "loss": 46.0, - "step": 11721 - }, - { - "epoch": 1.8877168968154918, - "grad_norm": 0.002443799050524831, - "learning_rate": 0.0001999982445689617, - "loss": 46.0, - "step": 11722 - }, - { - "epoch": 1.8878779338942793, - "grad_norm": 0.00199572229757905, - "learning_rate": 0.00019999824426918355, - "loss": 46.0, - "step": 11723 - }, - { - "epoch": 1.8880389709730665, - "grad_norm": 0.0017114938236773014, - "learning_rate": 0.00019999824396937983, - "loss": 46.0, - "step": 11724 - }, - { - "epoch": 1.888200008051854, - "grad_norm": 0.0031553099397569895, - "learning_rate": 0.0001999982436695505, - "loss": 46.0, - "step": 11725 - }, - { - "epoch": 1.8883610451306412, - "grad_norm": 0.0050039710476994514, - "learning_rate": 0.00019999824336969555, - "loss": 46.0, - "step": 11726 - }, - { - "epoch": 1.8885220822094286, - "grad_norm": 0.006236762274056673, - "learning_rate": 0.00019999824306981504, - "loss": 46.0, - "step": 11727 - }, - { - "epoch": 1.888683119288216, - "grad_norm": 0.002946816384792328, - "learning_rate": 0.00019999824276990892, - "loss": 46.0, - "step": 11728 - }, - { - "epoch": 1.8888441563670035, - "grad_norm": 0.000903732783626765, - "learning_rate": 0.0001999982424699772, - "loss": 46.0, - "step": 11729 - }, - { - "epoch": 1.889005193445791, - "grad_norm": 0.006186710204929113, - "learning_rate": 0.00019999824217001988, - "loss": 46.0, - "step": 11730 - }, - { - "epoch": 1.8891662305245784, - "grad_norm": 0.0011079182149842381, - "learning_rate": 0.00019999824187003697, - "loss": 46.0, - "step": 11731 - }, - { - "epoch": 1.8893272676033657, - "grad_norm": 0.006587904412299395, - "learning_rate": 0.00019999824157002847, - "loss": 46.0, - "step": 11732 - }, - { - "epoch": 1.8894883046821531, - "grad_norm": 0.013779995031654835, - "learning_rate": 0.00019999824126999439, - "loss": 46.0, - "step": 11733 - }, - { - "epoch": 1.8896493417609403, - "grad_norm": 0.001720408326946199, - "learning_rate": 0.00019999824096993469, - "loss": 46.0, - "step": 11734 - }, - { - "epoch": 1.8898103788397278, - "grad_norm": 0.001307257916778326, - "learning_rate": 0.00019999824066984942, - "loss": 46.0, - "step": 11735 - }, - { - "epoch": 1.8899714159185153, - "grad_norm": 0.0035038997884839773, - "learning_rate": 0.00019999824036973852, - "loss": 46.0, - "step": 11736 - }, - { - "epoch": 1.8901324529973027, - "grad_norm": 0.0012819096446037292, - "learning_rate": 0.00019999824006960206, - "loss": 46.0, - "step": 11737 - }, - { - "epoch": 1.8902934900760902, - "grad_norm": 0.0017669338267296553, - "learning_rate": 0.00019999823976943998, - "loss": 46.0, - "step": 11738 - }, - { - "epoch": 1.8904545271548774, - "grad_norm": 0.0025631729513406754, - "learning_rate": 0.00019999823946925235, - "loss": 46.0, - "step": 11739 - }, - { - "epoch": 1.8906155642336648, - "grad_norm": 0.005793641321361065, - "learning_rate": 0.00019999823916903907, - "loss": 46.0, - "step": 11740 - }, - { - "epoch": 1.890776601312452, - "grad_norm": 0.002448096638545394, - "learning_rate": 0.00019999823886880026, - "loss": 46.0, - "step": 11741 - }, - { - "epoch": 1.8909376383912395, - "grad_norm": 0.0005394391482695937, - "learning_rate": 0.00019999823856853578, - "loss": 46.0, - "step": 11742 - }, - { - "epoch": 1.891098675470027, - "grad_norm": 0.002117827069014311, - "learning_rate": 0.00019999823826824574, - "loss": 46.0, - "step": 11743 - }, - { - "epoch": 1.8912597125488144, - "grad_norm": 0.0027311628218740225, - "learning_rate": 0.00019999823796793012, - "loss": 46.0, - "step": 11744 - }, - { - "epoch": 1.8914207496276019, - "grad_norm": 0.001291807508096099, - "learning_rate": 0.0001999982376675889, - "loss": 46.0, - "step": 11745 - }, - { - "epoch": 1.891581786706389, - "grad_norm": 0.0012104653287678957, - "learning_rate": 0.00019999823736722207, - "loss": 46.0, - "step": 11746 - }, - { - "epoch": 1.8917428237851766, - "grad_norm": 0.0023579923436045647, - "learning_rate": 0.00019999823706682966, - "loss": 46.0, - "step": 11747 - }, - { - "epoch": 1.8919038608639638, - "grad_norm": 0.000674968643579632, - "learning_rate": 0.00019999823676641166, - "loss": 46.0, - "step": 11748 - }, - { - "epoch": 1.8920648979427512, - "grad_norm": 0.007668052334338427, - "learning_rate": 0.00019999823646596804, - "loss": 46.0, - "step": 11749 - }, - { - "epoch": 1.8922259350215387, - "grad_norm": 0.004024151246994734, - "learning_rate": 0.00019999823616549884, - "loss": 46.0, - "step": 11750 - }, - { - "epoch": 1.8923869721003261, - "grad_norm": 0.0013241079868748784, - "learning_rate": 0.00019999823586500405, - "loss": 46.0, - "step": 11751 - }, - { - "epoch": 1.8925480091791136, - "grad_norm": 0.0012380306143313646, - "learning_rate": 0.00019999823556448364, - "loss": 46.0, - "step": 11752 - }, - { - "epoch": 1.892709046257901, - "grad_norm": 0.0003440381260588765, - "learning_rate": 0.00019999823526393768, - "loss": 46.0, - "step": 11753 - }, - { - "epoch": 1.8928700833366883, - "grad_norm": 0.002382012316957116, - "learning_rate": 0.0001999982349633661, - "loss": 46.0, - "step": 11754 - }, - { - "epoch": 1.8930311204154755, - "grad_norm": 0.0012605522060766816, - "learning_rate": 0.00019999823466276896, - "loss": 46.0, - "step": 11755 - }, - { - "epoch": 1.893192157494263, - "grad_norm": 0.008916929364204407, - "learning_rate": 0.00019999823436214618, - "loss": 46.0, - "step": 11756 - }, - { - "epoch": 1.8933531945730504, - "grad_norm": 0.0017264783382415771, - "learning_rate": 0.00019999823406149784, - "loss": 46.0, - "step": 11757 - }, - { - "epoch": 1.8935142316518379, - "grad_norm": 0.0005386592820286751, - "learning_rate": 0.00019999823376082385, - "loss": 46.0, - "step": 11758 - }, - { - "epoch": 1.8936752687306253, - "grad_norm": 0.004780413582921028, - "learning_rate": 0.00019999823346012428, - "loss": 46.0, - "step": 11759 - }, - { - "epoch": 1.8938363058094128, - "grad_norm": 0.002630789065733552, - "learning_rate": 0.00019999823315939916, - "loss": 46.0, - "step": 11760 - }, - { - "epoch": 1.8939973428882, - "grad_norm": 0.004380307625979185, - "learning_rate": 0.00019999823285864844, - "loss": 46.0, - "step": 11761 - }, - { - "epoch": 1.8941583799669874, - "grad_norm": 0.004099314566701651, - "learning_rate": 0.00019999823255787208, - "loss": 46.0, - "step": 11762 - }, - { - "epoch": 1.8943194170457747, - "grad_norm": 0.0047810859978199005, - "learning_rate": 0.00019999823225707016, - "loss": 46.0, - "step": 11763 - }, - { - "epoch": 1.8944804541245621, - "grad_norm": 0.004029067233204842, - "learning_rate": 0.00019999823195624266, - "loss": 46.0, - "step": 11764 - }, - { - "epoch": 1.8946414912033496, - "grad_norm": 0.000969112734310329, - "learning_rate": 0.00019999823165538954, - "loss": 46.0, - "step": 11765 - }, - { - "epoch": 1.894802528282137, - "grad_norm": 0.0019878216553479433, - "learning_rate": 0.00019999823135451083, - "loss": 46.0, - "step": 11766 - }, - { - "epoch": 1.8949635653609245, - "grad_norm": 0.00527028227224946, - "learning_rate": 0.0001999982310536065, - "loss": 46.0, - "step": 11767 - }, - { - "epoch": 1.8951246024397117, - "grad_norm": 0.001325234305113554, - "learning_rate": 0.0001999982307526766, - "loss": 46.0, - "step": 11768 - }, - { - "epoch": 1.8952856395184992, - "grad_norm": 0.0025457125157117844, - "learning_rate": 0.00019999823045172113, - "loss": 46.0, - "step": 11769 - }, - { - "epoch": 1.8954466765972864, - "grad_norm": 0.0024882894940674305, - "learning_rate": 0.00019999823015074005, - "loss": 46.0, - "step": 11770 - }, - { - "epoch": 1.8956077136760738, - "grad_norm": 0.0008611671510152519, - "learning_rate": 0.00019999822984973338, - "loss": 46.0, - "step": 11771 - }, - { - "epoch": 1.8957687507548613, - "grad_norm": 0.0015332571929320693, - "learning_rate": 0.0001999982295487011, - "loss": 46.0, - "step": 11772 - }, - { - "epoch": 1.8959297878336487, - "grad_norm": 0.0038195359520614147, - "learning_rate": 0.00019999822924764323, - "loss": 46.0, - "step": 11773 - }, - { - "epoch": 1.8960908249124362, - "grad_norm": 0.0017652831738814712, - "learning_rate": 0.00019999822894655974, - "loss": 46.0, - "step": 11774 - }, - { - "epoch": 1.8962518619912236, - "grad_norm": 0.0010880673071369529, - "learning_rate": 0.0001999982286454507, - "loss": 46.0, - "step": 11775 - }, - { - "epoch": 1.8964128990700109, - "grad_norm": 0.0014009218430146575, - "learning_rate": 0.00019999822834431604, - "loss": 46.0, - "step": 11776 - }, - { - "epoch": 1.896573936148798, - "grad_norm": 0.0028121969662606716, - "learning_rate": 0.0001999982280431558, - "loss": 46.0, - "step": 11777 - }, - { - "epoch": 1.8967349732275856, - "grad_norm": 0.002201802795752883, - "learning_rate": 0.00019999822774196995, - "loss": 46.0, - "step": 11778 - }, - { - "epoch": 1.896896010306373, - "grad_norm": 0.0013814257690683007, - "learning_rate": 0.0001999982274407585, - "loss": 46.0, - "step": 11779 - }, - { - "epoch": 1.8970570473851605, - "grad_norm": 0.0011241371976211667, - "learning_rate": 0.00019999822713952147, - "loss": 46.0, - "step": 11780 - }, - { - "epoch": 1.897218084463948, - "grad_norm": 0.00313038588501513, - "learning_rate": 0.00019999822683825885, - "loss": 46.0, - "step": 11781 - }, - { - "epoch": 1.8973791215427354, - "grad_norm": 0.0045711323618888855, - "learning_rate": 0.00019999822653697064, - "loss": 46.0, - "step": 11782 - }, - { - "epoch": 1.8975401586215226, - "grad_norm": 0.0038199895061552525, - "learning_rate": 0.00019999822623565681, - "loss": 46.0, - "step": 11783 - }, - { - "epoch": 1.89770119570031, - "grad_norm": 0.0032299107406288385, - "learning_rate": 0.0001999982259343174, - "loss": 46.0, - "step": 11784 - }, - { - "epoch": 1.8978622327790973, - "grad_norm": 0.00629310542717576, - "learning_rate": 0.0001999982256329524, - "loss": 46.0, - "step": 11785 - }, - { - "epoch": 1.8980232698578847, - "grad_norm": 0.0045644245110452175, - "learning_rate": 0.00019999822533156182, - "loss": 46.0, - "step": 11786 - }, - { - "epoch": 1.8981843069366722, - "grad_norm": 0.0014680320164188743, - "learning_rate": 0.00019999822503014562, - "loss": 46.0, - "step": 11787 - }, - { - "epoch": 1.8983453440154596, - "grad_norm": 0.0023908084258437157, - "learning_rate": 0.00019999822472870383, - "loss": 46.0, - "step": 11788 - }, - { - "epoch": 1.898506381094247, - "grad_norm": 0.001071223639883101, - "learning_rate": 0.00019999822442723643, - "loss": 46.0, - "step": 11789 - }, - { - "epoch": 1.8986674181730343, - "grad_norm": 0.0025115911848843098, - "learning_rate": 0.00019999822412574347, - "loss": 46.0, - "step": 11790 - }, - { - "epoch": 1.8988284552518218, - "grad_norm": 0.0007221614359878004, - "learning_rate": 0.0001999982238242249, - "loss": 46.0, - "step": 11791 - }, - { - "epoch": 1.898989492330609, - "grad_norm": 0.0032353224232792854, - "learning_rate": 0.00019999822352268073, - "loss": 46.0, - "step": 11792 - }, - { - "epoch": 1.8991505294093964, - "grad_norm": 0.000811191217508167, - "learning_rate": 0.00019999822322111098, - "loss": 46.0, - "step": 11793 - }, - { - "epoch": 1.8993115664881839, - "grad_norm": 0.0025089846458286047, - "learning_rate": 0.00019999822291951564, - "loss": 46.0, - "step": 11794 - }, - { - "epoch": 1.8994726035669713, - "grad_norm": 0.0027704325038939714, - "learning_rate": 0.0001999982226178947, - "loss": 46.0, - "step": 11795 - }, - { - "epoch": 1.8996336406457588, - "grad_norm": 0.008282319642603397, - "learning_rate": 0.00019999822231624815, - "loss": 46.0, - "step": 11796 - }, - { - "epoch": 1.899794677724546, - "grad_norm": 0.0018473841482773423, - "learning_rate": 0.000199998222014576, - "loss": 46.0, - "step": 11797 - }, - { - "epoch": 1.8999557148033335, - "grad_norm": 0.001742801396176219, - "learning_rate": 0.0001999982217128783, - "loss": 46.0, - "step": 11798 - }, - { - "epoch": 1.9001167518821207, - "grad_norm": 0.001645389012992382, - "learning_rate": 0.00019999822141115496, - "loss": 46.0, - "step": 11799 - }, - { - "epoch": 1.9002777889609082, - "grad_norm": 0.0004979412187822163, - "learning_rate": 0.00019999822110940602, - "loss": 46.0, - "step": 11800 - }, - { - "epoch": 1.9004388260396956, - "grad_norm": 0.002033951925113797, - "learning_rate": 0.00019999822080763152, - "loss": 46.0, - "step": 11801 - }, - { - "epoch": 1.900599863118483, - "grad_norm": 0.0012474007671698928, - "learning_rate": 0.0001999982205058314, - "loss": 46.0, - "step": 11802 - }, - { - "epoch": 1.9007609001972705, - "grad_norm": 0.002050091279670596, - "learning_rate": 0.0001999982202040057, - "loss": 46.0, - "step": 11803 - }, - { - "epoch": 1.900921937276058, - "grad_norm": 0.0013502688379958272, - "learning_rate": 0.0001999982199021544, - "loss": 46.0, - "step": 11804 - }, - { - "epoch": 1.9010829743548452, - "grad_norm": 0.0049523357301950455, - "learning_rate": 0.0001999982196002775, - "loss": 46.0, - "step": 11805 - }, - { - "epoch": 1.9012440114336326, - "grad_norm": 0.0007400332833640277, - "learning_rate": 0.000199998219298375, - "loss": 46.0, - "step": 11806 - }, - { - "epoch": 1.9014050485124199, - "grad_norm": 0.0011523393914103508, - "learning_rate": 0.00019999821899644696, - "loss": 46.0, - "step": 11807 - }, - { - "epoch": 1.9015660855912073, - "grad_norm": 0.0010133797768503428, - "learning_rate": 0.00019999821869449327, - "loss": 46.0, - "step": 11808 - }, - { - "epoch": 1.9017271226699948, - "grad_norm": 0.010040556080639362, - "learning_rate": 0.000199998218392514, - "loss": 46.0, - "step": 11809 - }, - { - "epoch": 1.9018881597487822, - "grad_norm": 0.0024087845813483, - "learning_rate": 0.00019999821809050914, - "loss": 46.0, - "step": 11810 - }, - { - "epoch": 1.9020491968275697, - "grad_norm": 0.006123644299805164, - "learning_rate": 0.00019999821778847866, - "loss": 46.0, - "step": 11811 - }, - { - "epoch": 1.902210233906357, - "grad_norm": 0.006247910670936108, - "learning_rate": 0.00019999821748642265, - "loss": 46.0, - "step": 11812 - }, - { - "epoch": 1.9023712709851444, - "grad_norm": 0.0018093495164066553, - "learning_rate": 0.000199998217184341, - "loss": 46.0, - "step": 11813 - }, - { - "epoch": 1.9025323080639316, - "grad_norm": 0.0012550739338621497, - "learning_rate": 0.00019999821688223372, - "loss": 46.0, - "step": 11814 - }, - { - "epoch": 1.902693345142719, - "grad_norm": 0.0013865187065675855, - "learning_rate": 0.00019999821658010092, - "loss": 46.0, - "step": 11815 - }, - { - "epoch": 1.9028543822215065, - "grad_norm": 0.002715684939175844, - "learning_rate": 0.00019999821627794247, - "loss": 46.0, - "step": 11816 - }, - { - "epoch": 1.903015419300294, - "grad_norm": 0.001148345647379756, - "learning_rate": 0.00019999821597575844, - "loss": 46.0, - "step": 11817 - }, - { - "epoch": 1.9031764563790814, - "grad_norm": 0.003584346268326044, - "learning_rate": 0.00019999821567354885, - "loss": 46.0, - "step": 11818 - }, - { - "epoch": 1.9033374934578686, - "grad_norm": 0.004240552894771099, - "learning_rate": 0.00019999821537131362, - "loss": 46.0, - "step": 11819 - }, - { - "epoch": 1.903498530536656, - "grad_norm": 0.0011552739888429642, - "learning_rate": 0.0001999982150690528, - "loss": 46.0, - "step": 11820 - }, - { - "epoch": 1.9036595676154433, - "grad_norm": 0.0034946631640195847, - "learning_rate": 0.00019999821476676642, - "loss": 46.0, - "step": 11821 - }, - { - "epoch": 1.9038206046942308, - "grad_norm": 0.0022506811656057835, - "learning_rate": 0.00019999821446445442, - "loss": 46.0, - "step": 11822 - }, - { - "epoch": 1.9039816417730182, - "grad_norm": 0.0007530305301770568, - "learning_rate": 0.00019999821416211684, - "loss": 46.0, - "step": 11823 - }, - { - "epoch": 1.9041426788518057, - "grad_norm": 0.0020015668123960495, - "learning_rate": 0.00019999821385975364, - "loss": 46.0, - "step": 11824 - }, - { - "epoch": 1.904303715930593, - "grad_norm": 0.0006485689664259553, - "learning_rate": 0.00019999821355736486, - "loss": 46.0, - "step": 11825 - }, - { - "epoch": 1.9044647530093806, - "grad_norm": 0.00039832803304307163, - "learning_rate": 0.0001999982132549505, - "loss": 46.0, - "step": 11826 - }, - { - "epoch": 1.9046257900881678, - "grad_norm": 0.0047916872426867485, - "learning_rate": 0.0001999982129525105, - "loss": 46.0, - "step": 11827 - }, - { - "epoch": 1.9047868271669552, - "grad_norm": 0.00035810013650916517, - "learning_rate": 0.00019999821265004496, - "loss": 46.0, - "step": 11828 - }, - { - "epoch": 1.9049478642457425, - "grad_norm": 0.0009615529561415315, - "learning_rate": 0.0001999982123475538, - "loss": 46.0, - "step": 11829 - }, - { - "epoch": 1.90510890132453, - "grad_norm": 0.001052516046911478, - "learning_rate": 0.00019999821204503702, - "loss": 46.0, - "step": 11830 - }, - { - "epoch": 1.9052699384033174, - "grad_norm": 0.0010156475473195314, - "learning_rate": 0.0001999982117424947, - "loss": 46.0, - "step": 11831 - }, - { - "epoch": 1.9054309754821048, - "grad_norm": 0.0048035853542387486, - "learning_rate": 0.00019999821143992677, - "loss": 46.0, - "step": 11832 - }, - { - "epoch": 1.9055920125608923, - "grad_norm": 0.007428744807839394, - "learning_rate": 0.0001999982111373332, - "loss": 46.0, - "step": 11833 - }, - { - "epoch": 1.9057530496396795, - "grad_norm": 0.0004149885498918593, - "learning_rate": 0.00019999821083471409, - "loss": 46.0, - "step": 11834 - }, - { - "epoch": 1.905914086718467, - "grad_norm": 0.0016499670455232263, - "learning_rate": 0.00019999821053206938, - "loss": 46.0, - "step": 11835 - }, - { - "epoch": 1.9060751237972542, - "grad_norm": 0.0011334343580529094, - "learning_rate": 0.00019999821022939905, - "loss": 46.0, - "step": 11836 - }, - { - "epoch": 1.9062361608760416, - "grad_norm": 0.0021435662638396025, - "learning_rate": 0.00019999820992670314, - "loss": 46.0, - "step": 11837 - }, - { - "epoch": 1.906397197954829, - "grad_norm": 0.0036536231637001038, - "learning_rate": 0.00019999820962398164, - "loss": 46.0, - "step": 11838 - }, - { - "epoch": 1.9065582350336165, - "grad_norm": 0.005240217316895723, - "learning_rate": 0.00019999820932123453, - "loss": 46.0, - "step": 11839 - }, - { - "epoch": 1.906719272112404, - "grad_norm": 0.004845750518143177, - "learning_rate": 0.00019999820901846183, - "loss": 46.0, - "step": 11840 - }, - { - "epoch": 1.9068803091911912, - "grad_norm": 0.008263902738690376, - "learning_rate": 0.00019999820871566355, - "loss": 46.0, - "step": 11841 - }, - { - "epoch": 1.9070413462699787, - "grad_norm": 0.0008414392941631377, - "learning_rate": 0.00019999820841283964, - "loss": 46.0, - "step": 11842 - }, - { - "epoch": 1.907202383348766, - "grad_norm": 0.002106528962031007, - "learning_rate": 0.00019999820810999018, - "loss": 46.0, - "step": 11843 - }, - { - "epoch": 1.9073634204275534, - "grad_norm": 0.000593257718719542, - "learning_rate": 0.0001999982078071151, - "loss": 46.0, - "step": 11844 - }, - { - "epoch": 1.9075244575063408, - "grad_norm": 0.0013861947227269411, - "learning_rate": 0.00019999820750421442, - "loss": 46.0, - "step": 11845 - }, - { - "epoch": 1.9076854945851283, - "grad_norm": 0.0005206508794799447, - "learning_rate": 0.00019999820720128817, - "loss": 46.0, - "step": 11846 - }, - { - "epoch": 1.9078465316639157, - "grad_norm": 0.00434001674875617, - "learning_rate": 0.00019999820689833633, - "loss": 46.0, - "step": 11847 - }, - { - "epoch": 1.9080075687427032, - "grad_norm": 0.007575648836791515, - "learning_rate": 0.00019999820659535885, - "loss": 46.0, - "step": 11848 - }, - { - "epoch": 1.9081686058214904, - "grad_norm": 0.002601374639198184, - "learning_rate": 0.00019999820629235582, - "loss": 46.0, - "step": 11849 - }, - { - "epoch": 1.9083296429002776, - "grad_norm": 0.00039806030690670013, - "learning_rate": 0.0001999982059893272, - "loss": 46.0, - "step": 11850 - }, - { - "epoch": 1.908490679979065, - "grad_norm": 0.0015142896445468068, - "learning_rate": 0.00019999820568627295, - "loss": 46.0, - "step": 11851 - }, - { - "epoch": 1.9086517170578525, - "grad_norm": 0.003050045343115926, - "learning_rate": 0.00019999820538319312, - "loss": 46.0, - "step": 11852 - }, - { - "epoch": 1.90881275413664, - "grad_norm": 0.0016080980421975255, - "learning_rate": 0.00019999820508008768, - "loss": 46.0, - "step": 11853 - }, - { - "epoch": 1.9089737912154274, - "grad_norm": 0.005588393658399582, - "learning_rate": 0.00019999820477695668, - "loss": 46.0, - "step": 11854 - }, - { - "epoch": 1.9091348282942149, - "grad_norm": 0.0015004201559349895, - "learning_rate": 0.00019999820447380007, - "loss": 46.0, - "step": 11855 - }, - { - "epoch": 1.909295865373002, - "grad_norm": 0.0038349470123648643, - "learning_rate": 0.00019999820417061786, - "loss": 46.0, - "step": 11856 - }, - { - "epoch": 1.9094569024517896, - "grad_norm": 0.0018715556943789124, - "learning_rate": 0.00019999820386741007, - "loss": 46.0, - "step": 11857 - }, - { - "epoch": 1.9096179395305768, - "grad_norm": 0.0017635863041505218, - "learning_rate": 0.00019999820356417667, - "loss": 46.0, - "step": 11858 - }, - { - "epoch": 1.9097789766093642, - "grad_norm": 0.0016591364983469248, - "learning_rate": 0.00019999820326091768, - "loss": 46.0, - "step": 11859 - }, - { - "epoch": 1.9099400136881517, - "grad_norm": 0.00836408045142889, - "learning_rate": 0.0001999982029576331, - "loss": 46.0, - "step": 11860 - }, - { - "epoch": 1.9101010507669391, - "grad_norm": 0.0060034324415028095, - "learning_rate": 0.0001999982026543229, - "loss": 46.0, - "step": 11861 - }, - { - "epoch": 1.9102620878457266, - "grad_norm": 0.0016696983948349953, - "learning_rate": 0.00019999820235098713, - "loss": 46.0, - "step": 11862 - }, - { - "epoch": 1.9104231249245138, - "grad_norm": 0.009441199712455273, - "learning_rate": 0.00019999820204762576, - "loss": 46.0, - "step": 11863 - }, - { - "epoch": 1.9105841620033013, - "grad_norm": 0.0008062032284215093, - "learning_rate": 0.0001999982017442388, - "loss": 46.0, - "step": 11864 - }, - { - "epoch": 1.9107451990820885, - "grad_norm": 0.0006342515698634088, - "learning_rate": 0.00019999820144082624, - "loss": 46.0, - "step": 11865 - }, - { - "epoch": 1.910906236160876, - "grad_norm": 0.0013712665531784296, - "learning_rate": 0.0001999982011373881, - "loss": 46.0, - "step": 11866 - }, - { - "epoch": 1.9110672732396634, - "grad_norm": 0.007080291397869587, - "learning_rate": 0.00019999820083392437, - "loss": 46.0, - "step": 11867 - }, - { - "epoch": 1.9112283103184509, - "grad_norm": 0.002430324675515294, - "learning_rate": 0.000199998200530435, - "loss": 46.0, - "step": 11868 - }, - { - "epoch": 1.9113893473972383, - "grad_norm": 0.000615751720033586, - "learning_rate": 0.00019999820022692006, - "loss": 46.0, - "step": 11869 - }, - { - "epoch": 1.9115503844760258, - "grad_norm": 0.005030791740864515, - "learning_rate": 0.00019999819992337953, - "loss": 46.0, - "step": 11870 - }, - { - "epoch": 1.911711421554813, - "grad_norm": 0.0007704586023464799, - "learning_rate": 0.00019999819961981344, - "loss": 46.0, - "step": 11871 - }, - { - "epoch": 1.9118724586336002, - "grad_norm": 0.0034569334238767624, - "learning_rate": 0.0001999981993162217, - "loss": 46.0, - "step": 11872 - }, - { - "epoch": 1.9120334957123877, - "grad_norm": 0.006942248437553644, - "learning_rate": 0.0001999981990126044, - "loss": 46.0, - "step": 11873 - }, - { - "epoch": 1.9121945327911751, - "grad_norm": 0.001468125730752945, - "learning_rate": 0.0001999981987089615, - "loss": 46.0, - "step": 11874 - }, - { - "epoch": 1.9123555698699626, - "grad_norm": 0.0004956931807100773, - "learning_rate": 0.000199998198405293, - "loss": 46.0, - "step": 11875 - }, - { - "epoch": 1.91251660694875, - "grad_norm": 0.000587780843488872, - "learning_rate": 0.0001999981981015989, - "loss": 46.0, - "step": 11876 - }, - { - "epoch": 1.9126776440275375, - "grad_norm": 0.0024365379940718412, - "learning_rate": 0.0001999981977978792, - "loss": 46.0, - "step": 11877 - }, - { - "epoch": 1.9128386811063247, - "grad_norm": 0.0019610903691500425, - "learning_rate": 0.00019999819749413393, - "loss": 46.0, - "step": 11878 - }, - { - "epoch": 1.9129997181851122, - "grad_norm": 0.0007827310473658144, - "learning_rate": 0.00019999819719036306, - "loss": 46.0, - "step": 11879 - }, - { - "epoch": 1.9131607552638994, - "grad_norm": 0.0012719838414341211, - "learning_rate": 0.00019999819688656657, - "loss": 46.0, - "step": 11880 - }, - { - "epoch": 1.9133217923426868, - "grad_norm": 0.0010255719535052776, - "learning_rate": 0.00019999819658274453, - "loss": 46.0, - "step": 11881 - }, - { - "epoch": 1.9134828294214743, - "grad_norm": 0.0011179463472217321, - "learning_rate": 0.00019999819627889684, - "loss": 46.0, - "step": 11882 - }, - { - "epoch": 1.9136438665002617, - "grad_norm": 0.0008510363404639065, - "learning_rate": 0.0001999981959750236, - "loss": 46.0, - "step": 11883 - }, - { - "epoch": 1.9138049035790492, - "grad_norm": 0.0035883383825421333, - "learning_rate": 0.00019999819567112473, - "loss": 46.0, - "step": 11884 - }, - { - "epoch": 1.9139659406578364, - "grad_norm": 0.0015765272546559572, - "learning_rate": 0.0001999981953672003, - "loss": 46.0, - "step": 11885 - }, - { - "epoch": 1.9141269777366239, - "grad_norm": 0.006658013444393873, - "learning_rate": 0.00019999819506325025, - "loss": 46.0, - "step": 11886 - }, - { - "epoch": 1.914288014815411, - "grad_norm": 0.0028836934361606836, - "learning_rate": 0.0001999981947592746, - "loss": 46.0, - "step": 11887 - }, - { - "epoch": 1.9144490518941986, - "grad_norm": 0.008031723089516163, - "learning_rate": 0.0001999981944552734, - "loss": 46.0, - "step": 11888 - }, - { - "epoch": 1.914610088972986, - "grad_norm": 0.0014731878181919456, - "learning_rate": 0.0001999981941512466, - "loss": 46.0, - "step": 11889 - }, - { - "epoch": 1.9147711260517735, - "grad_norm": 0.006518073845654726, - "learning_rate": 0.00019999819384719416, - "loss": 46.0, - "step": 11890 - }, - { - "epoch": 1.914932163130561, - "grad_norm": 0.0006891588564030826, - "learning_rate": 0.00019999819354311616, - "loss": 46.0, - "step": 11891 - }, - { - "epoch": 1.9150932002093484, - "grad_norm": 0.004762472119182348, - "learning_rate": 0.00019999819323901255, - "loss": 46.0, - "step": 11892 - }, - { - "epoch": 1.9152542372881356, - "grad_norm": 0.007304450962692499, - "learning_rate": 0.00019999819293488335, - "loss": 46.0, - "step": 11893 - }, - { - "epoch": 1.9154152743669228, - "grad_norm": 0.0039252061396837234, - "learning_rate": 0.00019999819263072857, - "loss": 46.0, - "step": 11894 - }, - { - "epoch": 1.9155763114457103, - "grad_norm": 0.0012136398581787944, - "learning_rate": 0.00019999819232654814, - "loss": 46.0, - "step": 11895 - }, - { - "epoch": 1.9157373485244977, - "grad_norm": 0.0037531491834670305, - "learning_rate": 0.00019999819202234218, - "loss": 46.0, - "step": 11896 - }, - { - "epoch": 1.9158983856032852, - "grad_norm": 0.0005400461377575994, - "learning_rate": 0.0001999981917181106, - "loss": 46.0, - "step": 11897 - }, - { - "epoch": 1.9160594226820726, - "grad_norm": 0.017477765679359436, - "learning_rate": 0.00019999819141385344, - "loss": 46.0, - "step": 11898 - }, - { - "epoch": 1.91622045976086, - "grad_norm": 0.002094994531944394, - "learning_rate": 0.00019999819110957067, - "loss": 46.0, - "step": 11899 - }, - { - "epoch": 1.9163814968396473, - "grad_norm": 0.002632502233609557, - "learning_rate": 0.0001999981908052623, - "loss": 46.0, - "step": 11900 - }, - { - "epoch": 1.9165425339184348, - "grad_norm": 0.0007712795049883425, - "learning_rate": 0.00019999819050092835, - "loss": 46.0, - "step": 11901 - }, - { - "epoch": 1.916703570997222, - "grad_norm": 0.002002400578930974, - "learning_rate": 0.0001999981901965688, - "loss": 46.0, - "step": 11902 - }, - { - "epoch": 1.9168646080760094, - "grad_norm": 0.006843820679932833, - "learning_rate": 0.00019999818989218366, - "loss": 46.0, - "step": 11903 - }, - { - "epoch": 1.917025645154797, - "grad_norm": 0.0019006015500053763, - "learning_rate": 0.00019999818958777292, - "loss": 46.0, - "step": 11904 - }, - { - "epoch": 1.9171866822335843, - "grad_norm": 0.0035611672792583704, - "learning_rate": 0.00019999818928333657, - "loss": 46.0, - "step": 11905 - }, - { - "epoch": 1.9173477193123718, - "grad_norm": 0.0006191295105963945, - "learning_rate": 0.00019999818897887465, - "loss": 46.0, - "step": 11906 - }, - { - "epoch": 1.917508756391159, - "grad_norm": 0.004043434746563435, - "learning_rate": 0.00019999818867438713, - "loss": 46.0, - "step": 11907 - }, - { - "epoch": 1.9176697934699465, - "grad_norm": 0.0005644129705615342, - "learning_rate": 0.000199998188369874, - "loss": 46.0, - "step": 11908 - }, - { - "epoch": 1.9178308305487337, - "grad_norm": 0.0009777158265933394, - "learning_rate": 0.0001999981880653353, - "loss": 46.0, - "step": 11909 - }, - { - "epoch": 1.9179918676275212, - "grad_norm": 0.0033065034076571465, - "learning_rate": 0.00019999818776077102, - "loss": 46.0, - "step": 11910 - }, - { - "epoch": 1.9181529047063086, - "grad_norm": 0.003878120332956314, - "learning_rate": 0.00019999818745618112, - "loss": 46.0, - "step": 11911 - }, - { - "epoch": 1.918313941785096, - "grad_norm": 0.0016693917568773031, - "learning_rate": 0.00019999818715156563, - "loss": 46.0, - "step": 11912 - }, - { - "epoch": 1.9184749788638835, - "grad_norm": 0.0008196932612918317, - "learning_rate": 0.00019999818684692452, - "loss": 46.0, - "step": 11913 - }, - { - "epoch": 1.9186360159426707, - "grad_norm": 0.00625289510935545, - "learning_rate": 0.00019999818654225786, - "loss": 46.0, - "step": 11914 - }, - { - "epoch": 1.9187970530214582, - "grad_norm": 0.0007819284801371396, - "learning_rate": 0.00019999818623756558, - "loss": 46.0, - "step": 11915 - }, - { - "epoch": 1.9189580901002454, - "grad_norm": 0.005089349113404751, - "learning_rate": 0.00019999818593284768, - "loss": 46.0, - "step": 11916 - }, - { - "epoch": 1.9191191271790329, - "grad_norm": 0.0016359409783035517, - "learning_rate": 0.00019999818562810423, - "loss": 46.0, - "step": 11917 - }, - { - "epoch": 1.9192801642578203, - "grad_norm": 0.007776940241456032, - "learning_rate": 0.00019999818532333516, - "loss": 46.0, - "step": 11918 - }, - { - "epoch": 1.9194412013366078, - "grad_norm": 0.0012445615138858557, - "learning_rate": 0.00019999818501854053, - "loss": 46.0, - "step": 11919 - }, - { - "epoch": 1.9196022384153952, - "grad_norm": 0.012229708023369312, - "learning_rate": 0.00019999818471372026, - "loss": 46.0, - "step": 11920 - }, - { - "epoch": 1.9197632754941827, - "grad_norm": 0.0050226133316755295, - "learning_rate": 0.00019999818440887443, - "loss": 46.0, - "step": 11921 - }, - { - "epoch": 1.91992431257297, - "grad_norm": 0.0026193547528237104, - "learning_rate": 0.00019999818410400296, - "loss": 46.0, - "step": 11922 - }, - { - "epoch": 1.9200853496517574, - "grad_norm": 0.0010318810818716884, - "learning_rate": 0.00019999818379910596, - "loss": 46.0, - "step": 11923 - }, - { - "epoch": 1.9202463867305446, - "grad_norm": 0.0027994862757623196, - "learning_rate": 0.00019999818349418334, - "loss": 46.0, - "step": 11924 - }, - { - "epoch": 1.920407423809332, - "grad_norm": 0.0026863550301641226, - "learning_rate": 0.0001999981831892351, - "loss": 46.0, - "step": 11925 - }, - { - "epoch": 1.9205684608881195, - "grad_norm": 0.0015136533183977008, - "learning_rate": 0.0001999981828842613, - "loss": 46.0, - "step": 11926 - }, - { - "epoch": 1.920729497966907, - "grad_norm": 0.005652355030179024, - "learning_rate": 0.00019999818257926188, - "loss": 46.0, - "step": 11927 - }, - { - "epoch": 1.9208905350456944, - "grad_norm": 0.00524381035938859, - "learning_rate": 0.0001999981822742369, - "loss": 46.0, - "step": 11928 - }, - { - "epoch": 1.9210515721244816, - "grad_norm": 0.0036435918882489204, - "learning_rate": 0.0001999981819691863, - "loss": 46.0, - "step": 11929 - }, - { - "epoch": 1.921212609203269, - "grad_norm": 0.0010698448168113828, - "learning_rate": 0.00019999818166411008, - "loss": 46.0, - "step": 11930 - }, - { - "epoch": 1.9213736462820563, - "grad_norm": 0.003018536139279604, - "learning_rate": 0.0001999981813590083, - "loss": 46.0, - "step": 11931 - }, - { - "epoch": 1.9215346833608438, - "grad_norm": 0.0017038368387147784, - "learning_rate": 0.0001999981810538809, - "loss": 46.0, - "step": 11932 - }, - { - "epoch": 1.9216957204396312, - "grad_norm": 0.0009454243700020015, - "learning_rate": 0.00019999818074872795, - "loss": 46.0, - "step": 11933 - }, - { - "epoch": 1.9218567575184187, - "grad_norm": 0.004435803275555372, - "learning_rate": 0.00019999818044354938, - "loss": 46.0, - "step": 11934 - }, - { - "epoch": 1.9220177945972061, - "grad_norm": 0.0007561726961284876, - "learning_rate": 0.00019999818013834522, - "loss": 46.0, - "step": 11935 - }, - { - "epoch": 1.9221788316759933, - "grad_norm": 0.0016916776075959206, - "learning_rate": 0.00019999817983311547, - "loss": 46.0, - "step": 11936 - }, - { - "epoch": 1.9223398687547808, - "grad_norm": 0.0016145188128575683, - "learning_rate": 0.00019999817952786009, - "loss": 46.0, - "step": 11937 - }, - { - "epoch": 1.922500905833568, - "grad_norm": 0.0007525762193836272, - "learning_rate": 0.00019999817922257914, - "loss": 46.0, - "step": 11938 - }, - { - "epoch": 1.9226619429123555, - "grad_norm": 0.001972717000171542, - "learning_rate": 0.0001999981789172726, - "loss": 46.0, - "step": 11939 - }, - { - "epoch": 1.922822979991143, - "grad_norm": 0.001384893897920847, - "learning_rate": 0.00019999817861194048, - "loss": 46.0, - "step": 11940 - }, - { - "epoch": 1.9229840170699304, - "grad_norm": 0.0035784346982836723, - "learning_rate": 0.00019999817830658275, - "loss": 46.0, - "step": 11941 - }, - { - "epoch": 1.9231450541487178, - "grad_norm": 0.005776277277618647, - "learning_rate": 0.00019999817800119942, - "loss": 46.0, - "step": 11942 - }, - { - "epoch": 1.9233060912275053, - "grad_norm": 0.0026387604884803295, - "learning_rate": 0.0001999981776957905, - "loss": 46.0, - "step": 11943 - }, - { - "epoch": 1.9234671283062925, - "grad_norm": 0.0018318184884265065, - "learning_rate": 0.000199998177390356, - "loss": 46.0, - "step": 11944 - }, - { - "epoch": 1.92362816538508, - "grad_norm": 0.0019589755684137344, - "learning_rate": 0.00019999817708489588, - "loss": 46.0, - "step": 11945 - }, - { - "epoch": 1.9237892024638672, - "grad_norm": 0.0005852350150234997, - "learning_rate": 0.00019999817677941018, - "loss": 46.0, - "step": 11946 - }, - { - "epoch": 1.9239502395426546, - "grad_norm": 0.0018827073508873582, - "learning_rate": 0.00019999817647389887, - "loss": 46.0, - "step": 11947 - }, - { - "epoch": 1.924111276621442, - "grad_norm": 0.0016252314671874046, - "learning_rate": 0.000199998176168362, - "loss": 46.0, - "step": 11948 - }, - { - "epoch": 1.9242723137002296, - "grad_norm": 0.00324031594209373, - "learning_rate": 0.0001999981758627995, - "loss": 46.0, - "step": 11949 - }, - { - "epoch": 1.924433350779017, - "grad_norm": 0.0012846144381910563, - "learning_rate": 0.0001999981755572114, - "loss": 46.0, - "step": 11950 - }, - { - "epoch": 1.9245943878578042, - "grad_norm": 0.0031126427929848433, - "learning_rate": 0.00019999817525159774, - "loss": 46.0, - "step": 11951 - }, - { - "epoch": 1.9247554249365917, - "grad_norm": 0.0015743878902867436, - "learning_rate": 0.00019999817494595847, - "loss": 46.0, - "step": 11952 - }, - { - "epoch": 1.924916462015379, - "grad_norm": 0.0008324464433826506, - "learning_rate": 0.0001999981746402936, - "loss": 46.0, - "step": 11953 - }, - { - "epoch": 1.9250774990941664, - "grad_norm": 0.0020896082278341055, - "learning_rate": 0.00019999817433460316, - "loss": 46.0, - "step": 11954 - }, - { - "epoch": 1.9252385361729538, - "grad_norm": 0.004017847124487162, - "learning_rate": 0.0001999981740288871, - "loss": 46.0, - "step": 11955 - }, - { - "epoch": 1.9253995732517413, - "grad_norm": 0.002839105436578393, - "learning_rate": 0.00019999817372314544, - "loss": 46.0, - "step": 11956 - }, - { - "epoch": 1.9255606103305287, - "grad_norm": 0.0010540698422119021, - "learning_rate": 0.0001999981734173782, - "loss": 46.0, - "step": 11957 - }, - { - "epoch": 1.925721647409316, - "grad_norm": 0.0012023458257317543, - "learning_rate": 0.00019999817311158537, - "loss": 46.0, - "step": 11958 - }, - { - "epoch": 1.9258826844881034, - "grad_norm": 0.00403686985373497, - "learning_rate": 0.00019999817280576693, - "loss": 46.0, - "step": 11959 - }, - { - "epoch": 1.9260437215668906, - "grad_norm": 0.001765172346495092, - "learning_rate": 0.0001999981724999229, - "loss": 46.0, - "step": 11960 - }, - { - "epoch": 1.926204758645678, - "grad_norm": 0.004205659963190556, - "learning_rate": 0.0001999981721940533, - "loss": 46.0, - "step": 11961 - }, - { - "epoch": 1.9263657957244655, - "grad_norm": 0.0014108573086559772, - "learning_rate": 0.0001999981718881581, - "loss": 46.0, - "step": 11962 - }, - { - "epoch": 1.926526832803253, - "grad_norm": 0.0016446582740172744, - "learning_rate": 0.00019999817158223725, - "loss": 46.0, - "step": 11963 - }, - { - "epoch": 1.9266878698820404, - "grad_norm": 0.008473742753267288, - "learning_rate": 0.00019999817127629087, - "loss": 46.0, - "step": 11964 - }, - { - "epoch": 1.926848906960828, - "grad_norm": 0.0026354636065661907, - "learning_rate": 0.00019999817097031888, - "loss": 46.0, - "step": 11965 - }, - { - "epoch": 1.9270099440396151, - "grad_norm": 0.0021241875365376472, - "learning_rate": 0.0001999981706643213, - "loss": 46.0, - "step": 11966 - }, - { - "epoch": 1.9271709811184023, - "grad_norm": 0.0019948366098105907, - "learning_rate": 0.00019999817035829808, - "loss": 46.0, - "step": 11967 - }, - { - "epoch": 1.9273320181971898, - "grad_norm": 0.0019483129726722836, - "learning_rate": 0.00019999817005224933, - "loss": 46.0, - "step": 11968 - }, - { - "epoch": 1.9274930552759773, - "grad_norm": 0.001787984510883689, - "learning_rate": 0.00019999816974617494, - "loss": 46.0, - "step": 11969 - }, - { - "epoch": 1.9276540923547647, - "grad_norm": 0.001117595355026424, - "learning_rate": 0.00019999816944007499, - "loss": 46.0, - "step": 11970 - }, - { - "epoch": 1.9278151294335522, - "grad_norm": 0.003120437962934375, - "learning_rate": 0.00019999816913394942, - "loss": 46.0, - "step": 11971 - }, - { - "epoch": 1.9279761665123396, - "grad_norm": 0.003077798755839467, - "learning_rate": 0.00019999816882779824, - "loss": 46.0, - "step": 11972 - }, - { - "epoch": 1.9281372035911268, - "grad_norm": 0.001689390861429274, - "learning_rate": 0.0001999981685216215, - "loss": 46.0, - "step": 11973 - }, - { - "epoch": 1.9282982406699143, - "grad_norm": 0.0013024532236158848, - "learning_rate": 0.00019999816821541914, - "loss": 46.0, - "step": 11974 - }, - { - "epoch": 1.9284592777487015, - "grad_norm": 0.0056304638274014, - "learning_rate": 0.00019999816790919122, - "loss": 46.0, - "step": 11975 - }, - { - "epoch": 1.928620314827489, - "grad_norm": 0.007379127666354179, - "learning_rate": 0.0001999981676029377, - "loss": 46.0, - "step": 11976 - }, - { - "epoch": 1.9287813519062764, - "grad_norm": 0.00048339113709516823, - "learning_rate": 0.00019999816729665857, - "loss": 46.0, - "step": 11977 - }, - { - "epoch": 1.9289423889850639, - "grad_norm": 0.007068888284265995, - "learning_rate": 0.00019999816699035384, - "loss": 46.0, - "step": 11978 - }, - { - "epoch": 1.9291034260638513, - "grad_norm": 0.0009395984816364944, - "learning_rate": 0.00019999816668402352, - "loss": 46.0, - "step": 11979 - }, - { - "epoch": 1.9292644631426386, - "grad_norm": 0.0007488145492970943, - "learning_rate": 0.0001999981663776676, - "loss": 46.0, - "step": 11980 - }, - { - "epoch": 1.929425500221426, - "grad_norm": 0.0009382746065966785, - "learning_rate": 0.0001999981660712861, - "loss": 46.0, - "step": 11981 - }, - { - "epoch": 1.9295865373002132, - "grad_norm": 0.002310621552169323, - "learning_rate": 0.000199998165764879, - "loss": 46.0, - "step": 11982 - }, - { - "epoch": 1.9297475743790007, - "grad_norm": 0.0011846248526126146, - "learning_rate": 0.0001999981654584463, - "loss": 46.0, - "step": 11983 - }, - { - "epoch": 1.9299086114577881, - "grad_norm": 0.0026515538338571787, - "learning_rate": 0.00019999816515198798, - "loss": 46.0, - "step": 11984 - }, - { - "epoch": 1.9300696485365756, - "grad_norm": 0.003187221474945545, - "learning_rate": 0.00019999816484550414, - "loss": 46.0, - "step": 11985 - }, - { - "epoch": 1.930230685615363, - "grad_norm": 0.0017586504109203815, - "learning_rate": 0.00019999816453899466, - "loss": 46.0, - "step": 11986 - }, - { - "epoch": 1.9303917226941505, - "grad_norm": 0.0070524695329368114, - "learning_rate": 0.00019999816423245956, - "loss": 46.0, - "step": 11987 - }, - { - "epoch": 1.9305527597729377, - "grad_norm": 0.0031549998093396425, - "learning_rate": 0.0001999981639258989, - "loss": 46.0, - "step": 11988 - }, - { - "epoch": 1.930713796851725, - "grad_norm": 0.00520916236564517, - "learning_rate": 0.00019999816361931263, - "loss": 46.0, - "step": 11989 - }, - { - "epoch": 1.9308748339305124, - "grad_norm": 0.0011408912250772119, - "learning_rate": 0.00019999816331270077, - "loss": 46.0, - "step": 11990 - }, - { - "epoch": 1.9310358710092999, - "grad_norm": 0.001446875394321978, - "learning_rate": 0.00019999816300606332, - "loss": 46.0, - "step": 11991 - }, - { - "epoch": 1.9311969080880873, - "grad_norm": 0.0007984877447597682, - "learning_rate": 0.0001999981626994003, - "loss": 46.0, - "step": 11992 - }, - { - "epoch": 1.9313579451668748, - "grad_norm": 0.000950752233620733, - "learning_rate": 0.00019999816239271164, - "loss": 46.0, - "step": 11993 - }, - { - "epoch": 1.9315189822456622, - "grad_norm": 0.0015661190263926983, - "learning_rate": 0.0001999981620859974, - "loss": 46.0, - "step": 11994 - }, - { - "epoch": 1.9316800193244494, - "grad_norm": 0.010314003564417362, - "learning_rate": 0.00019999816177925758, - "loss": 46.0, - "step": 11995 - }, - { - "epoch": 1.931841056403237, - "grad_norm": 0.0005025442806072533, - "learning_rate": 0.00019999816147249215, - "loss": 46.0, - "step": 11996 - }, - { - "epoch": 1.9320020934820241, - "grad_norm": 0.0019186304416507483, - "learning_rate": 0.00019999816116570112, - "loss": 46.0, - "step": 11997 - }, - { - "epoch": 1.9321631305608116, - "grad_norm": 0.006390773691236973, - "learning_rate": 0.0001999981608588845, - "loss": 46.0, - "step": 11998 - }, - { - "epoch": 1.932324167639599, - "grad_norm": 0.0012256887275725603, - "learning_rate": 0.00019999816055204231, - "loss": 46.0, - "step": 11999 - }, - { - "epoch": 1.9324852047183865, - "grad_norm": 0.0008525012526661158, - "learning_rate": 0.0001999981602451745, - "loss": 46.0, - "step": 12000 - }, - { - "epoch": 1.932646241797174, - "grad_norm": 0.0026415903121232986, - "learning_rate": 0.00019999815993828113, - "loss": 46.0, - "step": 12001 - }, - { - "epoch": 1.9328072788759612, - "grad_norm": 0.0022421609610319138, - "learning_rate": 0.00019999815963136212, - "loss": 46.0, - "step": 12002 - }, - { - "epoch": 1.9329683159547486, - "grad_norm": 0.0009753041085787117, - "learning_rate": 0.00019999815932441754, - "loss": 46.0, - "step": 12003 - }, - { - "epoch": 1.9331293530335358, - "grad_norm": 0.007640308700501919, - "learning_rate": 0.00019999815901744738, - "loss": 46.0, - "step": 12004 - }, - { - "epoch": 1.9332903901123233, - "grad_norm": 0.0008999984711408615, - "learning_rate": 0.0001999981587104516, - "loss": 46.0, - "step": 12005 - }, - { - "epoch": 1.9334514271911107, - "grad_norm": 0.005782170686870813, - "learning_rate": 0.00019999815840343024, - "loss": 46.0, - "step": 12006 - }, - { - "epoch": 1.9336124642698982, - "grad_norm": 0.0008568445919081569, - "learning_rate": 0.00019999815809638327, - "loss": 46.0, - "step": 12007 - }, - { - "epoch": 1.9337735013486856, - "grad_norm": 0.002353344112634659, - "learning_rate": 0.00019999815778931073, - "loss": 46.0, - "step": 12008 - }, - { - "epoch": 1.9339345384274729, - "grad_norm": 0.00510431220754981, - "learning_rate": 0.00019999815748221255, - "loss": 46.0, - "step": 12009 - }, - { - "epoch": 1.9340955755062603, - "grad_norm": 0.006691760383546352, - "learning_rate": 0.0001999981571750888, - "loss": 46.0, - "step": 12010 - }, - { - "epoch": 1.9342566125850476, - "grad_norm": 0.0024249027483165264, - "learning_rate": 0.0001999981568679395, - "loss": 46.0, - "step": 12011 - }, - { - "epoch": 1.934417649663835, - "grad_norm": 0.0032153818756341934, - "learning_rate": 0.00019999815656076455, - "loss": 46.0, - "step": 12012 - }, - { - "epoch": 1.9345786867426225, - "grad_norm": 0.007203936576843262, - "learning_rate": 0.00019999815625356402, - "loss": 46.0, - "step": 12013 - }, - { - "epoch": 1.93473972382141, - "grad_norm": 0.0010269597405567765, - "learning_rate": 0.00019999815594633788, - "loss": 46.0, - "step": 12014 - }, - { - "epoch": 1.9349007609001974, - "grad_norm": 0.001805074862204492, - "learning_rate": 0.0001999981556390862, - "loss": 46.0, - "step": 12015 - }, - { - "epoch": 1.9350617979789848, - "grad_norm": 0.004218493588268757, - "learning_rate": 0.0001999981553318089, - "loss": 46.0, - "step": 12016 - }, - { - "epoch": 1.935222835057772, - "grad_norm": 0.0006299649248830974, - "learning_rate": 0.00019999815502450596, - "loss": 46.0, - "step": 12017 - }, - { - "epoch": 1.9353838721365595, - "grad_norm": 0.009320312179625034, - "learning_rate": 0.00019999815471717747, - "loss": 46.0, - "step": 12018 - }, - { - "epoch": 1.9355449092153467, - "grad_norm": 0.009105801582336426, - "learning_rate": 0.00019999815440982337, - "loss": 46.0, - "step": 12019 - }, - { - "epoch": 1.9357059462941342, - "grad_norm": 0.0045250155963003635, - "learning_rate": 0.0001999981541024437, - "loss": 46.0, - "step": 12020 - }, - { - "epoch": 1.9358669833729216, - "grad_norm": 0.0034765484742820263, - "learning_rate": 0.0001999981537950384, - "loss": 46.0, - "step": 12021 - }, - { - "epoch": 1.936028020451709, - "grad_norm": 0.0009283124818466604, - "learning_rate": 0.0001999981534876075, - "loss": 46.0, - "step": 12022 - }, - { - "epoch": 1.9361890575304965, - "grad_norm": 0.0021041701547801495, - "learning_rate": 0.00019999815318015105, - "loss": 46.0, - "step": 12023 - }, - { - "epoch": 1.9363500946092838, - "grad_norm": 0.001083271112293005, - "learning_rate": 0.00019999815287266898, - "loss": 46.0, - "step": 12024 - }, - { - "epoch": 1.9365111316880712, - "grad_norm": 0.0017777506727725267, - "learning_rate": 0.00019999815256516133, - "loss": 46.0, - "step": 12025 - }, - { - "epoch": 1.9366721687668584, - "grad_norm": 0.002739750314503908, - "learning_rate": 0.00019999815225762806, - "loss": 46.0, - "step": 12026 - }, - { - "epoch": 1.9368332058456459, - "grad_norm": 0.0025823721662163734, - "learning_rate": 0.0001999981519500692, - "loss": 46.0, - "step": 12027 - }, - { - "epoch": 1.9369942429244333, - "grad_norm": 0.0014158979756757617, - "learning_rate": 0.0001999981516424848, - "loss": 46.0, - "step": 12028 - }, - { - "epoch": 1.9371552800032208, - "grad_norm": 0.004590214695781469, - "learning_rate": 0.00019999815133487473, - "loss": 46.0, - "step": 12029 - }, - { - "epoch": 1.9373163170820082, - "grad_norm": 0.00962040200829506, - "learning_rate": 0.00019999815102723911, - "loss": 46.0, - "step": 12030 - }, - { - "epoch": 1.9374773541607955, - "grad_norm": 0.0015417892718687654, - "learning_rate": 0.00019999815071957788, - "loss": 46.0, - "step": 12031 - }, - { - "epoch": 1.937638391239583, - "grad_norm": 0.0013389629311859608, - "learning_rate": 0.00019999815041189104, - "loss": 46.0, - "step": 12032 - }, - { - "epoch": 1.9377994283183702, - "grad_norm": 0.0007798003498464823, - "learning_rate": 0.00019999815010417863, - "loss": 46.0, - "step": 12033 - }, - { - "epoch": 1.9379604653971576, - "grad_norm": 0.00914925429970026, - "learning_rate": 0.00019999814979644064, - "loss": 46.0, - "step": 12034 - }, - { - "epoch": 1.938121502475945, - "grad_norm": 0.001808097935281694, - "learning_rate": 0.00019999814948867703, - "loss": 46.0, - "step": 12035 - }, - { - "epoch": 1.9382825395547325, - "grad_norm": 0.0011173714883625507, - "learning_rate": 0.0001999981491808878, - "loss": 46.0, - "step": 12036 - }, - { - "epoch": 1.93844357663352, - "grad_norm": 0.001478369114920497, - "learning_rate": 0.00019999814887307303, - "loss": 46.0, - "step": 12037 - }, - { - "epoch": 1.9386046137123074, - "grad_norm": 0.005412651225924492, - "learning_rate": 0.00019999814856523266, - "loss": 46.0, - "step": 12038 - }, - { - "epoch": 1.9387656507910946, - "grad_norm": 0.005451761186122894, - "learning_rate": 0.00019999814825736665, - "loss": 46.0, - "step": 12039 - }, - { - "epoch": 1.938926687869882, - "grad_norm": 0.0007236241362988949, - "learning_rate": 0.00019999814794947508, - "loss": 46.0, - "step": 12040 - }, - { - "epoch": 1.9390877249486693, - "grad_norm": 0.00831711757928133, - "learning_rate": 0.00019999814764155792, - "loss": 46.0, - "step": 12041 - }, - { - "epoch": 1.9392487620274568, - "grad_norm": 0.01236477680504322, - "learning_rate": 0.00019999814733361515, - "loss": 46.0, - "step": 12042 - }, - { - "epoch": 1.9394097991062442, - "grad_norm": 0.005869931075721979, - "learning_rate": 0.0001999981470256468, - "loss": 46.0, - "step": 12043 - }, - { - "epoch": 1.9395708361850317, - "grad_norm": 0.001580044045113027, - "learning_rate": 0.00019999814671765282, - "loss": 46.0, - "step": 12044 - }, - { - "epoch": 1.9397318732638191, - "grad_norm": 0.003241234226152301, - "learning_rate": 0.0001999981464096333, - "loss": 46.0, - "step": 12045 - }, - { - "epoch": 1.9398929103426064, - "grad_norm": 0.0021619251929223537, - "learning_rate": 0.00019999814610158816, - "loss": 46.0, - "step": 12046 - }, - { - "epoch": 1.9400539474213938, - "grad_norm": 0.0015432409709319472, - "learning_rate": 0.0001999981457935174, - "loss": 46.0, - "step": 12047 - }, - { - "epoch": 1.940214984500181, - "grad_norm": 0.0023786805104464293, - "learning_rate": 0.00019999814548542108, - "loss": 46.0, - "step": 12048 - }, - { - "epoch": 1.9403760215789685, - "grad_norm": 0.0018266643164679408, - "learning_rate": 0.00019999814517729917, - "loss": 46.0, - "step": 12049 - }, - { - "epoch": 1.940537058657756, - "grad_norm": 0.0009219483472406864, - "learning_rate": 0.00019999814486915162, - "loss": 46.0, - "step": 12050 - }, - { - "epoch": 1.9406980957365434, - "grad_norm": 0.011202193796634674, - "learning_rate": 0.0001999981445609785, - "loss": 46.0, - "step": 12051 - }, - { - "epoch": 1.9408591328153308, - "grad_norm": 0.002483846852555871, - "learning_rate": 0.0001999981442527798, - "loss": 46.0, - "step": 12052 - }, - { - "epoch": 1.941020169894118, - "grad_norm": 0.0009753602207638323, - "learning_rate": 0.00019999814394455553, - "loss": 46.0, - "step": 12053 - }, - { - "epoch": 1.9411812069729055, - "grad_norm": 0.0028262832202017307, - "learning_rate": 0.0001999981436363056, - "loss": 46.0, - "step": 12054 - }, - { - "epoch": 1.9413422440516928, - "grad_norm": 0.0019340101862326264, - "learning_rate": 0.00019999814332803011, - "loss": 46.0, - "step": 12055 - }, - { - "epoch": 1.9415032811304802, - "grad_norm": 0.0034789543133229017, - "learning_rate": 0.00019999814301972904, - "loss": 46.0, - "step": 12056 - }, - { - "epoch": 1.9416643182092677, - "grad_norm": 0.004788337275385857, - "learning_rate": 0.00019999814271140233, - "loss": 46.0, - "step": 12057 - }, - { - "epoch": 1.941825355288055, - "grad_norm": 0.002175217494368553, - "learning_rate": 0.00019999814240305005, - "loss": 46.0, - "step": 12058 - }, - { - "epoch": 1.9419863923668426, - "grad_norm": 0.004098773468285799, - "learning_rate": 0.00019999814209467222, - "loss": 46.0, - "step": 12059 - }, - { - "epoch": 1.94214742944563, - "grad_norm": 0.006668771151453257, - "learning_rate": 0.0001999981417862687, - "loss": 46.0, - "step": 12060 - }, - { - "epoch": 1.9423084665244172, - "grad_norm": 0.0009941081516444683, - "learning_rate": 0.00019999814147783968, - "loss": 46.0, - "step": 12061 - }, - { - "epoch": 1.9424695036032045, - "grad_norm": 0.008171027526259422, - "learning_rate": 0.00019999814116938502, - "loss": 46.0, - "step": 12062 - }, - { - "epoch": 1.942630540681992, - "grad_norm": 0.0019051500130444765, - "learning_rate": 0.00019999814086090476, - "loss": 46.0, - "step": 12063 - }, - { - "epoch": 1.9427915777607794, - "grad_norm": 0.0026300756726413965, - "learning_rate": 0.0001999981405523989, - "loss": 46.0, - "step": 12064 - }, - { - "epoch": 1.9429526148395668, - "grad_norm": 0.001015787827782333, - "learning_rate": 0.0001999981402438675, - "loss": 46.0, - "step": 12065 - }, - { - "epoch": 1.9431136519183543, - "grad_norm": 0.0007785811321809888, - "learning_rate": 0.00019999813993531044, - "loss": 46.0, - "step": 12066 - }, - { - "epoch": 1.9432746889971417, - "grad_norm": 0.001544971950352192, - "learning_rate": 0.00019999813962672783, - "loss": 46.0, - "step": 12067 - }, - { - "epoch": 1.943435726075929, - "grad_norm": 0.006294172257184982, - "learning_rate": 0.00019999813931811963, - "loss": 46.0, - "step": 12068 - }, - { - "epoch": 1.9435967631547164, - "grad_norm": 0.00624111806973815, - "learning_rate": 0.0001999981390094858, - "loss": 46.0, - "step": 12069 - }, - { - "epoch": 1.9437578002335036, - "grad_norm": 0.007776613347232342, - "learning_rate": 0.0001999981387008264, - "loss": 46.0, - "step": 12070 - }, - { - "epoch": 1.943918837312291, - "grad_norm": 0.004170961678028107, - "learning_rate": 0.0001999981383921414, - "loss": 46.0, - "step": 12071 - }, - { - "epoch": 1.9440798743910785, - "grad_norm": 0.0032628020271658897, - "learning_rate": 0.0001999981380834308, - "loss": 46.0, - "step": 12072 - }, - { - "epoch": 1.944240911469866, - "grad_norm": 0.004319639410823584, - "learning_rate": 0.0001999981377746946, - "loss": 46.0, - "step": 12073 - }, - { - "epoch": 1.9444019485486534, - "grad_norm": 0.00107934873085469, - "learning_rate": 0.00019999813746593282, - "loss": 46.0, - "step": 12074 - }, - { - "epoch": 1.9445629856274407, - "grad_norm": 0.0034845848567783833, - "learning_rate": 0.00019999813715714543, - "loss": 46.0, - "step": 12075 - }, - { - "epoch": 1.9447240227062281, - "grad_norm": 0.008552225306630135, - "learning_rate": 0.00019999813684833248, - "loss": 46.0, - "step": 12076 - }, - { - "epoch": 1.9448850597850154, - "grad_norm": 0.0025991920847445726, - "learning_rate": 0.00019999813653949388, - "loss": 46.0, - "step": 12077 - }, - { - "epoch": 1.9450460968638028, - "grad_norm": 0.001660397625528276, - "learning_rate": 0.00019999813623062973, - "loss": 46.0, - "step": 12078 - }, - { - "epoch": 1.9452071339425903, - "grad_norm": 0.0020927749574184418, - "learning_rate": 0.00019999813592173996, - "loss": 46.0, - "step": 12079 - }, - { - "epoch": 1.9453681710213777, - "grad_norm": 0.001581791671924293, - "learning_rate": 0.0001999981356128246, - "loss": 46.0, - "step": 12080 - }, - { - "epoch": 1.9455292081001652, - "grad_norm": 0.0007431380799971521, - "learning_rate": 0.00019999813530388366, - "loss": 46.0, - "step": 12081 - }, - { - "epoch": 1.9456902451789526, - "grad_norm": 0.0017823704984039068, - "learning_rate": 0.00019999813499491713, - "loss": 46.0, - "step": 12082 - }, - { - "epoch": 1.9458512822577398, - "grad_norm": 0.0059769004583358765, - "learning_rate": 0.000199998134685925, - "loss": 46.0, - "step": 12083 - }, - { - "epoch": 1.946012319336527, - "grad_norm": 0.0010595706989988685, - "learning_rate": 0.00019999813437690726, - "loss": 46.0, - "step": 12084 - }, - { - "epoch": 1.9461733564153145, - "grad_norm": 0.0004557165375445038, - "learning_rate": 0.00019999813406786394, - "loss": 46.0, - "step": 12085 - }, - { - "epoch": 1.946334393494102, - "grad_norm": 0.002186733530834317, - "learning_rate": 0.000199998133758795, - "loss": 46.0, - "step": 12086 - }, - { - "epoch": 1.9464954305728894, - "grad_norm": 0.0007412717095576227, - "learning_rate": 0.00019999813344970051, - "loss": 46.0, - "step": 12087 - }, - { - "epoch": 1.9466564676516769, - "grad_norm": 0.005248753819614649, - "learning_rate": 0.0001999981331405804, - "loss": 46.0, - "step": 12088 - }, - { - "epoch": 1.9468175047304643, - "grad_norm": 0.002423160243779421, - "learning_rate": 0.00019999813283143469, - "loss": 46.0, - "step": 12089 - }, - { - "epoch": 1.9469785418092516, - "grad_norm": 0.0014704273780807853, - "learning_rate": 0.0001999981325222634, - "loss": 46.0, - "step": 12090 - }, - { - "epoch": 1.947139578888039, - "grad_norm": 0.0005859193624928594, - "learning_rate": 0.0001999981322130665, - "loss": 46.0, - "step": 12091 - }, - { - "epoch": 1.9473006159668262, - "grad_norm": 0.00194712751545012, - "learning_rate": 0.00019999813190384403, - "loss": 46.0, - "step": 12092 - }, - { - "epoch": 1.9474616530456137, - "grad_norm": 0.0008329588454216719, - "learning_rate": 0.00019999813159459596, - "loss": 46.0, - "step": 12093 - }, - { - "epoch": 1.9476226901244011, - "grad_norm": 0.001748097944073379, - "learning_rate": 0.00019999813128532227, - "loss": 46.0, - "step": 12094 - }, - { - "epoch": 1.9477837272031886, - "grad_norm": 0.001053764601238072, - "learning_rate": 0.000199998130976023, - "loss": 46.0, - "step": 12095 - }, - { - "epoch": 1.947944764281976, - "grad_norm": 0.0012390092015266418, - "learning_rate": 0.00019999813066669814, - "loss": 46.0, - "step": 12096 - }, - { - "epoch": 1.9481058013607633, - "grad_norm": 0.008858664892613888, - "learning_rate": 0.0001999981303573477, - "loss": 46.0, - "step": 12097 - }, - { - "epoch": 1.9482668384395507, - "grad_norm": 0.0015460547292605042, - "learning_rate": 0.00019999813004797164, - "loss": 46.0, - "step": 12098 - }, - { - "epoch": 1.948427875518338, - "grad_norm": 0.001218714052811265, - "learning_rate": 0.00019999812973857, - "loss": 46.0, - "step": 12099 - }, - { - "epoch": 1.9485889125971254, - "grad_norm": 0.011520106345415115, - "learning_rate": 0.00019999812942914276, - "loss": 46.0, - "step": 12100 - }, - { - "epoch": 1.9487499496759129, - "grad_norm": 0.005516184028238058, - "learning_rate": 0.0001999981291196899, - "loss": 46.0, - "step": 12101 - }, - { - "epoch": 1.9489109867547003, - "grad_norm": 0.0005748493713326752, - "learning_rate": 0.0001999981288102115, - "loss": 46.0, - "step": 12102 - }, - { - "epoch": 1.9490720238334878, - "grad_norm": 0.0038536195643246174, - "learning_rate": 0.00019999812850070745, - "loss": 46.0, - "step": 12103 - }, - { - "epoch": 1.9492330609122752, - "grad_norm": 0.008117640390992165, - "learning_rate": 0.00019999812819117784, - "loss": 46.0, - "step": 12104 - }, - { - "epoch": 1.9493940979910624, - "grad_norm": 0.0025573784951120615, - "learning_rate": 0.00019999812788162261, - "loss": 46.0, - "step": 12105 - }, - { - "epoch": 1.9495551350698497, - "grad_norm": 0.003354765707626939, - "learning_rate": 0.0001999981275720418, - "loss": 46.0, - "step": 12106 - }, - { - "epoch": 1.9497161721486371, - "grad_norm": 0.012561849318444729, - "learning_rate": 0.00019999812726243543, - "loss": 46.0, - "step": 12107 - }, - { - "epoch": 1.9498772092274246, - "grad_norm": 0.0015255310572683811, - "learning_rate": 0.00019999812695280342, - "loss": 46.0, - "step": 12108 - }, - { - "epoch": 1.950038246306212, - "grad_norm": 0.0014020310482010245, - "learning_rate": 0.00019999812664314585, - "loss": 46.0, - "step": 12109 - }, - { - "epoch": 1.9501992833849995, - "grad_norm": 0.0013271570205688477, - "learning_rate": 0.00019999812633346266, - "loss": 46.0, - "step": 12110 - }, - { - "epoch": 1.950360320463787, - "grad_norm": 0.0008281288901343942, - "learning_rate": 0.00019999812602375388, - "loss": 46.0, - "step": 12111 - }, - { - "epoch": 1.9505213575425742, - "grad_norm": 0.0006146468804217875, - "learning_rate": 0.0001999981257140195, - "loss": 46.0, - "step": 12112 - }, - { - "epoch": 1.9506823946213616, - "grad_norm": 0.001222972059622407, - "learning_rate": 0.00019999812540425955, - "loss": 46.0, - "step": 12113 - }, - { - "epoch": 1.9508434317001488, - "grad_norm": 0.0031757354736328125, - "learning_rate": 0.00019999812509447398, - "loss": 46.0, - "step": 12114 - }, - { - "epoch": 1.9510044687789363, - "grad_norm": 0.009279381483793259, - "learning_rate": 0.00019999812478466283, - "loss": 46.0, - "step": 12115 - }, - { - "epoch": 1.9511655058577237, - "grad_norm": 0.0017381719080731273, - "learning_rate": 0.0001999981244748261, - "loss": 46.0, - "step": 12116 - }, - { - "epoch": 1.9513265429365112, - "grad_norm": 0.002244340954348445, - "learning_rate": 0.00019999812416496372, - "loss": 46.0, - "step": 12117 - }, - { - "epoch": 1.9514875800152987, - "grad_norm": 0.003751095151528716, - "learning_rate": 0.0001999981238550758, - "loss": 46.0, - "step": 12118 - }, - { - "epoch": 1.9516486170940859, - "grad_norm": 0.0019959721248596907, - "learning_rate": 0.00019999812354516225, - "loss": 46.0, - "step": 12119 - }, - { - "epoch": 1.9518096541728733, - "grad_norm": 0.003004837315529585, - "learning_rate": 0.00019999812323522314, - "loss": 46.0, - "step": 12120 - }, - { - "epoch": 1.9519706912516606, - "grad_norm": 0.0034260652028024197, - "learning_rate": 0.0001999981229252584, - "loss": 46.0, - "step": 12121 - }, - { - "epoch": 1.952131728330448, - "grad_norm": 0.00870165042579174, - "learning_rate": 0.0001999981226152681, - "loss": 46.0, - "step": 12122 - }, - { - "epoch": 1.9522927654092355, - "grad_norm": 0.002273171441629529, - "learning_rate": 0.0001999981223052522, - "loss": 46.0, - "step": 12123 - }, - { - "epoch": 1.952453802488023, - "grad_norm": 0.002056877827271819, - "learning_rate": 0.00019999812199521068, - "loss": 46.0, - "step": 12124 - }, - { - "epoch": 1.9526148395668104, - "grad_norm": 0.0037193584721535444, - "learning_rate": 0.00019999812168514355, - "loss": 46.0, - "step": 12125 - }, - { - "epoch": 1.9527758766455976, - "grad_norm": 0.0007669219048693776, - "learning_rate": 0.00019999812137505088, - "loss": 46.0, - "step": 12126 - }, - { - "epoch": 1.952936913724385, - "grad_norm": 0.0026137731038033962, - "learning_rate": 0.00019999812106493258, - "loss": 46.0, - "step": 12127 - }, - { - "epoch": 1.9530979508031723, - "grad_norm": 0.0008542754221707582, - "learning_rate": 0.0001999981207547887, - "loss": 46.0, - "step": 12128 - }, - { - "epoch": 1.9532589878819597, - "grad_norm": 0.0013925344683229923, - "learning_rate": 0.00019999812044461924, - "loss": 46.0, - "step": 12129 - }, - { - "epoch": 1.9534200249607472, - "grad_norm": 0.0016747916815802455, - "learning_rate": 0.00019999812013442417, - "loss": 46.0, - "step": 12130 - }, - { - "epoch": 1.9535810620395346, - "grad_norm": 0.0005838229553773999, - "learning_rate": 0.0001999981198242035, - "loss": 46.0, - "step": 12131 - }, - { - "epoch": 1.953742099118322, - "grad_norm": 0.0108532989397645, - "learning_rate": 0.00019999811951395722, - "loss": 46.0, - "step": 12132 - }, - { - "epoch": 1.9539031361971095, - "grad_norm": 0.006761074066162109, - "learning_rate": 0.00019999811920368537, - "loss": 46.0, - "step": 12133 - }, - { - "epoch": 1.9540641732758968, - "grad_norm": 0.0003684191033244133, - "learning_rate": 0.00019999811889338792, - "loss": 46.0, - "step": 12134 - }, - { - "epoch": 1.9542252103546842, - "grad_norm": 0.007435506675392389, - "learning_rate": 0.00019999811858306487, - "loss": 46.0, - "step": 12135 - }, - { - "epoch": 1.9543862474334714, - "grad_norm": 0.0034958310425281525, - "learning_rate": 0.00019999811827271625, - "loss": 46.0, - "step": 12136 - }, - { - "epoch": 1.954547284512259, - "grad_norm": 0.0012527265353128314, - "learning_rate": 0.00019999811796234202, - "loss": 46.0, - "step": 12137 - }, - { - "epoch": 1.9547083215910463, - "grad_norm": 0.0012181499041616917, - "learning_rate": 0.00019999811765194218, - "loss": 46.0, - "step": 12138 - }, - { - "epoch": 1.9548693586698338, - "grad_norm": 0.0006066956557333469, - "learning_rate": 0.00019999811734151677, - "loss": 46.0, - "step": 12139 - }, - { - "epoch": 1.9550303957486213, - "grad_norm": 0.0031212479807436466, - "learning_rate": 0.00019999811703106575, - "loss": 46.0, - "step": 12140 - }, - { - "epoch": 1.9551914328274085, - "grad_norm": 0.000998818315565586, - "learning_rate": 0.00019999811672058915, - "loss": 46.0, - "step": 12141 - }, - { - "epoch": 1.955352469906196, - "grad_norm": 0.0007590508321300149, - "learning_rate": 0.00019999811641008695, - "loss": 46.0, - "step": 12142 - }, - { - "epoch": 1.9555135069849832, - "grad_norm": 0.00371168227866292, - "learning_rate": 0.00019999811609955912, - "loss": 46.0, - "step": 12143 - }, - { - "epoch": 1.9556745440637706, - "grad_norm": 0.0026444531977176666, - "learning_rate": 0.00019999811578900575, - "loss": 46.0, - "step": 12144 - }, - { - "epoch": 1.955835581142558, - "grad_norm": 0.0006410221103578806, - "learning_rate": 0.00019999811547842674, - "loss": 46.0, - "step": 12145 - }, - { - "epoch": 1.9559966182213455, - "grad_norm": 0.011332300491631031, - "learning_rate": 0.00019999811516782217, - "loss": 46.0, - "step": 12146 - }, - { - "epoch": 1.956157655300133, - "grad_norm": 0.0016425964422523975, - "learning_rate": 0.00019999811485719199, - "loss": 46.0, - "step": 12147 - }, - { - "epoch": 1.9563186923789202, - "grad_norm": 0.0014186163898557425, - "learning_rate": 0.00019999811454653624, - "loss": 46.0, - "step": 12148 - }, - { - "epoch": 1.9564797294577077, - "grad_norm": 0.0037449817173182964, - "learning_rate": 0.00019999811423585486, - "loss": 46.0, - "step": 12149 - }, - { - "epoch": 1.9566407665364949, - "grad_norm": 0.0006519765593111515, - "learning_rate": 0.00019999811392514789, - "loss": 46.0, - "step": 12150 - }, - { - "epoch": 1.9568018036152823, - "grad_norm": 0.0019342554733157158, - "learning_rate": 0.00019999811361441533, - "loss": 46.0, - "step": 12151 - }, - { - "epoch": 1.9569628406940698, - "grad_norm": 0.006986892782151699, - "learning_rate": 0.0001999981133036572, - "loss": 46.0, - "step": 12152 - }, - { - "epoch": 1.9571238777728572, - "grad_norm": 0.00025443563936278224, - "learning_rate": 0.00019999811299287345, - "loss": 46.0, - "step": 12153 - }, - { - "epoch": 1.9572849148516447, - "grad_norm": 0.007195987738668919, - "learning_rate": 0.00019999811268206412, - "loss": 46.0, - "step": 12154 - }, - { - "epoch": 1.9574459519304321, - "grad_norm": 0.003083311254158616, - "learning_rate": 0.0001999981123712292, - "loss": 46.0, - "step": 12155 - }, - { - "epoch": 1.9576069890092194, - "grad_norm": 0.009662473574280739, - "learning_rate": 0.00019999811206036867, - "loss": 46.0, - "step": 12156 - }, - { - "epoch": 1.9577680260880068, - "grad_norm": 0.002636327873915434, - "learning_rate": 0.00019999811174948256, - "loss": 46.0, - "step": 12157 - }, - { - "epoch": 1.957929063166794, - "grad_norm": 0.0024721070658415556, - "learning_rate": 0.0001999981114385708, - "loss": 46.0, - "step": 12158 - }, - { - "epoch": 1.9580901002455815, - "grad_norm": 0.0004560021625366062, - "learning_rate": 0.00019999811112763352, - "loss": 46.0, - "step": 12159 - }, - { - "epoch": 1.958251137324369, - "grad_norm": 0.0020225245971232653, - "learning_rate": 0.00019999811081667062, - "loss": 46.0, - "step": 12160 - }, - { - "epoch": 1.9584121744031564, - "grad_norm": 0.0010688375914469361, - "learning_rate": 0.0001999981105056821, - "loss": 46.0, - "step": 12161 - }, - { - "epoch": 1.9585732114819439, - "grad_norm": 0.0017607322661206126, - "learning_rate": 0.000199998110194668, - "loss": 46.0, - "step": 12162 - }, - { - "epoch": 1.958734248560731, - "grad_norm": 0.0012470840010792017, - "learning_rate": 0.00019999810988362832, - "loss": 46.0, - "step": 12163 - }, - { - "epoch": 1.9588952856395185, - "grad_norm": 0.0035028427373617887, - "learning_rate": 0.00019999810957256305, - "loss": 46.0, - "step": 12164 - }, - { - "epoch": 1.9590563227183058, - "grad_norm": 0.0007340355659835041, - "learning_rate": 0.0001999981092614722, - "loss": 46.0, - "step": 12165 - }, - { - "epoch": 1.9592173597970932, - "grad_norm": 0.003109299810603261, - "learning_rate": 0.00019999810895035571, - "loss": 46.0, - "step": 12166 - }, - { - "epoch": 1.9593783968758807, - "grad_norm": 0.0033639948815107346, - "learning_rate": 0.00019999810863921365, - "loss": 46.0, - "step": 12167 - }, - { - "epoch": 1.9595394339546681, - "grad_norm": 0.002353239804506302, - "learning_rate": 0.000199998108328046, - "loss": 46.0, - "step": 12168 - }, - { - "epoch": 1.9597004710334556, - "grad_norm": 0.000553739897441119, - "learning_rate": 0.00019999810801685274, - "loss": 46.0, - "step": 12169 - }, - { - "epoch": 1.9598615081122428, - "grad_norm": 0.0019046416273340583, - "learning_rate": 0.00019999810770563389, - "loss": 46.0, - "step": 12170 - }, - { - "epoch": 1.9600225451910303, - "grad_norm": 0.01497845072299242, - "learning_rate": 0.00019999810739438945, - "loss": 46.0, - "step": 12171 - }, - { - "epoch": 1.9601835822698175, - "grad_norm": 0.0008863619877956808, - "learning_rate": 0.00019999810708311942, - "loss": 46.0, - "step": 12172 - }, - { - "epoch": 1.960344619348605, - "grad_norm": 0.0016912948340177536, - "learning_rate": 0.00019999810677182378, - "loss": 46.0, - "step": 12173 - }, - { - "epoch": 1.9605056564273924, - "grad_norm": 0.001350126345641911, - "learning_rate": 0.00019999810646050256, - "loss": 46.0, - "step": 12174 - }, - { - "epoch": 1.9606666935061798, - "grad_norm": 0.0021432330831885338, - "learning_rate": 0.00019999810614915574, - "loss": 46.0, - "step": 12175 - }, - { - "epoch": 1.9608277305849673, - "grad_norm": 0.0010685812449082732, - "learning_rate": 0.00019999810583778334, - "loss": 46.0, - "step": 12176 - }, - { - "epoch": 1.9609887676637547, - "grad_norm": 0.002427290426567197, - "learning_rate": 0.00019999810552638532, - "loss": 46.0, - "step": 12177 - }, - { - "epoch": 1.961149804742542, - "grad_norm": 0.003014141460880637, - "learning_rate": 0.00019999810521496172, - "loss": 46.0, - "step": 12178 - }, - { - "epoch": 1.9613108418213292, - "grad_norm": 0.0024816745426505804, - "learning_rate": 0.0001999981049035125, - "loss": 46.0, - "step": 12179 - }, - { - "epoch": 1.9614718789001166, - "grad_norm": 0.004163929261267185, - "learning_rate": 0.00019999810459203773, - "loss": 46.0, - "step": 12180 - }, - { - "epoch": 1.961632915978904, - "grad_norm": 0.003371578874066472, - "learning_rate": 0.00019999810428053734, - "loss": 46.0, - "step": 12181 - }, - { - "epoch": 1.9617939530576916, - "grad_norm": 0.004334550350904465, - "learning_rate": 0.00019999810396901136, - "loss": 46.0, - "step": 12182 - }, - { - "epoch": 1.961954990136479, - "grad_norm": 0.0031515981536358595, - "learning_rate": 0.0001999981036574598, - "loss": 46.0, - "step": 12183 - }, - { - "epoch": 1.9621160272152665, - "grad_norm": 0.002309974981471896, - "learning_rate": 0.0001999981033458826, - "loss": 46.0, - "step": 12184 - }, - { - "epoch": 1.9622770642940537, - "grad_norm": 0.0008523603319190443, - "learning_rate": 0.00019999810303427987, - "loss": 46.0, - "step": 12185 - }, - { - "epoch": 1.9624381013728411, - "grad_norm": 0.002786514349281788, - "learning_rate": 0.0001999981027226515, - "loss": 46.0, - "step": 12186 - }, - { - "epoch": 1.9625991384516284, - "grad_norm": 0.006536405999213457, - "learning_rate": 0.00019999810241099755, - "loss": 46.0, - "step": 12187 - }, - { - "epoch": 1.9627601755304158, - "grad_norm": 0.0012258117785677314, - "learning_rate": 0.000199998102099318, - "loss": 46.0, - "step": 12188 - }, - { - "epoch": 1.9629212126092033, - "grad_norm": 0.0020775615703314543, - "learning_rate": 0.00019999810178761288, - "loss": 46.0, - "step": 12189 - }, - { - "epoch": 1.9630822496879907, - "grad_norm": 0.0036292194854468107, - "learning_rate": 0.00019999810147588215, - "loss": 46.0, - "step": 12190 - }, - { - "epoch": 1.9632432867667782, - "grad_norm": 0.0007245729211717844, - "learning_rate": 0.00019999810116412577, - "loss": 46.0, - "step": 12191 - }, - { - "epoch": 1.9634043238455654, - "grad_norm": 0.010219019837677479, - "learning_rate": 0.00019999810085234387, - "loss": 46.0, - "step": 12192 - }, - { - "epoch": 1.9635653609243529, - "grad_norm": 0.0030370333697646856, - "learning_rate": 0.00019999810054053635, - "loss": 46.0, - "step": 12193 - }, - { - "epoch": 1.96372639800314, - "grad_norm": 0.00286252424120903, - "learning_rate": 0.00019999810022870324, - "loss": 46.0, - "step": 12194 - }, - { - "epoch": 1.9638874350819275, - "grad_norm": 0.0015990732936188579, - "learning_rate": 0.00019999809991684452, - "loss": 46.0, - "step": 12195 - }, - { - "epoch": 1.964048472160715, - "grad_norm": 0.002753674518316984, - "learning_rate": 0.00019999809960496024, - "loss": 46.0, - "step": 12196 - }, - { - "epoch": 1.9642095092395024, - "grad_norm": 0.0009453110978938639, - "learning_rate": 0.00019999809929305032, - "loss": 46.0, - "step": 12197 - }, - { - "epoch": 1.96437054631829, - "grad_norm": 0.001847909647040069, - "learning_rate": 0.00019999809898111487, - "loss": 46.0, - "step": 12198 - }, - { - "epoch": 1.9645315833970773, - "grad_norm": 0.011321339756250381, - "learning_rate": 0.00019999809866915377, - "loss": 46.0, - "step": 12199 - }, - { - "epoch": 1.9646926204758646, - "grad_norm": 0.00615813909098506, - "learning_rate": 0.00019999809835716706, - "loss": 46.0, - "step": 12200 - }, - { - "epoch": 1.9648536575546518, - "grad_norm": 0.0003013201057910919, - "learning_rate": 0.00019999809804515482, - "loss": 46.0, - "step": 12201 - }, - { - "epoch": 1.9650146946334393, - "grad_norm": 0.003417704487219453, - "learning_rate": 0.00019999809773311693, - "loss": 46.0, - "step": 12202 - }, - { - "epoch": 1.9651757317122267, - "grad_norm": 0.0013419464230537415, - "learning_rate": 0.0001999980974210535, - "loss": 46.0, - "step": 12203 - }, - { - "epoch": 1.9653367687910142, - "grad_norm": 0.0008844972471706569, - "learning_rate": 0.00019999809710896443, - "loss": 46.0, - "step": 12204 - }, - { - "epoch": 1.9654978058698016, - "grad_norm": 0.0007280951831489801, - "learning_rate": 0.00019999809679684978, - "loss": 46.0, - "step": 12205 - }, - { - "epoch": 1.965658842948589, - "grad_norm": 0.0015828980831429362, - "learning_rate": 0.00019999809648470952, - "loss": 46.0, - "step": 12206 - }, - { - "epoch": 1.9658198800273763, - "grad_norm": 0.0012401886051520705, - "learning_rate": 0.0001999980961725437, - "loss": 46.0, - "step": 12207 - }, - { - "epoch": 1.9659809171061637, - "grad_norm": 0.0008402077364735305, - "learning_rate": 0.00019999809586035224, - "loss": 46.0, - "step": 12208 - }, - { - "epoch": 1.966141954184951, - "grad_norm": 0.005274999886751175, - "learning_rate": 0.00019999809554813522, - "loss": 46.0, - "step": 12209 - }, - { - "epoch": 1.9663029912637384, - "grad_norm": 0.004728623665869236, - "learning_rate": 0.00019999809523589258, - "loss": 46.0, - "step": 12210 - }, - { - "epoch": 1.9664640283425259, - "grad_norm": 0.001169433817267418, - "learning_rate": 0.00019999809492362439, - "loss": 46.0, - "step": 12211 - }, - { - "epoch": 1.9666250654213133, - "grad_norm": 0.0015337723307311535, - "learning_rate": 0.00019999809461133058, - "loss": 46.0, - "step": 12212 - }, - { - "epoch": 1.9667861025001008, - "grad_norm": 0.00455063208937645, - "learning_rate": 0.00019999809429901118, - "loss": 46.0, - "step": 12213 - }, - { - "epoch": 1.966947139578888, - "grad_norm": 0.002513535087928176, - "learning_rate": 0.0001999980939866662, - "loss": 46.0, - "step": 12214 - }, - { - "epoch": 1.9671081766576755, - "grad_norm": 0.0019140273798257113, - "learning_rate": 0.00019999809367429557, - "loss": 46.0, - "step": 12215 - }, - { - "epoch": 1.9672692137364627, - "grad_norm": 0.0010622062254697084, - "learning_rate": 0.00019999809336189938, - "loss": 46.0, - "step": 12216 - }, - { - "epoch": 1.9674302508152501, - "grad_norm": 0.0009454112150706351, - "learning_rate": 0.0001999980930494776, - "loss": 46.0, - "step": 12217 - }, - { - "epoch": 1.9675912878940376, - "grad_norm": 0.0007914511952549219, - "learning_rate": 0.00019999809273703022, - "loss": 46.0, - "step": 12218 - }, - { - "epoch": 1.967752324972825, - "grad_norm": 0.0033709832932800055, - "learning_rate": 0.00019999809242455724, - "loss": 46.0, - "step": 12219 - }, - { - "epoch": 1.9679133620516125, - "grad_norm": 0.0011022750986739993, - "learning_rate": 0.00019999809211205868, - "loss": 46.0, - "step": 12220 - }, - { - "epoch": 1.9680743991303997, - "grad_norm": 0.0025576017796993256, - "learning_rate": 0.00019999809179953453, - "loss": 46.0, - "step": 12221 - }, - { - "epoch": 1.9682354362091872, - "grad_norm": 0.0008091646595858037, - "learning_rate": 0.0001999980914869848, - "loss": 46.0, - "step": 12222 - }, - { - "epoch": 1.9683964732879744, - "grad_norm": 0.0037050878163427114, - "learning_rate": 0.00019999809117440942, - "loss": 46.0, - "step": 12223 - }, - { - "epoch": 1.9685575103667619, - "grad_norm": 0.0006190096610225737, - "learning_rate": 0.00019999809086180848, - "loss": 46.0, - "step": 12224 - }, - { - "epoch": 1.9687185474455493, - "grad_norm": 0.00044239015551283956, - "learning_rate": 0.00019999809054918193, - "loss": 46.0, - "step": 12225 - }, - { - "epoch": 1.9688795845243368, - "grad_norm": 0.0003960916365031153, - "learning_rate": 0.0001999980902365298, - "loss": 46.0, - "step": 12226 - }, - { - "epoch": 1.9690406216031242, - "grad_norm": 0.0038154316134750843, - "learning_rate": 0.0001999980899238521, - "loss": 46.0, - "step": 12227 - }, - { - "epoch": 1.9692016586819117, - "grad_norm": 0.005771647207438946, - "learning_rate": 0.00019999808961114877, - "loss": 46.0, - "step": 12228 - }, - { - "epoch": 1.969362695760699, - "grad_norm": 0.0010224670404568315, - "learning_rate": 0.00019999808929841985, - "loss": 46.0, - "step": 12229 - }, - { - "epoch": 1.9695237328394863, - "grad_norm": 0.0019001205218955874, - "learning_rate": 0.00019999808898566533, - "loss": 46.0, - "step": 12230 - }, - { - "epoch": 1.9696847699182736, - "grad_norm": 0.000559313572011888, - "learning_rate": 0.00019999808867288523, - "loss": 46.0, - "step": 12231 - }, - { - "epoch": 1.969845806997061, - "grad_norm": 0.002748290542513132, - "learning_rate": 0.0001999980883600795, - "loss": 46.0, - "step": 12232 - }, - { - "epoch": 1.9700068440758485, - "grad_norm": 0.005919093266129494, - "learning_rate": 0.00019999808804724824, - "loss": 46.0, - "step": 12233 - }, - { - "epoch": 1.970167881154636, - "grad_norm": 0.00418158108368516, - "learning_rate": 0.00019999808773439134, - "loss": 46.0, - "step": 12234 - }, - { - "epoch": 1.9703289182334234, - "grad_norm": 0.0008404531399719417, - "learning_rate": 0.00019999808742150887, - "loss": 46.0, - "step": 12235 - }, - { - "epoch": 1.9704899553122106, - "grad_norm": 0.0033515128307044506, - "learning_rate": 0.0001999980871086008, - "loss": 46.0, - "step": 12236 - }, - { - "epoch": 1.970650992390998, - "grad_norm": 0.0004960817750543356, - "learning_rate": 0.00019999808679566712, - "loss": 46.0, - "step": 12237 - }, - { - "epoch": 1.9708120294697853, - "grad_norm": 0.006968372967094183, - "learning_rate": 0.00019999808648270785, - "loss": 46.0, - "step": 12238 - }, - { - "epoch": 1.9709730665485727, - "grad_norm": 0.0009137294837273657, - "learning_rate": 0.000199998086169723, - "loss": 46.0, - "step": 12239 - }, - { - "epoch": 1.9711341036273602, - "grad_norm": 0.0008460060344077647, - "learning_rate": 0.00019999808585671253, - "loss": 46.0, - "step": 12240 - }, - { - "epoch": 1.9712951407061476, - "grad_norm": 0.010460236109793186, - "learning_rate": 0.0001999980855436765, - "loss": 46.0, - "step": 12241 - }, - { - "epoch": 1.971456177784935, - "grad_norm": 0.00291981128975749, - "learning_rate": 0.00019999808523061486, - "loss": 46.0, - "step": 12242 - }, - { - "epoch": 1.9716172148637223, - "grad_norm": 0.0007473889854736626, - "learning_rate": 0.00019999808491752763, - "loss": 46.0, - "step": 12243 - }, - { - "epoch": 1.9717782519425098, - "grad_norm": 0.0021796675864607096, - "learning_rate": 0.0001999980846044148, - "loss": 46.0, - "step": 12244 - }, - { - "epoch": 1.971939289021297, - "grad_norm": 0.005767948925495148, - "learning_rate": 0.00019999808429127636, - "loss": 46.0, - "step": 12245 - }, - { - "epoch": 1.9721003261000845, - "grad_norm": 0.0014900406822562218, - "learning_rate": 0.00019999808397811234, - "loss": 46.0, - "step": 12246 - }, - { - "epoch": 1.972261363178872, - "grad_norm": 0.0019085450330749154, - "learning_rate": 0.00019999808366492273, - "loss": 46.0, - "step": 12247 - }, - { - "epoch": 1.9724224002576594, - "grad_norm": 0.003345359116792679, - "learning_rate": 0.0001999980833517075, - "loss": 46.0, - "step": 12248 - }, - { - "epoch": 1.9725834373364468, - "grad_norm": 0.0007762960158288479, - "learning_rate": 0.00019999808303846673, - "loss": 46.0, - "step": 12249 - }, - { - "epoch": 1.9727444744152343, - "grad_norm": 0.0006640477222390473, - "learning_rate": 0.0001999980827252003, - "loss": 46.0, - "step": 12250 - }, - { - "epoch": 1.9729055114940215, - "grad_norm": 0.0009199056075885892, - "learning_rate": 0.0001999980824119083, - "loss": 46.0, - "step": 12251 - }, - { - "epoch": 1.973066548572809, - "grad_norm": 0.009267180226743221, - "learning_rate": 0.00019999808209859073, - "loss": 46.0, - "step": 12252 - }, - { - "epoch": 1.9732275856515962, - "grad_norm": 0.003825358347967267, - "learning_rate": 0.00019999808178524755, - "loss": 46.0, - "step": 12253 - }, - { - "epoch": 1.9733886227303836, - "grad_norm": 0.004644161555916071, - "learning_rate": 0.00019999808147187878, - "loss": 46.0, - "step": 12254 - }, - { - "epoch": 1.973549659809171, - "grad_norm": 0.004506648983806372, - "learning_rate": 0.00019999808115848442, - "loss": 46.0, - "step": 12255 - }, - { - "epoch": 1.9737106968879585, - "grad_norm": 0.0012973000993952155, - "learning_rate": 0.00019999808084506443, - "loss": 46.0, - "step": 12256 - }, - { - "epoch": 1.973871733966746, - "grad_norm": 0.000974331283941865, - "learning_rate": 0.00019999808053161887, - "loss": 46.0, - "step": 12257 - }, - { - "epoch": 1.9740327710455332, - "grad_norm": 0.0029749576933681965, - "learning_rate": 0.00019999808021814772, - "loss": 46.0, - "step": 12258 - }, - { - "epoch": 1.9741938081243207, - "grad_norm": 0.0019794791005551815, - "learning_rate": 0.000199998079904651, - "loss": 46.0, - "step": 12259 - }, - { - "epoch": 1.9743548452031079, - "grad_norm": 0.007509741000831127, - "learning_rate": 0.00019999807959112864, - "loss": 46.0, - "step": 12260 - }, - { - "epoch": 1.9745158822818953, - "grad_norm": 0.0055227261036634445, - "learning_rate": 0.0001999980792775807, - "loss": 46.0, - "step": 12261 - }, - { - "epoch": 1.9746769193606828, - "grad_norm": 0.0009662671363912523, - "learning_rate": 0.00019999807896400719, - "loss": 46.0, - "step": 12262 - }, - { - "epoch": 1.9748379564394702, - "grad_norm": 0.023432206362485886, - "learning_rate": 0.00019999807865040805, - "loss": 46.0, - "step": 12263 - }, - { - "epoch": 1.9749989935182577, - "grad_norm": 0.0019245572621002793, - "learning_rate": 0.00019999807833678333, - "loss": 46.0, - "step": 12264 - }, - { - "epoch": 1.975160030597045, - "grad_norm": 0.0009568097302690148, - "learning_rate": 0.00019999807802313302, - "loss": 46.0, - "step": 12265 - }, - { - "epoch": 1.9753210676758324, - "grad_norm": 0.0018147414084523916, - "learning_rate": 0.00019999807770945712, - "loss": 46.0, - "step": 12266 - }, - { - "epoch": 1.9754821047546196, - "grad_norm": 0.0030487615149468184, - "learning_rate": 0.00019999807739575564, - "loss": 46.0, - "step": 12267 - }, - { - "epoch": 1.975643141833407, - "grad_norm": 0.0027401866391301155, - "learning_rate": 0.0001999980770820285, - "loss": 46.0, - "step": 12268 - }, - { - "epoch": 1.9758041789121945, - "grad_norm": 0.0011288942769169807, - "learning_rate": 0.00019999807676827582, - "loss": 46.0, - "step": 12269 - }, - { - "epoch": 1.975965215990982, - "grad_norm": 0.0037102545611560345, - "learning_rate": 0.00019999807645449752, - "loss": 46.0, - "step": 12270 - }, - { - "epoch": 1.9761262530697694, - "grad_norm": 0.0009827904868870974, - "learning_rate": 0.00019999807614069366, - "loss": 46.0, - "step": 12271 - }, - { - "epoch": 1.9762872901485569, - "grad_norm": 0.0012922310270369053, - "learning_rate": 0.0001999980758268642, - "loss": 46.0, - "step": 12272 - }, - { - "epoch": 1.976448327227344, - "grad_norm": 0.002203173702582717, - "learning_rate": 0.0001999980755130091, - "loss": 46.0, - "step": 12273 - }, - { - "epoch": 1.9766093643061313, - "grad_norm": 0.001265895669348538, - "learning_rate": 0.00019999807519912845, - "loss": 46.0, - "step": 12274 - }, - { - "epoch": 1.9767704013849188, - "grad_norm": 0.0048488047905266285, - "learning_rate": 0.0001999980748852222, - "loss": 46.0, - "step": 12275 - }, - { - "epoch": 1.9769314384637062, - "grad_norm": 0.005234608892351389, - "learning_rate": 0.00019999807457129034, - "loss": 46.0, - "step": 12276 - }, - { - "epoch": 1.9770924755424937, - "grad_norm": 0.009345931932330132, - "learning_rate": 0.0001999980742573329, - "loss": 46.0, - "step": 12277 - }, - { - "epoch": 1.9772535126212811, - "grad_norm": 0.0040670582093298435, - "learning_rate": 0.00019999807394334985, - "loss": 46.0, - "step": 12278 - }, - { - "epoch": 1.9774145497000686, - "grad_norm": 0.0071364580653607845, - "learning_rate": 0.0001999980736293412, - "loss": 46.0, - "step": 12279 - }, - { - "epoch": 1.9775755867788558, - "grad_norm": 0.0061156777665019035, - "learning_rate": 0.00019999807331530698, - "loss": 46.0, - "step": 12280 - }, - { - "epoch": 1.9777366238576433, - "grad_norm": 0.0024092956446111202, - "learning_rate": 0.00019999807300124717, - "loss": 46.0, - "step": 12281 - }, - { - "epoch": 1.9778976609364305, - "grad_norm": 0.00545324943959713, - "learning_rate": 0.00019999807268716174, - "loss": 46.0, - "step": 12282 - }, - { - "epoch": 1.978058698015218, - "grad_norm": 0.0022237629164010286, - "learning_rate": 0.00019999807237305075, - "loss": 46.0, - "step": 12283 - }, - { - "epoch": 1.9782197350940054, - "grad_norm": 0.005282882135361433, - "learning_rate": 0.00019999807205891413, - "loss": 46.0, - "step": 12284 - }, - { - "epoch": 1.9783807721727928, - "grad_norm": 0.0026031166780740023, - "learning_rate": 0.00019999807174475194, - "loss": 46.0, - "step": 12285 - }, - { - "epoch": 1.9785418092515803, - "grad_norm": 0.002004999201744795, - "learning_rate": 0.00019999807143056413, - "loss": 46.0, - "step": 12286 - }, - { - "epoch": 1.9787028463303675, - "grad_norm": 0.0025790894869714975, - "learning_rate": 0.00019999807111635077, - "loss": 46.0, - "step": 12287 - }, - { - "epoch": 1.978863883409155, - "grad_norm": 0.0008057039813138545, - "learning_rate": 0.00019999807080211176, - "loss": 46.0, - "step": 12288 - }, - { - "epoch": 1.9790249204879422, - "grad_norm": 0.007503815460950136, - "learning_rate": 0.00019999807048784717, - "loss": 46.0, - "step": 12289 - }, - { - "epoch": 1.9791859575667297, - "grad_norm": 0.00282031437382102, - "learning_rate": 0.000199998070173557, - "loss": 46.0, - "step": 12290 - }, - { - "epoch": 1.979346994645517, - "grad_norm": 0.0022822634782642126, - "learning_rate": 0.00019999806985924123, - "loss": 46.0, - "step": 12291 - }, - { - "epoch": 1.9795080317243046, - "grad_norm": 0.008359306491911411, - "learning_rate": 0.00019999806954489987, - "loss": 46.0, - "step": 12292 - }, - { - "epoch": 1.979669068803092, - "grad_norm": 0.003279345342889428, - "learning_rate": 0.00019999806923053293, - "loss": 46.0, - "step": 12293 - }, - { - "epoch": 1.9798301058818795, - "grad_norm": 0.0025783029850572348, - "learning_rate": 0.00019999806891614038, - "loss": 46.0, - "step": 12294 - }, - { - "epoch": 1.9799911429606667, - "grad_norm": 0.0023204863537102938, - "learning_rate": 0.00019999806860172224, - "loss": 46.0, - "step": 12295 - }, - { - "epoch": 1.980152180039454, - "grad_norm": 0.0014210662338882685, - "learning_rate": 0.0001999980682872785, - "loss": 46.0, - "step": 12296 - }, - { - "epoch": 1.9803132171182414, - "grad_norm": 0.0030528847128152847, - "learning_rate": 0.00019999806797280916, - "loss": 46.0, - "step": 12297 - }, - { - "epoch": 1.9804742541970288, - "grad_norm": 0.0021437364630401134, - "learning_rate": 0.00019999806765831423, - "loss": 46.0, - "step": 12298 - }, - { - "epoch": 1.9806352912758163, - "grad_norm": 0.0025230783503502607, - "learning_rate": 0.0001999980673437937, - "loss": 46.0, - "step": 12299 - }, - { - "epoch": 1.9807963283546037, - "grad_norm": 0.0017503659473732114, - "learning_rate": 0.00019999806702924758, - "loss": 46.0, - "step": 12300 - }, - { - "epoch": 1.9809573654333912, - "grad_norm": 0.005607995670288801, - "learning_rate": 0.00019999806671467586, - "loss": 46.0, - "step": 12301 - }, - { - "epoch": 1.9811184025121784, - "grad_norm": 0.0011990922503173351, - "learning_rate": 0.00019999806640007858, - "loss": 46.0, - "step": 12302 - }, - { - "epoch": 1.9812794395909659, - "grad_norm": 0.0012647804105654359, - "learning_rate": 0.0001999980660854557, - "loss": 46.0, - "step": 12303 - }, - { - "epoch": 1.981440476669753, - "grad_norm": 0.002947992878034711, - "learning_rate": 0.0001999980657708072, - "loss": 46.0, - "step": 12304 - }, - { - "epoch": 1.9816015137485405, - "grad_norm": 0.001517716096714139, - "learning_rate": 0.00019999806545613309, - "loss": 46.0, - "step": 12305 - }, - { - "epoch": 1.981762550827328, - "grad_norm": 0.002484687604010105, - "learning_rate": 0.0001999980651414334, - "loss": 46.0, - "step": 12306 - }, - { - "epoch": 1.9819235879061154, - "grad_norm": 0.0037229699082672596, - "learning_rate": 0.00019999806482670813, - "loss": 46.0, - "step": 12307 - }, - { - "epoch": 1.982084624984903, - "grad_norm": 0.0006061809835955501, - "learning_rate": 0.00019999806451195725, - "loss": 46.0, - "step": 12308 - }, - { - "epoch": 1.9822456620636901, - "grad_norm": 0.007558826357126236, - "learning_rate": 0.00019999806419718078, - "loss": 46.0, - "step": 12309 - }, - { - "epoch": 1.9824066991424776, - "grad_norm": 0.004070258233696222, - "learning_rate": 0.00019999806388237875, - "loss": 46.0, - "step": 12310 - }, - { - "epoch": 1.9825677362212648, - "grad_norm": 0.0019721612334251404, - "learning_rate": 0.00019999806356755108, - "loss": 46.0, - "step": 12311 - }, - { - "epoch": 1.9827287733000523, - "grad_norm": 0.006729131564497948, - "learning_rate": 0.00019999806325269784, - "loss": 46.0, - "step": 12312 - }, - { - "epoch": 1.9828898103788397, - "grad_norm": 0.009025485254824162, - "learning_rate": 0.000199998062937819, - "loss": 46.0, - "step": 12313 - }, - { - "epoch": 1.9830508474576272, - "grad_norm": 0.0011525398585945368, - "learning_rate": 0.00019999806262291454, - "loss": 46.0, - "step": 12314 - }, - { - "epoch": 1.9832118845364146, - "grad_norm": 0.0012309578014537692, - "learning_rate": 0.00019999806230798451, - "loss": 46.0, - "step": 12315 - }, - { - "epoch": 1.9833729216152018, - "grad_norm": 0.004987578373402357, - "learning_rate": 0.00019999806199302888, - "loss": 46.0, - "step": 12316 - }, - { - "epoch": 1.9835339586939893, - "grad_norm": 0.0017838859930634499, - "learning_rate": 0.00019999806167804766, - "loss": 46.0, - "step": 12317 - }, - { - "epoch": 1.9836949957727765, - "grad_norm": 0.0008300473564304411, - "learning_rate": 0.00019999806136304087, - "loss": 46.0, - "step": 12318 - }, - { - "epoch": 1.983856032851564, - "grad_norm": 0.0036320278886705637, - "learning_rate": 0.00019999806104800845, - "loss": 46.0, - "step": 12319 - }, - { - "epoch": 1.9840170699303514, - "grad_norm": 0.002548553515225649, - "learning_rate": 0.00019999806073295044, - "loss": 46.0, - "step": 12320 - }, - { - "epoch": 1.9841781070091389, - "grad_norm": 0.002902851440012455, - "learning_rate": 0.00019999806041786684, - "loss": 46.0, - "step": 12321 - }, - { - "epoch": 1.9843391440879263, - "grad_norm": 0.004910650663077831, - "learning_rate": 0.00019999806010275766, - "loss": 46.0, - "step": 12322 - }, - { - "epoch": 1.9845001811667138, - "grad_norm": 0.0009098303853534162, - "learning_rate": 0.00019999805978762286, - "loss": 46.0, - "step": 12323 - }, - { - "epoch": 1.984661218245501, - "grad_norm": 0.002984803169965744, - "learning_rate": 0.0001999980594724625, - "loss": 46.0, - "step": 12324 - }, - { - "epoch": 1.9848222553242885, - "grad_norm": 0.012350688688457012, - "learning_rate": 0.0001999980591572765, - "loss": 46.0, - "step": 12325 - }, - { - "epoch": 1.9849832924030757, - "grad_norm": 0.00577581487596035, - "learning_rate": 0.00019999805884206496, - "loss": 46.0, - "step": 12326 - }, - { - "epoch": 1.9851443294818631, - "grad_norm": 0.0006859779241494834, - "learning_rate": 0.00019999805852682778, - "loss": 46.0, - "step": 12327 - }, - { - "epoch": 1.9853053665606506, - "grad_norm": 0.0006017529522068799, - "learning_rate": 0.00019999805821156502, - "loss": 46.0, - "step": 12328 - }, - { - "epoch": 1.985466403639438, - "grad_norm": 0.00808424036949873, - "learning_rate": 0.00019999805789627665, - "loss": 46.0, - "step": 12329 - }, - { - "epoch": 1.9856274407182255, - "grad_norm": 0.012796023860573769, - "learning_rate": 0.0001999980575809627, - "loss": 46.0, - "step": 12330 - }, - { - "epoch": 1.9857884777970127, - "grad_norm": 0.004385095555335283, - "learning_rate": 0.00019999805726562318, - "loss": 46.0, - "step": 12331 - }, - { - "epoch": 1.9859495148758002, - "grad_norm": 0.0036203661002218723, - "learning_rate": 0.00019999805695025802, - "loss": 46.0, - "step": 12332 - }, - { - "epoch": 1.9861105519545874, - "grad_norm": 0.008089341223239899, - "learning_rate": 0.0001999980566348673, - "loss": 46.0, - "step": 12333 - }, - { - "epoch": 1.9862715890333749, - "grad_norm": 0.00829344056546688, - "learning_rate": 0.00019999805631945095, - "loss": 46.0, - "step": 12334 - }, - { - "epoch": 1.9864326261121623, - "grad_norm": 0.0019083835650235415, - "learning_rate": 0.00019999805600400908, - "loss": 46.0, - "step": 12335 - }, - { - "epoch": 1.9865936631909498, - "grad_norm": 0.005012488458305597, - "learning_rate": 0.00019999805568854157, - "loss": 46.0, - "step": 12336 - }, - { - "epoch": 1.9867547002697372, - "grad_norm": 0.009535104967653751, - "learning_rate": 0.00019999805537304844, - "loss": 46.0, - "step": 12337 - }, - { - "epoch": 1.9869157373485244, - "grad_norm": 0.0005328900297172368, - "learning_rate": 0.00019999805505752972, - "loss": 46.0, - "step": 12338 - }, - { - "epoch": 1.987076774427312, - "grad_norm": 0.00236369576305151, - "learning_rate": 0.00019999805474198542, - "loss": 46.0, - "step": 12339 - }, - { - "epoch": 1.9872378115060991, - "grad_norm": 0.001165008987300098, - "learning_rate": 0.00019999805442641556, - "loss": 46.0, - "step": 12340 - }, - { - "epoch": 1.9873988485848866, - "grad_norm": 0.0036361468955874443, - "learning_rate": 0.00019999805411082005, - "loss": 46.0, - "step": 12341 - }, - { - "epoch": 1.987559885663674, - "grad_norm": 0.0006017803680151701, - "learning_rate": 0.00019999805379519899, - "loss": 46.0, - "step": 12342 - }, - { - "epoch": 1.9877209227424615, - "grad_norm": 0.0038714283145964146, - "learning_rate": 0.0001999980534795523, - "loss": 46.0, - "step": 12343 - }, - { - "epoch": 1.987881959821249, - "grad_norm": 0.009130291640758514, - "learning_rate": 0.00019999805316388001, - "loss": 46.0, - "step": 12344 - }, - { - "epoch": 1.9880429969000364, - "grad_norm": 0.001152730779722333, - "learning_rate": 0.0001999980528481822, - "loss": 46.0, - "step": 12345 - }, - { - "epoch": 1.9882040339788236, - "grad_norm": 0.0018681924557313323, - "learning_rate": 0.00019999805253245872, - "loss": 46.0, - "step": 12346 - }, - { - "epoch": 1.988365071057611, - "grad_norm": 0.005925884936004877, - "learning_rate": 0.00019999805221670967, - "loss": 46.0, - "step": 12347 - }, - { - "epoch": 1.9885261081363983, - "grad_norm": 0.0007004171493463218, - "learning_rate": 0.00019999805190093503, - "loss": 46.0, - "step": 12348 - }, - { - "epoch": 1.9886871452151857, - "grad_norm": 0.0013518970226868987, - "learning_rate": 0.00019999805158513477, - "loss": 46.0, - "step": 12349 - }, - { - "epoch": 1.9888481822939732, - "grad_norm": 0.0009766726288944483, - "learning_rate": 0.00019999805126930893, - "loss": 46.0, - "step": 12350 - }, - { - "epoch": 1.9890092193727607, - "grad_norm": 0.004946235101670027, - "learning_rate": 0.0001999980509534575, - "loss": 46.0, - "step": 12351 - }, - { - "epoch": 1.989170256451548, - "grad_norm": 0.0025586055126041174, - "learning_rate": 0.00019999805063758048, - "loss": 46.0, - "step": 12352 - }, - { - "epoch": 1.9893312935303353, - "grad_norm": 0.0016643865965306759, - "learning_rate": 0.00019999805032167787, - "loss": 46.0, - "step": 12353 - }, - { - "epoch": 1.9894923306091228, - "grad_norm": 0.008810807019472122, - "learning_rate": 0.00019999805000574965, - "loss": 46.0, - "step": 12354 - }, - { - "epoch": 1.98965336768791, - "grad_norm": 0.001997843850404024, - "learning_rate": 0.00019999804968979582, - "loss": 46.0, - "step": 12355 - }, - { - "epoch": 1.9898144047666975, - "grad_norm": 0.003036949783563614, - "learning_rate": 0.00019999804937381646, - "loss": 46.0, - "step": 12356 - }, - { - "epoch": 1.989975441845485, - "grad_norm": 0.0014212050009518862, - "learning_rate": 0.00019999804905781145, - "loss": 46.0, - "step": 12357 - }, - { - "epoch": 1.9901364789242724, - "grad_norm": 0.007264718413352966, - "learning_rate": 0.00019999804874178088, - "loss": 46.0, - "step": 12358 - }, - { - "epoch": 1.9902975160030598, - "grad_norm": 0.007655519060790539, - "learning_rate": 0.00019999804842572467, - "loss": 46.0, - "step": 12359 - }, - { - "epoch": 1.990458553081847, - "grad_norm": 0.00354858860373497, - "learning_rate": 0.00019999804810964288, - "loss": 46.0, - "step": 12360 - }, - { - "epoch": 1.9906195901606345, - "grad_norm": 0.001752532203681767, - "learning_rate": 0.00019999804779353552, - "loss": 46.0, - "step": 12361 - }, - { - "epoch": 1.9907806272394217, - "grad_norm": 0.01585347205400467, - "learning_rate": 0.00019999804747740255, - "loss": 46.0, - "step": 12362 - }, - { - "epoch": 1.9909416643182092, - "grad_norm": 0.0026188462506979704, - "learning_rate": 0.000199998047161244, - "loss": 46.0, - "step": 12363 - }, - { - "epoch": 1.9911027013969966, - "grad_norm": 0.0007879911572672427, - "learning_rate": 0.00019999804684505982, - "loss": 46.0, - "step": 12364 - }, - { - "epoch": 1.991263738475784, - "grad_norm": 0.011935262940824032, - "learning_rate": 0.0001999980465288501, - "loss": 46.0, - "step": 12365 - }, - { - "epoch": 1.9914247755545715, - "grad_norm": 0.00202621566131711, - "learning_rate": 0.00019999804621261474, - "loss": 46.0, - "step": 12366 - }, - { - "epoch": 1.991585812633359, - "grad_norm": 0.0012810584157705307, - "learning_rate": 0.0001999980458963538, - "loss": 46.0, - "step": 12367 - }, - { - "epoch": 1.9917468497121462, - "grad_norm": 0.011172051541507244, - "learning_rate": 0.00019999804558006726, - "loss": 46.0, - "step": 12368 - }, - { - "epoch": 1.9919078867909334, - "grad_norm": 0.0009969781385734677, - "learning_rate": 0.00019999804526375515, - "loss": 46.0, - "step": 12369 - }, - { - "epoch": 1.992068923869721, - "grad_norm": 0.014885064214468002, - "learning_rate": 0.00019999804494741743, - "loss": 46.0, - "step": 12370 - }, - { - "epoch": 1.9922299609485083, - "grad_norm": 0.007742216344922781, - "learning_rate": 0.0001999980446310541, - "loss": 46.0, - "step": 12371 - }, - { - "epoch": 1.9923909980272958, - "grad_norm": 0.004301742650568485, - "learning_rate": 0.00019999804431466517, - "loss": 46.0, - "step": 12372 - }, - { - "epoch": 1.9925520351060833, - "grad_norm": 0.0026088471058756113, - "learning_rate": 0.00019999804399825069, - "loss": 46.0, - "step": 12373 - }, - { - "epoch": 1.9927130721848707, - "grad_norm": 0.0007938767084851861, - "learning_rate": 0.00019999804368181056, - "loss": 46.0, - "step": 12374 - }, - { - "epoch": 1.992874109263658, - "grad_norm": 0.002270581666380167, - "learning_rate": 0.00019999804336534488, - "loss": 46.0, - "step": 12375 - }, - { - "epoch": 1.9930351463424454, - "grad_norm": 0.0047074127942323685, - "learning_rate": 0.00019999804304885358, - "loss": 46.0, - "step": 12376 - }, - { - "epoch": 1.9931961834212326, - "grad_norm": 0.0019312455551698804, - "learning_rate": 0.00019999804273233672, - "loss": 46.0, - "step": 12377 - }, - { - "epoch": 1.99335722050002, - "grad_norm": 0.003867735853418708, - "learning_rate": 0.00019999804241579424, - "loss": 46.0, - "step": 12378 - }, - { - "epoch": 1.9935182575788075, - "grad_norm": 0.0008616733830422163, - "learning_rate": 0.00019999804209922616, - "loss": 46.0, - "step": 12379 - }, - { - "epoch": 1.993679294657595, - "grad_norm": 0.0006523603806272149, - "learning_rate": 0.0001999980417826325, - "loss": 46.0, - "step": 12380 - }, - { - "epoch": 1.9938403317363824, - "grad_norm": 0.004674069117754698, - "learning_rate": 0.00019999804146601325, - "loss": 46.0, - "step": 12381 - }, - { - "epoch": 1.9940013688151697, - "grad_norm": 0.001432825461961329, - "learning_rate": 0.0001999980411493684, - "loss": 46.0, - "step": 12382 - }, - { - "epoch": 1.994162405893957, - "grad_norm": 0.003109590383246541, - "learning_rate": 0.00019999804083269793, - "loss": 46.0, - "step": 12383 - }, - { - "epoch": 1.9943234429727443, - "grad_norm": 0.003053095657378435, - "learning_rate": 0.00019999804051600188, - "loss": 46.0, - "step": 12384 - }, - { - "epoch": 1.9944844800515318, - "grad_norm": 0.0062919482588768005, - "learning_rate": 0.00019999804019928024, - "loss": 46.0, - "step": 12385 - }, - { - "epoch": 1.9946455171303192, - "grad_norm": 0.002199358306825161, - "learning_rate": 0.00019999803988253302, - "loss": 46.0, - "step": 12386 - }, - { - "epoch": 1.9948065542091067, - "grad_norm": 0.0048120892606675625, - "learning_rate": 0.0001999980395657602, - "loss": 46.0, - "step": 12387 - }, - { - "epoch": 1.9949675912878941, - "grad_norm": 0.0067680599167943, - "learning_rate": 0.00019999803924896178, - "loss": 46.0, - "step": 12388 - }, - { - "epoch": 1.9951286283666816, - "grad_norm": 0.0009305099956691265, - "learning_rate": 0.00019999803893213777, - "loss": 46.0, - "step": 12389 - }, - { - "epoch": 1.9952896654454688, - "grad_norm": 0.0011082387063652277, - "learning_rate": 0.00019999803861528814, - "loss": 46.0, - "step": 12390 - }, - { - "epoch": 1.995450702524256, - "grad_norm": 0.005581849720329046, - "learning_rate": 0.00019999803829841292, - "loss": 46.0, - "step": 12391 - }, - { - "epoch": 1.9956117396030435, - "grad_norm": 0.0034909637179225683, - "learning_rate": 0.00019999803798151215, - "loss": 46.0, - "step": 12392 - }, - { - "epoch": 1.995772776681831, - "grad_norm": 0.0030998988077044487, - "learning_rate": 0.00019999803766458576, - "loss": 46.0, - "step": 12393 - }, - { - "epoch": 1.9959338137606184, - "grad_norm": 0.0020426588598638773, - "learning_rate": 0.00019999803734763375, - "loss": 46.0, - "step": 12394 - }, - { - "epoch": 1.9960948508394059, - "grad_norm": 0.0010780710726976395, - "learning_rate": 0.0001999980370306562, - "loss": 46.0, - "step": 12395 - }, - { - "epoch": 1.9962558879181933, - "grad_norm": 0.0020355668384581804, - "learning_rate": 0.000199998036713653, - "loss": 46.0, - "step": 12396 - }, - { - "epoch": 1.9964169249969805, - "grad_norm": 0.00808663759380579, - "learning_rate": 0.00019999803639662425, - "loss": 46.0, - "step": 12397 - }, - { - "epoch": 1.996577962075768, - "grad_norm": 0.001027940772473812, - "learning_rate": 0.00019999803607956987, - "loss": 46.0, - "step": 12398 - }, - { - "epoch": 1.9967389991545552, - "grad_norm": 0.0017057600198313594, - "learning_rate": 0.0001999980357624899, - "loss": 46.0, - "step": 12399 - }, - { - "epoch": 1.9969000362333427, - "grad_norm": 0.0031915432773530483, - "learning_rate": 0.00019999803544538434, - "loss": 46.0, - "step": 12400 - }, - { - "epoch": 1.9970610733121301, - "grad_norm": 0.005933266133069992, - "learning_rate": 0.0001999980351282532, - "loss": 46.0, - "step": 12401 - }, - { - "epoch": 1.9972221103909176, - "grad_norm": 0.00589543953537941, - "learning_rate": 0.00019999803481109647, - "loss": 46.0, - "step": 12402 - }, - { - "epoch": 1.997383147469705, - "grad_norm": 0.003595212008804083, - "learning_rate": 0.00019999803449391413, - "loss": 46.0, - "step": 12403 - }, - { - "epoch": 1.9975441845484923, - "grad_norm": 0.005101496819406748, - "learning_rate": 0.0001999980341767062, - "loss": 46.0, - "step": 12404 - }, - { - "epoch": 1.9977052216272797, - "grad_norm": 0.0034306419547647238, - "learning_rate": 0.00019999803385947269, - "loss": 46.0, - "step": 12405 - }, - { - "epoch": 1.997866258706067, - "grad_norm": 0.003043942851945758, - "learning_rate": 0.00019999803354221355, - "loss": 46.0, - "step": 12406 - }, - { - "epoch": 1.9980272957848544, - "grad_norm": 0.0005994051462039351, - "learning_rate": 0.00019999803322492886, - "loss": 46.0, - "step": 12407 - }, - { - "epoch": 1.9981883328636418, - "grad_norm": 0.0021799085661768913, - "learning_rate": 0.00019999803290761853, - "loss": 46.0, - "step": 12408 - }, - { - "epoch": 1.9983493699424293, - "grad_norm": 0.0038078760262578726, - "learning_rate": 0.00019999803259028264, - "loss": 46.0, - "step": 12409 - }, - { - "epoch": 1.9985104070212167, - "grad_norm": 0.0029125921428203583, - "learning_rate": 0.00019999803227292113, - "loss": 46.0, - "step": 12410 - }, - { - "epoch": 1.9986714441000042, - "grad_norm": 0.011689925566315651, - "learning_rate": 0.00019999803195553404, - "loss": 46.0, - "step": 12411 - }, - { - "epoch": 1.9988324811787914, - "grad_norm": 0.00571091752499342, - "learning_rate": 0.00019999803163812135, - "loss": 46.0, - "step": 12412 - }, - { - "epoch": 1.9989935182575786, - "grad_norm": 0.00724672619253397, - "learning_rate": 0.00019999803132068306, - "loss": 46.0, - "step": 12413 - }, - { - "epoch": 1.999154555336366, - "grad_norm": 0.0006026331102475524, - "learning_rate": 0.00019999803100321918, - "loss": 46.0, - "step": 12414 - }, - { - "epoch": 1.9993155924151536, - "grad_norm": 0.008923816494643688, - "learning_rate": 0.00019999803068572973, - "loss": 46.0, - "step": 12415 - }, - { - "epoch": 1.999476629493941, - "grad_norm": 0.0019138951320201159, - "learning_rate": 0.00019999803036821465, - "loss": 46.0, - "step": 12416 - }, - { - "epoch": 1.9996376665727285, - "grad_norm": 0.004096074495464563, - "learning_rate": 0.000199998030050674, - "loss": 46.0, - "step": 12417 - }, - { - "epoch": 1.999798703651516, - "grad_norm": 0.002738706301897764, - "learning_rate": 0.00019999802973310775, - "loss": 46.0, - "step": 12418 - }, - { - "epoch": 1.9999597407303031, - "grad_norm": 0.0036281358916312456, - "learning_rate": 0.0001999980294155159, - "loss": 46.0, - "step": 12419 - }, - { - "epoch": 1.9999597407303031, - "eval_loss": 11.5, - "eval_runtime": 14.8838, - "eval_samples_per_second": 175.695, - "eval_steps_per_second": 87.881, - "step": 12419 - }, - { - "epoch": 2.0001610370787875, - "grad_norm": 0.008145570755004883, - "learning_rate": 0.00019999802909789847, - "loss": 46.0, - "step": 12420 - }, - { - "epoch": 2.000322074157575, - "grad_norm": 0.005223439075052738, - "learning_rate": 0.00019999802878025542, - "loss": 46.0, - "step": 12421 - }, - { - "epoch": 2.0004831112363624, - "grad_norm": 0.0007635240908712149, - "learning_rate": 0.0001999980284625868, - "loss": 46.0, - "step": 12422 - }, - { - "epoch": 2.0006441483151494, - "grad_norm": 0.003887373022735119, - "learning_rate": 0.0001999980281448926, - "loss": 46.0, - "step": 12423 - }, - { - "epoch": 2.000805185393937, - "grad_norm": 0.002868556184694171, - "learning_rate": 0.00019999802782717275, - "loss": 46.0, - "step": 12424 - }, - { - "epoch": 2.0009662224727243, - "grad_norm": 0.0013190106255933642, - "learning_rate": 0.00019999802750942736, - "loss": 46.0, - "step": 12425 - }, - { - "epoch": 2.0011272595515117, - "grad_norm": 0.003386542433872819, - "learning_rate": 0.00019999802719165635, - "loss": 46.0, - "step": 12426 - }, - { - "epoch": 2.001288296630299, - "grad_norm": 0.0027957663405686617, - "learning_rate": 0.00019999802687385972, - "loss": 46.0, - "step": 12427 - }, - { - "epoch": 2.0014493337090866, - "grad_norm": 0.005035643000155687, - "learning_rate": 0.00019999802655603754, - "loss": 46.0, - "step": 12428 - }, - { - "epoch": 2.001610370787874, - "grad_norm": 0.00128930585924536, - "learning_rate": 0.00019999802623818974, - "loss": 46.0, - "step": 12429 - }, - { - "epoch": 2.001771407866661, - "grad_norm": 0.002517638262361288, - "learning_rate": 0.00019999802592031638, - "loss": 46.0, - "step": 12430 - }, - { - "epoch": 2.0019324449454485, - "grad_norm": 0.0030535205733031034, - "learning_rate": 0.00019999802560241738, - "loss": 46.0, - "step": 12431 - }, - { - "epoch": 2.002093482024236, - "grad_norm": 0.003312586573883891, - "learning_rate": 0.0001999980252844928, - "loss": 46.0, - "step": 12432 - }, - { - "epoch": 2.0022545191030234, - "grad_norm": 0.002608202863484621, - "learning_rate": 0.00019999802496654265, - "loss": 46.0, - "step": 12433 - }, - { - "epoch": 2.002415556181811, - "grad_norm": 0.003110248129814863, - "learning_rate": 0.00019999802464856688, - "loss": 46.0, - "step": 12434 - }, - { - "epoch": 2.0025765932605983, - "grad_norm": 0.00047402121708728373, - "learning_rate": 0.0001999980243305655, - "loss": 46.0, - "step": 12435 - }, - { - "epoch": 2.002737630339386, - "grad_norm": 0.0006869040662422776, - "learning_rate": 0.00019999802401253857, - "loss": 46.0, - "step": 12436 - }, - { - "epoch": 2.0028986674181732, - "grad_norm": 0.0009354244684800506, - "learning_rate": 0.00019999802369448605, - "loss": 46.0, - "step": 12437 - }, - { - "epoch": 2.0030597044969602, - "grad_norm": 0.00657620606943965, - "learning_rate": 0.00019999802337640788, - "loss": 46.0, - "step": 12438 - }, - { - "epoch": 2.0032207415757477, - "grad_norm": 0.0007929457351565361, - "learning_rate": 0.00019999802305830416, - "loss": 46.0, - "step": 12439 - }, - { - "epoch": 2.003381778654535, - "grad_norm": 0.006124570500105619, - "learning_rate": 0.00019999802274017482, - "loss": 46.0, - "step": 12440 - }, - { - "epoch": 2.0035428157333226, - "grad_norm": 0.0017109365435317159, - "learning_rate": 0.00019999802242201992, - "loss": 46.0, - "step": 12441 - }, - { - "epoch": 2.00370385281211, - "grad_norm": 0.0011208091164007783, - "learning_rate": 0.00019999802210383938, - "loss": 46.0, - "step": 12442 - }, - { - "epoch": 2.0038648898908975, - "grad_norm": 0.0011709654936566949, - "learning_rate": 0.0001999980217856333, - "loss": 46.0, - "step": 12443 - }, - { - "epoch": 2.004025926969685, - "grad_norm": 0.0027863120194524527, - "learning_rate": 0.0001999980214674016, - "loss": 46.0, - "step": 12444 - }, - { - "epoch": 2.004186964048472, - "grad_norm": 0.0015842863358557224, - "learning_rate": 0.0001999980211491443, - "loss": 46.0, - "step": 12445 - }, - { - "epoch": 2.0043480011272594, - "grad_norm": 0.004461794160306454, - "learning_rate": 0.0001999980208308614, - "loss": 46.0, - "step": 12446 - }, - { - "epoch": 2.004509038206047, - "grad_norm": 0.0010177140356972814, - "learning_rate": 0.0001999980205125529, - "loss": 46.0, - "step": 12447 - }, - { - "epoch": 2.0046700752848343, - "grad_norm": 0.0020138367544859648, - "learning_rate": 0.0001999980201942188, - "loss": 46.0, - "step": 12448 - }, - { - "epoch": 2.0048311123636218, - "grad_norm": 0.003289173124358058, - "learning_rate": 0.00019999801987585913, - "loss": 46.0, - "step": 12449 - }, - { - "epoch": 2.004992149442409, - "grad_norm": 0.0035305796191096306, - "learning_rate": 0.00019999801955747386, - "loss": 46.0, - "step": 12450 - }, - { - "epoch": 2.0051531865211967, - "grad_norm": 0.0022860136814415455, - "learning_rate": 0.000199998019239063, - "loss": 46.0, - "step": 12451 - }, - { - "epoch": 2.0053142235999837, - "grad_norm": 0.0015331333270296454, - "learning_rate": 0.00019999801892062652, - "loss": 46.0, - "step": 12452 - }, - { - "epoch": 2.005475260678771, - "grad_norm": 0.008465247228741646, - "learning_rate": 0.00019999801860216446, - "loss": 46.0, - "step": 12453 - }, - { - "epoch": 2.0056362977575586, - "grad_norm": 0.0015940681332722306, - "learning_rate": 0.00019999801828367682, - "loss": 46.0, - "step": 12454 - }, - { - "epoch": 2.005797334836346, - "grad_norm": 0.009646641090512276, - "learning_rate": 0.00019999801796516357, - "loss": 46.0, - "step": 12455 - }, - { - "epoch": 2.0059583719151335, - "grad_norm": 0.006479586940258741, - "learning_rate": 0.00019999801764662473, - "loss": 46.0, - "step": 12456 - }, - { - "epoch": 2.006119408993921, - "grad_norm": 0.013124020770192146, - "learning_rate": 0.00019999801732806032, - "loss": 46.0, - "step": 12457 - }, - { - "epoch": 2.0062804460727084, - "grad_norm": 0.001967004267498851, - "learning_rate": 0.00019999801700947028, - "loss": 46.0, - "step": 12458 - }, - { - "epoch": 2.006441483151496, - "grad_norm": 0.008753619156777859, - "learning_rate": 0.00019999801669085468, - "loss": 46.0, - "step": 12459 - }, - { - "epoch": 2.006602520230283, - "grad_norm": 0.002026032656431198, - "learning_rate": 0.00019999801637221346, - "loss": 46.0, - "step": 12460 - }, - { - "epoch": 2.0067635573090703, - "grad_norm": 0.0008684775093570352, - "learning_rate": 0.00019999801605354665, - "loss": 46.0, - "step": 12461 - }, - { - "epoch": 2.0069245943878578, - "grad_norm": 0.0017957454547286034, - "learning_rate": 0.00019999801573485423, - "loss": 46.0, - "step": 12462 - }, - { - "epoch": 2.007085631466645, - "grad_norm": 0.010439875535666943, - "learning_rate": 0.00019999801541613623, - "loss": 46.0, - "step": 12463 - }, - { - "epoch": 2.0072466685454327, - "grad_norm": 0.00128399976529181, - "learning_rate": 0.00019999801509739263, - "loss": 46.0, - "step": 12464 - }, - { - "epoch": 2.00740770562422, - "grad_norm": 0.0012941404711455107, - "learning_rate": 0.00019999801477862345, - "loss": 46.0, - "step": 12465 - }, - { - "epoch": 2.0075687427030076, - "grad_norm": 0.0016802349127829075, - "learning_rate": 0.00019999801445982866, - "loss": 46.0, - "step": 12466 - }, - { - "epoch": 2.0077297797817946, - "grad_norm": 0.003150808857753873, - "learning_rate": 0.00019999801414100833, - "loss": 46.0, - "step": 12467 - }, - { - "epoch": 2.007890816860582, - "grad_norm": 0.0138546796515584, - "learning_rate": 0.00019999801382216233, - "loss": 46.0, - "step": 12468 - }, - { - "epoch": 2.0080518539393695, - "grad_norm": 0.0014055476058274508, - "learning_rate": 0.00019999801350329078, - "loss": 46.0, - "step": 12469 - }, - { - "epoch": 2.008212891018157, - "grad_norm": 0.001947572105564177, - "learning_rate": 0.0001999980131843936, - "loss": 46.0, - "step": 12470 - }, - { - "epoch": 2.0083739280969444, - "grad_norm": 0.004537053406238556, - "learning_rate": 0.00019999801286547087, - "loss": 46.0, - "step": 12471 - }, - { - "epoch": 2.008534965175732, - "grad_norm": 0.0028735287487506866, - "learning_rate": 0.0001999980125465225, - "loss": 46.0, - "step": 12472 - }, - { - "epoch": 2.0086960022545193, - "grad_norm": 0.0015545415226370096, - "learning_rate": 0.00019999801222754857, - "loss": 46.0, - "step": 12473 - }, - { - "epoch": 2.0088570393333063, - "grad_norm": 0.004813038744032383, - "learning_rate": 0.00019999801190854902, - "loss": 46.0, - "step": 12474 - }, - { - "epoch": 2.0090180764120937, - "grad_norm": 0.0013712627114728093, - "learning_rate": 0.0001999980115895239, - "loss": 46.0, - "step": 12475 - }, - { - "epoch": 2.009179113490881, - "grad_norm": 0.004010814242064953, - "learning_rate": 0.00019999801127047317, - "loss": 46.0, - "step": 12476 - }, - { - "epoch": 2.0093401505696686, - "grad_norm": 0.0028936523012816906, - "learning_rate": 0.00019999801095139686, - "loss": 46.0, - "step": 12477 - }, - { - "epoch": 2.009501187648456, - "grad_norm": 0.004158404655754566, - "learning_rate": 0.00019999801063229496, - "loss": 46.0, - "step": 12478 - }, - { - "epoch": 2.0096622247272435, - "grad_norm": 0.001904657226987183, - "learning_rate": 0.00019999801031316743, - "loss": 46.0, - "step": 12479 - }, - { - "epoch": 2.009823261806031, - "grad_norm": 0.002171450527384877, - "learning_rate": 0.00019999800999401433, - "loss": 46.0, - "step": 12480 - }, - { - "epoch": 2.0099842988848184, - "grad_norm": 0.005121269263327122, - "learning_rate": 0.00019999800967483564, - "loss": 46.0, - "step": 12481 - }, - { - "epoch": 2.0101453359636055, - "grad_norm": 0.0019119092030450702, - "learning_rate": 0.00019999800935563132, - "loss": 46.0, - "step": 12482 - }, - { - "epoch": 2.010306373042393, - "grad_norm": 0.0029260199517011642, - "learning_rate": 0.00019999800903640143, - "loss": 46.0, - "step": 12483 - }, - { - "epoch": 2.0104674101211804, - "grad_norm": 0.0010152553441002965, - "learning_rate": 0.000199998008717146, - "loss": 46.0, - "step": 12484 - }, - { - "epoch": 2.010628447199968, - "grad_norm": 0.004634276498109102, - "learning_rate": 0.0001999980083978649, - "loss": 46.0, - "step": 12485 - }, - { - "epoch": 2.0107894842787553, - "grad_norm": 0.004788288380950689, - "learning_rate": 0.00019999800807855823, - "loss": 46.0, - "step": 12486 - }, - { - "epoch": 2.0109505213575427, - "grad_norm": 0.0010063032386824489, - "learning_rate": 0.00019999800775922597, - "loss": 46.0, - "step": 12487 - }, - { - "epoch": 2.01111155843633, - "grad_norm": 0.004236471839249134, - "learning_rate": 0.0001999980074398681, - "loss": 46.0, - "step": 12488 - }, - { - "epoch": 2.011272595515117, - "grad_norm": 0.0013123058015480638, - "learning_rate": 0.00019999800712048465, - "loss": 46.0, - "step": 12489 - }, - { - "epoch": 2.0114336325939046, - "grad_norm": 0.0034236281644552946, - "learning_rate": 0.0001999980068010756, - "loss": 46.0, - "step": 12490 - }, - { - "epoch": 2.011594669672692, - "grad_norm": 0.001787322573363781, - "learning_rate": 0.00019999800648164097, - "loss": 46.0, - "step": 12491 - }, - { - "epoch": 2.0117557067514795, - "grad_norm": 0.001953398110345006, - "learning_rate": 0.00019999800616218074, - "loss": 46.0, - "step": 12492 - }, - { - "epoch": 2.011916743830267, - "grad_norm": 0.0016378588043153286, - "learning_rate": 0.0001999980058426949, - "loss": 46.0, - "step": 12493 - }, - { - "epoch": 2.0120777809090544, - "grad_norm": 0.010391559451818466, - "learning_rate": 0.00019999800552318348, - "loss": 46.0, - "step": 12494 - }, - { - "epoch": 2.012238817987842, - "grad_norm": 0.0006751961773261428, - "learning_rate": 0.00019999800520364647, - "loss": 46.0, - "step": 12495 - }, - { - "epoch": 2.012399855066629, - "grad_norm": 0.0050230189226567745, - "learning_rate": 0.00019999800488408384, - "loss": 46.0, - "step": 12496 - }, - { - "epoch": 2.0125608921454163, - "grad_norm": 0.0012957988074049354, - "learning_rate": 0.00019999800456449565, - "loss": 46.0, - "step": 12497 - }, - { - "epoch": 2.012721929224204, - "grad_norm": 0.0011957393726333976, - "learning_rate": 0.00019999800424488185, - "loss": 46.0, - "step": 12498 - }, - { - "epoch": 2.0128829663029912, - "grad_norm": 0.00101442018058151, - "learning_rate": 0.00019999800392524243, - "loss": 46.0, - "step": 12499 - }, - { - "epoch": 2.0130440033817787, - "grad_norm": 0.002004617126658559, - "learning_rate": 0.00019999800360557746, - "loss": 46.0, - "step": 12500 - }, - { - "epoch": 2.013205040460566, - "grad_norm": 0.0009082549950107932, - "learning_rate": 0.00019999800328588687, - "loss": 46.0, - "step": 12501 - }, - { - "epoch": 2.0133660775393536, - "grad_norm": 0.0003384132869541645, - "learning_rate": 0.0001999980029661707, - "loss": 46.0, - "step": 12502 - }, - { - "epoch": 2.013527114618141, - "grad_norm": 0.0033907992765307426, - "learning_rate": 0.00019999800264642895, - "loss": 46.0, - "step": 12503 - }, - { - "epoch": 2.013688151696928, - "grad_norm": 0.003555042902007699, - "learning_rate": 0.00019999800232666157, - "loss": 46.0, - "step": 12504 - }, - { - "epoch": 2.0138491887757155, - "grad_norm": 0.002011653268709779, - "learning_rate": 0.00019999800200686858, - "loss": 46.0, - "step": 12505 - }, - { - "epoch": 2.014010225854503, - "grad_norm": 0.0020098022650927305, - "learning_rate": 0.00019999800168705005, - "loss": 46.0, - "step": 12506 - }, - { - "epoch": 2.0141712629332904, - "grad_norm": 0.002362739061936736, - "learning_rate": 0.0001999980013672059, - "loss": 46.0, - "step": 12507 - }, - { - "epoch": 2.014332300012078, - "grad_norm": 0.001455823890864849, - "learning_rate": 0.00019999800104733616, - "loss": 46.0, - "step": 12508 - }, - { - "epoch": 2.0144933370908653, - "grad_norm": 0.005797697696834803, - "learning_rate": 0.00019999800072744082, - "loss": 46.0, - "step": 12509 - }, - { - "epoch": 2.0146543741696528, - "grad_norm": 0.004650041926652193, - "learning_rate": 0.0001999980004075199, - "loss": 46.0, - "step": 12510 - }, - { - "epoch": 2.0148154112484398, - "grad_norm": 0.0028827732894569635, - "learning_rate": 0.00019999800008757335, - "loss": 46.0, - "step": 12511 - }, - { - "epoch": 2.014976448327227, - "grad_norm": 0.0007172313635237515, - "learning_rate": 0.00019999799976760124, - "loss": 46.0, - "step": 12512 - }, - { - "epoch": 2.0151374854060147, - "grad_norm": 0.0020117510575801134, - "learning_rate": 0.00019999799944760352, - "loss": 46.0, - "step": 12513 - }, - { - "epoch": 2.015298522484802, - "grad_norm": 0.004131100606173277, - "learning_rate": 0.0001999979991275802, - "loss": 46.0, - "step": 12514 - }, - { - "epoch": 2.0154595595635896, - "grad_norm": 0.0020162505097687244, - "learning_rate": 0.0001999979988075313, - "loss": 46.0, - "step": 12515 - }, - { - "epoch": 2.015620596642377, - "grad_norm": 0.0005785388639196754, - "learning_rate": 0.0001999979984874568, - "loss": 46.0, - "step": 12516 - }, - { - "epoch": 2.0157816337211645, - "grad_norm": 0.0015340599929913878, - "learning_rate": 0.0001999979981673567, - "loss": 46.0, - "step": 12517 - }, - { - "epoch": 2.0159426707999515, - "grad_norm": 0.009182674810290337, - "learning_rate": 0.00019999799784723102, - "loss": 46.0, - "step": 12518 - }, - { - "epoch": 2.016103707878739, - "grad_norm": 0.002607476431876421, - "learning_rate": 0.00019999799752707973, - "loss": 46.0, - "step": 12519 - }, - { - "epoch": 2.0162647449575264, - "grad_norm": 0.008819465525448322, - "learning_rate": 0.00019999799720690284, - "loss": 46.0, - "step": 12520 - }, - { - "epoch": 2.016425782036314, - "grad_norm": 0.0014921671245247126, - "learning_rate": 0.00019999799688670038, - "loss": 46.0, - "step": 12521 - }, - { - "epoch": 2.0165868191151013, - "grad_norm": 0.0022067397367209196, - "learning_rate": 0.00019999799656647232, - "loss": 46.0, - "step": 12522 - }, - { - "epoch": 2.0167478561938887, - "grad_norm": 0.004075291100889444, - "learning_rate": 0.00019999799624621868, - "loss": 46.0, - "step": 12523 - }, - { - "epoch": 2.016908893272676, - "grad_norm": 0.005843945313245058, - "learning_rate": 0.0001999979959259394, - "loss": 46.0, - "step": 12524 - }, - { - "epoch": 2.0170699303514636, - "grad_norm": 0.004273079801350832, - "learning_rate": 0.00019999799560563455, - "loss": 46.0, - "step": 12525 - }, - { - "epoch": 2.0172309674302507, - "grad_norm": 0.003784353844821453, - "learning_rate": 0.0001999979952853041, - "loss": 46.0, - "step": 12526 - }, - { - "epoch": 2.017392004509038, - "grad_norm": 0.004242415074259043, - "learning_rate": 0.00019999799496494807, - "loss": 46.0, - "step": 12527 - }, - { - "epoch": 2.0175530415878256, - "grad_norm": 0.0064863828010857105, - "learning_rate": 0.00019999799464456643, - "loss": 46.0, - "step": 12528 - }, - { - "epoch": 2.017714078666613, - "grad_norm": 0.002360024256631732, - "learning_rate": 0.0001999979943241592, - "loss": 46.0, - "step": 12529 - }, - { - "epoch": 2.0178751157454005, - "grad_norm": 0.01183509360998869, - "learning_rate": 0.00019999799400372638, - "loss": 46.0, - "step": 12530 - }, - { - "epoch": 2.018036152824188, - "grad_norm": 0.001177316065877676, - "learning_rate": 0.00019999799368326798, - "loss": 46.0, - "step": 12531 - }, - { - "epoch": 2.0181971899029754, - "grad_norm": 0.0018052023369818926, - "learning_rate": 0.00019999799336278397, - "loss": 46.0, - "step": 12532 - }, - { - "epoch": 2.0183582269817624, - "grad_norm": 0.0010589802404865623, - "learning_rate": 0.00019999799304227435, - "loss": 46.0, - "step": 12533 - }, - { - "epoch": 2.01851926406055, - "grad_norm": 0.0011090995976701379, - "learning_rate": 0.00019999799272173917, - "loss": 46.0, - "step": 12534 - }, - { - "epoch": 2.0186803011393373, - "grad_norm": 0.0019332461524754763, - "learning_rate": 0.00019999799240117837, - "loss": 46.0, - "step": 12535 - }, - { - "epoch": 2.0188413382181247, - "grad_norm": 0.00811974797397852, - "learning_rate": 0.00019999799208059198, - "loss": 46.0, - "step": 12536 - }, - { - "epoch": 2.019002375296912, - "grad_norm": 0.002995526883751154, - "learning_rate": 0.00019999799175997998, - "loss": 46.0, - "step": 12537 - }, - { - "epoch": 2.0191634123756996, - "grad_norm": 0.0013452901039272547, - "learning_rate": 0.00019999799143934242, - "loss": 46.0, - "step": 12538 - }, - { - "epoch": 2.019324449454487, - "grad_norm": 0.0016195857897400856, - "learning_rate": 0.00019999799111867925, - "loss": 46.0, - "step": 12539 - }, - { - "epoch": 2.019485486533274, - "grad_norm": 0.016419364139437675, - "learning_rate": 0.0001999979907979905, - "loss": 46.0, - "step": 12540 - }, - { - "epoch": 2.0196465236120615, - "grad_norm": 0.005423232913017273, - "learning_rate": 0.00019999799047727614, - "loss": 46.0, - "step": 12541 - }, - { - "epoch": 2.019807560690849, - "grad_norm": 0.003274044021964073, - "learning_rate": 0.00019999799015653618, - "loss": 46.0, - "step": 12542 - }, - { - "epoch": 2.0199685977696364, - "grad_norm": 0.002777092158794403, - "learning_rate": 0.00019999798983577063, - "loss": 46.0, - "step": 12543 - }, - { - "epoch": 2.020129634848424, - "grad_norm": 0.005474648904055357, - "learning_rate": 0.0001999979895149795, - "loss": 46.0, - "step": 12544 - }, - { - "epoch": 2.0202906719272113, - "grad_norm": 0.0010304292663931847, - "learning_rate": 0.00019999798919416274, - "loss": 46.0, - "step": 12545 - }, - { - "epoch": 2.020451709005999, - "grad_norm": 0.0009154936415143311, - "learning_rate": 0.00019999798887332043, - "loss": 46.0, - "step": 12546 - }, - { - "epoch": 2.020612746084786, - "grad_norm": 0.0007544083637185395, - "learning_rate": 0.0001999979885524525, - "loss": 46.0, - "step": 12547 - }, - { - "epoch": 2.0207737831635733, - "grad_norm": 0.00824920367449522, - "learning_rate": 0.00019999798823155897, - "loss": 46.0, - "step": 12548 - }, - { - "epoch": 2.0209348202423607, - "grad_norm": 0.0035660217981785536, - "learning_rate": 0.00019999798791063987, - "loss": 46.0, - "step": 12549 - }, - { - "epoch": 2.021095857321148, - "grad_norm": 0.003505911212414503, - "learning_rate": 0.00019999798758969518, - "loss": 46.0, - "step": 12550 - }, - { - "epoch": 2.0212568943999356, - "grad_norm": 0.0022898083552718163, - "learning_rate": 0.00019999798726872485, - "loss": 46.0, - "step": 12551 - }, - { - "epoch": 2.021417931478723, - "grad_norm": 0.0015310329617932439, - "learning_rate": 0.00019999798694772896, - "loss": 46.0, - "step": 12552 - }, - { - "epoch": 2.0215789685575105, - "grad_norm": 0.0007999847293831408, - "learning_rate": 0.0001999979866267075, - "loss": 46.0, - "step": 12553 - }, - { - "epoch": 2.021740005636298, - "grad_norm": 0.0013681076234206557, - "learning_rate": 0.00019999798630566037, - "loss": 46.0, - "step": 12554 - }, - { - "epoch": 2.021901042715085, - "grad_norm": 0.010000420734286308, - "learning_rate": 0.0001999979859845877, - "loss": 46.0, - "step": 12555 - }, - { - "epoch": 2.0220620797938724, - "grad_norm": 0.00205078162252903, - "learning_rate": 0.00019999798566348943, - "loss": 46.0, - "step": 12556 - }, - { - "epoch": 2.02222311687266, - "grad_norm": 0.0012896863045170903, - "learning_rate": 0.00019999798534236555, - "loss": 46.0, - "step": 12557 - }, - { - "epoch": 2.0223841539514473, - "grad_norm": 0.0004371644463390112, - "learning_rate": 0.00019999798502121609, - "loss": 46.0, - "step": 12558 - }, - { - "epoch": 2.022545191030235, - "grad_norm": 0.005417472217231989, - "learning_rate": 0.00019999798470004106, - "loss": 46.0, - "step": 12559 - }, - { - "epoch": 2.0227062281090222, - "grad_norm": 0.001667987322434783, - "learning_rate": 0.0001999979843788404, - "loss": 46.0, - "step": 12560 - }, - { - "epoch": 2.0228672651878097, - "grad_norm": 0.004480540752410889, - "learning_rate": 0.00019999798405761414, - "loss": 46.0, - "step": 12561 - }, - { - "epoch": 2.0230283022665967, - "grad_norm": 0.008831013925373554, - "learning_rate": 0.00019999798373636232, - "loss": 46.0, - "step": 12562 - }, - { - "epoch": 2.023189339345384, - "grad_norm": 0.0016567183192819357, - "learning_rate": 0.00019999798341508487, - "loss": 46.0, - "step": 12563 - }, - { - "epoch": 2.0233503764241716, - "grad_norm": 0.0003811770584434271, - "learning_rate": 0.00019999798309378183, - "loss": 46.0, - "step": 12564 - }, - { - "epoch": 2.023511413502959, - "grad_norm": 0.0006745964055880904, - "learning_rate": 0.00019999798277245322, - "loss": 46.0, - "step": 12565 - }, - { - "epoch": 2.0236724505817465, - "grad_norm": 0.0057113138027489185, - "learning_rate": 0.000199997982451099, - "loss": 46.0, - "step": 12566 - }, - { - "epoch": 2.023833487660534, - "grad_norm": 0.011550800874829292, - "learning_rate": 0.00019999798212971917, - "loss": 46.0, - "step": 12567 - }, - { - "epoch": 2.0239945247393214, - "grad_norm": 0.0019337967969477177, - "learning_rate": 0.00019999798180831378, - "loss": 46.0, - "step": 12568 - }, - { - "epoch": 2.0241555618181084, - "grad_norm": 0.0020467578433454037, - "learning_rate": 0.00019999798148688278, - "loss": 46.0, - "step": 12569 - }, - { - "epoch": 2.024316598896896, - "grad_norm": 0.0032627317123115063, - "learning_rate": 0.00019999798116542618, - "loss": 46.0, - "step": 12570 - }, - { - "epoch": 2.0244776359756833, - "grad_norm": 0.0009862061124294996, - "learning_rate": 0.000199997980843944, - "loss": 46.0, - "step": 12571 - }, - { - "epoch": 2.0246386730544708, - "grad_norm": 0.0033507738262414932, - "learning_rate": 0.0001999979805224362, - "loss": 46.0, - "step": 12572 - }, - { - "epoch": 2.024799710133258, - "grad_norm": 0.000880859384778887, - "learning_rate": 0.00019999798020090283, - "loss": 46.0, - "step": 12573 - }, - { - "epoch": 2.0249607472120457, - "grad_norm": 0.0016476516611874104, - "learning_rate": 0.0001999979798793439, - "loss": 46.0, - "step": 12574 - }, - { - "epoch": 2.025121784290833, - "grad_norm": 0.0018749985611066222, - "learning_rate": 0.0001999979795577593, - "loss": 46.0, - "step": 12575 - }, - { - "epoch": 2.0252828213696206, - "grad_norm": 0.0035091356839984655, - "learning_rate": 0.00019999797923614913, - "loss": 46.0, - "step": 12576 - }, - { - "epoch": 2.0254438584484076, - "grad_norm": 0.002460634335875511, - "learning_rate": 0.0001999979789145134, - "loss": 46.0, - "step": 12577 - }, - { - "epoch": 2.025604895527195, - "grad_norm": 0.001522272708825767, - "learning_rate": 0.00019999797859285203, - "loss": 46.0, - "step": 12578 - }, - { - "epoch": 2.0257659326059825, - "grad_norm": 0.0009102607145905495, - "learning_rate": 0.0001999979782711651, - "loss": 46.0, - "step": 12579 - }, - { - "epoch": 2.02592696968477, - "grad_norm": 0.004572173114866018, - "learning_rate": 0.00019999797794945255, - "loss": 46.0, - "step": 12580 - }, - { - "epoch": 2.0260880067635574, - "grad_norm": 0.0006295728962868452, - "learning_rate": 0.00019999797762771442, - "loss": 46.0, - "step": 12581 - }, - { - "epoch": 2.026249043842345, - "grad_norm": 0.00104395707603544, - "learning_rate": 0.0001999979773059507, - "loss": 46.0, - "step": 12582 - }, - { - "epoch": 2.0264100809211323, - "grad_norm": 0.005980762653052807, - "learning_rate": 0.0001999979769841614, - "loss": 46.0, - "step": 12583 - }, - { - "epoch": 2.0265711179999193, - "grad_norm": 0.001454058918170631, - "learning_rate": 0.00019999797666234645, - "loss": 46.0, - "step": 12584 - }, - { - "epoch": 2.0267321550787067, - "grad_norm": 0.004987990017980337, - "learning_rate": 0.00019999797634050594, - "loss": 46.0, - "step": 12585 - }, - { - "epoch": 2.026893192157494, - "grad_norm": 0.004416910465806723, - "learning_rate": 0.00019999797601863987, - "loss": 46.0, - "step": 12586 - }, - { - "epoch": 2.0270542292362816, - "grad_norm": 0.0011555826058611274, - "learning_rate": 0.00019999797569674813, - "loss": 46.0, - "step": 12587 - }, - { - "epoch": 2.027215266315069, - "grad_norm": 0.005862301215529442, - "learning_rate": 0.00019999797537483086, - "loss": 46.0, - "step": 12588 - }, - { - "epoch": 2.0273763033938565, - "grad_norm": 0.008833087980747223, - "learning_rate": 0.00019999797505288797, - "loss": 46.0, - "step": 12589 - }, - { - "epoch": 2.027537340472644, - "grad_norm": 0.0021030204370617867, - "learning_rate": 0.00019999797473091948, - "loss": 46.0, - "step": 12590 - }, - { - "epoch": 2.027698377551431, - "grad_norm": 0.0026271375827491283, - "learning_rate": 0.00019999797440892542, - "loss": 46.0, - "step": 12591 - }, - { - "epoch": 2.0278594146302185, - "grad_norm": 0.008646892383694649, - "learning_rate": 0.00019999797408690574, - "loss": 46.0, - "step": 12592 - }, - { - "epoch": 2.028020451709006, - "grad_norm": 0.003314882516860962, - "learning_rate": 0.00019999797376486046, - "loss": 46.0, - "step": 12593 - }, - { - "epoch": 2.0281814887877934, - "grad_norm": 0.0013615135103464127, - "learning_rate": 0.0001999979734427896, - "loss": 46.0, - "step": 12594 - }, - { - "epoch": 2.028342525866581, - "grad_norm": 0.0067373900674283504, - "learning_rate": 0.00019999797312069317, - "loss": 46.0, - "step": 12595 - }, - { - "epoch": 2.0285035629453683, - "grad_norm": 0.0036978526040911674, - "learning_rate": 0.0001999979727985711, - "loss": 46.0, - "step": 12596 - }, - { - "epoch": 2.0286646000241557, - "grad_norm": 0.0008455890347249806, - "learning_rate": 0.00019999797247642346, - "loss": 46.0, - "step": 12597 - }, - { - "epoch": 2.028825637102943, - "grad_norm": 0.0017260325839743018, - "learning_rate": 0.00019999797215425024, - "loss": 46.0, - "step": 12598 - }, - { - "epoch": 2.02898667418173, - "grad_norm": 0.005904557649046183, - "learning_rate": 0.0001999979718320514, - "loss": 46.0, - "step": 12599 - }, - { - "epoch": 2.0291477112605176, - "grad_norm": 0.001210798160172999, - "learning_rate": 0.00019999797150982698, - "loss": 46.0, - "step": 12600 - }, - { - "epoch": 2.029308748339305, - "grad_norm": 0.0010602562688291073, - "learning_rate": 0.00019999797118757696, - "loss": 46.0, - "step": 12601 - }, - { - "epoch": 2.0294697854180925, - "grad_norm": 0.001755236298777163, - "learning_rate": 0.00019999797086530134, - "loss": 46.0, - "step": 12602 - }, - { - "epoch": 2.02963082249688, - "grad_norm": 0.004284292459487915, - "learning_rate": 0.00019999797054300012, - "loss": 46.0, - "step": 12603 - }, - { - "epoch": 2.0297918595756674, - "grad_norm": 0.0039684101939201355, - "learning_rate": 0.00019999797022067335, - "loss": 46.0, - "step": 12604 - }, - { - "epoch": 2.029952896654455, - "grad_norm": 0.0030487023759633303, - "learning_rate": 0.00019999796989832093, - "loss": 46.0, - "step": 12605 - }, - { - "epoch": 2.030113933733242, - "grad_norm": 0.0030690382700413465, - "learning_rate": 0.00019999796957594293, - "loss": 46.0, - "step": 12606 - }, - { - "epoch": 2.0302749708120293, - "grad_norm": 0.0019898023456335068, - "learning_rate": 0.00019999796925353937, - "loss": 46.0, - "step": 12607 - }, - { - "epoch": 2.030436007890817, - "grad_norm": 0.0011768710101023316, - "learning_rate": 0.00019999796893111017, - "loss": 46.0, - "step": 12608 - }, - { - "epoch": 2.0305970449696042, - "grad_norm": 0.006985583808273077, - "learning_rate": 0.0001999979686086554, - "loss": 46.0, - "step": 12609 - }, - { - "epoch": 2.0307580820483917, - "grad_norm": 0.011857746168971062, - "learning_rate": 0.00019999796828617505, - "loss": 46.0, - "step": 12610 - }, - { - "epoch": 2.030919119127179, - "grad_norm": 0.001817281125113368, - "learning_rate": 0.0001999979679636691, - "loss": 46.0, - "step": 12611 - }, - { - "epoch": 2.0310801562059666, - "grad_norm": 0.0022048638202250004, - "learning_rate": 0.0001999979676411375, - "loss": 46.0, - "step": 12612 - }, - { - "epoch": 2.0312411932847536, - "grad_norm": 0.0007136227213777602, - "learning_rate": 0.00019999796731858037, - "loss": 46.0, - "step": 12613 - }, - { - "epoch": 2.031402230363541, - "grad_norm": 0.002254886319860816, - "learning_rate": 0.00019999796699599762, - "loss": 46.0, - "step": 12614 - }, - { - "epoch": 2.0315632674423285, - "grad_norm": 0.0008421302190981805, - "learning_rate": 0.00019999796667338927, - "loss": 46.0, - "step": 12615 - }, - { - "epoch": 2.031724304521116, - "grad_norm": 0.0008954409859143198, - "learning_rate": 0.00019999796635075537, - "loss": 46.0, - "step": 12616 - }, - { - "epoch": 2.0318853415999034, - "grad_norm": 0.0011271878611296415, - "learning_rate": 0.00019999796602809583, - "loss": 46.0, - "step": 12617 - }, - { - "epoch": 2.032046378678691, - "grad_norm": 0.0015551706310361624, - "learning_rate": 0.0001999979657054107, - "loss": 46.0, - "step": 12618 - }, - { - "epoch": 2.0322074157574783, - "grad_norm": 0.0010714060626924038, - "learning_rate": 0.00019999796538269998, - "loss": 46.0, - "step": 12619 - }, - { - "epoch": 2.0323684528362653, - "grad_norm": 0.0012057216372340918, - "learning_rate": 0.00019999796505996365, - "loss": 46.0, - "step": 12620 - }, - { - "epoch": 2.032529489915053, - "grad_norm": 0.002392561873421073, - "learning_rate": 0.00019999796473720176, - "loss": 46.0, - "step": 12621 - }, - { - "epoch": 2.0326905269938402, - "grad_norm": 0.003465986577793956, - "learning_rate": 0.00019999796441441426, - "loss": 46.0, - "step": 12622 - }, - { - "epoch": 2.0328515640726277, - "grad_norm": 0.0007431176491081715, - "learning_rate": 0.00019999796409160117, - "loss": 46.0, - "step": 12623 - }, - { - "epoch": 2.033012601151415, - "grad_norm": 0.003488802118226886, - "learning_rate": 0.00019999796376876249, - "loss": 46.0, - "step": 12624 - }, - { - "epoch": 2.0331736382302026, - "grad_norm": 0.001669726800173521, - "learning_rate": 0.0001999979634458982, - "loss": 46.0, - "step": 12625 - }, - { - "epoch": 2.03333467530899, - "grad_norm": 0.004264045041054487, - "learning_rate": 0.0001999979631230083, - "loss": 46.0, - "step": 12626 - }, - { - "epoch": 2.0334957123877775, - "grad_norm": 0.0014957647072151303, - "learning_rate": 0.00019999796280009284, - "loss": 46.0, - "step": 12627 - }, - { - "epoch": 2.0336567494665645, - "grad_norm": 0.003543445374816656, - "learning_rate": 0.00019999796247715176, - "loss": 46.0, - "step": 12628 - }, - { - "epoch": 2.033817786545352, - "grad_norm": 0.008305152878165245, - "learning_rate": 0.00019999796215418512, - "loss": 46.0, - "step": 12629 - }, - { - "epoch": 2.0339788236241394, - "grad_norm": 0.0015745005803182721, - "learning_rate": 0.00019999796183119286, - "loss": 46.0, - "step": 12630 - }, - { - "epoch": 2.034139860702927, - "grad_norm": 0.004378624726086855, - "learning_rate": 0.000199997961508175, - "loss": 46.0, - "step": 12631 - }, - { - "epoch": 2.0343008977817143, - "grad_norm": 0.0025321580469608307, - "learning_rate": 0.00019999796118513156, - "loss": 46.0, - "step": 12632 - }, - { - "epoch": 2.0344619348605018, - "grad_norm": 0.0020093375351279974, - "learning_rate": 0.00019999796086206254, - "loss": 46.0, - "step": 12633 - }, - { - "epoch": 2.034622971939289, - "grad_norm": 0.005774980410933495, - "learning_rate": 0.00019999796053896788, - "loss": 46.0, - "step": 12634 - }, - { - "epoch": 2.034784009018076, - "grad_norm": 0.0015911576338112354, - "learning_rate": 0.00019999796021584766, - "loss": 46.0, - "step": 12635 - }, - { - "epoch": 2.0349450460968637, - "grad_norm": 0.0012758868979290128, - "learning_rate": 0.00019999795989270185, - "loss": 46.0, - "step": 12636 - }, - { - "epoch": 2.035106083175651, - "grad_norm": 0.006038719322532415, - "learning_rate": 0.00019999795956953043, - "loss": 46.0, - "step": 12637 - }, - { - "epoch": 2.0352671202544386, - "grad_norm": 0.0008050496107898653, - "learning_rate": 0.00019999795924633342, - "loss": 46.0, - "step": 12638 - }, - { - "epoch": 2.035428157333226, - "grad_norm": 0.0037582528311759233, - "learning_rate": 0.00019999795892311083, - "loss": 46.0, - "step": 12639 - }, - { - "epoch": 2.0355891944120135, - "grad_norm": 0.006221001967787743, - "learning_rate": 0.00019999795859986262, - "loss": 46.0, - "step": 12640 - }, - { - "epoch": 2.035750231490801, - "grad_norm": 0.0018074963008984923, - "learning_rate": 0.00019999795827658882, - "loss": 46.0, - "step": 12641 - }, - { - "epoch": 2.0359112685695884, - "grad_norm": 0.0023985167499631643, - "learning_rate": 0.00019999795795328944, - "loss": 46.0, - "step": 12642 - }, - { - "epoch": 2.0360723056483754, - "grad_norm": 0.0016935777384787798, - "learning_rate": 0.00019999795762996444, - "loss": 46.0, - "step": 12643 - }, - { - "epoch": 2.036233342727163, - "grad_norm": 0.0029211658984422684, - "learning_rate": 0.00019999795730661388, - "loss": 46.0, - "step": 12644 - }, - { - "epoch": 2.0363943798059503, - "grad_norm": 0.0013775217812508345, - "learning_rate": 0.0001999979569832377, - "loss": 46.0, - "step": 12645 - }, - { - "epoch": 2.0365554168847377, - "grad_norm": 0.0031842817552387714, - "learning_rate": 0.00019999795665983592, - "loss": 46.0, - "step": 12646 - }, - { - "epoch": 2.036716453963525, - "grad_norm": 0.0036360002122819424, - "learning_rate": 0.0001999979563364086, - "loss": 46.0, - "step": 12647 - }, - { - "epoch": 2.0368774910423126, - "grad_norm": 0.007040873169898987, - "learning_rate": 0.0001999979560129556, - "loss": 46.0, - "step": 12648 - }, - { - "epoch": 2.0370385281211, - "grad_norm": 0.0015644961968064308, - "learning_rate": 0.00019999795568947706, - "loss": 46.0, - "step": 12649 - }, - { - "epoch": 2.037199565199887, - "grad_norm": 0.0035487052518874407, - "learning_rate": 0.00019999795536597293, - "loss": 46.0, - "step": 12650 - }, - { - "epoch": 2.0373606022786745, - "grad_norm": 0.0020636108238250017, - "learning_rate": 0.00019999795504244318, - "loss": 46.0, - "step": 12651 - }, - { - "epoch": 2.037521639357462, - "grad_norm": 0.0022487486712634563, - "learning_rate": 0.00019999795471888784, - "loss": 46.0, - "step": 12652 - }, - { - "epoch": 2.0376826764362495, - "grad_norm": 0.0012746353168040514, - "learning_rate": 0.00019999795439530694, - "loss": 46.0, - "step": 12653 - }, - { - "epoch": 2.037843713515037, - "grad_norm": 0.0033677066676318645, - "learning_rate": 0.0001999979540717004, - "loss": 46.0, - "step": 12654 - }, - { - "epoch": 2.0380047505938244, - "grad_norm": 0.0022001334000378847, - "learning_rate": 0.00019999795374806828, - "loss": 46.0, - "step": 12655 - }, - { - "epoch": 2.038165787672612, - "grad_norm": 0.0033290269784629345, - "learning_rate": 0.0001999979534244106, - "loss": 46.0, - "step": 12656 - }, - { - "epoch": 2.038326824751399, - "grad_norm": 0.0032610977068543434, - "learning_rate": 0.0001999979531007273, - "loss": 46.0, - "step": 12657 - }, - { - "epoch": 2.0384878618301863, - "grad_norm": 0.001511602196842432, - "learning_rate": 0.00019999795277701838, - "loss": 46.0, - "step": 12658 - }, - { - "epoch": 2.0386488989089737, - "grad_norm": 0.0030205200891941786, - "learning_rate": 0.0001999979524532839, - "loss": 46.0, - "step": 12659 - }, - { - "epoch": 2.038809935987761, - "grad_norm": 0.005918892100453377, - "learning_rate": 0.0001999979521295238, - "loss": 46.0, - "step": 12660 - }, - { - "epoch": 2.0389709730665486, - "grad_norm": 0.001982569694519043, - "learning_rate": 0.00019999795180573814, - "loss": 46.0, - "step": 12661 - }, - { - "epoch": 2.039132010145336, - "grad_norm": 0.0024182030465453863, - "learning_rate": 0.00019999795148192685, - "loss": 46.0, - "step": 12662 - }, - { - "epoch": 2.0392930472241235, - "grad_norm": 0.0017946631414815784, - "learning_rate": 0.00019999795115809, - "loss": 46.0, - "step": 12663 - }, - { - "epoch": 2.0394540843029105, - "grad_norm": 0.0018103251932188869, - "learning_rate": 0.00019999795083422753, - "loss": 46.0, - "step": 12664 - }, - { - "epoch": 2.039615121381698, - "grad_norm": 0.0017367289401590824, - "learning_rate": 0.00019999795051033945, - "loss": 46.0, - "step": 12665 - }, - { - "epoch": 2.0397761584604854, - "grad_norm": 0.004122640006244183, - "learning_rate": 0.0001999979501864258, - "loss": 46.0, - "step": 12666 - }, - { - "epoch": 2.039937195539273, - "grad_norm": 0.0018341062823310494, - "learning_rate": 0.00019999794986248656, - "loss": 46.0, - "step": 12667 - }, - { - "epoch": 2.0400982326180603, - "grad_norm": 0.0010154114570468664, - "learning_rate": 0.00019999794953852172, - "loss": 46.0, - "step": 12668 - }, - { - "epoch": 2.040259269696848, - "grad_norm": 0.004763561766594648, - "learning_rate": 0.00019999794921453127, - "loss": 46.0, - "step": 12669 - }, - { - "epoch": 2.0404203067756352, - "grad_norm": 0.0014819520292803645, - "learning_rate": 0.00019999794889051525, - "loss": 46.0, - "step": 12670 - }, - { - "epoch": 2.0405813438544227, - "grad_norm": 0.002452469663694501, - "learning_rate": 0.00019999794856647362, - "loss": 46.0, - "step": 12671 - }, - { - "epoch": 2.0407423809332097, - "grad_norm": 0.0009148407843895257, - "learning_rate": 0.0001999979482424064, - "loss": 46.0, - "step": 12672 - }, - { - "epoch": 2.040903418011997, - "grad_norm": 0.0014726783847436309, - "learning_rate": 0.0001999979479183136, - "loss": 46.0, - "step": 12673 - }, - { - "epoch": 2.0410644550907846, - "grad_norm": 0.0015294309705495834, - "learning_rate": 0.00019999794759419518, - "loss": 46.0, - "step": 12674 - }, - { - "epoch": 2.041225492169572, - "grad_norm": 0.0019074780866503716, - "learning_rate": 0.00019999794727005118, - "loss": 46.0, - "step": 12675 - }, - { - "epoch": 2.0413865292483595, - "grad_norm": 0.00789949856698513, - "learning_rate": 0.00019999794694588162, - "loss": 46.0, - "step": 12676 - }, - { - "epoch": 2.041547566327147, - "grad_norm": 0.0014568453188985586, - "learning_rate": 0.0001999979466216864, - "loss": 46.0, - "step": 12677 - }, - { - "epoch": 2.0417086034059344, - "grad_norm": 0.0010552306193858385, - "learning_rate": 0.00019999794629746562, - "loss": 46.0, - "step": 12678 - }, - { - "epoch": 2.0418696404847214, - "grad_norm": 0.0004922501975670457, - "learning_rate": 0.00019999794597321926, - "loss": 46.0, - "step": 12679 - }, - { - "epoch": 2.042030677563509, - "grad_norm": 0.0026808297261595726, - "learning_rate": 0.00019999794564894727, - "loss": 46.0, - "step": 12680 - }, - { - "epoch": 2.0421917146422963, - "grad_norm": 0.0010481438366696239, - "learning_rate": 0.0001999979453246497, - "loss": 46.0, - "step": 12681 - }, - { - "epoch": 2.0423527517210838, - "grad_norm": 0.014141246676445007, - "learning_rate": 0.00019999794500032657, - "loss": 46.0, - "step": 12682 - }, - { - "epoch": 2.042513788799871, - "grad_norm": 0.001846129773184657, - "learning_rate": 0.00019999794467597779, - "loss": 46.0, - "step": 12683 - }, - { - "epoch": 2.0426748258786587, - "grad_norm": 0.002073851181194186, - "learning_rate": 0.00019999794435160344, - "loss": 46.0, - "step": 12684 - }, - { - "epoch": 2.042835862957446, - "grad_norm": 0.0026352526620030403, - "learning_rate": 0.0001999979440272035, - "loss": 46.0, - "step": 12685 - }, - { - "epoch": 2.042996900036233, - "grad_norm": 0.003275652416050434, - "learning_rate": 0.00019999794370277797, - "loss": 46.0, - "step": 12686 - }, - { - "epoch": 2.0431579371150206, - "grad_norm": 0.00578341307118535, - "learning_rate": 0.00019999794337832683, - "loss": 46.0, - "step": 12687 - }, - { - "epoch": 2.043318974193808, - "grad_norm": 0.0017546183662489057, - "learning_rate": 0.00019999794305385012, - "loss": 46.0, - "step": 12688 - }, - { - "epoch": 2.0434800112725955, - "grad_norm": 0.003102260874584317, - "learning_rate": 0.00019999794272934778, - "loss": 46.0, - "step": 12689 - }, - { - "epoch": 2.043641048351383, - "grad_norm": 0.0030546931084245443, - "learning_rate": 0.00019999794240481986, - "loss": 46.0, - "step": 12690 - }, - { - "epoch": 2.0438020854301704, - "grad_norm": 0.006423891056329012, - "learning_rate": 0.00019999794208026635, - "loss": 46.0, - "step": 12691 - }, - { - "epoch": 2.043963122508958, - "grad_norm": 0.0012006739852949977, - "learning_rate": 0.00019999794175568726, - "loss": 46.0, - "step": 12692 - }, - { - "epoch": 2.0441241595877453, - "grad_norm": 0.0014467312721535563, - "learning_rate": 0.00019999794143108255, - "loss": 46.0, - "step": 12693 - }, - { - "epoch": 2.0442851966665323, - "grad_norm": 0.0004815906868316233, - "learning_rate": 0.00019999794110645225, - "loss": 46.0, - "step": 12694 - }, - { - "epoch": 2.0444462337453198, - "grad_norm": 0.002055687829852104, - "learning_rate": 0.00019999794078179637, - "loss": 46.0, - "step": 12695 - }, - { - "epoch": 2.044607270824107, - "grad_norm": 0.002334614284336567, - "learning_rate": 0.0001999979404571149, - "loss": 46.0, - "step": 12696 - }, - { - "epoch": 2.0447683079028947, - "grad_norm": 0.0013030642876401544, - "learning_rate": 0.0001999979401324078, - "loss": 46.0, - "step": 12697 - }, - { - "epoch": 2.044929344981682, - "grad_norm": 0.0007385505014099181, - "learning_rate": 0.00019999793980767514, - "loss": 46.0, - "step": 12698 - }, - { - "epoch": 2.0450903820604696, - "grad_norm": 0.0025091059505939484, - "learning_rate": 0.00019999793948291688, - "loss": 46.0, - "step": 12699 - }, - { - "epoch": 2.045251419139257, - "grad_norm": 0.000989076099358499, - "learning_rate": 0.000199997939158133, - "loss": 46.0, - "step": 12700 - }, - { - "epoch": 2.045412456218044, - "grad_norm": 0.004465559963136911, - "learning_rate": 0.00019999793883332357, - "loss": 46.0, - "step": 12701 - }, - { - "epoch": 2.0455734932968315, - "grad_norm": 0.0021706765983253717, - "learning_rate": 0.00019999793850848852, - "loss": 46.0, - "step": 12702 - }, - { - "epoch": 2.045734530375619, - "grad_norm": 0.002952922135591507, - "learning_rate": 0.00019999793818362786, - "loss": 46.0, - "step": 12703 - }, - { - "epoch": 2.0458955674544064, - "grad_norm": 0.002846744144335389, - "learning_rate": 0.00019999793785874164, - "loss": 46.0, - "step": 12704 - }, - { - "epoch": 2.046056604533194, - "grad_norm": 0.003411015262827277, - "learning_rate": 0.0001999979375338298, - "loss": 46.0, - "step": 12705 - }, - { - "epoch": 2.0462176416119813, - "grad_norm": 0.002308523515239358, - "learning_rate": 0.00019999793720889235, - "loss": 46.0, - "step": 12706 - }, - { - "epoch": 2.0463786786907687, - "grad_norm": 0.0029919177759438753, - "learning_rate": 0.00019999793688392937, - "loss": 46.0, - "step": 12707 - }, - { - "epoch": 2.0465397157695557, - "grad_norm": 0.002041302155703306, - "learning_rate": 0.00019999793655894074, - "loss": 46.0, - "step": 12708 - }, - { - "epoch": 2.046700752848343, - "grad_norm": 0.004689004737883806, - "learning_rate": 0.0001999979362339265, - "loss": 46.0, - "step": 12709 - }, - { - "epoch": 2.0468617899271306, - "grad_norm": 0.0023718990851193666, - "learning_rate": 0.00019999793590888673, - "loss": 46.0, - "step": 12710 - }, - { - "epoch": 2.047022827005918, - "grad_norm": 0.0010333546670153737, - "learning_rate": 0.00019999793558382132, - "loss": 46.0, - "step": 12711 - }, - { - "epoch": 2.0471838640847055, - "grad_norm": 0.0007368649821728468, - "learning_rate": 0.00019999793525873031, - "loss": 46.0, - "step": 12712 - }, - { - "epoch": 2.047344901163493, - "grad_norm": 0.003788518253713846, - "learning_rate": 0.00019999793493361375, - "loss": 46.0, - "step": 12713 - }, - { - "epoch": 2.0475059382422804, - "grad_norm": 0.0016053363215178251, - "learning_rate": 0.00019999793460847155, - "loss": 46.0, - "step": 12714 - }, - { - "epoch": 2.047666975321068, - "grad_norm": 0.003955433145165443, - "learning_rate": 0.0001999979342833038, - "loss": 46.0, - "step": 12715 - }, - { - "epoch": 2.047828012399855, - "grad_norm": 0.005060320254415274, - "learning_rate": 0.0001999979339581104, - "loss": 46.0, - "step": 12716 - }, - { - "epoch": 2.0479890494786424, - "grad_norm": 0.002011056523770094, - "learning_rate": 0.00019999793363289145, - "loss": 46.0, - "step": 12717 - }, - { - "epoch": 2.04815008655743, - "grad_norm": 0.001769879600033164, - "learning_rate": 0.0001999979333076469, - "loss": 46.0, - "step": 12718 - }, - { - "epoch": 2.0483111236362173, - "grad_norm": 0.003078615292906761, - "learning_rate": 0.00019999793298237676, - "loss": 46.0, - "step": 12719 - }, - { - "epoch": 2.0484721607150047, - "grad_norm": 0.0004540045338217169, - "learning_rate": 0.000199997932657081, - "loss": 46.0, - "step": 12720 - }, - { - "epoch": 2.048633197793792, - "grad_norm": 0.0016792655223980546, - "learning_rate": 0.00019999793233175966, - "loss": 46.0, - "step": 12721 - }, - { - "epoch": 2.0487942348725796, - "grad_norm": 0.0018158906605094671, - "learning_rate": 0.00019999793200641274, - "loss": 46.0, - "step": 12722 - }, - { - "epoch": 2.0489552719513666, - "grad_norm": 0.001656759181059897, - "learning_rate": 0.0001999979316810402, - "loss": 46.0, - "step": 12723 - }, - { - "epoch": 2.049116309030154, - "grad_norm": 0.0052232141606509686, - "learning_rate": 0.00019999793135564207, - "loss": 46.0, - "step": 12724 - }, - { - "epoch": 2.0492773461089415, - "grad_norm": 0.006812802981585264, - "learning_rate": 0.00019999793103021835, - "loss": 46.0, - "step": 12725 - }, - { - "epoch": 2.049438383187729, - "grad_norm": 0.0046859378926455975, - "learning_rate": 0.00019999793070476902, - "loss": 46.0, - "step": 12726 - }, - { - "epoch": 2.0495994202665164, - "grad_norm": 0.0004233905056025833, - "learning_rate": 0.00019999793037929413, - "loss": 46.0, - "step": 12727 - }, - { - "epoch": 2.049760457345304, - "grad_norm": 0.002785461489111185, - "learning_rate": 0.00019999793005379363, - "loss": 46.0, - "step": 12728 - }, - { - "epoch": 2.0499214944240913, - "grad_norm": 0.0018482727464288473, - "learning_rate": 0.0001999979297282675, - "loss": 46.0, - "step": 12729 - }, - { - "epoch": 2.0500825315028783, - "grad_norm": 0.0068340180441737175, - "learning_rate": 0.00019999792940271586, - "loss": 46.0, - "step": 12730 - }, - { - "epoch": 2.050243568581666, - "grad_norm": 0.00059436698211357, - "learning_rate": 0.00019999792907713854, - "loss": 46.0, - "step": 12731 - }, - { - "epoch": 2.0504046056604532, - "grad_norm": 0.003365225624293089, - "learning_rate": 0.00019999792875153566, - "loss": 46.0, - "step": 12732 - }, - { - "epoch": 2.0505656427392407, - "grad_norm": 0.002422794932499528, - "learning_rate": 0.00019999792842590722, - "loss": 46.0, - "step": 12733 - }, - { - "epoch": 2.050726679818028, - "grad_norm": 0.0009200755739584565, - "learning_rate": 0.00019999792810025313, - "loss": 46.0, - "step": 12734 - }, - { - "epoch": 2.0508877168968156, - "grad_norm": 0.004491645842790604, - "learning_rate": 0.00019999792777457347, - "loss": 46.0, - "step": 12735 - }, - { - "epoch": 2.051048753975603, - "grad_norm": 0.0031578242778778076, - "learning_rate": 0.00019999792744886824, - "loss": 46.0, - "step": 12736 - }, - { - "epoch": 2.05120979105439, - "grad_norm": 0.001434703473933041, - "learning_rate": 0.00019999792712313737, - "loss": 46.0, - "step": 12737 - }, - { - "epoch": 2.0513708281331775, - "grad_norm": 0.004946847911924124, - "learning_rate": 0.0001999979267973809, - "loss": 46.0, - "step": 12738 - }, - { - "epoch": 2.051531865211965, - "grad_norm": 0.002262961817905307, - "learning_rate": 0.0001999979264715989, - "loss": 46.0, - "step": 12739 - }, - { - "epoch": 2.0516929022907524, - "grad_norm": 0.001229085261002183, - "learning_rate": 0.00019999792614579126, - "loss": 46.0, - "step": 12740 - }, - { - "epoch": 2.05185393936954, - "grad_norm": 0.0016119007486850023, - "learning_rate": 0.000199997925819958, - "loss": 46.0, - "step": 12741 - }, - { - "epoch": 2.0520149764483273, - "grad_norm": 0.002097120974212885, - "learning_rate": 0.0001999979254940992, - "loss": 46.0, - "step": 12742 - }, - { - "epoch": 2.0521760135271148, - "grad_norm": 0.006228986196219921, - "learning_rate": 0.0001999979251682148, - "loss": 46.0, - "step": 12743 - }, - { - "epoch": 2.052337050605902, - "grad_norm": 0.0009796464582905173, - "learning_rate": 0.00019999792484230478, - "loss": 46.0, - "step": 12744 - }, - { - "epoch": 2.052498087684689, - "grad_norm": 0.0006819817936047912, - "learning_rate": 0.00019999792451636918, - "loss": 46.0, - "step": 12745 - }, - { - "epoch": 2.0526591247634767, - "grad_norm": 0.0014942156849429011, - "learning_rate": 0.00019999792419040798, - "loss": 46.0, - "step": 12746 - }, - { - "epoch": 2.052820161842264, - "grad_norm": 0.001344421529211104, - "learning_rate": 0.00019999792386442118, - "loss": 46.0, - "step": 12747 - }, - { - "epoch": 2.0529811989210516, - "grad_norm": 0.0029171700589358807, - "learning_rate": 0.0001999979235384088, - "loss": 46.0, - "step": 12748 - }, - { - "epoch": 2.053142235999839, - "grad_norm": 0.017736775800585747, - "learning_rate": 0.0001999979232123708, - "loss": 46.0, - "step": 12749 - }, - { - "epoch": 2.0533032730786265, - "grad_norm": 0.0020546342711895704, - "learning_rate": 0.00019999792288630724, - "loss": 46.0, - "step": 12750 - }, - { - "epoch": 2.053464310157414, - "grad_norm": 0.0008654060657136142, - "learning_rate": 0.00019999792256021807, - "loss": 46.0, - "step": 12751 - }, - { - "epoch": 2.053625347236201, - "grad_norm": 0.02104876935482025, - "learning_rate": 0.0001999979222341033, - "loss": 46.0, - "step": 12752 - }, - { - "epoch": 2.0537863843149884, - "grad_norm": 0.002866534749045968, - "learning_rate": 0.00019999792190796294, - "loss": 46.0, - "step": 12753 - }, - { - "epoch": 2.053947421393776, - "grad_norm": 0.0024026569444686174, - "learning_rate": 0.00019999792158179698, - "loss": 46.0, - "step": 12754 - }, - { - "epoch": 2.0541084584725633, - "grad_norm": 0.0010798060102388263, - "learning_rate": 0.00019999792125560543, - "loss": 46.0, - "step": 12755 - }, - { - "epoch": 2.0542694955513507, - "grad_norm": 0.0018468784401193261, - "learning_rate": 0.0001999979209293883, - "loss": 46.0, - "step": 12756 - }, - { - "epoch": 2.054430532630138, - "grad_norm": 0.00404136348515749, - "learning_rate": 0.00019999792060314554, - "loss": 46.0, - "step": 12757 - }, - { - "epoch": 2.0545915697089256, - "grad_norm": 0.001251041772775352, - "learning_rate": 0.00019999792027687723, - "loss": 46.0, - "step": 12758 - }, - { - "epoch": 2.0547526067877127, - "grad_norm": 0.0028351852670311928, - "learning_rate": 0.0001999979199505833, - "loss": 46.0, - "step": 12759 - }, - { - "epoch": 2.0549136438665, - "grad_norm": 0.002025420079007745, - "learning_rate": 0.00019999791962426377, - "loss": 46.0, - "step": 12760 - }, - { - "epoch": 2.0550746809452876, - "grad_norm": 0.002291368320584297, - "learning_rate": 0.00019999791929791868, - "loss": 46.0, - "step": 12761 - }, - { - "epoch": 2.055235718024075, - "grad_norm": 0.004026977811008692, - "learning_rate": 0.00019999791897154796, - "loss": 46.0, - "step": 12762 - }, - { - "epoch": 2.0553967551028625, - "grad_norm": 0.0031712024938315153, - "learning_rate": 0.00019999791864515164, - "loss": 46.0, - "step": 12763 - }, - { - "epoch": 2.05555779218165, - "grad_norm": 0.0044436086900532246, - "learning_rate": 0.00019999791831872975, - "loss": 46.0, - "step": 12764 - }, - { - "epoch": 2.0557188292604374, - "grad_norm": 0.008992526680231094, - "learning_rate": 0.00019999791799228228, - "loss": 46.0, - "step": 12765 - }, - { - "epoch": 2.055879866339225, - "grad_norm": 0.0018407624447718263, - "learning_rate": 0.0001999979176658092, - "loss": 46.0, - "step": 12766 - }, - { - "epoch": 2.056040903418012, - "grad_norm": 0.003313424065709114, - "learning_rate": 0.00019999791733931051, - "loss": 46.0, - "step": 12767 - }, - { - "epoch": 2.0562019404967993, - "grad_norm": 0.0008020804962143302, - "learning_rate": 0.00019999791701278622, - "loss": 46.0, - "step": 12768 - }, - { - "epoch": 2.0563629775755867, - "grad_norm": 0.007835103198885918, - "learning_rate": 0.00019999791668623635, - "loss": 46.0, - "step": 12769 - }, - { - "epoch": 2.056524014654374, - "grad_norm": 0.0018140114843845367, - "learning_rate": 0.00019999791635966088, - "loss": 46.0, - "step": 12770 - }, - { - "epoch": 2.0566850517331616, - "grad_norm": 0.0009696898050606251, - "learning_rate": 0.00019999791603305983, - "loss": 46.0, - "step": 12771 - }, - { - "epoch": 2.056846088811949, - "grad_norm": 0.0019198352238163352, - "learning_rate": 0.00019999791570643317, - "loss": 46.0, - "step": 12772 - }, - { - "epoch": 2.0570071258907365, - "grad_norm": 0.0011088484898209572, - "learning_rate": 0.00019999791537978094, - "loss": 46.0, - "step": 12773 - }, - { - "epoch": 2.0571681629695235, - "grad_norm": 0.0034949174150824547, - "learning_rate": 0.0001999979150531031, - "loss": 46.0, - "step": 12774 - }, - { - "epoch": 2.057329200048311, - "grad_norm": 0.0019022391643375158, - "learning_rate": 0.00019999791472639965, - "loss": 46.0, - "step": 12775 - }, - { - "epoch": 2.0574902371270984, - "grad_norm": 0.006527475547045469, - "learning_rate": 0.00019999791439967064, - "loss": 46.0, - "step": 12776 - }, - { - "epoch": 2.057651274205886, - "grad_norm": 0.0025081345811486244, - "learning_rate": 0.000199997914072916, - "loss": 46.0, - "step": 12777 - }, - { - "epoch": 2.0578123112846733, - "grad_norm": 0.001696562161669135, - "learning_rate": 0.00019999791374613577, - "loss": 46.0, - "step": 12778 - }, - { - "epoch": 2.057973348363461, - "grad_norm": 0.0009385391604155302, - "learning_rate": 0.00019999791341933, - "loss": 46.0, - "step": 12779 - }, - { - "epoch": 2.0581343854422482, - "grad_norm": 0.0030935273971408606, - "learning_rate": 0.00019999791309249855, - "loss": 46.0, - "step": 12780 - }, - { - "epoch": 2.0582954225210353, - "grad_norm": 0.001058962196111679, - "learning_rate": 0.00019999791276564154, - "loss": 46.0, - "step": 12781 - }, - { - "epoch": 2.0584564595998227, - "grad_norm": 0.0014233856927603483, - "learning_rate": 0.00019999791243875895, - "loss": 46.0, - "step": 12782 - }, - { - "epoch": 2.05861749667861, - "grad_norm": 0.003509201342239976, - "learning_rate": 0.00019999791211185077, - "loss": 46.0, - "step": 12783 - }, - { - "epoch": 2.0587785337573976, - "grad_norm": 0.004560582805424929, - "learning_rate": 0.00019999791178491698, - "loss": 46.0, - "step": 12784 - }, - { - "epoch": 2.058939570836185, - "grad_norm": 0.0026095740031450987, - "learning_rate": 0.0001999979114579576, - "loss": 46.0, - "step": 12785 - }, - { - "epoch": 2.0591006079149725, - "grad_norm": 0.0011534725781530142, - "learning_rate": 0.00019999791113097263, - "loss": 46.0, - "step": 12786 - }, - { - "epoch": 2.05926164499376, - "grad_norm": 0.0037338025867938995, - "learning_rate": 0.00019999791080396205, - "loss": 46.0, - "step": 12787 - }, - { - "epoch": 2.0594226820725474, - "grad_norm": 0.0014899610541760921, - "learning_rate": 0.0001999979104769259, - "loss": 46.0, - "step": 12788 - }, - { - "epoch": 2.0595837191513344, - "grad_norm": 0.002470711711794138, - "learning_rate": 0.00019999791014986416, - "loss": 46.0, - "step": 12789 - }, - { - "epoch": 2.059744756230122, - "grad_norm": 0.0021710586734116077, - "learning_rate": 0.00019999790982277679, - "loss": 46.0, - "step": 12790 - }, - { - "epoch": 2.0599057933089093, - "grad_norm": 0.01125306822359562, - "learning_rate": 0.00019999790949566386, - "loss": 46.0, - "step": 12791 - }, - { - "epoch": 2.060066830387697, - "grad_norm": 0.001020616153255105, - "learning_rate": 0.0001999979091685253, - "loss": 46.0, - "step": 12792 - }, - { - "epoch": 2.0602278674664842, - "grad_norm": 0.0013511452125385404, - "learning_rate": 0.00019999790884136118, - "loss": 46.0, - "step": 12793 - }, - { - "epoch": 2.0603889045452717, - "grad_norm": 0.0037048342637717724, - "learning_rate": 0.00019999790851417146, - "loss": 46.0, - "step": 12794 - }, - { - "epoch": 2.060549941624059, - "grad_norm": 0.001499238540418446, - "learning_rate": 0.00019999790818695613, - "loss": 46.0, - "step": 12795 - }, - { - "epoch": 2.060710978702846, - "grad_norm": 0.0019282250432297587, - "learning_rate": 0.0001999979078597152, - "loss": 46.0, - "step": 12796 - }, - { - "epoch": 2.0608720157816336, - "grad_norm": 0.0005515927914530039, - "learning_rate": 0.0001999979075324487, - "loss": 46.0, - "step": 12797 - }, - { - "epoch": 2.061033052860421, - "grad_norm": 0.002887873910367489, - "learning_rate": 0.0001999979072051566, - "loss": 46.0, - "step": 12798 - }, - { - "epoch": 2.0611940899392085, - "grad_norm": 0.008013742975890636, - "learning_rate": 0.0001999979068778389, - "loss": 46.0, - "step": 12799 - }, - { - "epoch": 2.061355127017996, - "grad_norm": 0.011362212710082531, - "learning_rate": 0.0001999979065504956, - "loss": 46.0, - "step": 12800 - }, - { - "epoch": 2.0615161640967834, - "grad_norm": 0.004647592082619667, - "learning_rate": 0.00019999790622312672, - "loss": 46.0, - "step": 12801 - }, - { - "epoch": 2.061677201175571, - "grad_norm": 0.0007746084011159837, - "learning_rate": 0.00019999790589573225, - "loss": 46.0, - "step": 12802 - }, - { - "epoch": 2.061838238254358, - "grad_norm": 0.002557308180257678, - "learning_rate": 0.00019999790556831217, - "loss": 46.0, - "step": 12803 - }, - { - "epoch": 2.0619992753331453, - "grad_norm": 0.0033991560339927673, - "learning_rate": 0.0001999979052408665, - "loss": 46.0, - "step": 12804 - }, - { - "epoch": 2.0621603124119328, - "grad_norm": 0.0017291069962084293, - "learning_rate": 0.00019999790491339524, - "loss": 46.0, - "step": 12805 - }, - { - "epoch": 2.06232134949072, - "grad_norm": 0.0011777442414313555, - "learning_rate": 0.00019999790458589836, - "loss": 46.0, - "step": 12806 - }, - { - "epoch": 2.0624823865695077, - "grad_norm": 0.0020870326552540064, - "learning_rate": 0.00019999790425837593, - "loss": 46.0, - "step": 12807 - }, - { - "epoch": 2.062643423648295, - "grad_norm": 0.0026128143072128296, - "learning_rate": 0.00019999790393082786, - "loss": 46.0, - "step": 12808 - }, - { - "epoch": 2.0628044607270826, - "grad_norm": 0.00748836062848568, - "learning_rate": 0.00019999790360325422, - "loss": 46.0, - "step": 12809 - }, - { - "epoch": 2.0629654978058696, - "grad_norm": 0.004799081943929195, - "learning_rate": 0.00019999790327565497, - "loss": 46.0, - "step": 12810 - }, - { - "epoch": 2.063126534884657, - "grad_norm": 0.003735225647687912, - "learning_rate": 0.00019999790294803016, - "loss": 46.0, - "step": 12811 - }, - { - "epoch": 2.0632875719634445, - "grad_norm": 0.0010754746617749333, - "learning_rate": 0.00019999790262037971, - "loss": 46.0, - "step": 12812 - }, - { - "epoch": 2.063448609042232, - "grad_norm": 0.006830889265984297, - "learning_rate": 0.0001999979022927037, - "loss": 46.0, - "step": 12813 - }, - { - "epoch": 2.0636096461210194, - "grad_norm": 0.005894643720239401, - "learning_rate": 0.0001999979019650021, - "loss": 46.0, - "step": 12814 - }, - { - "epoch": 2.063770683199807, - "grad_norm": 0.002159679774194956, - "learning_rate": 0.00019999790163727487, - "loss": 46.0, - "step": 12815 - }, - { - "epoch": 2.0639317202785943, - "grad_norm": 0.004094042349606752, - "learning_rate": 0.00019999790130952207, - "loss": 46.0, - "step": 12816 - }, - { - "epoch": 2.0640927573573817, - "grad_norm": 0.0006622025393880904, - "learning_rate": 0.00019999790098174368, - "loss": 46.0, - "step": 12817 - }, - { - "epoch": 2.0642537944361687, - "grad_norm": 0.0011210956145077944, - "learning_rate": 0.00019999790065393968, - "loss": 46.0, - "step": 12818 - }, - { - "epoch": 2.064414831514956, - "grad_norm": 0.0006348789320327342, - "learning_rate": 0.0001999979003261101, - "loss": 46.0, - "step": 12819 - }, - { - "epoch": 2.0645758685937436, - "grad_norm": 0.0014930064789950848, - "learning_rate": 0.00019999789999825492, - "loss": 46.0, - "step": 12820 - }, - { - "epoch": 2.064736905672531, - "grad_norm": 0.0006207611877471209, - "learning_rate": 0.00019999789967037413, - "loss": 46.0, - "step": 12821 - }, - { - "epoch": 2.0648979427513185, - "grad_norm": 0.0024403254501521587, - "learning_rate": 0.00019999789934246778, - "loss": 46.0, - "step": 12822 - }, - { - "epoch": 2.065058979830106, - "grad_norm": 0.0029341739136725664, - "learning_rate": 0.00019999789901453582, - "loss": 46.0, - "step": 12823 - }, - { - "epoch": 2.0652200169088935, - "grad_norm": 0.006413840688765049, - "learning_rate": 0.00019999789868657827, - "loss": 46.0, - "step": 12824 - }, - { - "epoch": 2.0653810539876805, - "grad_norm": 0.0017412956804037094, - "learning_rate": 0.0001999978983585951, - "loss": 46.0, - "step": 12825 - }, - { - "epoch": 2.065542091066468, - "grad_norm": 0.0032224729657173157, - "learning_rate": 0.00019999789803058638, - "loss": 46.0, - "step": 12826 - }, - { - "epoch": 2.0657031281452554, - "grad_norm": 0.000601353996898979, - "learning_rate": 0.000199997897702552, - "loss": 46.0, - "step": 12827 - }, - { - "epoch": 2.065864165224043, - "grad_norm": 0.0022773239761590958, - "learning_rate": 0.0001999978973744921, - "loss": 46.0, - "step": 12828 - }, - { - "epoch": 2.0660252023028303, - "grad_norm": 0.0016295919194817543, - "learning_rate": 0.00019999789704640657, - "loss": 46.0, - "step": 12829 - }, - { - "epoch": 2.0661862393816177, - "grad_norm": 0.008981181308627129, - "learning_rate": 0.00019999789671829542, - "loss": 46.0, - "step": 12830 - }, - { - "epoch": 2.066347276460405, - "grad_norm": 0.000781839422415942, - "learning_rate": 0.0001999978963901587, - "loss": 46.0, - "step": 12831 - }, - { - "epoch": 2.0665083135391926, - "grad_norm": 0.0004905819660052657, - "learning_rate": 0.0001999978960619964, - "loss": 46.0, - "step": 12832 - }, - { - "epoch": 2.0666693506179796, - "grad_norm": 0.004294846672564745, - "learning_rate": 0.00019999789573380849, - "loss": 46.0, - "step": 12833 - }, - { - "epoch": 2.066830387696767, - "grad_norm": 0.002000445267185569, - "learning_rate": 0.00019999789540559498, - "loss": 46.0, - "step": 12834 - }, - { - "epoch": 2.0669914247755545, - "grad_norm": 0.00406459579244256, - "learning_rate": 0.00019999789507735592, - "loss": 46.0, - "step": 12835 - }, - { - "epoch": 2.067152461854342, - "grad_norm": 0.0018631048733368516, - "learning_rate": 0.00019999789474909121, - "loss": 46.0, - "step": 12836 - }, - { - "epoch": 2.0673134989331294, - "grad_norm": 0.0061733657494187355, - "learning_rate": 0.00019999789442080092, - "loss": 46.0, - "step": 12837 - }, - { - "epoch": 2.067474536011917, - "grad_norm": 0.0037168734706938267, - "learning_rate": 0.00019999789409248507, - "loss": 46.0, - "step": 12838 - }, - { - "epoch": 2.0676355730907043, - "grad_norm": 0.0013421680778265, - "learning_rate": 0.00019999789376414358, - "loss": 46.0, - "step": 12839 - }, - { - "epoch": 2.0677966101694913, - "grad_norm": 0.0020350811537355185, - "learning_rate": 0.0001999978934357765, - "loss": 46.0, - "step": 12840 - }, - { - "epoch": 2.067957647248279, - "grad_norm": 0.0029729909729212523, - "learning_rate": 0.00019999789310738385, - "loss": 46.0, - "step": 12841 - }, - { - "epoch": 2.0681186843270662, - "grad_norm": 0.001266554114408791, - "learning_rate": 0.0001999978927789656, - "loss": 46.0, - "step": 12842 - }, - { - "epoch": 2.0682797214058537, - "grad_norm": 0.0020449126604944468, - "learning_rate": 0.00019999789245052173, - "loss": 46.0, - "step": 12843 - }, - { - "epoch": 2.068440758484641, - "grad_norm": 0.0028160871006548405, - "learning_rate": 0.0001999978921220523, - "loss": 46.0, - "step": 12844 - }, - { - "epoch": 2.0686017955634286, - "grad_norm": 0.0015975756105035543, - "learning_rate": 0.00019999789179355726, - "loss": 46.0, - "step": 12845 - }, - { - "epoch": 2.068762832642216, - "grad_norm": 0.0023738001473248005, - "learning_rate": 0.00019999789146503663, - "loss": 46.0, - "step": 12846 - }, - { - "epoch": 2.068923869721003, - "grad_norm": 0.0012726528802886605, - "learning_rate": 0.0001999978911364904, - "loss": 46.0, - "step": 12847 - }, - { - "epoch": 2.0690849067997905, - "grad_norm": 0.0018322101095691323, - "learning_rate": 0.00019999789080791858, - "loss": 46.0, - "step": 12848 - }, - { - "epoch": 2.069245943878578, - "grad_norm": 0.0034243674017488956, - "learning_rate": 0.00019999789047932116, - "loss": 46.0, - "step": 12849 - }, - { - "epoch": 2.0694069809573654, - "grad_norm": 0.0015597471501678228, - "learning_rate": 0.00019999789015069818, - "loss": 46.0, - "step": 12850 - }, - { - "epoch": 2.069568018036153, - "grad_norm": 0.002911241492256522, - "learning_rate": 0.00019999788982204956, - "loss": 46.0, - "step": 12851 - }, - { - "epoch": 2.0697290551149403, - "grad_norm": 0.0012249392457306385, - "learning_rate": 0.00019999788949337535, - "loss": 46.0, - "step": 12852 - }, - { - "epoch": 2.0698900921937278, - "grad_norm": 0.0021083063911646605, - "learning_rate": 0.00019999788916467558, - "loss": 46.0, - "step": 12853 - }, - { - "epoch": 2.070051129272515, - "grad_norm": 0.004616187885403633, - "learning_rate": 0.00019999788883595017, - "loss": 46.0, - "step": 12854 - }, - { - "epoch": 2.0702121663513022, - "grad_norm": 0.0022661376278847456, - "learning_rate": 0.00019999788850719917, - "loss": 46.0, - "step": 12855 - }, - { - "epoch": 2.0703732034300897, - "grad_norm": 0.007422221824526787, - "learning_rate": 0.00019999788817842262, - "loss": 46.0, - "step": 12856 - }, - { - "epoch": 2.070534240508877, - "grad_norm": 0.005455732811242342, - "learning_rate": 0.00019999788784962045, - "loss": 46.0, - "step": 12857 - }, - { - "epoch": 2.0706952775876646, - "grad_norm": 0.004719725344330072, - "learning_rate": 0.00019999788752079266, - "loss": 46.0, - "step": 12858 - }, - { - "epoch": 2.070856314666452, - "grad_norm": 0.0017437150236219168, - "learning_rate": 0.00019999788719193932, - "loss": 46.0, - "step": 12859 - }, - { - "epoch": 2.0710173517452395, - "grad_norm": 0.002943633357062936, - "learning_rate": 0.00019999788686306038, - "loss": 46.0, - "step": 12860 - }, - { - "epoch": 2.071178388824027, - "grad_norm": 0.007313722278922796, - "learning_rate": 0.00019999788653415584, - "loss": 46.0, - "step": 12861 - }, - { - "epoch": 2.071339425902814, - "grad_norm": 0.00593662541359663, - "learning_rate": 0.00019999788620522567, - "loss": 46.0, - "step": 12862 - }, - { - "epoch": 2.0715004629816014, - "grad_norm": 0.003066117875277996, - "learning_rate": 0.00019999788587626995, - "loss": 46.0, - "step": 12863 - }, - { - "epoch": 2.071661500060389, - "grad_norm": 0.003927651327103376, - "learning_rate": 0.00019999788554728862, - "loss": 46.0, - "step": 12864 - }, - { - "epoch": 2.0718225371391763, - "grad_norm": 0.0005517390090972185, - "learning_rate": 0.0001999978852182817, - "loss": 46.0, - "step": 12865 - }, - { - "epoch": 2.0719835742179638, - "grad_norm": 0.0039609442465007305, - "learning_rate": 0.00019999788488924919, - "loss": 46.0, - "step": 12866 - }, - { - "epoch": 2.072144611296751, - "grad_norm": 0.0011377392802387476, - "learning_rate": 0.00019999788456019106, - "loss": 46.0, - "step": 12867 - }, - { - "epoch": 2.0723056483755387, - "grad_norm": 0.0016544148093089461, - "learning_rate": 0.00019999788423110735, - "loss": 46.0, - "step": 12868 - }, - { - "epoch": 2.0724666854543257, - "grad_norm": 0.001906959223560989, - "learning_rate": 0.00019999788390199805, - "loss": 46.0, - "step": 12869 - }, - { - "epoch": 2.072627722533113, - "grad_norm": 0.0015103211626410484, - "learning_rate": 0.00019999788357286317, - "loss": 46.0, - "step": 12870 - }, - { - "epoch": 2.0727887596119006, - "grad_norm": 0.0020772633142769337, - "learning_rate": 0.00019999788324370267, - "loss": 46.0, - "step": 12871 - }, - { - "epoch": 2.072949796690688, - "grad_norm": 0.002363672247156501, - "learning_rate": 0.00019999788291451658, - "loss": 46.0, - "step": 12872 - }, - { - "epoch": 2.0731108337694755, - "grad_norm": 0.00032840410131029785, - "learning_rate": 0.0001999978825853049, - "loss": 46.0, - "step": 12873 - }, - { - "epoch": 2.073271870848263, - "grad_norm": 0.0035052697639912367, - "learning_rate": 0.00019999788225606762, - "loss": 46.0, - "step": 12874 - }, - { - "epoch": 2.0734329079270504, - "grad_norm": 0.003118172287940979, - "learning_rate": 0.00019999788192680474, - "loss": 46.0, - "step": 12875 - }, - { - "epoch": 2.0735939450058374, - "grad_norm": 0.000833704776596278, - "learning_rate": 0.00019999788159751628, - "loss": 46.0, - "step": 12876 - }, - { - "epoch": 2.073754982084625, - "grad_norm": 0.0006109802052378654, - "learning_rate": 0.00019999788126820223, - "loss": 46.0, - "step": 12877 - }, - { - "epoch": 2.0739160191634123, - "grad_norm": 0.010422511957585812, - "learning_rate": 0.0001999978809388626, - "loss": 46.0, - "step": 12878 - }, - { - "epoch": 2.0740770562421997, - "grad_norm": 0.0008158034179359674, - "learning_rate": 0.00019999788060949734, - "loss": 46.0, - "step": 12879 - }, - { - "epoch": 2.074238093320987, - "grad_norm": 0.0028415508568286896, - "learning_rate": 0.00019999788028010647, - "loss": 46.0, - "step": 12880 - }, - { - "epoch": 2.0743991303997746, - "grad_norm": 0.0031324869487434626, - "learning_rate": 0.00019999787995069005, - "loss": 46.0, - "step": 12881 - }, - { - "epoch": 2.074560167478562, - "grad_norm": 0.0007474896847270429, - "learning_rate": 0.00019999787962124804, - "loss": 46.0, - "step": 12882 - }, - { - "epoch": 2.0747212045573495, - "grad_norm": 0.006062533240765333, - "learning_rate": 0.0001999978792917804, - "loss": 46.0, - "step": 12883 - }, - { - "epoch": 2.0748822416361365, - "grad_norm": 0.004317732993513346, - "learning_rate": 0.00019999787896228722, - "loss": 46.0, - "step": 12884 - }, - { - "epoch": 2.075043278714924, - "grad_norm": 0.004423650912940502, - "learning_rate": 0.0001999978786327684, - "loss": 46.0, - "step": 12885 - }, - { - "epoch": 2.0752043157937115, - "grad_norm": 0.0064974562264978886, - "learning_rate": 0.00019999787830322395, - "loss": 46.0, - "step": 12886 - }, - { - "epoch": 2.075365352872499, - "grad_norm": 0.0017117992974817753, - "learning_rate": 0.000199997877973654, - "loss": 46.0, - "step": 12887 - }, - { - "epoch": 2.0755263899512864, - "grad_norm": 0.004673875868320465, - "learning_rate": 0.00019999787764405838, - "loss": 46.0, - "step": 12888 - }, - { - "epoch": 2.075687427030074, - "grad_norm": 0.010830063372850418, - "learning_rate": 0.0001999978773144372, - "loss": 46.0, - "step": 12889 - }, - { - "epoch": 2.0758484641088613, - "grad_norm": 0.00277833198197186, - "learning_rate": 0.00019999787698479041, - "loss": 46.0, - "step": 12890 - }, - { - "epoch": 2.0760095011876483, - "grad_norm": 0.002786587690934539, - "learning_rate": 0.00019999787665511804, - "loss": 46.0, - "step": 12891 - }, - { - "epoch": 2.0761705382664357, - "grad_norm": 0.0015934628900140524, - "learning_rate": 0.00019999787632542007, - "loss": 46.0, - "step": 12892 - }, - { - "epoch": 2.076331575345223, - "grad_norm": 0.005051181651651859, - "learning_rate": 0.00019999787599569652, - "loss": 46.0, - "step": 12893 - }, - { - "epoch": 2.0764926124240106, - "grad_norm": 0.0005999958957545459, - "learning_rate": 0.00019999787566594735, - "loss": 46.0, - "step": 12894 - }, - { - "epoch": 2.076653649502798, - "grad_norm": 0.0016861752374097705, - "learning_rate": 0.00019999787533617257, - "loss": 46.0, - "step": 12895 - }, - { - "epoch": 2.0768146865815855, - "grad_norm": 0.0026603303849697113, - "learning_rate": 0.00019999787500637223, - "loss": 46.0, - "step": 12896 - }, - { - "epoch": 2.076975723660373, - "grad_norm": 0.005328537430614233, - "learning_rate": 0.0001999978746765463, - "loss": 46.0, - "step": 12897 - }, - { - "epoch": 2.07713676073916, - "grad_norm": 0.012002729810774326, - "learning_rate": 0.00019999787434669475, - "loss": 46.0, - "step": 12898 - }, - { - "epoch": 2.0772977978179474, - "grad_norm": 0.012205596081912518, - "learning_rate": 0.00019999787401681762, - "loss": 46.0, - "step": 12899 - }, - { - "epoch": 2.077458834896735, - "grad_norm": 0.00956760160624981, - "learning_rate": 0.0001999978736869149, - "loss": 46.0, - "step": 12900 - }, - { - "epoch": 2.0776198719755223, - "grad_norm": 0.003218648489564657, - "learning_rate": 0.00019999787335698657, - "loss": 46.0, - "step": 12901 - }, - { - "epoch": 2.07778090905431, - "grad_norm": 0.0009184836526401341, - "learning_rate": 0.00019999787302703265, - "loss": 46.0, - "step": 12902 - }, - { - "epoch": 2.0779419461330972, - "grad_norm": 0.001078438712283969, - "learning_rate": 0.00019999787269705315, - "loss": 46.0, - "step": 12903 - }, - { - "epoch": 2.0781029832118847, - "grad_norm": 0.0052854218520224094, - "learning_rate": 0.00019999787236704805, - "loss": 46.0, - "step": 12904 - }, - { - "epoch": 2.078264020290672, - "grad_norm": 0.0005770517163909972, - "learning_rate": 0.00019999787203701735, - "loss": 46.0, - "step": 12905 - }, - { - "epoch": 2.078425057369459, - "grad_norm": 0.0018130955286324024, - "learning_rate": 0.00019999787170696105, - "loss": 46.0, - "step": 12906 - }, - { - "epoch": 2.0785860944482466, - "grad_norm": 0.0015770385507494211, - "learning_rate": 0.00019999787137687917, - "loss": 46.0, - "step": 12907 - }, - { - "epoch": 2.078747131527034, - "grad_norm": 0.007847209461033344, - "learning_rate": 0.00019999787104677167, - "loss": 46.0, - "step": 12908 - }, - { - "epoch": 2.0789081686058215, - "grad_norm": 0.008562360890209675, - "learning_rate": 0.0001999978707166386, - "loss": 46.0, - "step": 12909 - }, - { - "epoch": 2.079069205684609, - "grad_norm": 0.0010221872944384813, - "learning_rate": 0.00019999787038647995, - "loss": 46.0, - "step": 12910 - }, - { - "epoch": 2.0792302427633964, - "grad_norm": 0.000811954028904438, - "learning_rate": 0.00019999787005629566, - "loss": 46.0, - "step": 12911 - }, - { - "epoch": 2.079391279842184, - "grad_norm": 0.0024959570728242397, - "learning_rate": 0.00019999786972608582, - "loss": 46.0, - "step": 12912 - }, - { - "epoch": 2.079552316920971, - "grad_norm": 0.0021960786543786526, - "learning_rate": 0.00019999786939585036, - "loss": 46.0, - "step": 12913 - }, - { - "epoch": 2.0797133539997583, - "grad_norm": 0.004885167349129915, - "learning_rate": 0.0001999978690655893, - "loss": 46.0, - "step": 12914 - }, - { - "epoch": 2.0798743910785458, - "grad_norm": 0.004590335302054882, - "learning_rate": 0.00019999786873530265, - "loss": 46.0, - "step": 12915 - }, - { - "epoch": 2.080035428157333, - "grad_norm": 0.001211219234392047, - "learning_rate": 0.00019999786840499043, - "loss": 46.0, - "step": 12916 - }, - { - "epoch": 2.0801964652361207, - "grad_norm": 0.00057620630832389, - "learning_rate": 0.0001999978680746526, - "loss": 46.0, - "step": 12917 - }, - { - "epoch": 2.080357502314908, - "grad_norm": 0.0022125651594251394, - "learning_rate": 0.00019999786774428917, - "loss": 46.0, - "step": 12918 - }, - { - "epoch": 2.0805185393936956, - "grad_norm": 0.006694595795124769, - "learning_rate": 0.00019999786741390016, - "loss": 46.0, - "step": 12919 - }, - { - "epoch": 2.0806795764724826, - "grad_norm": 0.0012308426667004824, - "learning_rate": 0.00019999786708348557, - "loss": 46.0, - "step": 12920 - }, - { - "epoch": 2.08084061355127, - "grad_norm": 0.0026756240986287594, - "learning_rate": 0.00019999786675304536, - "loss": 46.0, - "step": 12921 - }, - { - "epoch": 2.0810016506300575, - "grad_norm": 0.003326231613755226, - "learning_rate": 0.00019999786642257953, - "loss": 46.0, - "step": 12922 - }, - { - "epoch": 2.081162687708845, - "grad_norm": 0.001794333104044199, - "learning_rate": 0.00019999786609208815, - "loss": 46.0, - "step": 12923 - }, - { - "epoch": 2.0813237247876324, - "grad_norm": 0.0016420054016634822, - "learning_rate": 0.00019999786576157115, - "loss": 46.0, - "step": 12924 - }, - { - "epoch": 2.08148476186642, - "grad_norm": 0.0035400076303631067, - "learning_rate": 0.00019999786543102856, - "loss": 46.0, - "step": 12925 - }, - { - "epoch": 2.0816457989452073, - "grad_norm": 0.006951572373509407, - "learning_rate": 0.0001999978651004604, - "loss": 46.0, - "step": 12926 - }, - { - "epoch": 2.0818068360239943, - "grad_norm": 0.005445762071758509, - "learning_rate": 0.00019999786476986663, - "loss": 46.0, - "step": 12927 - }, - { - "epoch": 2.0819678731027818, - "grad_norm": 0.005064631812274456, - "learning_rate": 0.00019999786443924725, - "loss": 46.0, - "step": 12928 - }, - { - "epoch": 2.082128910181569, - "grad_norm": 0.003887486644089222, - "learning_rate": 0.00019999786410860232, - "loss": 46.0, - "step": 12929 - }, - { - "epoch": 2.0822899472603567, - "grad_norm": 0.0003751896438188851, - "learning_rate": 0.00019999786377793174, - "loss": 46.0, - "step": 12930 - }, - { - "epoch": 2.082450984339144, - "grad_norm": 0.001582261174917221, - "learning_rate": 0.0001999978634472356, - "loss": 46.0, - "step": 12931 - }, - { - "epoch": 2.0826120214179316, - "grad_norm": 0.0010145421838387847, - "learning_rate": 0.00019999786311651385, - "loss": 46.0, - "step": 12932 - }, - { - "epoch": 2.082773058496719, - "grad_norm": 0.007230785675346851, - "learning_rate": 0.0001999978627857665, - "loss": 46.0, - "step": 12933 - }, - { - "epoch": 2.0829340955755065, - "grad_norm": 0.004808077588677406, - "learning_rate": 0.00019999786245499356, - "loss": 46.0, - "step": 12934 - }, - { - "epoch": 2.0830951326542935, - "grad_norm": 0.001964322756975889, - "learning_rate": 0.00019999786212419505, - "loss": 46.0, - "step": 12935 - }, - { - "epoch": 2.083256169733081, - "grad_norm": 0.005786800291389227, - "learning_rate": 0.00019999786179337092, - "loss": 46.0, - "step": 12936 - }, - { - "epoch": 2.0834172068118684, - "grad_norm": 0.006222918163985014, - "learning_rate": 0.0001999978614625212, - "loss": 46.0, - "step": 12937 - }, - { - "epoch": 2.083578243890656, - "grad_norm": 0.003976618405431509, - "learning_rate": 0.00019999786113164588, - "loss": 46.0, - "step": 12938 - }, - { - "epoch": 2.0837392809694433, - "grad_norm": 0.009521611966192722, - "learning_rate": 0.000199997860800745, - "loss": 46.0, - "step": 12939 - }, - { - "epoch": 2.0839003180482307, - "grad_norm": 0.005143940448760986, - "learning_rate": 0.0001999978604698185, - "loss": 46.0, - "step": 12940 - }, - { - "epoch": 2.084061355127018, - "grad_norm": 0.0019475278677418828, - "learning_rate": 0.0001999978601388664, - "loss": 46.0, - "step": 12941 - }, - { - "epoch": 2.084222392205805, - "grad_norm": 0.001794849755242467, - "learning_rate": 0.0001999978598078887, - "loss": 46.0, - "step": 12942 - }, - { - "epoch": 2.0843834292845926, - "grad_norm": 0.017196884378790855, - "learning_rate": 0.00019999785947688543, - "loss": 46.0, - "step": 12943 - }, - { - "epoch": 2.08454446636338, - "grad_norm": 0.0007669737678952515, - "learning_rate": 0.00019999785914585653, - "loss": 46.0, - "step": 12944 - }, - { - "epoch": 2.0847055034421675, - "grad_norm": 0.0021896001417189837, - "learning_rate": 0.00019999785881480206, - "loss": 46.0, - "step": 12945 - }, - { - "epoch": 2.084866540520955, - "grad_norm": 0.010119922459125519, - "learning_rate": 0.000199997858483722, - "loss": 46.0, - "step": 12946 - }, - { - "epoch": 2.0850275775997424, - "grad_norm": 0.004330337047576904, - "learning_rate": 0.00019999785815261634, - "loss": 46.0, - "step": 12947 - }, - { - "epoch": 2.08518861467853, - "grad_norm": 0.0010863668285310268, - "learning_rate": 0.00019999785782148509, - "loss": 46.0, - "step": 12948 - }, - { - "epoch": 2.0853496517573173, - "grad_norm": 0.0032476340420544147, - "learning_rate": 0.00019999785749032824, - "loss": 46.0, - "step": 12949 - }, - { - "epoch": 2.0855106888361044, - "grad_norm": 0.0007361643365584314, - "learning_rate": 0.00019999785715914582, - "loss": 46.0, - "step": 12950 - }, - { - "epoch": 2.085671725914892, - "grad_norm": 0.01038648933172226, - "learning_rate": 0.00019999785682793777, - "loss": 46.0, - "step": 12951 - }, - { - "epoch": 2.0858327629936793, - "grad_norm": 0.0009936820715665817, - "learning_rate": 0.00019999785649670412, - "loss": 46.0, - "step": 12952 - }, - { - "epoch": 2.0859938000724667, - "grad_norm": 0.0013045460218563676, - "learning_rate": 0.0001999978561654449, - "loss": 46.0, - "step": 12953 - }, - { - "epoch": 2.086154837151254, - "grad_norm": 0.002221314935013652, - "learning_rate": 0.0001999978558341601, - "loss": 46.0, - "step": 12954 - }, - { - "epoch": 2.0863158742300416, - "grad_norm": 0.0003671857703011483, - "learning_rate": 0.00019999785550284965, - "loss": 46.0, - "step": 12955 - }, - { - "epoch": 2.086476911308829, - "grad_norm": 0.0013719316339120269, - "learning_rate": 0.00019999785517151367, - "loss": 46.0, - "step": 12956 - }, - { - "epoch": 2.086637948387616, - "grad_norm": 0.008065132424235344, - "learning_rate": 0.00019999785484015208, - "loss": 46.0, - "step": 12957 - }, - { - "epoch": 2.0867989854664035, - "grad_norm": 0.006490033119916916, - "learning_rate": 0.00019999785450876487, - "loss": 46.0, - "step": 12958 - }, - { - "epoch": 2.086960022545191, - "grad_norm": 0.0018406733870506287, - "learning_rate": 0.00019999785417735208, - "loss": 46.0, - "step": 12959 - }, - { - "epoch": 2.0871210596239784, - "grad_norm": 0.001462517655454576, - "learning_rate": 0.0001999978538459137, - "loss": 46.0, - "step": 12960 - }, - { - "epoch": 2.087282096702766, - "grad_norm": 0.0026362589560449123, - "learning_rate": 0.0001999978535144497, - "loss": 46.0, - "step": 12961 - }, - { - "epoch": 2.0874431337815533, - "grad_norm": 0.0017384827369824052, - "learning_rate": 0.00019999785318296014, - "loss": 46.0, - "step": 12962 - }, - { - "epoch": 2.087604170860341, - "grad_norm": 0.0013901966158300638, - "learning_rate": 0.00019999785285144497, - "loss": 46.0, - "step": 12963 - }, - { - "epoch": 2.087765207939128, - "grad_norm": 0.0026942843105643988, - "learning_rate": 0.00019999785251990422, - "loss": 46.0, - "step": 12964 - }, - { - "epoch": 2.0879262450179152, - "grad_norm": 0.002685442566871643, - "learning_rate": 0.00019999785218833784, - "loss": 46.0, - "step": 12965 - }, - { - "epoch": 2.0880872820967027, - "grad_norm": 0.004451130982488394, - "learning_rate": 0.00019999785185674589, - "loss": 46.0, - "step": 12966 - }, - { - "epoch": 2.08824831917549, - "grad_norm": 0.0017370268469676375, - "learning_rate": 0.00019999785152512837, - "loss": 46.0, - "step": 12967 - }, - { - "epoch": 2.0884093562542776, - "grad_norm": 0.0009535567369312048, - "learning_rate": 0.0001999978511934852, - "loss": 46.0, - "step": 12968 - }, - { - "epoch": 2.088570393333065, - "grad_norm": 0.00570228835567832, - "learning_rate": 0.00019999785086181646, - "loss": 46.0, - "step": 12969 - }, - { - "epoch": 2.0887314304118525, - "grad_norm": 0.0011478904634714127, - "learning_rate": 0.00019999785053012215, - "loss": 46.0, - "step": 12970 - }, - { - "epoch": 2.0888924674906395, - "grad_norm": 0.00048434018390253186, - "learning_rate": 0.00019999785019840223, - "loss": 46.0, - "step": 12971 - }, - { - "epoch": 2.089053504569427, - "grad_norm": 0.011068599298596382, - "learning_rate": 0.0001999978498666567, - "loss": 46.0, - "step": 12972 - }, - { - "epoch": 2.0892145416482144, - "grad_norm": 0.0014579817652702332, - "learning_rate": 0.0001999978495348856, - "loss": 46.0, - "step": 12973 - }, - { - "epoch": 2.089375578727002, - "grad_norm": 0.0009975058492273092, - "learning_rate": 0.0001999978492030889, - "loss": 46.0, - "step": 12974 - }, - { - "epoch": 2.0895366158057893, - "grad_norm": 0.0036850841715931892, - "learning_rate": 0.0001999978488712666, - "loss": 46.0, - "step": 12975 - }, - { - "epoch": 2.0896976528845768, - "grad_norm": 0.005624666344374418, - "learning_rate": 0.00019999784853941868, - "loss": 46.0, - "step": 12976 - }, - { - "epoch": 2.089858689963364, - "grad_norm": 0.0011068080784752965, - "learning_rate": 0.0001999978482075452, - "loss": 46.0, - "step": 12977 - }, - { - "epoch": 2.0900197270421517, - "grad_norm": 0.0018457322148606181, - "learning_rate": 0.00019999784787564612, - "loss": 46.0, - "step": 12978 - }, - { - "epoch": 2.0901807641209387, - "grad_norm": 0.003644770011305809, - "learning_rate": 0.00019999784754372145, - "loss": 46.0, - "step": 12979 - }, - { - "epoch": 2.090341801199726, - "grad_norm": 0.0008228995138779283, - "learning_rate": 0.00019999784721177116, - "loss": 46.0, - "step": 12980 - }, - { - "epoch": 2.0905028382785136, - "grad_norm": 0.00238042208366096, - "learning_rate": 0.0001999978468797953, - "loss": 46.0, - "step": 12981 - }, - { - "epoch": 2.090663875357301, - "grad_norm": 0.002125946804881096, - "learning_rate": 0.00019999784654779385, - "loss": 46.0, - "step": 12982 - }, - { - "epoch": 2.0908249124360885, - "grad_norm": 0.0018968727672472596, - "learning_rate": 0.00019999784621576677, - "loss": 46.0, - "step": 12983 - }, - { - "epoch": 2.090985949514876, - "grad_norm": 0.001954280072823167, - "learning_rate": 0.00019999784588371414, - "loss": 46.0, - "step": 12984 - }, - { - "epoch": 2.0911469865936634, - "grad_norm": 0.0034703118726611137, - "learning_rate": 0.0001999978455516359, - "loss": 46.0, - "step": 12985 - }, - { - "epoch": 2.0913080236724504, - "grad_norm": 0.0006526318611577153, - "learning_rate": 0.00019999784521953205, - "loss": 46.0, - "step": 12986 - }, - { - "epoch": 2.091469060751238, - "grad_norm": 0.0031117235776036978, - "learning_rate": 0.0001999978448874026, - "loss": 46.0, - "step": 12987 - }, - { - "epoch": 2.0916300978300253, - "grad_norm": 0.006501418072730303, - "learning_rate": 0.00019999784455524758, - "loss": 46.0, - "step": 12988 - }, - { - "epoch": 2.0917911349088127, - "grad_norm": 0.004377106670290232, - "learning_rate": 0.00019999784422306699, - "loss": 46.0, - "step": 12989 - }, - { - "epoch": 2.0919521719876, - "grad_norm": 0.0044606709852814674, - "learning_rate": 0.00019999784389086077, - "loss": 46.0, - "step": 12990 - }, - { - "epoch": 2.0921132090663876, - "grad_norm": 0.003383436007425189, - "learning_rate": 0.00019999784355862894, - "loss": 46.0, - "step": 12991 - }, - { - "epoch": 2.092274246145175, - "grad_norm": 0.0005474574281834066, - "learning_rate": 0.00019999784322637156, - "loss": 46.0, - "step": 12992 - }, - { - "epoch": 2.092435283223962, - "grad_norm": 0.00288202497176826, - "learning_rate": 0.00019999784289408855, - "loss": 46.0, - "step": 12993 - }, - { - "epoch": 2.0925963203027496, - "grad_norm": 0.004795857239514589, - "learning_rate": 0.00019999784256177997, - "loss": 46.0, - "step": 12994 - }, - { - "epoch": 2.092757357381537, - "grad_norm": 0.0028374146204441786, - "learning_rate": 0.0001999978422294458, - "loss": 46.0, - "step": 12995 - }, - { - "epoch": 2.0929183944603245, - "grad_norm": 0.0005240382743068039, - "learning_rate": 0.000199997841897086, - "loss": 46.0, - "step": 12996 - }, - { - "epoch": 2.093079431539112, - "grad_norm": 0.00736638531088829, - "learning_rate": 0.0001999978415647006, - "loss": 46.0, - "step": 12997 - }, - { - "epoch": 2.0932404686178994, - "grad_norm": 0.0016489975387230515, - "learning_rate": 0.00019999784123228963, - "loss": 46.0, - "step": 12998 - }, - { - "epoch": 2.093401505696687, - "grad_norm": 0.0005634868866764009, - "learning_rate": 0.00019999784089985308, - "loss": 46.0, - "step": 12999 - }, - { - "epoch": 2.0935625427754743, - "grad_norm": 0.0010365262860432267, - "learning_rate": 0.00019999784056739094, - "loss": 46.0, - "step": 13000 - }, - { - "epoch": 2.0937235798542613, - "grad_norm": 0.0011834435863420367, - "learning_rate": 0.00019999784023490316, - "loss": 46.0, - "step": 13001 - }, - { - "epoch": 2.0938846169330487, - "grad_norm": 0.001617851434275508, - "learning_rate": 0.00019999783990238981, - "loss": 46.0, - "step": 13002 - }, - { - "epoch": 2.094045654011836, - "grad_norm": 0.0016286453464999795, - "learning_rate": 0.0001999978395698509, - "loss": 46.0, - "step": 13003 - }, - { - "epoch": 2.0942066910906236, - "grad_norm": 0.005443901289254427, - "learning_rate": 0.00019999783923728634, - "loss": 46.0, - "step": 13004 - }, - { - "epoch": 2.094367728169411, - "grad_norm": 0.0020517813973128796, - "learning_rate": 0.00019999783890469621, - "loss": 46.0, - "step": 13005 - }, - { - "epoch": 2.0945287652481985, - "grad_norm": 0.0007304801838472486, - "learning_rate": 0.00019999783857208047, - "loss": 46.0, - "step": 13006 - }, - { - "epoch": 2.094689802326986, - "grad_norm": 0.002236368600279093, - "learning_rate": 0.0001999978382394392, - "loss": 46.0, - "step": 13007 - }, - { - "epoch": 2.094850839405773, - "grad_norm": 0.002079067286103964, - "learning_rate": 0.00019999783790677228, - "loss": 46.0, - "step": 13008 - }, - { - "epoch": 2.0950118764845604, - "grad_norm": 0.01510265190154314, - "learning_rate": 0.00019999783757407974, - "loss": 46.0, - "step": 13009 - }, - { - "epoch": 2.095172913563348, - "grad_norm": 0.004322418011724949, - "learning_rate": 0.00019999783724136165, - "loss": 46.0, - "step": 13010 - }, - { - "epoch": 2.0953339506421353, - "grad_norm": 0.0032783453352749348, - "learning_rate": 0.00019999783690861794, - "loss": 46.0, - "step": 13011 - }, - { - "epoch": 2.095494987720923, - "grad_norm": 0.003865213831886649, - "learning_rate": 0.00019999783657584868, - "loss": 46.0, - "step": 13012 - }, - { - "epoch": 2.0956560247997102, - "grad_norm": 0.0034887182991951704, - "learning_rate": 0.00019999783624305377, - "loss": 46.0, - "step": 13013 - }, - { - "epoch": 2.0958170618784977, - "grad_norm": 0.016281094402074814, - "learning_rate": 0.0001999978359102333, - "loss": 46.0, - "step": 13014 - }, - { - "epoch": 2.0959780989572847, - "grad_norm": 0.0019259456312283874, - "learning_rate": 0.00019999783557738722, - "loss": 46.0, - "step": 13015 - }, - { - "epoch": 2.096139136036072, - "grad_norm": 0.004159122239798307, - "learning_rate": 0.00019999783524451555, - "loss": 46.0, - "step": 13016 - }, - { - "epoch": 2.0963001731148596, - "grad_norm": 0.001633423031307757, - "learning_rate": 0.0001999978349116183, - "loss": 46.0, - "step": 13017 - }, - { - "epoch": 2.096461210193647, - "grad_norm": 0.005753161385655403, - "learning_rate": 0.00019999783457869542, - "loss": 46.0, - "step": 13018 - }, - { - "epoch": 2.0966222472724345, - "grad_norm": 0.025512289255857468, - "learning_rate": 0.000199997834245747, - "loss": 46.0, - "step": 13019 - }, - { - "epoch": 2.096783284351222, - "grad_norm": 0.003740867367014289, - "learning_rate": 0.00019999783391277294, - "loss": 46.0, - "step": 13020 - }, - { - "epoch": 2.0969443214300094, - "grad_norm": 0.005378149449825287, - "learning_rate": 0.00019999783357977328, - "loss": 46.0, - "step": 13021 - }, - { - "epoch": 2.097105358508797, - "grad_norm": 0.0030334487091749907, - "learning_rate": 0.0001999978332467481, - "loss": 46.0, - "step": 13022 - }, - { - "epoch": 2.097266395587584, - "grad_norm": 0.00285663572140038, - "learning_rate": 0.00019999783291369726, - "loss": 46.0, - "step": 13023 - }, - { - "epoch": 2.0974274326663713, - "grad_norm": 0.004079996608197689, - "learning_rate": 0.0001999978325806208, - "loss": 46.0, - "step": 13024 - }, - { - "epoch": 2.097588469745159, - "grad_norm": 0.0036319908685982227, - "learning_rate": 0.0001999978322475188, - "loss": 46.0, - "step": 13025 - }, - { - "epoch": 2.0977495068239462, - "grad_norm": 0.002361035207286477, - "learning_rate": 0.00019999783191439118, - "loss": 46.0, - "step": 13026 - }, - { - "epoch": 2.0979105439027337, - "grad_norm": 0.003262534271925688, - "learning_rate": 0.000199997831581238, - "loss": 46.0, - "step": 13027 - }, - { - "epoch": 2.098071580981521, - "grad_norm": 0.0017374531598761678, - "learning_rate": 0.00019999783124805917, - "loss": 46.0, - "step": 13028 - }, - { - "epoch": 2.0982326180603086, - "grad_norm": 0.009705738164484501, - "learning_rate": 0.0001999978309148548, - "loss": 46.0, - "step": 13029 - }, - { - "epoch": 2.0983936551390956, - "grad_norm": 0.006310978904366493, - "learning_rate": 0.0001999978305816248, - "loss": 46.0, - "step": 13030 - }, - { - "epoch": 2.098554692217883, - "grad_norm": 0.007194725796580315, - "learning_rate": 0.00019999783024836923, - "loss": 46.0, - "step": 13031 - }, - { - "epoch": 2.0987157292966705, - "grad_norm": 0.0006201033247634768, - "learning_rate": 0.00019999782991508803, - "loss": 46.0, - "step": 13032 - }, - { - "epoch": 2.098876766375458, - "grad_norm": 0.003160185646265745, - "learning_rate": 0.00019999782958178127, - "loss": 46.0, - "step": 13033 - }, - { - "epoch": 2.0990378034542454, - "grad_norm": 0.003502171253785491, - "learning_rate": 0.00019999782924844892, - "loss": 46.0, - "step": 13034 - }, - { - "epoch": 2.099198840533033, - "grad_norm": 0.005906878039240837, - "learning_rate": 0.00019999782891509096, - "loss": 46.0, - "step": 13035 - }, - { - "epoch": 2.0993598776118203, - "grad_norm": 0.004086571279913187, - "learning_rate": 0.0001999978285817074, - "loss": 46.0, - "step": 13036 - }, - { - "epoch": 2.0995209146906073, - "grad_norm": 0.0012891882797703147, - "learning_rate": 0.00019999782824829825, - "loss": 46.0, - "step": 13037 - }, - { - "epoch": 2.0996819517693948, - "grad_norm": 0.0017189068021252751, - "learning_rate": 0.0001999978279148635, - "loss": 46.0, - "step": 13038 - }, - { - "epoch": 2.099842988848182, - "grad_norm": 0.0016231165500357747, - "learning_rate": 0.00019999782758140316, - "loss": 46.0, - "step": 13039 - }, - { - "epoch": 2.1000040259269697, - "grad_norm": 0.01039725448936224, - "learning_rate": 0.0001999978272479172, - "loss": 46.0, - "step": 13040 - }, - { - "epoch": 2.100165063005757, - "grad_norm": 0.0015360346296802163, - "learning_rate": 0.00019999782691440573, - "loss": 46.0, - "step": 13041 - }, - { - "epoch": 2.1003261000845446, - "grad_norm": 0.001290195039473474, - "learning_rate": 0.00019999782658086857, - "loss": 46.0, - "step": 13042 - }, - { - "epoch": 2.100487137163332, - "grad_norm": 0.0018424431327730417, - "learning_rate": 0.0001999978262473059, - "loss": 46.0, - "step": 13043 - }, - { - "epoch": 2.100648174242119, - "grad_norm": 0.0008400485967285931, - "learning_rate": 0.0001999978259137176, - "loss": 46.0, - "step": 13044 - }, - { - "epoch": 2.1008092113209065, - "grad_norm": 0.002809921745210886, - "learning_rate": 0.00019999782558010367, - "loss": 46.0, - "step": 13045 - }, - { - "epoch": 2.100970248399694, - "grad_norm": 0.0026175386738032103, - "learning_rate": 0.00019999782524646417, - "loss": 46.0, - "step": 13046 - }, - { - "epoch": 2.1011312854784814, - "grad_norm": 0.00609249621629715, - "learning_rate": 0.00019999782491279908, - "loss": 46.0, - "step": 13047 - }, - { - "epoch": 2.101292322557269, - "grad_norm": 0.0023897849023342133, - "learning_rate": 0.0001999978245791084, - "loss": 46.0, - "step": 13048 - }, - { - "epoch": 2.1014533596360563, - "grad_norm": 0.0007847766391932964, - "learning_rate": 0.00019999782424539212, - "loss": 46.0, - "step": 13049 - }, - { - "epoch": 2.1016143967148437, - "grad_norm": 0.004051079507917166, - "learning_rate": 0.00019999782391165024, - "loss": 46.0, - "step": 13050 - }, - { - "epoch": 2.101775433793631, - "grad_norm": 0.004867277108132839, - "learning_rate": 0.00019999782357788277, - "loss": 46.0, - "step": 13051 - }, - { - "epoch": 2.101936470872418, - "grad_norm": 0.0021208066027611494, - "learning_rate": 0.00019999782324408972, - "loss": 46.0, - "step": 13052 - }, - { - "epoch": 2.1020975079512056, - "grad_norm": 0.001158708124421537, - "learning_rate": 0.00019999782291027106, - "loss": 46.0, - "step": 13053 - }, - { - "epoch": 2.102258545029993, - "grad_norm": 0.00791366770863533, - "learning_rate": 0.00019999782257642678, - "loss": 46.0, - "step": 13054 - }, - { - "epoch": 2.1024195821087805, - "grad_norm": 0.00036076310789212584, - "learning_rate": 0.00019999782224255696, - "loss": 46.0, - "step": 13055 - }, - { - "epoch": 2.102580619187568, - "grad_norm": 0.003293624147772789, - "learning_rate": 0.0001999978219086615, - "loss": 46.0, - "step": 13056 - }, - { - "epoch": 2.1027416562663555, - "grad_norm": 0.0014254057314246893, - "learning_rate": 0.0001999978215747405, - "loss": 46.0, - "step": 13057 - }, - { - "epoch": 2.102902693345143, - "grad_norm": 0.0019138320349156857, - "learning_rate": 0.00019999782124079384, - "loss": 46.0, - "step": 13058 - }, - { - "epoch": 2.10306373042393, - "grad_norm": 0.001688587130047381, - "learning_rate": 0.00019999782090682162, - "loss": 46.0, - "step": 13059 - }, - { - "epoch": 2.1032247675027174, - "grad_norm": 0.001946649863384664, - "learning_rate": 0.00019999782057282382, - "loss": 46.0, - "step": 13060 - }, - { - "epoch": 2.103385804581505, - "grad_norm": 0.013654623180627823, - "learning_rate": 0.0001999978202388004, - "loss": 46.0, - "step": 13061 - }, - { - "epoch": 2.1035468416602923, - "grad_norm": 0.0007878972101025283, - "learning_rate": 0.0001999978199047514, - "loss": 46.0, - "step": 13062 - }, - { - "epoch": 2.1037078787390797, - "grad_norm": 0.0008884586859494448, - "learning_rate": 0.00019999781957067678, - "loss": 46.0, - "step": 13063 - }, - { - "epoch": 2.103868915817867, - "grad_norm": 0.0005142706213518977, - "learning_rate": 0.00019999781923657662, - "loss": 46.0, - "step": 13064 - }, - { - "epoch": 2.1040299528966546, - "grad_norm": 0.001775235403329134, - "learning_rate": 0.00019999781890245083, - "loss": 46.0, - "step": 13065 - }, - { - "epoch": 2.104190989975442, - "grad_norm": 0.0008990365313366055, - "learning_rate": 0.00019999781856829942, - "loss": 46.0, - "step": 13066 - }, - { - "epoch": 2.104352027054229, - "grad_norm": 0.0032157900277525187, - "learning_rate": 0.00019999781823412246, - "loss": 46.0, - "step": 13067 - }, - { - "epoch": 2.1045130641330165, - "grad_norm": 0.0034256966318935156, - "learning_rate": 0.00019999781789991987, - "loss": 46.0, - "step": 13068 - }, - { - "epoch": 2.104674101211804, - "grad_norm": 0.002097911899909377, - "learning_rate": 0.0001999978175656917, - "loss": 46.0, - "step": 13069 - }, - { - "epoch": 2.1048351382905914, - "grad_norm": 0.0034216258209198713, - "learning_rate": 0.00019999781723143795, - "loss": 46.0, - "step": 13070 - }, - { - "epoch": 2.104996175369379, - "grad_norm": 0.0008702049381099641, - "learning_rate": 0.0001999978168971586, - "loss": 46.0, - "step": 13071 - }, - { - "epoch": 2.1051572124481663, - "grad_norm": 0.003039182862266898, - "learning_rate": 0.00019999781656285365, - "loss": 46.0, - "step": 13072 - }, - { - "epoch": 2.105318249526954, - "grad_norm": 0.005514364689588547, - "learning_rate": 0.00019999781622852313, - "loss": 46.0, - "step": 13073 - }, - { - "epoch": 2.105479286605741, - "grad_norm": 0.0034514337312430143, - "learning_rate": 0.00019999781589416697, - "loss": 46.0, - "step": 13074 - }, - { - "epoch": 2.1056403236845282, - "grad_norm": 0.0012727950233966112, - "learning_rate": 0.00019999781555978522, - "loss": 46.0, - "step": 13075 - }, - { - "epoch": 2.1058013607633157, - "grad_norm": 0.002618958940729499, - "learning_rate": 0.00019999781522537792, - "loss": 46.0, - "step": 13076 - }, - { - "epoch": 2.105962397842103, - "grad_norm": 0.0012678103521466255, - "learning_rate": 0.00019999781489094497, - "loss": 46.0, - "step": 13077 - }, - { - "epoch": 2.1061234349208906, - "grad_norm": 0.0037736110389232635, - "learning_rate": 0.0001999978145564865, - "loss": 46.0, - "step": 13078 - }, - { - "epoch": 2.106284471999678, - "grad_norm": 0.003596524940803647, - "learning_rate": 0.00019999781422200236, - "loss": 46.0, - "step": 13079 - }, - { - "epoch": 2.1064455090784655, - "grad_norm": 0.0021964493207633495, - "learning_rate": 0.00019999781388749266, - "loss": 46.0, - "step": 13080 - }, - { - "epoch": 2.1066065461572525, - "grad_norm": 0.003467605682089925, - "learning_rate": 0.00019999781355295736, - "loss": 46.0, - "step": 13081 - }, - { - "epoch": 2.10676758323604, - "grad_norm": 0.005575900431722403, - "learning_rate": 0.00019999781321839647, - "loss": 46.0, - "step": 13082 - }, - { - "epoch": 2.1069286203148274, - "grad_norm": 0.002288502175360918, - "learning_rate": 0.00019999781288380998, - "loss": 46.0, - "step": 13083 - }, - { - "epoch": 2.107089657393615, - "grad_norm": 0.0015003661392256618, - "learning_rate": 0.00019999781254919792, - "loss": 46.0, - "step": 13084 - }, - { - "epoch": 2.1072506944724023, - "grad_norm": 0.0071339975111186504, - "learning_rate": 0.00019999781221456022, - "loss": 46.0, - "step": 13085 - }, - { - "epoch": 2.1074117315511898, - "grad_norm": 0.002094605704769492, - "learning_rate": 0.00019999781187989693, - "loss": 46.0, - "step": 13086 - }, - { - "epoch": 2.1075727686299772, - "grad_norm": 0.0029553426429629326, - "learning_rate": 0.00019999781154520809, - "loss": 46.0, - "step": 13087 - }, - { - "epoch": 2.1077338057087642, - "grad_norm": 0.0033231982961297035, - "learning_rate": 0.00019999781121049362, - "loss": 46.0, - "step": 13088 - }, - { - "epoch": 2.1078948427875517, - "grad_norm": 0.0011876259231939912, - "learning_rate": 0.00019999781087575358, - "loss": 46.0, - "step": 13089 - }, - { - "epoch": 2.108055879866339, - "grad_norm": 0.004289860837161541, - "learning_rate": 0.00019999781054098794, - "loss": 46.0, - "step": 13090 - }, - { - "epoch": 2.1082169169451266, - "grad_norm": 0.0029466752894222736, - "learning_rate": 0.0001999978102061967, - "loss": 46.0, - "step": 13091 - }, - { - "epoch": 2.108377954023914, - "grad_norm": 0.0024188857059925795, - "learning_rate": 0.00019999780987137985, - "loss": 46.0, - "step": 13092 - }, - { - "epoch": 2.1085389911027015, - "grad_norm": 0.004038097336888313, - "learning_rate": 0.00019999780953653743, - "loss": 46.0, - "step": 13093 - }, - { - "epoch": 2.108700028181489, - "grad_norm": 0.0012877986300736666, - "learning_rate": 0.0001999978092016694, - "loss": 46.0, - "step": 13094 - }, - { - "epoch": 2.1088610652602764, - "grad_norm": 0.00403616763651371, - "learning_rate": 0.0001999978088667758, - "loss": 46.0, - "step": 13095 - }, - { - "epoch": 2.1090221023390634, - "grad_norm": 0.006237053778022528, - "learning_rate": 0.00019999780853185655, - "loss": 46.0, - "step": 13096 - }, - { - "epoch": 2.109183139417851, - "grad_norm": 0.0037090268451720476, - "learning_rate": 0.00019999780819691178, - "loss": 46.0, - "step": 13097 - }, - { - "epoch": 2.1093441764966383, - "grad_norm": 0.002308735391125083, - "learning_rate": 0.00019999780786194137, - "loss": 46.0, - "step": 13098 - }, - { - "epoch": 2.1095052135754258, - "grad_norm": 0.0072404914535582066, - "learning_rate": 0.00019999780752694537, - "loss": 46.0, - "step": 13099 - }, - { - "epoch": 2.109666250654213, - "grad_norm": 0.0009635112946853042, - "learning_rate": 0.00019999780719192378, - "loss": 46.0, - "step": 13100 - }, - { - "epoch": 2.1098272877330007, - "grad_norm": 0.007864450104534626, - "learning_rate": 0.0001999978068568766, - "loss": 46.0, - "step": 13101 - }, - { - "epoch": 2.109988324811788, - "grad_norm": 0.002165738958865404, - "learning_rate": 0.0001999978065218038, - "loss": 46.0, - "step": 13102 - }, - { - "epoch": 2.110149361890575, - "grad_norm": 0.003186293877661228, - "learning_rate": 0.00019999780618670546, - "loss": 46.0, - "step": 13103 - }, - { - "epoch": 2.1103103989693626, - "grad_norm": 0.0014151227660477161, - "learning_rate": 0.00019999780585158147, - "loss": 46.0, - "step": 13104 - }, - { - "epoch": 2.11047143604815, - "grad_norm": 0.0019516373286023736, - "learning_rate": 0.00019999780551643192, - "loss": 46.0, - "step": 13105 - }, - { - "epoch": 2.1106324731269375, - "grad_norm": 0.00819750688970089, - "learning_rate": 0.00019999780518125678, - "loss": 46.0, - "step": 13106 - }, - { - "epoch": 2.110793510205725, - "grad_norm": 0.002146652899682522, - "learning_rate": 0.000199997804846056, - "loss": 46.0, - "step": 13107 - }, - { - "epoch": 2.1109545472845124, - "grad_norm": 0.003578031435608864, - "learning_rate": 0.0001999978045108297, - "loss": 46.0, - "step": 13108 - }, - { - "epoch": 2.1111155843633, - "grad_norm": 0.00167667877394706, - "learning_rate": 0.0001999978041755777, - "loss": 46.0, - "step": 13109 - }, - { - "epoch": 2.111276621442087, - "grad_norm": 0.002079801866784692, - "learning_rate": 0.0001999978038403002, - "loss": 46.0, - "step": 13110 - }, - { - "epoch": 2.1114376585208743, - "grad_norm": 0.0006874401587992907, - "learning_rate": 0.00019999780350499707, - "loss": 46.0, - "step": 13111 - }, - { - "epoch": 2.1115986955996617, - "grad_norm": 0.0007555847405456007, - "learning_rate": 0.00019999780316966838, - "loss": 46.0, - "step": 13112 - }, - { - "epoch": 2.111759732678449, - "grad_norm": 0.0034825955517590046, - "learning_rate": 0.00019999780283431405, - "loss": 46.0, - "step": 13113 - }, - { - "epoch": 2.1119207697572366, - "grad_norm": 0.0012161614140495658, - "learning_rate": 0.00019999780249893413, - "loss": 46.0, - "step": 13114 - }, - { - "epoch": 2.112081806836024, - "grad_norm": 0.0020795147866010666, - "learning_rate": 0.00019999780216352862, - "loss": 46.0, - "step": 13115 - }, - { - "epoch": 2.1122428439148115, - "grad_norm": 0.0027438029646873474, - "learning_rate": 0.0001999978018280975, - "loss": 46.0, - "step": 13116 - }, - { - "epoch": 2.1124038809935985, - "grad_norm": 0.001997697865590453, - "learning_rate": 0.00019999780149264083, - "loss": 46.0, - "step": 13117 - }, - { - "epoch": 2.112564918072386, - "grad_norm": 0.0015676217153668404, - "learning_rate": 0.00019999780115715853, - "loss": 46.0, - "step": 13118 - }, - { - "epoch": 2.1127259551511735, - "grad_norm": 0.001409698510542512, - "learning_rate": 0.00019999780082165068, - "loss": 46.0, - "step": 13119 - }, - { - "epoch": 2.112886992229961, - "grad_norm": 0.0010853136191144586, - "learning_rate": 0.0001999978004861172, - "loss": 46.0, - "step": 13120 - }, - { - "epoch": 2.1130480293087484, - "grad_norm": 0.0007200825493782759, - "learning_rate": 0.0001999978001505581, - "loss": 46.0, - "step": 13121 - }, - { - "epoch": 2.113209066387536, - "grad_norm": 0.0033002144191414118, - "learning_rate": 0.00019999779981497344, - "loss": 46.0, - "step": 13122 - }, - { - "epoch": 2.1133701034663233, - "grad_norm": 0.0008897663210518658, - "learning_rate": 0.00019999779947936318, - "loss": 46.0, - "step": 13123 - }, - { - "epoch": 2.1135311405451107, - "grad_norm": 0.00624080840498209, - "learning_rate": 0.00019999779914372734, - "loss": 46.0, - "step": 13124 - }, - { - "epoch": 2.1136921776238977, - "grad_norm": 0.0023192770313471556, - "learning_rate": 0.0001999977988080659, - "loss": 46.0, - "step": 13125 - }, - { - "epoch": 2.113853214702685, - "grad_norm": 0.0022517463658005, - "learning_rate": 0.00019999779847237886, - "loss": 46.0, - "step": 13126 - }, - { - "epoch": 2.1140142517814726, - "grad_norm": 0.0012670793803408742, - "learning_rate": 0.0001999977981366662, - "loss": 46.0, - "step": 13127 - }, - { - "epoch": 2.11417528886026, - "grad_norm": 0.0024502556771039963, - "learning_rate": 0.00019999779780092798, - "loss": 46.0, - "step": 13128 - }, - { - "epoch": 2.1143363259390475, - "grad_norm": 0.0022671790793538094, - "learning_rate": 0.00019999779746516415, - "loss": 46.0, - "step": 13129 - }, - { - "epoch": 2.114497363017835, - "grad_norm": 0.0013940547360107303, - "learning_rate": 0.00019999779712937473, - "loss": 46.0, - "step": 13130 - }, - { - "epoch": 2.1146584000966224, - "grad_norm": 0.0017259205924347043, - "learning_rate": 0.00019999779679355972, - "loss": 46.0, - "step": 13131 - }, - { - "epoch": 2.1148194371754094, - "grad_norm": 0.0031034525018185377, - "learning_rate": 0.00019999779645771913, - "loss": 46.0, - "step": 13132 - }, - { - "epoch": 2.114980474254197, - "grad_norm": 0.001265834434889257, - "learning_rate": 0.00019999779612185292, - "loss": 46.0, - "step": 13133 - }, - { - "epoch": 2.1151415113329843, - "grad_norm": 0.004677072167396545, - "learning_rate": 0.00019999779578596112, - "loss": 46.0, - "step": 13134 - }, - { - "epoch": 2.115302548411772, - "grad_norm": 0.004497617483139038, - "learning_rate": 0.00019999779545004374, - "loss": 46.0, - "step": 13135 - }, - { - "epoch": 2.1154635854905592, - "grad_norm": 0.003626951714977622, - "learning_rate": 0.00019999779511410072, - "loss": 46.0, - "step": 13136 - }, - { - "epoch": 2.1156246225693467, - "grad_norm": 0.0022104203235358, - "learning_rate": 0.00019999779477813216, - "loss": 46.0, - "step": 13137 - }, - { - "epoch": 2.115785659648134, - "grad_norm": 0.0009194849408231676, - "learning_rate": 0.00019999779444213799, - "loss": 46.0, - "step": 13138 - }, - { - "epoch": 2.1159466967269216, - "grad_norm": 0.008031724952161312, - "learning_rate": 0.00019999779410611823, - "loss": 46.0, - "step": 13139 - }, - { - "epoch": 2.1161077338057086, - "grad_norm": 0.003028605366125703, - "learning_rate": 0.00019999779377007285, - "loss": 46.0, - "step": 13140 - }, - { - "epoch": 2.116268770884496, - "grad_norm": 0.00220558256842196, - "learning_rate": 0.00019999779343400192, - "loss": 46.0, - "step": 13141 - }, - { - "epoch": 2.1164298079632835, - "grad_norm": 0.001776234246790409, - "learning_rate": 0.00019999779309790535, - "loss": 46.0, - "step": 13142 - }, - { - "epoch": 2.116590845042071, - "grad_norm": 0.0032892825547605753, - "learning_rate": 0.0001999977927617832, - "loss": 46.0, - "step": 13143 - }, - { - "epoch": 2.1167518821208584, - "grad_norm": 0.003959726542234421, - "learning_rate": 0.00019999779242563546, - "loss": 46.0, - "step": 13144 - }, - { - "epoch": 2.116912919199646, - "grad_norm": 0.006606176495552063, - "learning_rate": 0.00019999779208946215, - "loss": 46.0, - "step": 13145 - }, - { - "epoch": 2.1170739562784333, - "grad_norm": 0.0038326343055814505, - "learning_rate": 0.0001999977917532632, - "loss": 46.0, - "step": 13146 - }, - { - "epoch": 2.1172349933572203, - "grad_norm": 0.006243380717933178, - "learning_rate": 0.0001999977914170387, - "loss": 46.0, - "step": 13147 - }, - { - "epoch": 2.1173960304360078, - "grad_norm": 0.0035989389289170504, - "learning_rate": 0.00019999779108078857, - "loss": 46.0, - "step": 13148 - }, - { - "epoch": 2.117557067514795, - "grad_norm": 0.0010204442078247666, - "learning_rate": 0.00019999779074451285, - "loss": 46.0, - "step": 13149 - }, - { - "epoch": 2.1177181045935827, - "grad_norm": 0.0005883739213459194, - "learning_rate": 0.00019999779040821155, - "loss": 46.0, - "step": 13150 - }, - { - "epoch": 2.11787914167237, - "grad_norm": 0.005654613021761179, - "learning_rate": 0.00019999779007188467, - "loss": 46.0, - "step": 13151 - }, - { - "epoch": 2.1180401787511576, - "grad_norm": 0.0012487141648307443, - "learning_rate": 0.00019999778973553217, - "loss": 46.0, - "step": 13152 - }, - { - "epoch": 2.118201215829945, - "grad_norm": 0.0007987206918187439, - "learning_rate": 0.00019999778939915408, - "loss": 46.0, - "step": 13153 - }, - { - "epoch": 2.118362252908732, - "grad_norm": 0.007598782889544964, - "learning_rate": 0.0001999977890627504, - "loss": 46.0, - "step": 13154 - }, - { - "epoch": 2.1185232899875195, - "grad_norm": 0.0019899557810276747, - "learning_rate": 0.00019999778872632111, - "loss": 46.0, - "step": 13155 - }, - { - "epoch": 2.118684327066307, - "grad_norm": 0.00354297598823905, - "learning_rate": 0.00019999778838986624, - "loss": 46.0, - "step": 13156 - }, - { - "epoch": 2.1188453641450944, - "grad_norm": 0.001969203818589449, - "learning_rate": 0.00019999778805338577, - "loss": 46.0, - "step": 13157 - }, - { - "epoch": 2.119006401223882, - "grad_norm": 0.001812206581234932, - "learning_rate": 0.00019999778771687972, - "loss": 46.0, - "step": 13158 - }, - { - "epoch": 2.1191674383026693, - "grad_norm": 0.0026534204371273518, - "learning_rate": 0.00019999778738034806, - "loss": 46.0, - "step": 13159 - }, - { - "epoch": 2.1193284753814567, - "grad_norm": 0.002966444008052349, - "learning_rate": 0.0001999977870437908, - "loss": 46.0, - "step": 13160 - }, - { - "epoch": 2.1194895124602438, - "grad_norm": 0.0016665224684402347, - "learning_rate": 0.000199997786707208, - "loss": 46.0, - "step": 13161 - }, - { - "epoch": 2.119650549539031, - "grad_norm": 0.0010220034746453166, - "learning_rate": 0.00019999778637059954, - "loss": 46.0, - "step": 13162 - }, - { - "epoch": 2.1198115866178187, - "grad_norm": 0.003974624909460545, - "learning_rate": 0.0001999977860339655, - "loss": 46.0, - "step": 13163 - }, - { - "epoch": 2.119972623696606, - "grad_norm": 0.001191667397506535, - "learning_rate": 0.00019999778569730587, - "loss": 46.0, - "step": 13164 - }, - { - "epoch": 2.1201336607753936, - "grad_norm": 0.008775805123150349, - "learning_rate": 0.00019999778536062066, - "loss": 46.0, - "step": 13165 - }, - { - "epoch": 2.120294697854181, - "grad_norm": 0.004642212297767401, - "learning_rate": 0.00019999778502390985, - "loss": 46.0, - "step": 13166 - }, - { - "epoch": 2.1204557349329685, - "grad_norm": 0.003119399771094322, - "learning_rate": 0.00019999778468717346, - "loss": 46.0, - "step": 13167 - }, - { - "epoch": 2.120616772011756, - "grad_norm": 0.0015318223740905523, - "learning_rate": 0.00019999778435041146, - "loss": 46.0, - "step": 13168 - }, - { - "epoch": 2.120777809090543, - "grad_norm": 0.0025191716849803925, - "learning_rate": 0.00019999778401362384, - "loss": 46.0, - "step": 13169 - }, - { - "epoch": 2.1209388461693304, - "grad_norm": 0.0011357973562553525, - "learning_rate": 0.00019999778367681064, - "loss": 46.0, - "step": 13170 - }, - { - "epoch": 2.121099883248118, - "grad_norm": 0.003429251490160823, - "learning_rate": 0.00019999778333997184, - "loss": 46.0, - "step": 13171 - }, - { - "epoch": 2.1212609203269053, - "grad_norm": 0.004361074883490801, - "learning_rate": 0.0001999977830031075, - "loss": 46.0, - "step": 13172 - }, - { - "epoch": 2.1214219574056927, - "grad_norm": 0.0060005937702953815, - "learning_rate": 0.00019999778266621752, - "loss": 46.0, - "step": 13173 - }, - { - "epoch": 2.12158299448448, - "grad_norm": 0.015361596830189228, - "learning_rate": 0.00019999778232930194, - "loss": 46.0, - "step": 13174 - }, - { - "epoch": 2.1217440315632676, - "grad_norm": 0.003407333977520466, - "learning_rate": 0.00019999778199236077, - "loss": 46.0, - "step": 13175 - }, - { - "epoch": 2.1219050686420546, - "grad_norm": 0.002047986490651965, - "learning_rate": 0.00019999778165539402, - "loss": 46.0, - "step": 13176 - }, - { - "epoch": 2.122066105720842, - "grad_norm": 0.0006436305120587349, - "learning_rate": 0.00019999778131840168, - "loss": 46.0, - "step": 13177 - }, - { - "epoch": 2.1222271427996295, - "grad_norm": 0.0034564253874123096, - "learning_rate": 0.00019999778098138372, - "loss": 46.0, - "step": 13178 - }, - { - "epoch": 2.122388179878417, - "grad_norm": 0.0026176944375038147, - "learning_rate": 0.00019999778064434018, - "loss": 46.0, - "step": 13179 - }, - { - "epoch": 2.1225492169572044, - "grad_norm": 0.004664492327719927, - "learning_rate": 0.00019999778030727104, - "loss": 46.0, - "step": 13180 - }, - { - "epoch": 2.122710254035992, - "grad_norm": 0.014643349684774876, - "learning_rate": 0.00019999777997017633, - "loss": 46.0, - "step": 13181 - }, - { - "epoch": 2.1228712911147793, - "grad_norm": 0.001551997964270413, - "learning_rate": 0.00019999777963305602, - "loss": 46.0, - "step": 13182 - }, - { - "epoch": 2.1230323281935664, - "grad_norm": 0.003320495132356882, - "learning_rate": 0.00019999777929591007, - "loss": 46.0, - "step": 13183 - }, - { - "epoch": 2.123193365272354, - "grad_norm": 0.00405589584261179, - "learning_rate": 0.00019999777895873857, - "loss": 46.0, - "step": 13184 - }, - { - "epoch": 2.1233544023511413, - "grad_norm": 0.0006907575880177319, - "learning_rate": 0.00019999777862154144, - "loss": 46.0, - "step": 13185 - }, - { - "epoch": 2.1235154394299287, - "grad_norm": 0.002527642762288451, - "learning_rate": 0.00019999777828431876, - "loss": 46.0, - "step": 13186 - }, - { - "epoch": 2.123676476508716, - "grad_norm": 0.006069401744753122, - "learning_rate": 0.0001999977779470705, - "loss": 46.0, - "step": 13187 - }, - { - "epoch": 2.1238375135875036, - "grad_norm": 0.0019359297584742308, - "learning_rate": 0.00019999777760979658, - "loss": 46.0, - "step": 13188 - }, - { - "epoch": 2.123998550666291, - "grad_norm": 0.0017593411030247808, - "learning_rate": 0.00019999777727249711, - "loss": 46.0, - "step": 13189 - }, - { - "epoch": 2.1241595877450785, - "grad_norm": 0.004458352457731962, - "learning_rate": 0.000199997776935172, - "loss": 46.0, - "step": 13190 - }, - { - "epoch": 2.1243206248238655, - "grad_norm": 0.003931291401386261, - "learning_rate": 0.00019999777659782136, - "loss": 46.0, - "step": 13191 - }, - { - "epoch": 2.124481661902653, - "grad_norm": 0.004808352794498205, - "learning_rate": 0.0001999977762604451, - "loss": 46.0, - "step": 13192 - }, - { - "epoch": 2.1246426989814404, - "grad_norm": 0.0015759035013616085, - "learning_rate": 0.00019999777592304323, - "loss": 46.0, - "step": 13193 - }, - { - "epoch": 2.124803736060228, - "grad_norm": 0.0029267179779708385, - "learning_rate": 0.00019999777558561577, - "loss": 46.0, - "step": 13194 - }, - { - "epoch": 2.1249647731390153, - "grad_norm": 0.000670771871227771, - "learning_rate": 0.00019999777524816272, - "loss": 46.0, - "step": 13195 - }, - { - "epoch": 2.125125810217803, - "grad_norm": 0.0008510480402037501, - "learning_rate": 0.0001999977749106841, - "loss": 46.0, - "step": 13196 - }, - { - "epoch": 2.1252868472965902, - "grad_norm": 0.0035457415506243706, - "learning_rate": 0.00019999777457317986, - "loss": 46.0, - "step": 13197 - }, - { - "epoch": 2.1254478843753772, - "grad_norm": 0.002282337285578251, - "learning_rate": 0.00019999777423565003, - "loss": 46.0, - "step": 13198 - }, - { - "epoch": 2.1256089214541647, - "grad_norm": 0.0036603291518986225, - "learning_rate": 0.00019999777389809458, - "loss": 46.0, - "step": 13199 - }, - { - "epoch": 2.125769958532952, - "grad_norm": 0.003987677861005068, - "learning_rate": 0.00019999777356051357, - "loss": 46.0, - "step": 13200 - }, - { - "epoch": 2.1259309956117396, - "grad_norm": 0.007842597551643848, - "learning_rate": 0.00019999777322290697, - "loss": 46.0, - "step": 13201 - }, - { - "epoch": 2.126092032690527, - "grad_norm": 0.001646373770199716, - "learning_rate": 0.00019999777288527476, - "loss": 46.0, - "step": 13202 - }, - { - "epoch": 2.1262530697693145, - "grad_norm": 0.0014534646179527044, - "learning_rate": 0.00019999777254761693, - "loss": 46.0, - "step": 13203 - }, - { - "epoch": 2.126414106848102, - "grad_norm": 0.004115855786949396, - "learning_rate": 0.00019999777220993355, - "loss": 46.0, - "step": 13204 - }, - { - "epoch": 2.126575143926889, - "grad_norm": 0.001109655131585896, - "learning_rate": 0.00019999777187222457, - "loss": 46.0, - "step": 13205 - }, - { - "epoch": 2.1267361810056764, - "grad_norm": 0.0011397922644391656, - "learning_rate": 0.00019999777153448999, - "loss": 46.0, - "step": 13206 - }, - { - "epoch": 2.126897218084464, - "grad_norm": 0.0014358978951349854, - "learning_rate": 0.0001999977711967298, - "loss": 46.0, - "step": 13207 - }, - { - "epoch": 2.1270582551632513, - "grad_norm": 0.00215542851947248, - "learning_rate": 0.000199997770858944, - "loss": 46.0, - "step": 13208 - }, - { - "epoch": 2.1272192922420388, - "grad_norm": 0.0012454851530492306, - "learning_rate": 0.00019999777052113265, - "loss": 46.0, - "step": 13209 - }, - { - "epoch": 2.127380329320826, - "grad_norm": 0.006422761827707291, - "learning_rate": 0.00019999777018329568, - "loss": 46.0, - "step": 13210 - }, - { - "epoch": 2.1275413663996137, - "grad_norm": 0.0028827968053519726, - "learning_rate": 0.00019999776984543313, - "loss": 46.0, - "step": 13211 - }, - { - "epoch": 2.127702403478401, - "grad_norm": 0.0038349488750100136, - "learning_rate": 0.00019999776950754497, - "loss": 46.0, - "step": 13212 - }, - { - "epoch": 2.127863440557188, - "grad_norm": 0.004856688901782036, - "learning_rate": 0.00019999776916963122, - "loss": 46.0, - "step": 13213 - }, - { - "epoch": 2.1280244776359756, - "grad_norm": 0.0012035154504701495, - "learning_rate": 0.0001999977688316919, - "loss": 46.0, - "step": 13214 - }, - { - "epoch": 2.128185514714763, - "grad_norm": 0.000896472716704011, - "learning_rate": 0.00019999776849372695, - "loss": 46.0, - "step": 13215 - }, - { - "epoch": 2.1283465517935505, - "grad_norm": 0.003361812559887767, - "learning_rate": 0.00019999776815573644, - "loss": 46.0, - "step": 13216 - }, - { - "epoch": 2.128507588872338, - "grad_norm": 0.0012828216422349215, - "learning_rate": 0.0001999977678177203, - "loss": 46.0, - "step": 13217 - }, - { - "epoch": 2.1286686259511254, - "grad_norm": 0.0016373510006815195, - "learning_rate": 0.00019999776747967857, - "loss": 46.0, - "step": 13218 - }, - { - "epoch": 2.128829663029913, - "grad_norm": 0.0030524181202054024, - "learning_rate": 0.00019999776714161127, - "loss": 46.0, - "step": 13219 - }, - { - "epoch": 2.1289907001087, - "grad_norm": 0.0026899180375039577, - "learning_rate": 0.00019999776680351835, - "loss": 46.0, - "step": 13220 - }, - { - "epoch": 2.1291517371874873, - "grad_norm": 0.0019747407641261816, - "learning_rate": 0.00019999776646539985, - "loss": 46.0, - "step": 13221 - }, - { - "epoch": 2.1293127742662747, - "grad_norm": 0.0024062597658485174, - "learning_rate": 0.00019999776612725575, - "loss": 46.0, - "step": 13222 - }, - { - "epoch": 2.129473811345062, - "grad_norm": 0.001246427884325385, - "learning_rate": 0.00019999776578908608, - "loss": 46.0, - "step": 13223 - }, - { - "epoch": 2.1296348484238496, - "grad_norm": 0.0037448867224156857, - "learning_rate": 0.00019999776545089079, - "loss": 46.0, - "step": 13224 - }, - { - "epoch": 2.129795885502637, - "grad_norm": 0.002583857625722885, - "learning_rate": 0.00019999776511266993, - "loss": 46.0, - "step": 13225 - }, - { - "epoch": 2.1299569225814245, - "grad_norm": 0.0011997292749583721, - "learning_rate": 0.00019999776477442344, - "loss": 46.0, - "step": 13226 - }, - { - "epoch": 2.1301179596602116, - "grad_norm": 0.002700342796742916, - "learning_rate": 0.00019999776443615139, - "loss": 46.0, - "step": 13227 - }, - { - "epoch": 2.130278996738999, - "grad_norm": 0.0008182794554159045, - "learning_rate": 0.00019999776409785375, - "loss": 46.0, - "step": 13228 - }, - { - "epoch": 2.1304400338177865, - "grad_norm": 0.003988698124885559, - "learning_rate": 0.00019999776375953046, - "loss": 46.0, - "step": 13229 - }, - { - "epoch": 2.130601070896574, - "grad_norm": 0.002513501327484846, - "learning_rate": 0.00019999776342118162, - "loss": 46.0, - "step": 13230 - }, - { - "epoch": 2.1307621079753614, - "grad_norm": 0.004659769590944052, - "learning_rate": 0.00019999776308280716, - "loss": 46.0, - "step": 13231 - }, - { - "epoch": 2.130923145054149, - "grad_norm": 0.0014631019439548254, - "learning_rate": 0.00019999776274440715, - "loss": 46.0, - "step": 13232 - }, - { - "epoch": 2.1310841821329363, - "grad_norm": 0.0012769061140716076, - "learning_rate": 0.0001999977624059815, - "loss": 46.0, - "step": 13233 - }, - { - "epoch": 2.1312452192117233, - "grad_norm": 0.003122994676232338, - "learning_rate": 0.00019999776206753027, - "loss": 46.0, - "step": 13234 - }, - { - "epoch": 2.1314062562905107, - "grad_norm": 0.0034654391929507256, - "learning_rate": 0.00019999776172905347, - "loss": 46.0, - "step": 13235 - }, - { - "epoch": 2.131567293369298, - "grad_norm": 0.008263527415692806, - "learning_rate": 0.00019999776139055105, - "loss": 46.0, - "step": 13236 - }, - { - "epoch": 2.1317283304480856, - "grad_norm": 0.0006701046950183809, - "learning_rate": 0.00019999776105202304, - "loss": 46.0, - "step": 13237 - }, - { - "epoch": 2.131889367526873, - "grad_norm": 0.003449707990512252, - "learning_rate": 0.00019999776071346942, - "loss": 46.0, - "step": 13238 - }, - { - "epoch": 2.1320504046056605, - "grad_norm": 0.0008667586953379214, - "learning_rate": 0.00019999776037489024, - "loss": 46.0, - "step": 13239 - }, - { - "epoch": 2.132211441684448, - "grad_norm": 0.0030587271321564913, - "learning_rate": 0.00019999776003628544, - "loss": 46.0, - "step": 13240 - }, - { - "epoch": 2.1323724787632354, - "grad_norm": 0.0013762717135250568, - "learning_rate": 0.00019999775969765503, - "loss": 46.0, - "step": 13241 - }, - { - "epoch": 2.1325335158420224, - "grad_norm": 0.0029092037584632635, - "learning_rate": 0.00019999775935899906, - "loss": 46.0, - "step": 13242 - }, - { - "epoch": 2.13269455292081, - "grad_norm": 0.003711071563884616, - "learning_rate": 0.0001999977590203175, - "loss": 46.0, - "step": 13243 - }, - { - "epoch": 2.1328555899995973, - "grad_norm": 0.004583151079714298, - "learning_rate": 0.00019999775868161034, - "loss": 46.0, - "step": 13244 - }, - { - "epoch": 2.133016627078385, - "grad_norm": 0.0033804697450250387, - "learning_rate": 0.00019999775834287755, - "loss": 46.0, - "step": 13245 - }, - { - "epoch": 2.1331776641571722, - "grad_norm": 0.0007176435901783407, - "learning_rate": 0.0001999977580041192, - "loss": 46.0, - "step": 13246 - }, - { - "epoch": 2.1333387012359597, - "grad_norm": 0.008193590678274632, - "learning_rate": 0.00019999775766533527, - "loss": 46.0, - "step": 13247 - }, - { - "epoch": 2.133499738314747, - "grad_norm": 0.0021499425638467073, - "learning_rate": 0.0001999977573265257, - "loss": 46.0, - "step": 13248 - }, - { - "epoch": 2.133660775393534, - "grad_norm": 0.001611798768863082, - "learning_rate": 0.00019999775698769057, - "loss": 46.0, - "step": 13249 - }, - { - "epoch": 2.1338218124723216, - "grad_norm": 0.003761806059628725, - "learning_rate": 0.00019999775664882984, - "loss": 46.0, - "step": 13250 - }, - { - "epoch": 2.133982849551109, - "grad_norm": 0.0024567090440541506, - "learning_rate": 0.0001999977563099435, - "loss": 46.0, - "step": 13251 - }, - { - "epoch": 2.1341438866298965, - "grad_norm": 0.006173844914883375, - "learning_rate": 0.00019999775597103159, - "loss": 46.0, - "step": 13252 - }, - { - "epoch": 2.134304923708684, - "grad_norm": 0.0030623492784798145, - "learning_rate": 0.00019999775563209408, - "loss": 46.0, - "step": 13253 - }, - { - "epoch": 2.1344659607874714, - "grad_norm": 0.0032281617168337107, - "learning_rate": 0.00019999775529313095, - "loss": 46.0, - "step": 13254 - }, - { - "epoch": 2.134626997866259, - "grad_norm": 0.00297993840649724, - "learning_rate": 0.00019999775495414227, - "loss": 46.0, - "step": 13255 - }, - { - "epoch": 2.1347880349450463, - "grad_norm": 0.0069718980230391026, - "learning_rate": 0.00019999775461512794, - "loss": 46.0, - "step": 13256 - }, - { - "epoch": 2.1349490720238333, - "grad_norm": 0.0014099756954237819, - "learning_rate": 0.00019999775427608806, - "loss": 46.0, - "step": 13257 - }, - { - "epoch": 2.135110109102621, - "grad_norm": 0.0018751703901216388, - "learning_rate": 0.00019999775393702256, - "loss": 46.0, - "step": 13258 - }, - { - "epoch": 2.1352711461814082, - "grad_norm": 0.008302632719278336, - "learning_rate": 0.00019999775359793147, - "loss": 46.0, - "step": 13259 - }, - { - "epoch": 2.1354321832601957, - "grad_norm": 0.000918448029551655, - "learning_rate": 0.0001999977532588148, - "loss": 46.0, - "step": 13260 - }, - { - "epoch": 2.135593220338983, - "grad_norm": 0.0009239473729394376, - "learning_rate": 0.00019999775291967253, - "loss": 46.0, - "step": 13261 - }, - { - "epoch": 2.1357542574177706, - "grad_norm": 0.012068535201251507, - "learning_rate": 0.00019999775258050466, - "loss": 46.0, - "step": 13262 - }, - { - "epoch": 2.135915294496558, - "grad_norm": 0.003447233932092786, - "learning_rate": 0.0001999977522413112, - "loss": 46.0, - "step": 13263 - }, - { - "epoch": 2.136076331575345, - "grad_norm": 0.001390350516885519, - "learning_rate": 0.00019999775190209215, - "loss": 46.0, - "step": 13264 - }, - { - "epoch": 2.1362373686541325, - "grad_norm": 0.009289112873375416, - "learning_rate": 0.0001999977515628475, - "loss": 46.0, - "step": 13265 - }, - { - "epoch": 2.13639840573292, - "grad_norm": 0.0019515432650223374, - "learning_rate": 0.00019999775122357726, - "loss": 46.0, - "step": 13266 - }, - { - "epoch": 2.1365594428117074, - "grad_norm": 0.005900282878428698, - "learning_rate": 0.00019999775088428142, - "loss": 46.0, - "step": 13267 - }, - { - "epoch": 2.136720479890495, - "grad_norm": 0.0018689382122829556, - "learning_rate": 0.00019999775054496, - "loss": 46.0, - "step": 13268 - }, - { - "epoch": 2.1368815169692823, - "grad_norm": 0.0006710558081977069, - "learning_rate": 0.00019999775020561295, - "loss": 46.0, - "step": 13269 - }, - { - "epoch": 2.1370425540480698, - "grad_norm": 0.007788967806845903, - "learning_rate": 0.00019999774986624033, - "loss": 46.0, - "step": 13270 - }, - { - "epoch": 2.1372035911268568, - "grad_norm": 0.0011311451671645045, - "learning_rate": 0.0001999977495268421, - "loss": 46.0, - "step": 13271 - }, - { - "epoch": 2.137364628205644, - "grad_norm": 0.003137932624667883, - "learning_rate": 0.0001999977491874183, - "loss": 46.0, - "step": 13272 - }, - { - "epoch": 2.1375256652844317, - "grad_norm": 0.004380833823233843, - "learning_rate": 0.00019999774884796892, - "loss": 46.0, - "step": 13273 - }, - { - "epoch": 2.137686702363219, - "grad_norm": 0.008016890846192837, - "learning_rate": 0.00019999774850849392, - "loss": 46.0, - "step": 13274 - }, - { - "epoch": 2.1378477394420066, - "grad_norm": 0.0038034694734960794, - "learning_rate": 0.00019999774816899333, - "loss": 46.0, - "step": 13275 - }, - { - "epoch": 2.138008776520794, - "grad_norm": 0.003832681104540825, - "learning_rate": 0.00019999774782946715, - "loss": 46.0, - "step": 13276 - }, - { - "epoch": 2.1381698135995815, - "grad_norm": 0.0004931480507366359, - "learning_rate": 0.00019999774748991533, - "loss": 46.0, - "step": 13277 - }, - { - "epoch": 2.1383308506783685, - "grad_norm": 0.0018774536438286304, - "learning_rate": 0.00019999774715033798, - "loss": 46.0, - "step": 13278 - }, - { - "epoch": 2.138491887757156, - "grad_norm": 0.007036134134978056, - "learning_rate": 0.000199997746810735, - "loss": 46.0, - "step": 13279 - }, - { - "epoch": 2.1386529248359434, - "grad_norm": 0.0035310224629938602, - "learning_rate": 0.00019999774647110643, - "loss": 46.0, - "step": 13280 - }, - { - "epoch": 2.138813961914731, - "grad_norm": 0.005686099175363779, - "learning_rate": 0.00019999774613145227, - "loss": 46.0, - "step": 13281 - }, - { - "epoch": 2.1389749989935183, - "grad_norm": 0.01061179954558611, - "learning_rate": 0.00019999774579177254, - "loss": 46.0, - "step": 13282 - }, - { - "epoch": 2.1391360360723057, - "grad_norm": 0.005625057499855757, - "learning_rate": 0.00019999774545206717, - "loss": 46.0, - "step": 13283 - }, - { - "epoch": 2.139297073151093, - "grad_norm": 0.005064083728939295, - "learning_rate": 0.00019999774511233624, - "loss": 46.0, - "step": 13284 - }, - { - "epoch": 2.1394581102298806, - "grad_norm": 0.0027127470821142197, - "learning_rate": 0.0001999977447725797, - "loss": 46.0, - "step": 13285 - }, - { - "epoch": 2.1396191473086676, - "grad_norm": 0.0013896109303459525, - "learning_rate": 0.00019999774443279757, - "loss": 46.0, - "step": 13286 - }, - { - "epoch": 2.139780184387455, - "grad_norm": 0.001895049586892128, - "learning_rate": 0.00019999774409298985, - "loss": 46.0, - "step": 13287 - }, - { - "epoch": 2.1399412214662425, - "grad_norm": 0.0008827557321637869, - "learning_rate": 0.00019999774375315652, - "loss": 46.0, - "step": 13288 - }, - { - "epoch": 2.14010225854503, - "grad_norm": 0.0012584917712956667, - "learning_rate": 0.00019999774341329763, - "loss": 46.0, - "step": 13289 - }, - { - "epoch": 2.1402632956238175, - "grad_norm": 0.0017334287986159325, - "learning_rate": 0.0001999977430734131, - "loss": 46.0, - "step": 13290 - }, - { - "epoch": 2.140424332702605, - "grad_norm": 0.009314744733273983, - "learning_rate": 0.000199997742733503, - "loss": 46.0, - "step": 13291 - }, - { - "epoch": 2.1405853697813924, - "grad_norm": 0.0022184152621775866, - "learning_rate": 0.00019999774239356732, - "loss": 46.0, - "step": 13292 - }, - { - "epoch": 2.1407464068601794, - "grad_norm": 0.011970209889113903, - "learning_rate": 0.00019999774205360602, - "loss": 46.0, - "step": 13293 - }, - { - "epoch": 2.140907443938967, - "grad_norm": 0.0012183836661279202, - "learning_rate": 0.00019999774171361914, - "loss": 46.0, - "step": 13294 - }, - { - "epoch": 2.1410684810177543, - "grad_norm": 0.0015864853048697114, - "learning_rate": 0.00019999774137360667, - "loss": 46.0, - "step": 13295 - }, - { - "epoch": 2.1412295180965417, - "grad_norm": 0.0019399285083636642, - "learning_rate": 0.0001999977410335686, - "loss": 46.0, - "step": 13296 - }, - { - "epoch": 2.141390555175329, - "grad_norm": 0.0019598123617470264, - "learning_rate": 0.00019999774069350492, - "loss": 46.0, - "step": 13297 - }, - { - "epoch": 2.1415515922541166, - "grad_norm": 0.0034518218599259853, - "learning_rate": 0.00019999774035341566, - "loss": 46.0, - "step": 13298 - }, - { - "epoch": 2.141712629332904, - "grad_norm": 0.005137486383318901, - "learning_rate": 0.00019999774001330082, - "loss": 46.0, - "step": 13299 - }, - { - "epoch": 2.1418736664116915, - "grad_norm": 0.0024593959096819162, - "learning_rate": 0.00019999773967316036, - "loss": 46.0, - "step": 13300 - }, - { - "epoch": 2.1420347034904785, - "grad_norm": 0.002304528374224901, - "learning_rate": 0.00019999773933299434, - "loss": 46.0, - "step": 13301 - }, - { - "epoch": 2.142195740569266, - "grad_norm": 0.0008896937943063676, - "learning_rate": 0.00019999773899280268, - "loss": 46.0, - "step": 13302 - }, - { - "epoch": 2.1423567776480534, - "grad_norm": 0.0025536175817251205, - "learning_rate": 0.00019999773865258543, - "loss": 46.0, - "step": 13303 - }, - { - "epoch": 2.142517814726841, - "grad_norm": 0.0036763439420610666, - "learning_rate": 0.00019999773831234262, - "loss": 46.0, - "step": 13304 - }, - { - "epoch": 2.1426788518056283, - "grad_norm": 0.007506121415644884, - "learning_rate": 0.0001999977379720742, - "loss": 46.0, - "step": 13305 - }, - { - "epoch": 2.142839888884416, - "grad_norm": 0.0028328776825219393, - "learning_rate": 0.0001999977376317802, - "loss": 46.0, - "step": 13306 - }, - { - "epoch": 2.143000925963203, - "grad_norm": 0.0006790681509301066, - "learning_rate": 0.0001999977372914606, - "loss": 46.0, - "step": 13307 - }, - { - "epoch": 2.1431619630419902, - "grad_norm": 0.0014149787602946162, - "learning_rate": 0.00019999773695111536, - "loss": 46.0, - "step": 13308 - }, - { - "epoch": 2.1433230001207777, - "grad_norm": 0.0022059523034840822, - "learning_rate": 0.00019999773661074456, - "loss": 46.0, - "step": 13309 - }, - { - "epoch": 2.143484037199565, - "grad_norm": 0.001987648429349065, - "learning_rate": 0.0001999977362703482, - "loss": 46.0, - "step": 13310 - }, - { - "epoch": 2.1436450742783526, - "grad_norm": 0.00417117727920413, - "learning_rate": 0.0001999977359299262, - "loss": 46.0, - "step": 13311 - }, - { - "epoch": 2.14380611135714, - "grad_norm": 0.0023212896194308996, - "learning_rate": 0.00019999773558947861, - "loss": 46.0, - "step": 13312 - }, - { - "epoch": 2.1439671484359275, - "grad_norm": 0.0011835447512567043, - "learning_rate": 0.00019999773524900544, - "loss": 46.0, - "step": 13313 - }, - { - "epoch": 2.144128185514715, - "grad_norm": 0.001484884531237185, - "learning_rate": 0.00019999773490850665, - "loss": 46.0, - "step": 13314 - }, - { - "epoch": 2.144289222593502, - "grad_norm": 0.0009144832729361951, - "learning_rate": 0.0001999977345679823, - "loss": 46.0, - "step": 13315 - }, - { - "epoch": 2.1444502596722894, - "grad_norm": 0.002882937900722027, - "learning_rate": 0.00019999773422743234, - "loss": 46.0, - "step": 13316 - }, - { - "epoch": 2.144611296751077, - "grad_norm": 0.005968336947262287, - "learning_rate": 0.0001999977338868568, - "loss": 46.0, - "step": 13317 - }, - { - "epoch": 2.1447723338298643, - "grad_norm": 0.0036911708302795887, - "learning_rate": 0.00019999773354625563, - "loss": 46.0, - "step": 13318 - }, - { - "epoch": 2.1449333709086518, - "grad_norm": 0.0036230271216481924, - "learning_rate": 0.0001999977332056289, - "loss": 46.0, - "step": 13319 - }, - { - "epoch": 2.1450944079874392, - "grad_norm": 0.006205302197486162, - "learning_rate": 0.00019999773286497657, - "loss": 46.0, - "step": 13320 - }, - { - "epoch": 2.1452554450662267, - "grad_norm": 0.0023907211143523455, - "learning_rate": 0.00019999773252429864, - "loss": 46.0, - "step": 13321 - }, - { - "epoch": 2.1454164821450137, - "grad_norm": 0.0029476932249963284, - "learning_rate": 0.00019999773218359513, - "loss": 46.0, - "step": 13322 - }, - { - "epoch": 2.145577519223801, - "grad_norm": 0.006936867721378803, - "learning_rate": 0.00019999773184286597, - "loss": 46.0, - "step": 13323 - }, - { - "epoch": 2.1457385563025886, - "grad_norm": 0.008803711272776127, - "learning_rate": 0.00019999773150211126, - "loss": 46.0, - "step": 13324 - }, - { - "epoch": 2.145899593381376, - "grad_norm": 0.0010045906528830528, - "learning_rate": 0.000199997731161331, - "loss": 46.0, - "step": 13325 - }, - { - "epoch": 2.1460606304601635, - "grad_norm": 0.0005345331155695021, - "learning_rate": 0.00019999773082052507, - "loss": 46.0, - "step": 13326 - }, - { - "epoch": 2.146221667538951, - "grad_norm": 0.002014959929510951, - "learning_rate": 0.00019999773047969357, - "loss": 46.0, - "step": 13327 - }, - { - "epoch": 2.1463827046177384, - "grad_norm": 0.0044423057697713375, - "learning_rate": 0.00019999773013883648, - "loss": 46.0, - "step": 13328 - }, - { - "epoch": 2.146543741696526, - "grad_norm": 0.003911557141691446, - "learning_rate": 0.0001999977297979538, - "loss": 46.0, - "step": 13329 - }, - { - "epoch": 2.146704778775313, - "grad_norm": 0.0013346080668270588, - "learning_rate": 0.00019999772945704554, - "loss": 46.0, - "step": 13330 - }, - { - "epoch": 2.1468658158541003, - "grad_norm": 0.003525221487507224, - "learning_rate": 0.00019999772911611166, - "loss": 46.0, - "step": 13331 - }, - { - "epoch": 2.1470268529328878, - "grad_norm": 0.00832782220095396, - "learning_rate": 0.0001999977287751522, - "loss": 46.0, - "step": 13332 - }, - { - "epoch": 2.147187890011675, - "grad_norm": 0.0008926745504140854, - "learning_rate": 0.00019999772843416714, - "loss": 46.0, - "step": 13333 - }, - { - "epoch": 2.1473489270904627, - "grad_norm": 0.002892832038924098, - "learning_rate": 0.00019999772809315647, - "loss": 46.0, - "step": 13334 - }, - { - "epoch": 2.14750996416925, - "grad_norm": 0.003158863401040435, - "learning_rate": 0.00019999772775212022, - "loss": 46.0, - "step": 13335 - }, - { - "epoch": 2.1476710012480376, - "grad_norm": 0.0020852971356362104, - "learning_rate": 0.00019999772741105838, - "loss": 46.0, - "step": 13336 - }, - { - "epoch": 2.1478320383268246, - "grad_norm": 0.0033024419099092484, - "learning_rate": 0.00019999772706997092, - "loss": 46.0, - "step": 13337 - }, - { - "epoch": 2.147993075405612, - "grad_norm": 0.005663240794092417, - "learning_rate": 0.00019999772672885793, - "loss": 46.0, - "step": 13338 - }, - { - "epoch": 2.1481541124843995, - "grad_norm": 0.0026012801099568605, - "learning_rate": 0.0001999977263877193, - "loss": 46.0, - "step": 13339 - }, - { - "epoch": 2.148315149563187, - "grad_norm": 0.002319332677870989, - "learning_rate": 0.00019999772604655506, - "loss": 46.0, - "step": 13340 - }, - { - "epoch": 2.1484761866419744, - "grad_norm": 0.005728776566684246, - "learning_rate": 0.00019999772570536526, - "loss": 46.0, - "step": 13341 - }, - { - "epoch": 2.148637223720762, - "grad_norm": 0.0020345617085695267, - "learning_rate": 0.00019999772536414984, - "loss": 46.0, - "step": 13342 - }, - { - "epoch": 2.1487982607995493, - "grad_norm": 0.0026169035118073225, - "learning_rate": 0.00019999772502290883, - "loss": 46.0, - "step": 13343 - }, - { - "epoch": 2.1489592978783363, - "grad_norm": 0.003275137161836028, - "learning_rate": 0.00019999772468164224, - "loss": 46.0, - "step": 13344 - }, - { - "epoch": 2.1491203349571237, - "grad_norm": 0.004151905421167612, - "learning_rate": 0.00019999772434035003, - "loss": 46.0, - "step": 13345 - }, - { - "epoch": 2.149281372035911, - "grad_norm": 0.006651798263192177, - "learning_rate": 0.00019999772399903226, - "loss": 46.0, - "step": 13346 - }, - { - "epoch": 2.1494424091146986, - "grad_norm": 0.003913423046469688, - "learning_rate": 0.00019999772365768888, - "loss": 46.0, - "step": 13347 - }, - { - "epoch": 2.149603446193486, - "grad_norm": 0.0009208098636008799, - "learning_rate": 0.0001999977233163199, - "loss": 46.0, - "step": 13348 - }, - { - "epoch": 2.1497644832722735, - "grad_norm": 0.003083003219217062, - "learning_rate": 0.00019999772297492533, - "loss": 46.0, - "step": 13349 - }, - { - "epoch": 2.149925520351061, - "grad_norm": 0.0050685168243944645, - "learning_rate": 0.00019999772263350516, - "loss": 46.0, - "step": 13350 - }, - { - "epoch": 2.150086557429848, - "grad_norm": 0.014121463522315025, - "learning_rate": 0.0001999977222920594, - "loss": 46.0, - "step": 13351 - }, - { - "epoch": 2.1502475945086355, - "grad_norm": 0.0011735630687326193, - "learning_rate": 0.00019999772195058806, - "loss": 46.0, - "step": 13352 - }, - { - "epoch": 2.150408631587423, - "grad_norm": 0.005555521231144667, - "learning_rate": 0.0001999977216090911, - "loss": 46.0, - "step": 13353 - }, - { - "epoch": 2.1505696686662104, - "grad_norm": 0.001185104250907898, - "learning_rate": 0.00019999772126756858, - "loss": 46.0, - "step": 13354 - }, - { - "epoch": 2.150730705744998, - "grad_norm": 0.0030374207999557257, - "learning_rate": 0.00019999772092602042, - "loss": 46.0, - "step": 13355 - }, - { - "epoch": 2.1508917428237853, - "grad_norm": 0.002993746427819133, - "learning_rate": 0.0001999977205844467, - "loss": 46.0, - "step": 13356 - }, - { - "epoch": 2.1510527799025727, - "grad_norm": 0.0034053742419928312, - "learning_rate": 0.00019999772024284737, - "loss": 46.0, - "step": 13357 - }, - { - "epoch": 2.15121381698136, - "grad_norm": 0.003543669590726495, - "learning_rate": 0.00019999771990122245, - "loss": 46.0, - "step": 13358 - }, - { - "epoch": 2.151374854060147, - "grad_norm": 0.0017898465739563107, - "learning_rate": 0.00019999771955957196, - "loss": 46.0, - "step": 13359 - }, - { - "epoch": 2.1515358911389346, - "grad_norm": 0.0029814443551003933, - "learning_rate": 0.00019999771921789584, - "loss": 46.0, - "step": 13360 - }, - { - "epoch": 2.151696928217722, - "grad_norm": 0.0021095196716487408, - "learning_rate": 0.00019999771887619413, - "loss": 46.0, - "step": 13361 - }, - { - "epoch": 2.1518579652965095, - "grad_norm": 0.0015045434702187777, - "learning_rate": 0.00019999771853446686, - "loss": 46.0, - "step": 13362 - }, - { - "epoch": 2.152019002375297, - "grad_norm": 0.00288623059168458, - "learning_rate": 0.00019999771819271395, - "loss": 46.0, - "step": 13363 - }, - { - "epoch": 2.1521800394540844, - "grad_norm": 0.0014310912229120731, - "learning_rate": 0.00019999771785093548, - "loss": 46.0, - "step": 13364 - }, - { - "epoch": 2.152341076532872, - "grad_norm": 0.0014442679239436984, - "learning_rate": 0.0001999977175091314, - "loss": 46.0, - "step": 13365 - }, - { - "epoch": 2.152502113611659, - "grad_norm": 0.004888609517365694, - "learning_rate": 0.00019999771716730172, - "loss": 46.0, - "step": 13366 - }, - { - "epoch": 2.1526631506904463, - "grad_norm": 0.005491163115948439, - "learning_rate": 0.00019999771682544646, - "loss": 46.0, - "step": 13367 - }, - { - "epoch": 2.152824187769234, - "grad_norm": 0.0019303741864860058, - "learning_rate": 0.00019999771648356558, - "loss": 46.0, - "step": 13368 - }, - { - "epoch": 2.1529852248480212, - "grad_norm": 0.015973171219229698, - "learning_rate": 0.00019999771614165912, - "loss": 46.0, - "step": 13369 - }, - { - "epoch": 2.1531462619268087, - "grad_norm": 0.003926645498722792, - "learning_rate": 0.00019999771579972707, - "loss": 46.0, - "step": 13370 - }, - { - "epoch": 2.153307299005596, - "grad_norm": 0.0033199922181665897, - "learning_rate": 0.00019999771545776943, - "loss": 46.0, - "step": 13371 - }, - { - "epoch": 2.1534683360843836, - "grad_norm": 0.013160881586372852, - "learning_rate": 0.0001999977151157862, - "loss": 46.0, - "step": 13372 - }, - { - "epoch": 2.153629373163171, - "grad_norm": 0.0035904760006815195, - "learning_rate": 0.00019999771477377735, - "loss": 46.0, - "step": 13373 - }, - { - "epoch": 2.153790410241958, - "grad_norm": 0.007611855398863554, - "learning_rate": 0.00019999771443174295, - "loss": 46.0, - "step": 13374 - }, - { - "epoch": 2.1539514473207455, - "grad_norm": 0.004493820481002331, - "learning_rate": 0.00019999771408968294, - "loss": 46.0, - "step": 13375 - }, - { - "epoch": 2.154112484399533, - "grad_norm": 0.0066480860114097595, - "learning_rate": 0.0001999977137475973, - "loss": 46.0, - "step": 13376 - }, - { - "epoch": 2.1542735214783204, - "grad_norm": 0.0009913266403600574, - "learning_rate": 0.0001999977134054861, - "loss": 46.0, - "step": 13377 - }, - { - "epoch": 2.154434558557108, - "grad_norm": 0.0014127322938293219, - "learning_rate": 0.0001999977130633493, - "loss": 46.0, - "step": 13378 - }, - { - "epoch": 2.1545955956358953, - "grad_norm": 0.001301866490393877, - "learning_rate": 0.0001999977127211869, - "loss": 46.0, - "step": 13379 - }, - { - "epoch": 2.1547566327146828, - "grad_norm": 0.008188326843082905, - "learning_rate": 0.00019999771237899888, - "loss": 46.0, - "step": 13380 - }, - { - "epoch": 2.1549176697934698, - "grad_norm": 0.0006416022079065442, - "learning_rate": 0.0001999977120367853, - "loss": 46.0, - "step": 13381 - }, - { - "epoch": 2.155078706872257, - "grad_norm": 0.000580929743591696, - "learning_rate": 0.00019999771169454614, - "loss": 46.0, - "step": 13382 - }, - { - "epoch": 2.1552397439510447, - "grad_norm": 0.0005832845345139503, - "learning_rate": 0.00019999771135228135, - "loss": 46.0, - "step": 13383 - }, - { - "epoch": 2.155400781029832, - "grad_norm": 0.0024417205713689327, - "learning_rate": 0.000199997711009991, - "loss": 46.0, - "step": 13384 - }, - { - "epoch": 2.1555618181086196, - "grad_norm": 0.008255396038293839, - "learning_rate": 0.00019999771066767503, - "loss": 46.0, - "step": 13385 - }, - { - "epoch": 2.155722855187407, - "grad_norm": 0.0010429261019453406, - "learning_rate": 0.00019999771032533348, - "loss": 46.0, - "step": 13386 - }, - { - "epoch": 2.1558838922661945, - "grad_norm": 0.005211732350289822, - "learning_rate": 0.00019999770998296631, - "loss": 46.0, - "step": 13387 - }, - { - "epoch": 2.1560449293449815, - "grad_norm": 0.0020214738324284554, - "learning_rate": 0.0001999977096405736, - "loss": 46.0, - "step": 13388 - }, - { - "epoch": 2.156205966423769, - "grad_norm": 0.001470905146561563, - "learning_rate": 0.00019999770929815522, - "loss": 46.0, - "step": 13389 - }, - { - "epoch": 2.1563670035025564, - "grad_norm": 0.001472577452659607, - "learning_rate": 0.0001999977089557113, - "loss": 46.0, - "step": 13390 - }, - { - "epoch": 2.156528040581344, - "grad_norm": 0.005086067132651806, - "learning_rate": 0.00019999770861324175, - "loss": 46.0, - "step": 13391 - }, - { - "epoch": 2.1566890776601313, - "grad_norm": 0.0018468507332727313, - "learning_rate": 0.00019999770827074665, - "loss": 46.0, - "step": 13392 - }, - { - "epoch": 2.1568501147389187, - "grad_norm": 0.002536970656365156, - "learning_rate": 0.00019999770792822593, - "loss": 46.0, - "step": 13393 - }, - { - "epoch": 2.157011151817706, - "grad_norm": 0.006078988313674927, - "learning_rate": 0.0001999977075856796, - "loss": 46.0, - "step": 13394 - }, - { - "epoch": 2.157172188896493, - "grad_norm": 0.0013384041376411915, - "learning_rate": 0.0001999977072431077, - "loss": 46.0, - "step": 13395 - }, - { - "epoch": 2.1573332259752807, - "grad_norm": 0.0009293656330555677, - "learning_rate": 0.0001999977069005102, - "loss": 46.0, - "step": 13396 - }, - { - "epoch": 2.157494263054068, - "grad_norm": 0.0025627962313592434, - "learning_rate": 0.0001999977065578871, - "loss": 46.0, - "step": 13397 - }, - { - "epoch": 2.1576553001328556, - "grad_norm": 0.005264953710138798, - "learning_rate": 0.0001999977062152384, - "loss": 46.0, - "step": 13398 - }, - { - "epoch": 2.157816337211643, - "grad_norm": 0.0017437953501939774, - "learning_rate": 0.0001999977058725641, - "loss": 46.0, - "step": 13399 - }, - { - "epoch": 2.1579773742904305, - "grad_norm": 0.0018247002735733986, - "learning_rate": 0.00019999770552986426, - "loss": 46.0, - "step": 13400 - }, - { - "epoch": 2.158138411369218, - "grad_norm": 0.0029213817324489355, - "learning_rate": 0.0001999977051871388, - "loss": 46.0, - "step": 13401 - }, - { - "epoch": 2.1582994484480054, - "grad_norm": 0.002560011576861143, - "learning_rate": 0.0001999977048443877, - "loss": 46.0, - "step": 13402 - }, - { - "epoch": 2.1584604855267924, - "grad_norm": 0.0017181907314807177, - "learning_rate": 0.00019999770450161104, - "loss": 46.0, - "step": 13403 - }, - { - "epoch": 2.15862152260558, - "grad_norm": 0.0032093869522213936, - "learning_rate": 0.0001999977041588088, - "loss": 46.0, - "step": 13404 - }, - { - "epoch": 2.1587825596843673, - "grad_norm": 0.0010084158275276423, - "learning_rate": 0.00019999770381598096, - "loss": 46.0, - "step": 13405 - }, - { - "epoch": 2.1589435967631547, - "grad_norm": 0.00312529387883842, - "learning_rate": 0.0001999977034731275, - "loss": 46.0, - "step": 13406 - }, - { - "epoch": 2.159104633841942, - "grad_norm": 0.0033588879741728306, - "learning_rate": 0.00019999770313024846, - "loss": 46.0, - "step": 13407 - }, - { - "epoch": 2.1592656709207296, - "grad_norm": 0.00045731355203315616, - "learning_rate": 0.00019999770278734383, - "loss": 46.0, - "step": 13408 - }, - { - "epoch": 2.159426707999517, - "grad_norm": 0.0008029004675336182, - "learning_rate": 0.0001999977024444136, - "loss": 46.0, - "step": 13409 - }, - { - "epoch": 2.159587745078304, - "grad_norm": 0.0038672753144055605, - "learning_rate": 0.00019999770210145777, - "loss": 46.0, - "step": 13410 - }, - { - "epoch": 2.1597487821570915, - "grad_norm": 0.0004022162174805999, - "learning_rate": 0.00019999770175847638, - "loss": 46.0, - "step": 13411 - }, - { - "epoch": 2.159909819235879, - "grad_norm": 0.0070533487014472485, - "learning_rate": 0.00019999770141546937, - "loss": 46.0, - "step": 13412 - }, - { - "epoch": 2.1600708563146664, - "grad_norm": 0.000979344709776342, - "learning_rate": 0.00019999770107243675, - "loss": 46.0, - "step": 13413 - }, - { - "epoch": 2.160231893393454, - "grad_norm": 0.0010132818715646863, - "learning_rate": 0.00019999770072937856, - "loss": 46.0, - "step": 13414 - }, - { - "epoch": 2.1603929304722413, - "grad_norm": 0.005230887793004513, - "learning_rate": 0.00019999770038629477, - "loss": 46.0, - "step": 13415 - }, - { - "epoch": 2.160553967551029, - "grad_norm": 0.0013651868794113398, - "learning_rate": 0.00019999770004318536, - "loss": 46.0, - "step": 13416 - }, - { - "epoch": 2.1607150046298162, - "grad_norm": 0.001582533004693687, - "learning_rate": 0.0001999976997000504, - "loss": 46.0, - "step": 13417 - }, - { - "epoch": 2.1608760417086033, - "grad_norm": 0.004795411601662636, - "learning_rate": 0.0001999976993568898, - "loss": 46.0, - "step": 13418 - }, - { - "epoch": 2.1610370787873907, - "grad_norm": 0.0022396952845156193, - "learning_rate": 0.00019999769901370365, - "loss": 46.0, - "step": 13419 - }, - { - "epoch": 2.161198115866178, - "grad_norm": 0.0018817392410710454, - "learning_rate": 0.0001999976986704919, - "loss": 46.0, - "step": 13420 - }, - { - "epoch": 2.1613591529449656, - "grad_norm": 0.0008986614411696792, - "learning_rate": 0.00019999769832725455, - "loss": 46.0, - "step": 13421 - }, - { - "epoch": 2.161520190023753, - "grad_norm": 0.0010536988265812397, - "learning_rate": 0.00019999769798399158, - "loss": 46.0, - "step": 13422 - }, - { - "epoch": 2.1616812271025405, - "grad_norm": 0.003245594911277294, - "learning_rate": 0.000199997697640703, - "loss": 46.0, - "step": 13423 - }, - { - "epoch": 2.1618422641813275, - "grad_norm": 0.0024706011172384024, - "learning_rate": 0.00019999769729738887, - "loss": 46.0, - "step": 13424 - }, - { - "epoch": 2.162003301260115, - "grad_norm": 0.0007533066673204303, - "learning_rate": 0.00019999769695404915, - "loss": 46.0, - "step": 13425 - }, - { - "epoch": 2.1621643383389024, - "grad_norm": 0.004144506994634867, - "learning_rate": 0.00019999769661068384, - "loss": 46.0, - "step": 13426 - }, - { - "epoch": 2.16232537541769, - "grad_norm": 0.00324800331145525, - "learning_rate": 0.0001999976962672929, - "loss": 46.0, - "step": 13427 - }, - { - "epoch": 2.1624864124964773, - "grad_norm": 0.013405922800302505, - "learning_rate": 0.00019999769592387638, - "loss": 46.0, - "step": 13428 - }, - { - "epoch": 2.162647449575265, - "grad_norm": 0.0038236540276557207, - "learning_rate": 0.00019999769558043425, - "loss": 46.0, - "step": 13429 - }, - { - "epoch": 2.1628084866540522, - "grad_norm": 0.006123604718595743, - "learning_rate": 0.00019999769523696656, - "loss": 46.0, - "step": 13430 - }, - { - "epoch": 2.1629695237328397, - "grad_norm": 0.0027882412541657686, - "learning_rate": 0.00019999769489347326, - "loss": 46.0, - "step": 13431 - }, - { - "epoch": 2.1631305608116267, - "grad_norm": 0.0015664227539673448, - "learning_rate": 0.00019999769454995438, - "loss": 46.0, - "step": 13432 - }, - { - "epoch": 2.163291597890414, - "grad_norm": 0.010436464101076126, - "learning_rate": 0.00019999769420640988, - "loss": 46.0, - "step": 13433 - }, - { - "epoch": 2.1634526349692016, - "grad_norm": 0.0018199981423094869, - "learning_rate": 0.0001999976938628398, - "loss": 46.0, - "step": 13434 - }, - { - "epoch": 2.163613672047989, - "grad_norm": 0.00535723427310586, - "learning_rate": 0.0001999976935192441, - "loss": 46.0, - "step": 13435 - }, - { - "epoch": 2.1637747091267765, - "grad_norm": 0.0023336452431976795, - "learning_rate": 0.00019999769317562285, - "loss": 46.0, - "step": 13436 - }, - { - "epoch": 2.163935746205564, - "grad_norm": 0.0016138700302690268, - "learning_rate": 0.00019999769283197597, - "loss": 46.0, - "step": 13437 - }, - { - "epoch": 2.1640967832843514, - "grad_norm": 0.0008041071705520153, - "learning_rate": 0.00019999769248830353, - "loss": 46.0, - "step": 13438 - }, - { - "epoch": 2.1642578203631384, - "grad_norm": 0.001124918577261269, - "learning_rate": 0.00019999769214460545, - "loss": 46.0, - "step": 13439 - }, - { - "epoch": 2.164418857441926, - "grad_norm": 0.003743311855942011, - "learning_rate": 0.00019999769180088181, - "loss": 46.0, - "step": 13440 - }, - { - "epoch": 2.1645798945207133, - "grad_norm": 0.0021203504875302315, - "learning_rate": 0.00019999769145713256, - "loss": 46.0, - "step": 13441 - }, - { - "epoch": 2.1647409315995008, - "grad_norm": 0.0012205262901261449, - "learning_rate": 0.00019999769111335772, - "loss": 46.0, - "step": 13442 - }, - { - "epoch": 2.164901968678288, - "grad_norm": 0.011249815113842487, - "learning_rate": 0.0001999976907695573, - "loss": 46.0, - "step": 13443 - }, - { - "epoch": 2.1650630057570757, - "grad_norm": 0.002064640400931239, - "learning_rate": 0.00019999769042573128, - "loss": 46.0, - "step": 13444 - }, - { - "epoch": 2.165224042835863, - "grad_norm": 0.004764914512634277, - "learning_rate": 0.00019999769008187965, - "loss": 46.0, - "step": 13445 - }, - { - "epoch": 2.1653850799146506, - "grad_norm": 0.0018297492060810328, - "learning_rate": 0.00019999768973800243, - "loss": 46.0, - "step": 13446 - }, - { - "epoch": 2.1655461169934376, - "grad_norm": 0.004330933094024658, - "learning_rate": 0.00019999768939409963, - "loss": 46.0, - "step": 13447 - }, - { - "epoch": 2.165707154072225, - "grad_norm": 0.002149335341528058, - "learning_rate": 0.0001999976890501712, - "loss": 46.0, - "step": 13448 - }, - { - "epoch": 2.1658681911510125, - "grad_norm": 0.0019518863409757614, - "learning_rate": 0.00019999768870621723, - "loss": 46.0, - "step": 13449 - }, - { - "epoch": 2.1660292282298, - "grad_norm": 0.001130544114857912, - "learning_rate": 0.00019999768836223764, - "loss": 46.0, - "step": 13450 - }, - { - "epoch": 2.1661902653085874, - "grad_norm": 0.0018277255585417151, - "learning_rate": 0.00019999768801823246, - "loss": 46.0, - "step": 13451 - }, - { - "epoch": 2.166351302387375, - "grad_norm": 0.006490216590464115, - "learning_rate": 0.00019999768767420167, - "loss": 46.0, - "step": 13452 - }, - { - "epoch": 2.1665123394661623, - "grad_norm": 0.0028237171936780214, - "learning_rate": 0.0001999976873301453, - "loss": 46.0, - "step": 13453 - }, - { - "epoch": 2.1666733765449493, - "grad_norm": 0.0005406445707194507, - "learning_rate": 0.00019999768698606332, - "loss": 46.0, - "step": 13454 - }, - { - "epoch": 2.1668344136237367, - "grad_norm": 0.00112696154974401, - "learning_rate": 0.00019999768664195577, - "loss": 46.0, - "step": 13455 - }, - { - "epoch": 2.166995450702524, - "grad_norm": 0.003206856083124876, - "learning_rate": 0.0001999976862978226, - "loss": 46.0, - "step": 13456 - }, - { - "epoch": 2.1671564877813116, - "grad_norm": 0.0022405830677598715, - "learning_rate": 0.00019999768595366387, - "loss": 46.0, - "step": 13457 - }, - { - "epoch": 2.167317524860099, - "grad_norm": 0.0033505712635815144, - "learning_rate": 0.0001999976856094795, - "loss": 46.0, - "step": 13458 - }, - { - "epoch": 2.1674785619388865, - "grad_norm": 0.0035899514332413673, - "learning_rate": 0.00019999768526526957, - "loss": 46.0, - "step": 13459 - }, - { - "epoch": 2.167639599017674, - "grad_norm": 0.00091147655621171, - "learning_rate": 0.00019999768492103402, - "loss": 46.0, - "step": 13460 - }, - { - "epoch": 2.167800636096461, - "grad_norm": 0.0022993178572505713, - "learning_rate": 0.00019999768457677292, - "loss": 46.0, - "step": 13461 - }, - { - "epoch": 2.1679616731752485, - "grad_norm": 0.001613577944226563, - "learning_rate": 0.00019999768423248617, - "loss": 46.0, - "step": 13462 - }, - { - "epoch": 2.168122710254036, - "grad_norm": 0.0008811029838398099, - "learning_rate": 0.00019999768388817386, - "loss": 46.0, - "step": 13463 - }, - { - "epoch": 2.1682837473328234, - "grad_norm": 0.0015586374793201685, - "learning_rate": 0.00019999768354383594, - "loss": 46.0, - "step": 13464 - }, - { - "epoch": 2.168444784411611, - "grad_norm": 0.003854429814964533, - "learning_rate": 0.00019999768319947246, - "loss": 46.0, - "step": 13465 - }, - { - "epoch": 2.1686058214903983, - "grad_norm": 0.00105179357342422, - "learning_rate": 0.00019999768285508334, - "loss": 46.0, - "step": 13466 - }, - { - "epoch": 2.1687668585691857, - "grad_norm": 0.002585239941254258, - "learning_rate": 0.00019999768251066863, - "loss": 46.0, - "step": 13467 - }, - { - "epoch": 2.1689278956479727, - "grad_norm": 0.0019705623853951693, - "learning_rate": 0.00019999768216622833, - "loss": 46.0, - "step": 13468 - }, - { - "epoch": 2.16908893272676, - "grad_norm": 0.0028651857282966375, - "learning_rate": 0.00019999768182176245, - "loss": 46.0, - "step": 13469 - }, - { - "epoch": 2.1692499698055476, - "grad_norm": 0.003413299098610878, - "learning_rate": 0.000199997681477271, - "loss": 46.0, - "step": 13470 - }, - { - "epoch": 2.169411006884335, - "grad_norm": 0.001498452853411436, - "learning_rate": 0.00019999768113275392, - "loss": 46.0, - "step": 13471 - }, - { - "epoch": 2.1695720439631225, - "grad_norm": 0.004488243255764246, - "learning_rate": 0.00019999768078821125, - "loss": 46.0, - "step": 13472 - }, - { - "epoch": 2.16973308104191, - "grad_norm": 0.005841054953634739, - "learning_rate": 0.000199997680443643, - "loss": 46.0, - "step": 13473 - }, - { - "epoch": 2.1698941181206974, - "grad_norm": 0.0042350334115326405, - "learning_rate": 0.00019999768009904917, - "loss": 46.0, - "step": 13474 - }, - { - "epoch": 2.170055155199485, - "grad_norm": 0.007741840090602636, - "learning_rate": 0.00019999767975442968, - "loss": 46.0, - "step": 13475 - }, - { - "epoch": 2.170216192278272, - "grad_norm": 0.008840867318212986, - "learning_rate": 0.00019999767940978463, - "loss": 46.0, - "step": 13476 - }, - { - "epoch": 2.1703772293570593, - "grad_norm": 0.00532876281067729, - "learning_rate": 0.000199997679065114, - "loss": 46.0, - "step": 13477 - }, - { - "epoch": 2.170538266435847, - "grad_norm": 0.004787585698068142, - "learning_rate": 0.00019999767872041778, - "loss": 46.0, - "step": 13478 - }, - { - "epoch": 2.1706993035146342, - "grad_norm": 0.004419929347932339, - "learning_rate": 0.00019999767837569597, - "loss": 46.0, - "step": 13479 - }, - { - "epoch": 2.1708603405934217, - "grad_norm": 0.003897394286468625, - "learning_rate": 0.00019999767803094854, - "loss": 46.0, - "step": 13480 - }, - { - "epoch": 2.171021377672209, - "grad_norm": 0.0010128198191523552, - "learning_rate": 0.00019999767768617553, - "loss": 46.0, - "step": 13481 - }, - { - "epoch": 2.1711824147509966, - "grad_norm": 0.0014552520588040352, - "learning_rate": 0.0001999976773413769, - "loss": 46.0, - "step": 13482 - }, - { - "epoch": 2.1713434518297836, - "grad_norm": 0.0016089060809463263, - "learning_rate": 0.00019999767699655272, - "loss": 46.0, - "step": 13483 - }, - { - "epoch": 2.171504488908571, - "grad_norm": 0.005142230540513992, - "learning_rate": 0.0001999976766517029, - "loss": 46.0, - "step": 13484 - }, - { - "epoch": 2.1716655259873585, - "grad_norm": 0.005287645384669304, - "learning_rate": 0.0001999976763068275, - "loss": 46.0, - "step": 13485 - }, - { - "epoch": 2.171826563066146, - "grad_norm": 0.006501209922134876, - "learning_rate": 0.00019999767596192653, - "loss": 46.0, - "step": 13486 - }, - { - "epoch": 2.1719876001449334, - "grad_norm": 0.006598499603569508, - "learning_rate": 0.00019999767561699994, - "loss": 46.0, - "step": 13487 - }, - { - "epoch": 2.172148637223721, - "grad_norm": 0.0012409052578732371, - "learning_rate": 0.00019999767527204777, - "loss": 46.0, - "step": 13488 - }, - { - "epoch": 2.1723096743025083, - "grad_norm": 0.0018534971168264747, - "learning_rate": 0.00019999767492707, - "loss": 46.0, - "step": 13489 - }, - { - "epoch": 2.1724707113812958, - "grad_norm": 0.002854087157174945, - "learning_rate": 0.00019999767458206663, - "loss": 46.0, - "step": 13490 - }, - { - "epoch": 2.172631748460083, - "grad_norm": 0.008386479690670967, - "learning_rate": 0.0001999976742370377, - "loss": 46.0, - "step": 13491 - }, - { - "epoch": 2.1727927855388702, - "grad_norm": 0.0009295197669416666, - "learning_rate": 0.00019999767389198314, - "loss": 46.0, - "step": 13492 - }, - { - "epoch": 2.1729538226176577, - "grad_norm": 0.0010711585637181997, - "learning_rate": 0.000199997673546903, - "loss": 46.0, - "step": 13493 - }, - { - "epoch": 2.173114859696445, - "grad_norm": 0.007884289138019085, - "learning_rate": 0.00019999767320179722, - "loss": 46.0, - "step": 13494 - }, - { - "epoch": 2.1732758967752326, - "grad_norm": 0.0021540382876992226, - "learning_rate": 0.0001999976728566659, - "loss": 46.0, - "step": 13495 - }, - { - "epoch": 2.17343693385402, - "grad_norm": 0.004816845059394836, - "learning_rate": 0.00019999767251150898, - "loss": 46.0, - "step": 13496 - }, - { - "epoch": 2.173597970932807, - "grad_norm": 0.002338269492611289, - "learning_rate": 0.00019999767216632647, - "loss": 46.0, - "step": 13497 - }, - { - "epoch": 2.1737590080115945, - "grad_norm": 0.0019003247143700719, - "learning_rate": 0.00019999767182111834, - "loss": 46.0, - "step": 13498 - }, - { - "epoch": 2.173920045090382, - "grad_norm": 0.005153437610715628, - "learning_rate": 0.00019999767147588462, - "loss": 46.0, - "step": 13499 - }, - { - "epoch": 2.1740810821691694, - "grad_norm": 0.011673007160425186, - "learning_rate": 0.00019999767113062532, - "loss": 46.0, - "step": 13500 - }, - { - "epoch": 2.174242119247957, - "grad_norm": 0.0049699158407747746, - "learning_rate": 0.00019999767078534043, - "loss": 46.0, - "step": 13501 - }, - { - "epoch": 2.1744031563267443, - "grad_norm": 0.0030734918545931578, - "learning_rate": 0.00019999767044002993, - "loss": 46.0, - "step": 13502 - }, - { - "epoch": 2.1745641934055318, - "grad_norm": 0.0014631218509748578, - "learning_rate": 0.00019999767009469386, - "loss": 46.0, - "step": 13503 - }, - { - "epoch": 2.174725230484319, - "grad_norm": 0.0022403006441891193, - "learning_rate": 0.00019999766974933216, - "loss": 46.0, - "step": 13504 - }, - { - "epoch": 2.174886267563106, - "grad_norm": 0.0016182791441679, - "learning_rate": 0.0001999976694039449, - "loss": 46.0, - "step": 13505 - }, - { - "epoch": 2.1750473046418937, - "grad_norm": 0.002961906138807535, - "learning_rate": 0.000199997669058532, - "loss": 46.0, - "step": 13506 - }, - { - "epoch": 2.175208341720681, - "grad_norm": 0.0019697381649166346, - "learning_rate": 0.00019999766871309357, - "loss": 46.0, - "step": 13507 - }, - { - "epoch": 2.1753693787994686, - "grad_norm": 0.0013614243362098932, - "learning_rate": 0.0001999976683676295, - "loss": 46.0, - "step": 13508 - }, - { - "epoch": 2.175530415878256, - "grad_norm": 0.006952998694032431, - "learning_rate": 0.00019999766802213982, - "loss": 46.0, - "step": 13509 - }, - { - "epoch": 2.1756914529570435, - "grad_norm": 0.002162138931453228, - "learning_rate": 0.0001999976676766246, - "loss": 46.0, - "step": 13510 - }, - { - "epoch": 2.175852490035831, - "grad_norm": 0.0007870601839385927, - "learning_rate": 0.00019999766733108375, - "loss": 46.0, - "step": 13511 - }, - { - "epoch": 2.176013527114618, - "grad_norm": 0.001855554641224444, - "learning_rate": 0.0001999976669855173, - "loss": 46.0, - "step": 13512 - }, - { - "epoch": 2.1761745641934054, - "grad_norm": 0.004577834624797106, - "learning_rate": 0.00019999766663992527, - "loss": 46.0, - "step": 13513 - }, - { - "epoch": 2.176335601272193, - "grad_norm": 0.0012973316479474306, - "learning_rate": 0.00019999766629430764, - "loss": 46.0, - "step": 13514 - }, - { - "epoch": 2.1764966383509803, - "grad_norm": 0.002179705537855625, - "learning_rate": 0.00019999766594866442, - "loss": 46.0, - "step": 13515 - }, - { - "epoch": 2.1766576754297677, - "grad_norm": 0.0010821259347721934, - "learning_rate": 0.0001999976656029956, - "loss": 46.0, - "step": 13516 - }, - { - "epoch": 2.176818712508555, - "grad_norm": 0.002393249422311783, - "learning_rate": 0.0001999976652573012, - "loss": 46.0, - "step": 13517 - }, - { - "epoch": 2.1769797495873426, - "grad_norm": 0.005909727420657873, - "learning_rate": 0.00019999766491158121, - "loss": 46.0, - "step": 13518 - }, - { - "epoch": 2.17714078666613, - "grad_norm": 0.004526886157691479, - "learning_rate": 0.0001999976645658356, - "loss": 46.0, - "step": 13519 - }, - { - "epoch": 2.177301823744917, - "grad_norm": 0.007912407629191875, - "learning_rate": 0.0001999976642200644, - "loss": 46.0, - "step": 13520 - }, - { - "epoch": 2.1774628608237045, - "grad_norm": 0.006462841760367155, - "learning_rate": 0.00019999766387426764, - "loss": 46.0, - "step": 13521 - }, - { - "epoch": 2.177623897902492, - "grad_norm": 0.0014400375075638294, - "learning_rate": 0.00019999766352844526, - "loss": 46.0, - "step": 13522 - }, - { - "epoch": 2.1777849349812795, - "grad_norm": 0.0027416078373789787, - "learning_rate": 0.00019999766318259726, - "loss": 46.0, - "step": 13523 - }, - { - "epoch": 2.177945972060067, - "grad_norm": 0.0034235159400850534, - "learning_rate": 0.0001999976628367237, - "loss": 46.0, - "step": 13524 - }, - { - "epoch": 2.1781070091388544, - "grad_norm": 0.0032882671803236008, - "learning_rate": 0.00019999766249082453, - "loss": 46.0, - "step": 13525 - }, - { - "epoch": 2.178268046217642, - "grad_norm": 0.006466683000326157, - "learning_rate": 0.0001999976621448998, - "loss": 46.0, - "step": 13526 - }, - { - "epoch": 2.178429083296429, - "grad_norm": 0.0036410633474588394, - "learning_rate": 0.00019999766179894943, - "loss": 46.0, - "step": 13527 - }, - { - "epoch": 2.1785901203752163, - "grad_norm": 0.002103250240907073, - "learning_rate": 0.0001999976614529735, - "loss": 46.0, - "step": 13528 - }, - { - "epoch": 2.1787511574540037, - "grad_norm": 0.001565914019010961, - "learning_rate": 0.00019999766110697194, - "loss": 46.0, - "step": 13529 - }, - { - "epoch": 2.178912194532791, - "grad_norm": 0.0023175831884145737, - "learning_rate": 0.00019999766076094478, - "loss": 46.0, - "step": 13530 - }, - { - "epoch": 2.1790732316115786, - "grad_norm": 0.0011863374384120107, - "learning_rate": 0.0001999976604148921, - "loss": 46.0, - "step": 13531 - }, - { - "epoch": 2.179234268690366, - "grad_norm": 0.0015350491739809513, - "learning_rate": 0.00019999766006881378, - "loss": 46.0, - "step": 13532 - }, - { - "epoch": 2.1793953057691535, - "grad_norm": 0.0012241523945704103, - "learning_rate": 0.00019999765972270983, - "loss": 46.0, - "step": 13533 - }, - { - "epoch": 2.1795563428479405, - "grad_norm": 0.0008566062897443771, - "learning_rate": 0.00019999765937658032, - "loss": 46.0, - "step": 13534 - }, - { - "epoch": 2.179717379926728, - "grad_norm": 0.003918860107660294, - "learning_rate": 0.00019999765903042522, - "loss": 46.0, - "step": 13535 - }, - { - "epoch": 2.1798784170055154, - "grad_norm": 0.005466017406433821, - "learning_rate": 0.0001999976586842445, - "loss": 46.0, - "step": 13536 - }, - { - "epoch": 2.180039454084303, - "grad_norm": 0.0029734678100794554, - "learning_rate": 0.00019999765833803823, - "loss": 46.0, - "step": 13537 - }, - { - "epoch": 2.1802004911630903, - "grad_norm": 0.0028642357792705297, - "learning_rate": 0.00019999765799180635, - "loss": 46.0, - "step": 13538 - }, - { - "epoch": 2.180361528241878, - "grad_norm": 0.006614120211452246, - "learning_rate": 0.00019999765764554885, - "loss": 46.0, - "step": 13539 - }, - { - "epoch": 2.1805225653206652, - "grad_norm": 0.0020899188239127398, - "learning_rate": 0.0001999976572992658, - "loss": 46.0, - "step": 13540 - }, - { - "epoch": 2.1806836023994522, - "grad_norm": 0.0011575022945180535, - "learning_rate": 0.0001999976569529571, - "loss": 46.0, - "step": 13541 - }, - { - "epoch": 2.1808446394782397, - "grad_norm": 0.004462036769837141, - "learning_rate": 0.00019999765660662285, - "loss": 46.0, - "step": 13542 - }, - { - "epoch": 2.181005676557027, - "grad_norm": 0.0042463792487978935, - "learning_rate": 0.000199997656260263, - "loss": 46.0, - "step": 13543 - }, - { - "epoch": 2.1811667136358146, - "grad_norm": 0.0012500300072133541, - "learning_rate": 0.00019999765591387754, - "loss": 46.0, - "step": 13544 - }, - { - "epoch": 2.181327750714602, - "grad_norm": 0.0015369309112429619, - "learning_rate": 0.00019999765556746649, - "loss": 46.0, - "step": 13545 - }, - { - "epoch": 2.1814887877933895, - "grad_norm": 0.002138678450137377, - "learning_rate": 0.00019999765522102985, - "loss": 46.0, - "step": 13546 - }, - { - "epoch": 2.181649824872177, - "grad_norm": 0.0021681070793420076, - "learning_rate": 0.0001999976548745676, - "loss": 46.0, - "step": 13547 - }, - { - "epoch": 2.1818108619509644, - "grad_norm": 0.0012066204799339175, - "learning_rate": 0.00019999765452807978, - "loss": 46.0, - "step": 13548 - }, - { - "epoch": 2.1819718990297514, - "grad_norm": 0.00296192429959774, - "learning_rate": 0.00019999765418156636, - "loss": 46.0, - "step": 13549 - }, - { - "epoch": 2.182132936108539, - "grad_norm": 0.002858041552826762, - "learning_rate": 0.00019999765383502734, - "loss": 46.0, - "step": 13550 - }, - { - "epoch": 2.1822939731873263, - "grad_norm": 0.0004258046974427998, - "learning_rate": 0.00019999765348846271, - "loss": 46.0, - "step": 13551 - }, - { - "epoch": 2.1824550102661138, - "grad_norm": 0.006359814200550318, - "learning_rate": 0.0001999976531418725, - "loss": 46.0, - "step": 13552 - }, - { - "epoch": 2.1826160473449012, - "grad_norm": 0.0016410560347139835, - "learning_rate": 0.0001999976527952567, - "loss": 46.0, - "step": 13553 - }, - { - "epoch": 2.1827770844236887, - "grad_norm": 0.006681407801806927, - "learning_rate": 0.0001999976524486153, - "loss": 46.0, - "step": 13554 - }, - { - "epoch": 2.182938121502476, - "grad_norm": 0.0008605445618741214, - "learning_rate": 0.00019999765210194833, - "loss": 46.0, - "step": 13555 - }, - { - "epoch": 2.183099158581263, - "grad_norm": 0.00569927878677845, - "learning_rate": 0.00019999765175525574, - "loss": 46.0, - "step": 13556 - }, - { - "epoch": 2.1832601956600506, - "grad_norm": 0.005860517732799053, - "learning_rate": 0.00019999765140853753, - "loss": 46.0, - "step": 13557 - }, - { - "epoch": 2.183421232738838, - "grad_norm": 0.0004395033756736666, - "learning_rate": 0.00019999765106179377, - "loss": 46.0, - "step": 13558 - }, - { - "epoch": 2.1835822698176255, - "grad_norm": 0.000844571040943265, - "learning_rate": 0.00019999765071502442, - "loss": 46.0, - "step": 13559 - }, - { - "epoch": 2.183743306896413, - "grad_norm": 0.004070487339049578, - "learning_rate": 0.00019999765036822945, - "loss": 46.0, - "step": 13560 - }, - { - "epoch": 2.1839043439752004, - "grad_norm": 0.009051503613591194, - "learning_rate": 0.00019999765002140892, - "loss": 46.0, - "step": 13561 - }, - { - "epoch": 2.184065381053988, - "grad_norm": 0.001517138327471912, - "learning_rate": 0.00019999764967456275, - "loss": 46.0, - "step": 13562 - }, - { - "epoch": 2.1842264181327753, - "grad_norm": 0.0025216175708919764, - "learning_rate": 0.000199997649327691, - "loss": 46.0, - "step": 13563 - }, - { - "epoch": 2.1843874552115623, - "grad_norm": 0.0007327153580263257, - "learning_rate": 0.00019999764898079368, - "loss": 46.0, - "step": 13564 - }, - { - "epoch": 2.1845484922903498, - "grad_norm": 0.0018117144936695695, - "learning_rate": 0.00019999764863387073, - "loss": 46.0, - "step": 13565 - }, - { - "epoch": 2.184709529369137, - "grad_norm": 0.0005709845572710037, - "learning_rate": 0.0001999976482869222, - "loss": 46.0, - "step": 13566 - }, - { - "epoch": 2.1848705664479247, - "grad_norm": 0.0023238679859787226, - "learning_rate": 0.00019999764793994808, - "loss": 46.0, - "step": 13567 - }, - { - "epoch": 2.185031603526712, - "grad_norm": 0.002491126535460353, - "learning_rate": 0.00019999764759294836, - "loss": 46.0, - "step": 13568 - }, - { - "epoch": 2.1851926406054996, - "grad_norm": 0.0031552943401038647, - "learning_rate": 0.00019999764724592305, - "loss": 46.0, - "step": 13569 - }, - { - "epoch": 2.185353677684287, - "grad_norm": 0.00076824682764709, - "learning_rate": 0.00019999764689887216, - "loss": 46.0, - "step": 13570 - }, - { - "epoch": 2.185514714763074, - "grad_norm": 0.0005623137694783509, - "learning_rate": 0.00019999764655179565, - "loss": 46.0, - "step": 13571 - }, - { - "epoch": 2.1856757518418615, - "grad_norm": 0.0022073371801525354, - "learning_rate": 0.00019999764620469356, - "loss": 46.0, - "step": 13572 - }, - { - "epoch": 2.185836788920649, - "grad_norm": 0.0006765663274563849, - "learning_rate": 0.00019999764585756585, - "loss": 46.0, - "step": 13573 - }, - { - "epoch": 2.1859978259994364, - "grad_norm": 0.0013320172438398004, - "learning_rate": 0.00019999764551041258, - "loss": 46.0, - "step": 13574 - }, - { - "epoch": 2.186158863078224, - "grad_norm": 0.0021768000442534685, - "learning_rate": 0.00019999764516323373, - "loss": 46.0, - "step": 13575 - }, - { - "epoch": 2.1863199001570113, - "grad_norm": 0.0003684274561237544, - "learning_rate": 0.00019999764481602923, - "loss": 46.0, - "step": 13576 - }, - { - "epoch": 2.1864809372357987, - "grad_norm": 0.001275241025723517, - "learning_rate": 0.00019999764446879917, - "loss": 46.0, - "step": 13577 - }, - { - "epoch": 2.1866419743145857, - "grad_norm": 0.0036178433801978827, - "learning_rate": 0.00019999764412154353, - "loss": 46.0, - "step": 13578 - }, - { - "epoch": 2.186803011393373, - "grad_norm": 0.002656420459970832, - "learning_rate": 0.00019999764377426224, - "loss": 46.0, - "step": 13579 - }, - { - "epoch": 2.1869640484721606, - "grad_norm": 0.009571787901222706, - "learning_rate": 0.00019999764342695542, - "loss": 46.0, - "step": 13580 - }, - { - "epoch": 2.187125085550948, - "grad_norm": 0.001973456237465143, - "learning_rate": 0.00019999764307962296, - "loss": 46.0, - "step": 13581 - }, - { - "epoch": 2.1872861226297355, - "grad_norm": 0.005300706718116999, - "learning_rate": 0.00019999764273226494, - "loss": 46.0, - "step": 13582 - }, - { - "epoch": 2.187447159708523, - "grad_norm": 0.0007981974049471319, - "learning_rate": 0.0001999976423848813, - "loss": 46.0, - "step": 13583 - }, - { - "epoch": 2.1876081967873104, - "grad_norm": 0.0029203679878264666, - "learning_rate": 0.00019999764203747206, - "loss": 46.0, - "step": 13584 - }, - { - "epoch": 2.1877692338660975, - "grad_norm": 0.0031833008397370577, - "learning_rate": 0.00019999764169003728, - "loss": 46.0, - "step": 13585 - }, - { - "epoch": 2.187930270944885, - "grad_norm": 0.0007549699512310326, - "learning_rate": 0.00019999764134257685, - "loss": 46.0, - "step": 13586 - }, - { - "epoch": 2.1880913080236724, - "grad_norm": 0.001534285955131054, - "learning_rate": 0.00019999764099509084, - "loss": 46.0, - "step": 13587 - }, - { - "epoch": 2.18825234510246, - "grad_norm": 0.002749880775809288, - "learning_rate": 0.00019999764064757925, - "loss": 46.0, - "step": 13588 - }, - { - "epoch": 2.1884133821812473, - "grad_norm": 0.004389625508338213, - "learning_rate": 0.00019999764030004204, - "loss": 46.0, - "step": 13589 - }, - { - "epoch": 2.1885744192600347, - "grad_norm": 0.0018506953492760658, - "learning_rate": 0.00019999763995247926, - "loss": 46.0, - "step": 13590 - }, - { - "epoch": 2.188735456338822, - "grad_norm": 0.0014044672716408968, - "learning_rate": 0.00019999763960489085, - "loss": 46.0, - "step": 13591 - }, - { - "epoch": 2.1888964934176096, - "grad_norm": 0.0017050118185579777, - "learning_rate": 0.00019999763925727688, - "loss": 46.0, - "step": 13592 - }, - { - "epoch": 2.1890575304963966, - "grad_norm": 0.0045059421099722385, - "learning_rate": 0.00019999763890963732, - "loss": 46.0, - "step": 13593 - }, - { - "epoch": 2.189218567575184, - "grad_norm": 0.0016245106235146523, - "learning_rate": 0.00019999763856197214, - "loss": 46.0, - "step": 13594 - }, - { - "epoch": 2.1893796046539715, - "grad_norm": 0.0026045513805001974, - "learning_rate": 0.00019999763821428138, - "loss": 46.0, - "step": 13595 - }, - { - "epoch": 2.189540641732759, - "grad_norm": 0.0014010741142556071, - "learning_rate": 0.00019999763786656503, - "loss": 46.0, - "step": 13596 - }, - { - "epoch": 2.1897016788115464, - "grad_norm": 0.0023435086477547884, - "learning_rate": 0.00019999763751882307, - "loss": 46.0, - "step": 13597 - }, - { - "epoch": 2.189862715890334, - "grad_norm": 0.0005747080431319773, - "learning_rate": 0.00019999763717105554, - "loss": 46.0, - "step": 13598 - }, - { - "epoch": 2.1900237529691213, - "grad_norm": 0.0029229726642370224, - "learning_rate": 0.0001999976368232624, - "loss": 46.0, - "step": 13599 - }, - { - "epoch": 2.1901847900479083, - "grad_norm": 0.002026109956204891, - "learning_rate": 0.00019999763647544368, - "loss": 46.0, - "step": 13600 - }, - { - "epoch": 2.190345827126696, - "grad_norm": 0.008457713760435581, - "learning_rate": 0.00019999763612759934, - "loss": 46.0, - "step": 13601 - }, - { - "epoch": 2.1905068642054832, - "grad_norm": 0.0016848701052367687, - "learning_rate": 0.00019999763577972944, - "loss": 46.0, - "step": 13602 - }, - { - "epoch": 2.1906679012842707, - "grad_norm": 0.0011079522082582116, - "learning_rate": 0.0001999976354318339, - "loss": 46.0, - "step": 13603 - }, - { - "epoch": 2.190828938363058, - "grad_norm": 0.00237249955534935, - "learning_rate": 0.00019999763508391278, - "loss": 46.0, - "step": 13604 - }, - { - "epoch": 2.1909899754418456, - "grad_norm": 0.003441508859395981, - "learning_rate": 0.0001999976347359661, - "loss": 46.0, - "step": 13605 - }, - { - "epoch": 2.191151012520633, - "grad_norm": 0.004961010534316301, - "learning_rate": 0.00019999763438799379, - "loss": 46.0, - "step": 13606 - }, - { - "epoch": 2.1913120495994205, - "grad_norm": 0.0010814645793288946, - "learning_rate": 0.0001999976340399959, - "loss": 46.0, - "step": 13607 - }, - { - "epoch": 2.1914730866782075, - "grad_norm": 0.010299154557287693, - "learning_rate": 0.0001999976336919724, - "loss": 46.0, - "step": 13608 - }, - { - "epoch": 2.191634123756995, - "grad_norm": 0.002826516982167959, - "learning_rate": 0.00019999763334392333, - "loss": 46.0, - "step": 13609 - }, - { - "epoch": 2.1917951608357824, - "grad_norm": 0.008937855251133442, - "learning_rate": 0.00019999763299584865, - "loss": 46.0, - "step": 13610 - }, - { - "epoch": 2.19195619791457, - "grad_norm": 0.011181384325027466, - "learning_rate": 0.00019999763264774841, - "loss": 46.0, - "step": 13611 - }, - { - "epoch": 2.1921172349933573, - "grad_norm": 0.0022061506751924753, - "learning_rate": 0.0001999976322996225, - "loss": 46.0, - "step": 13612 - }, - { - "epoch": 2.1922782720721448, - "grad_norm": 0.0011596990516409278, - "learning_rate": 0.00019999763195147104, - "loss": 46.0, - "step": 13613 - }, - { - "epoch": 2.1924393091509318, - "grad_norm": 0.0029557885136455297, - "learning_rate": 0.00019999763160329401, - "loss": 46.0, - "step": 13614 - }, - { - "epoch": 2.192600346229719, - "grad_norm": 0.0026413993909955025, - "learning_rate": 0.00019999763125509137, - "loss": 46.0, - "step": 13615 - }, - { - "epoch": 2.1927613833085067, - "grad_norm": 0.0010780281154438853, - "learning_rate": 0.00019999763090686312, - "loss": 46.0, - "step": 13616 - }, - { - "epoch": 2.192922420387294, - "grad_norm": 0.003673151833936572, - "learning_rate": 0.00019999763055860928, - "loss": 46.0, - "step": 13617 - }, - { - "epoch": 2.1930834574660816, - "grad_norm": 0.0017527603777125478, - "learning_rate": 0.00019999763021032987, - "loss": 46.0, - "step": 13618 - }, - { - "epoch": 2.193244494544869, - "grad_norm": 0.000748249760363251, - "learning_rate": 0.00019999762986202483, - "loss": 46.0, - "step": 13619 - }, - { - "epoch": 2.1934055316236565, - "grad_norm": 0.0035505208652466536, - "learning_rate": 0.00019999762951369423, - "loss": 46.0, - "step": 13620 - }, - { - "epoch": 2.193566568702444, - "grad_norm": 0.01069399993866682, - "learning_rate": 0.000199997629165338, - "loss": 46.0, - "step": 13621 - }, - { - "epoch": 2.193727605781231, - "grad_norm": 0.00884457677602768, - "learning_rate": 0.00019999762881695618, - "loss": 46.0, - "step": 13622 - }, - { - "epoch": 2.1938886428600184, - "grad_norm": 0.009702994488179684, - "learning_rate": 0.00019999762846854878, - "loss": 46.0, - "step": 13623 - }, - { - "epoch": 2.194049679938806, - "grad_norm": 0.002473380183801055, - "learning_rate": 0.0001999976281201158, - "loss": 46.0, - "step": 13624 - }, - { - "epoch": 2.1942107170175933, - "grad_norm": 0.0032200992573052645, - "learning_rate": 0.00019999762777165724, - "loss": 46.0, - "step": 13625 - }, - { - "epoch": 2.1943717540963807, - "grad_norm": 0.0026594691444188356, - "learning_rate": 0.00019999762742317303, - "loss": 46.0, - "step": 13626 - }, - { - "epoch": 2.194532791175168, - "grad_norm": 0.003416158026084304, - "learning_rate": 0.00019999762707466326, - "loss": 46.0, - "step": 13627 - }, - { - "epoch": 2.1946938282539556, - "grad_norm": 0.008864456787705421, - "learning_rate": 0.00019999762672612788, - "loss": 46.0, - "step": 13628 - }, - { - "epoch": 2.1948548653327427, - "grad_norm": 0.001293074106797576, - "learning_rate": 0.0001999976263775669, - "loss": 46.0, - "step": 13629 - }, - { - "epoch": 2.19501590241153, - "grad_norm": 0.0007040550117380917, - "learning_rate": 0.00019999762602898035, - "loss": 46.0, - "step": 13630 - }, - { - "epoch": 2.1951769394903176, - "grad_norm": 0.007560614496469498, - "learning_rate": 0.00019999762568036818, - "loss": 46.0, - "step": 13631 - }, - { - "epoch": 2.195337976569105, - "grad_norm": 0.004180553834885359, - "learning_rate": 0.00019999762533173042, - "loss": 46.0, - "step": 13632 - }, - { - "epoch": 2.1954990136478925, - "grad_norm": 0.0016429858515039086, - "learning_rate": 0.0001999976249830671, - "loss": 46.0, - "step": 13633 - }, - { - "epoch": 2.19566005072668, - "grad_norm": 0.004915562923997641, - "learning_rate": 0.00019999762463437817, - "loss": 46.0, - "step": 13634 - }, - { - "epoch": 2.1958210878054674, - "grad_norm": 0.0035727759823203087, - "learning_rate": 0.00019999762428566365, - "loss": 46.0, - "step": 13635 - }, - { - "epoch": 2.195982124884255, - "grad_norm": 0.002737649716436863, - "learning_rate": 0.0001999976239369235, - "loss": 46.0, - "step": 13636 - }, - { - "epoch": 2.196143161963042, - "grad_norm": 0.0015738006914034486, - "learning_rate": 0.0001999976235881578, - "loss": 46.0, - "step": 13637 - }, - { - "epoch": 2.1963041990418293, - "grad_norm": 0.008683986030519009, - "learning_rate": 0.00019999762323936646, - "loss": 46.0, - "step": 13638 - }, - { - "epoch": 2.1964652361206167, - "grad_norm": 0.0032735627610236406, - "learning_rate": 0.00019999762289054957, - "loss": 46.0, - "step": 13639 - }, - { - "epoch": 2.196626273199404, - "grad_norm": 0.003026894759386778, - "learning_rate": 0.00019999762254170706, - "loss": 46.0, - "step": 13640 - }, - { - "epoch": 2.1967873102781916, - "grad_norm": 0.000801481248345226, - "learning_rate": 0.00019999762219283896, - "loss": 46.0, - "step": 13641 - }, - { - "epoch": 2.196948347356979, - "grad_norm": 0.0017323765205219388, - "learning_rate": 0.00019999762184394525, - "loss": 46.0, - "step": 13642 - }, - { - "epoch": 2.1971093844357665, - "grad_norm": 0.001625302480533719, - "learning_rate": 0.00019999762149502598, - "loss": 46.0, - "step": 13643 - }, - { - "epoch": 2.1972704215145535, - "grad_norm": 0.0010175653733313084, - "learning_rate": 0.0001999976211460811, - "loss": 46.0, - "step": 13644 - }, - { - "epoch": 2.197431458593341, - "grad_norm": 0.002600245177745819, - "learning_rate": 0.00019999762079711065, - "loss": 46.0, - "step": 13645 - }, - { - "epoch": 2.1975924956721284, - "grad_norm": 0.0017785308882594109, - "learning_rate": 0.00019999762044811456, - "loss": 46.0, - "step": 13646 - }, - { - "epoch": 2.197753532750916, - "grad_norm": 0.0011726110242307186, - "learning_rate": 0.0001999976200990929, - "loss": 46.0, - "step": 13647 - }, - { - "epoch": 2.1979145698297033, - "grad_norm": 0.0029361031483858824, - "learning_rate": 0.00019999761975004562, - "loss": 46.0, - "step": 13648 - }, - { - "epoch": 2.198075606908491, - "grad_norm": 0.0029444019310176373, - "learning_rate": 0.0001999976194009728, - "loss": 46.0, - "step": 13649 - }, - { - "epoch": 2.1982366439872782, - "grad_norm": 0.001622056239284575, - "learning_rate": 0.00019999761905187434, - "loss": 46.0, - "step": 13650 - }, - { - "epoch": 2.1983976810660653, - "grad_norm": 0.001456732745282352, - "learning_rate": 0.00019999761870275032, - "loss": 46.0, - "step": 13651 - }, - { - "epoch": 2.1985587181448527, - "grad_norm": 0.008921913802623749, - "learning_rate": 0.00019999761835360068, - "loss": 46.0, - "step": 13652 - }, - { - "epoch": 2.19871975522364, - "grad_norm": 0.0012266503181308508, - "learning_rate": 0.00019999761800442546, - "loss": 46.0, - "step": 13653 - }, - { - "epoch": 2.1988807923024276, - "grad_norm": 0.0022826273925602436, - "learning_rate": 0.0001999976176552246, - "loss": 46.0, - "step": 13654 - }, - { - "epoch": 2.199041829381215, - "grad_norm": 0.0012355137150734663, - "learning_rate": 0.0001999976173059982, - "loss": 46.0, - "step": 13655 - }, - { - "epoch": 2.1992028664600025, - "grad_norm": 0.0031980867497622967, - "learning_rate": 0.00019999761695674618, - "loss": 46.0, - "step": 13656 - }, - { - "epoch": 2.19936390353879, - "grad_norm": 0.0019669425673782825, - "learning_rate": 0.00019999761660746858, - "loss": 46.0, - "step": 13657 - }, - { - "epoch": 2.199524940617577, - "grad_norm": 0.0008439529337920249, - "learning_rate": 0.00019999761625816536, - "loss": 46.0, - "step": 13658 - }, - { - "epoch": 2.1996859776963644, - "grad_norm": 0.0023680536542087793, - "learning_rate": 0.0001999976159088366, - "loss": 46.0, - "step": 13659 - }, - { - "epoch": 2.199847014775152, - "grad_norm": 0.0019078385084867477, - "learning_rate": 0.0001999976155594822, - "loss": 46.0, - "step": 13660 - }, - { - "epoch": 2.2000080518539393, - "grad_norm": 0.0012144406791776419, - "learning_rate": 0.00019999761521010223, - "loss": 46.0, - "step": 13661 - }, - { - "epoch": 2.200169088932727, - "grad_norm": 0.0021109990775585175, - "learning_rate": 0.00019999761486069663, - "loss": 46.0, - "step": 13662 - }, - { - "epoch": 2.2003301260115142, - "grad_norm": 0.008120409213006496, - "learning_rate": 0.00019999761451126546, - "loss": 46.0, - "step": 13663 - }, - { - "epoch": 2.2004911630903017, - "grad_norm": 0.0020527790766209364, - "learning_rate": 0.0001999976141618087, - "loss": 46.0, - "step": 13664 - }, - { - "epoch": 2.200652200169089, - "grad_norm": 0.0007342428434640169, - "learning_rate": 0.00019999761381232634, - "loss": 46.0, - "step": 13665 - }, - { - "epoch": 2.200813237247876, - "grad_norm": 0.006230971310287714, - "learning_rate": 0.0001999976134628184, - "loss": 46.0, - "step": 13666 - }, - { - "epoch": 2.2009742743266636, - "grad_norm": 0.0014894644264131784, - "learning_rate": 0.00019999761311328485, - "loss": 46.0, - "step": 13667 - }, - { - "epoch": 2.201135311405451, - "grad_norm": 0.010603736154735088, - "learning_rate": 0.0001999976127637257, - "loss": 46.0, - "step": 13668 - }, - { - "epoch": 2.2012963484842385, - "grad_norm": 0.0042411889880895615, - "learning_rate": 0.00019999761241414095, - "loss": 46.0, - "step": 13669 - }, - { - "epoch": 2.201457385563026, - "grad_norm": 0.008183501660823822, - "learning_rate": 0.00019999761206453064, - "loss": 46.0, - "step": 13670 - }, - { - "epoch": 2.2016184226418134, - "grad_norm": 0.004855779930949211, - "learning_rate": 0.0001999976117148947, - "loss": 46.0, - "step": 13671 - }, - { - "epoch": 2.201779459720601, - "grad_norm": 0.0013469979166984558, - "learning_rate": 0.0001999976113652332, - "loss": 46.0, - "step": 13672 - }, - { - "epoch": 2.201940496799388, - "grad_norm": 0.0020328834652900696, - "learning_rate": 0.00019999761101554606, - "loss": 46.0, - "step": 13673 - }, - { - "epoch": 2.2021015338781753, - "grad_norm": 0.005405766889452934, - "learning_rate": 0.00019999761066583335, - "loss": 46.0, - "step": 13674 - }, - { - "epoch": 2.2022625709569628, - "grad_norm": 0.004016388673335314, - "learning_rate": 0.00019999761031609507, - "loss": 46.0, - "step": 13675 - }, - { - "epoch": 2.20242360803575, - "grad_norm": 0.0033229391556233168, - "learning_rate": 0.00019999760996633118, - "loss": 46.0, - "step": 13676 - }, - { - "epoch": 2.2025846451145377, - "grad_norm": 0.0011385190300643444, - "learning_rate": 0.00019999760961654167, - "loss": 46.0, - "step": 13677 - }, - { - "epoch": 2.202745682193325, - "grad_norm": 0.004269816912710667, - "learning_rate": 0.0001999976092667266, - "loss": 46.0, - "step": 13678 - }, - { - "epoch": 2.2029067192721126, - "grad_norm": 0.0019309286726638675, - "learning_rate": 0.0001999976089168859, - "loss": 46.0, - "step": 13679 - }, - { - "epoch": 2.2030677563509, - "grad_norm": 0.0038594515062868595, - "learning_rate": 0.00019999760856701963, - "loss": 46.0, - "step": 13680 - }, - { - "epoch": 2.203228793429687, - "grad_norm": 0.002060219179838896, - "learning_rate": 0.00019999760821712777, - "loss": 46.0, - "step": 13681 - }, - { - "epoch": 2.2033898305084745, - "grad_norm": 0.0009372822241857648, - "learning_rate": 0.00019999760786721028, - "loss": 46.0, - "step": 13682 - }, - { - "epoch": 2.203550867587262, - "grad_norm": 0.006065111141651869, - "learning_rate": 0.00019999760751726725, - "loss": 46.0, - "step": 13683 - }, - { - "epoch": 2.2037119046660494, - "grad_norm": 0.002058411715552211, - "learning_rate": 0.0001999976071672986, - "loss": 46.0, - "step": 13684 - }, - { - "epoch": 2.203872941744837, - "grad_norm": 0.00735425716266036, - "learning_rate": 0.00019999760681730435, - "loss": 46.0, - "step": 13685 - }, - { - "epoch": 2.2040339788236243, - "grad_norm": 0.0031345027964562178, - "learning_rate": 0.0001999976064672845, - "loss": 46.0, - "step": 13686 - }, - { - "epoch": 2.2041950159024117, - "grad_norm": 0.0033000244293361902, - "learning_rate": 0.00019999760611723907, - "loss": 46.0, - "step": 13687 - }, - { - "epoch": 2.2043560529811987, - "grad_norm": 0.0011922132689505816, - "learning_rate": 0.00019999760576716805, - "loss": 46.0, - "step": 13688 - }, - { - "epoch": 2.204517090059986, - "grad_norm": 0.007995802909135818, - "learning_rate": 0.00019999760541707145, - "loss": 46.0, - "step": 13689 - }, - { - "epoch": 2.2046781271387736, - "grad_norm": 0.001073703053407371, - "learning_rate": 0.0001999976050669492, - "loss": 46.0, - "step": 13690 - }, - { - "epoch": 2.204839164217561, - "grad_norm": 0.0011595672694966197, - "learning_rate": 0.0001999976047168014, - "loss": 46.0, - "step": 13691 - }, - { - "epoch": 2.2050002012963485, - "grad_norm": 0.006388626527041197, - "learning_rate": 0.000199997604366628, - "loss": 46.0, - "step": 13692 - }, - { - "epoch": 2.205161238375136, - "grad_norm": 0.0010825737845152617, - "learning_rate": 0.00019999760401642899, - "loss": 46.0, - "step": 13693 - }, - { - "epoch": 2.2053222754539235, - "grad_norm": 0.003770017297938466, - "learning_rate": 0.0001999976036662044, - "loss": 46.0, - "step": 13694 - }, - { - "epoch": 2.2054833125327105, - "grad_norm": 0.007412966340780258, - "learning_rate": 0.0001999976033159542, - "loss": 46.0, - "step": 13695 - }, - { - "epoch": 2.205644349611498, - "grad_norm": 0.0010894156293943524, - "learning_rate": 0.00019999760296567843, - "loss": 46.0, - "step": 13696 - }, - { - "epoch": 2.2058053866902854, - "grad_norm": 0.0007538596983067691, - "learning_rate": 0.00019999760261537705, - "loss": 46.0, - "step": 13697 - }, - { - "epoch": 2.205966423769073, - "grad_norm": 0.0003239456273149699, - "learning_rate": 0.00019999760226505008, - "loss": 46.0, - "step": 13698 - }, - { - "epoch": 2.2061274608478603, - "grad_norm": 0.0018409915501251817, - "learning_rate": 0.00019999760191469754, - "loss": 46.0, - "step": 13699 - }, - { - "epoch": 2.2062884979266477, - "grad_norm": 0.003168245777487755, - "learning_rate": 0.00019999760156431937, - "loss": 46.0, - "step": 13700 - }, - { - "epoch": 2.206449535005435, - "grad_norm": 0.002283796202391386, - "learning_rate": 0.0001999976012139156, - "loss": 46.0, - "step": 13701 - }, - { - "epoch": 2.206610572084222, - "grad_norm": 0.0011646875645965338, - "learning_rate": 0.00019999760086348626, - "loss": 46.0, - "step": 13702 - }, - { - "epoch": 2.2067716091630096, - "grad_norm": 0.009998772293329239, - "learning_rate": 0.00019999760051303133, - "loss": 46.0, - "step": 13703 - }, - { - "epoch": 2.206932646241797, - "grad_norm": 0.002271430566906929, - "learning_rate": 0.0001999976001625508, - "loss": 46.0, - "step": 13704 - }, - { - "epoch": 2.2070936833205845, - "grad_norm": 0.007020572666078806, - "learning_rate": 0.00019999759981204464, - "loss": 46.0, - "step": 13705 - }, - { - "epoch": 2.207254720399372, - "grad_norm": 0.003235981334000826, - "learning_rate": 0.00019999759946151294, - "loss": 46.0, - "step": 13706 - }, - { - "epoch": 2.2074157574781594, - "grad_norm": 0.002830745652318001, - "learning_rate": 0.0001999975991109556, - "loss": 46.0, - "step": 13707 - }, - { - "epoch": 2.207576794556947, - "grad_norm": 0.0016132837627083063, - "learning_rate": 0.0001999975987603727, - "loss": 46.0, - "step": 13708 - }, - { - "epoch": 2.2077378316357343, - "grad_norm": 0.0038657637778669596, - "learning_rate": 0.0001999975984097642, - "loss": 46.0, - "step": 13709 - }, - { - "epoch": 2.2078988687145213, - "grad_norm": 0.0012947127688676119, - "learning_rate": 0.0001999975980591301, - "loss": 46.0, - "step": 13710 - }, - { - "epoch": 2.208059905793309, - "grad_norm": 0.0009376327507197857, - "learning_rate": 0.0001999975977084704, - "loss": 46.0, - "step": 13711 - }, - { - "epoch": 2.2082209428720962, - "grad_norm": 0.0003879389551002532, - "learning_rate": 0.0001999975973577851, - "loss": 46.0, - "step": 13712 - }, - { - "epoch": 2.2083819799508837, - "grad_norm": 0.004862251225858927, - "learning_rate": 0.00019999759700707422, - "loss": 46.0, - "step": 13713 - }, - { - "epoch": 2.208543017029671, - "grad_norm": 0.006597419735044241, - "learning_rate": 0.00019999759665633774, - "loss": 46.0, - "step": 13714 - }, - { - "epoch": 2.2087040541084586, - "grad_norm": 0.00931819062680006, - "learning_rate": 0.00019999759630557565, - "loss": 46.0, - "step": 13715 - }, - { - "epoch": 2.208865091187246, - "grad_norm": 0.0049352184869349, - "learning_rate": 0.000199997595954788, - "loss": 46.0, - "step": 13716 - }, - { - "epoch": 2.209026128266033, - "grad_norm": 0.0030654005240648985, - "learning_rate": 0.00019999759560397474, - "loss": 46.0, - "step": 13717 - }, - { - "epoch": 2.2091871653448205, - "grad_norm": 0.008191955275833607, - "learning_rate": 0.0001999975952531359, - "loss": 46.0, - "step": 13718 - }, - { - "epoch": 2.209348202423608, - "grad_norm": 0.0031054536812007427, - "learning_rate": 0.00019999759490227142, - "loss": 46.0, - "step": 13719 - }, - { - "epoch": 2.2095092395023954, - "grad_norm": 0.005711531266570091, - "learning_rate": 0.0001999975945513814, - "loss": 46.0, - "step": 13720 - }, - { - "epoch": 2.209670276581183, - "grad_norm": 0.002103405073285103, - "learning_rate": 0.00019999759420046576, - "loss": 46.0, - "step": 13721 - }, - { - "epoch": 2.2098313136599703, - "grad_norm": 0.0025062395725399256, - "learning_rate": 0.00019999759384952453, - "loss": 46.0, - "step": 13722 - }, - { - "epoch": 2.2099923507387578, - "grad_norm": 0.0048634884878993034, - "learning_rate": 0.00019999759349855772, - "loss": 46.0, - "step": 13723 - }, - { - "epoch": 2.2101533878175452, - "grad_norm": 0.0032078830990940332, - "learning_rate": 0.0001999975931475653, - "loss": 46.0, - "step": 13724 - }, - { - "epoch": 2.2103144248963322, - "grad_norm": 0.003515372984111309, - "learning_rate": 0.00019999759279654727, - "loss": 46.0, - "step": 13725 - }, - { - "epoch": 2.2104754619751197, - "grad_norm": 0.003074072999879718, - "learning_rate": 0.00019999759244550367, - "loss": 46.0, - "step": 13726 - }, - { - "epoch": 2.210636499053907, - "grad_norm": 0.002210519975051284, - "learning_rate": 0.00019999759209443445, - "loss": 46.0, - "step": 13727 - }, - { - "epoch": 2.2107975361326946, - "grad_norm": 0.005355529952794313, - "learning_rate": 0.00019999759174333965, - "loss": 46.0, - "step": 13728 - }, - { - "epoch": 2.210958573211482, - "grad_norm": 0.0009055686532519758, - "learning_rate": 0.00019999759139221928, - "loss": 46.0, - "step": 13729 - }, - { - "epoch": 2.2111196102902695, - "grad_norm": 0.008600863628089428, - "learning_rate": 0.00019999759104107328, - "loss": 46.0, - "step": 13730 - }, - { - "epoch": 2.2112806473690565, - "grad_norm": 0.0024756542406976223, - "learning_rate": 0.00019999759068990168, - "loss": 46.0, - "step": 13731 - }, - { - "epoch": 2.211441684447844, - "grad_norm": 0.0011623684549704194, - "learning_rate": 0.00019999759033870453, - "loss": 46.0, - "step": 13732 - }, - { - "epoch": 2.2116027215266314, - "grad_norm": 0.003117821179330349, - "learning_rate": 0.00019999758998748174, - "loss": 46.0, - "step": 13733 - }, - { - "epoch": 2.211763758605419, - "grad_norm": 0.006619364954531193, - "learning_rate": 0.00019999758963623338, - "loss": 46.0, - "step": 13734 - }, - { - "epoch": 2.2119247956842063, - "grad_norm": 0.004035099409520626, - "learning_rate": 0.00019999758928495944, - "loss": 46.0, - "step": 13735 - }, - { - "epoch": 2.2120858327629938, - "grad_norm": 0.004001006484031677, - "learning_rate": 0.00019999758893365989, - "loss": 46.0, - "step": 13736 - }, - { - "epoch": 2.212246869841781, - "grad_norm": 0.0008899013046175241, - "learning_rate": 0.00019999758858233472, - "loss": 46.0, - "step": 13737 - }, - { - "epoch": 2.2124079069205687, - "grad_norm": 0.00424217851832509, - "learning_rate": 0.000199997588230984, - "loss": 46.0, - "step": 13738 - }, - { - "epoch": 2.2125689439993557, - "grad_norm": 0.0032466743141412735, - "learning_rate": 0.00019999758787960764, - "loss": 46.0, - "step": 13739 - }, - { - "epoch": 2.212729981078143, - "grad_norm": 0.0024136712308973074, - "learning_rate": 0.0001999975875282057, - "loss": 46.0, - "step": 13740 - }, - { - "epoch": 2.2128910181569306, - "grad_norm": 0.005446125287562609, - "learning_rate": 0.00019999758717677822, - "loss": 46.0, - "step": 13741 - }, - { - "epoch": 2.213052055235718, - "grad_norm": 0.0022979495115578175, - "learning_rate": 0.00019999758682532508, - "loss": 46.0, - "step": 13742 - }, - { - "epoch": 2.2132130923145055, - "grad_norm": 0.0006464969483204186, - "learning_rate": 0.00019999758647384636, - "loss": 46.0, - "step": 13743 - }, - { - "epoch": 2.213374129393293, - "grad_norm": 0.00036187004297971725, - "learning_rate": 0.00019999758612234208, - "loss": 46.0, - "step": 13744 - }, - { - "epoch": 2.2135351664720804, - "grad_norm": 0.0017232020618394017, - "learning_rate": 0.00019999758577081216, - "loss": 46.0, - "step": 13745 - }, - { - "epoch": 2.2136962035508674, - "grad_norm": 0.0010562578681856394, - "learning_rate": 0.00019999758541925665, - "loss": 46.0, - "step": 13746 - }, - { - "epoch": 2.213857240629655, - "grad_norm": 0.0016176251228898764, - "learning_rate": 0.00019999758506767558, - "loss": 46.0, - "step": 13747 - }, - { - "epoch": 2.2140182777084423, - "grad_norm": 0.008211284875869751, - "learning_rate": 0.0001999975847160689, - "loss": 46.0, - "step": 13748 - }, - { - "epoch": 2.2141793147872297, - "grad_norm": 0.0013719237176701427, - "learning_rate": 0.00019999758436443663, - "loss": 46.0, - "step": 13749 - }, - { - "epoch": 2.214340351866017, - "grad_norm": 0.0012339329114183784, - "learning_rate": 0.00019999758401277875, - "loss": 46.0, - "step": 13750 - }, - { - "epoch": 2.2145013889448046, - "grad_norm": 0.0028864461928606033, - "learning_rate": 0.0001999975836610953, - "loss": 46.0, - "step": 13751 - }, - { - "epoch": 2.214662426023592, - "grad_norm": 0.0033955059479922056, - "learning_rate": 0.00019999758330938622, - "loss": 46.0, - "step": 13752 - }, - { - "epoch": 2.2148234631023795, - "grad_norm": 0.0017166660400107503, - "learning_rate": 0.00019999758295765157, - "loss": 46.0, - "step": 13753 - }, - { - "epoch": 2.2149845001811665, - "grad_norm": 0.0011917110532522202, - "learning_rate": 0.00019999758260589134, - "loss": 46.0, - "step": 13754 - }, - { - "epoch": 2.215145537259954, - "grad_norm": 0.0034384748432785273, - "learning_rate": 0.0001999975822541055, - "loss": 46.0, - "step": 13755 - }, - { - "epoch": 2.2153065743387415, - "grad_norm": 0.003914912696927786, - "learning_rate": 0.00019999758190229405, - "loss": 46.0, - "step": 13756 - }, - { - "epoch": 2.215467611417529, - "grad_norm": 0.0019371293019503355, - "learning_rate": 0.00019999758155045703, - "loss": 46.0, - "step": 13757 - }, - { - "epoch": 2.2156286484963164, - "grad_norm": 0.0005503292195498943, - "learning_rate": 0.0001999975811985944, - "loss": 46.0, - "step": 13758 - }, - { - "epoch": 2.215789685575104, - "grad_norm": 0.005088392645120621, - "learning_rate": 0.00019999758084670617, - "loss": 46.0, - "step": 13759 - }, - { - "epoch": 2.2159507226538913, - "grad_norm": 0.011506959795951843, - "learning_rate": 0.00019999758049479236, - "loss": 46.0, - "step": 13760 - }, - { - "epoch": 2.2161117597326783, - "grad_norm": 0.001494453172199428, - "learning_rate": 0.00019999758014285294, - "loss": 46.0, - "step": 13761 - }, - { - "epoch": 2.2162727968114657, - "grad_norm": 0.0008600427536293864, - "learning_rate": 0.00019999757979088795, - "loss": 46.0, - "step": 13762 - }, - { - "epoch": 2.216433833890253, - "grad_norm": 0.0018290492007508874, - "learning_rate": 0.00019999757943889735, - "loss": 46.0, - "step": 13763 - }, - { - "epoch": 2.2165948709690406, - "grad_norm": 0.004759151488542557, - "learning_rate": 0.00019999757908688114, - "loss": 46.0, - "step": 13764 - }, - { - "epoch": 2.216755908047828, - "grad_norm": 0.007206445559859276, - "learning_rate": 0.00019999757873483936, - "loss": 46.0, - "step": 13765 - }, - { - "epoch": 2.2169169451266155, - "grad_norm": 0.001072881743311882, - "learning_rate": 0.000199997578382772, - "loss": 46.0, - "step": 13766 - }, - { - "epoch": 2.217077982205403, - "grad_norm": 0.002913131844252348, - "learning_rate": 0.00019999757803067903, - "loss": 46.0, - "step": 13767 - }, - { - "epoch": 2.21723901928419, - "grad_norm": 0.0028124507516622543, - "learning_rate": 0.00019999757767856044, - "loss": 46.0, - "step": 13768 - }, - { - "epoch": 2.2174000563629774, - "grad_norm": 0.0023997551761567593, - "learning_rate": 0.00019999757732641632, - "loss": 46.0, - "step": 13769 - }, - { - "epoch": 2.217561093441765, - "grad_norm": 0.004580811131745577, - "learning_rate": 0.00019999757697424655, - "loss": 46.0, - "step": 13770 - }, - { - "epoch": 2.2177221305205523, - "grad_norm": 0.003870446467772126, - "learning_rate": 0.00019999757662205118, - "loss": 46.0, - "step": 13771 - }, - { - "epoch": 2.21788316759934, - "grad_norm": 0.005958774592727423, - "learning_rate": 0.00019999757626983024, - "loss": 46.0, - "step": 13772 - }, - { - "epoch": 2.2180442046781272, - "grad_norm": 0.0019876330625265837, - "learning_rate": 0.0001999975759175837, - "loss": 46.0, - "step": 13773 - }, - { - "epoch": 2.2182052417569147, - "grad_norm": 0.0012807289604097605, - "learning_rate": 0.00019999757556531157, - "loss": 46.0, - "step": 13774 - }, - { - "epoch": 2.2183662788357017, - "grad_norm": 0.010696006007492542, - "learning_rate": 0.00019999757521301382, - "loss": 46.0, - "step": 13775 - }, - { - "epoch": 2.218527315914489, - "grad_norm": 0.001628201687708497, - "learning_rate": 0.0001999975748606905, - "loss": 46.0, - "step": 13776 - }, - { - "epoch": 2.2186883529932766, - "grad_norm": 0.009343422017991543, - "learning_rate": 0.0001999975745083416, - "loss": 46.0, - "step": 13777 - }, - { - "epoch": 2.218849390072064, - "grad_norm": 0.002921203849837184, - "learning_rate": 0.0001999975741559671, - "loss": 46.0, - "step": 13778 - }, - { - "epoch": 2.2190104271508515, - "grad_norm": 0.005728908348828554, - "learning_rate": 0.000199997573803567, - "loss": 46.0, - "step": 13779 - }, - { - "epoch": 2.219171464229639, - "grad_norm": 0.00424401368945837, - "learning_rate": 0.00019999757345114127, - "loss": 46.0, - "step": 13780 - }, - { - "epoch": 2.2193325013084264, - "grad_norm": 0.001296310918405652, - "learning_rate": 0.00019999757309869, - "loss": 46.0, - "step": 13781 - }, - { - "epoch": 2.219493538387214, - "grad_norm": 0.005168564151972532, - "learning_rate": 0.0001999975727462131, - "loss": 46.0, - "step": 13782 - }, - { - "epoch": 2.219654575466001, - "grad_norm": 0.0007294094539247453, - "learning_rate": 0.00019999757239371065, - "loss": 46.0, - "step": 13783 - }, - { - "epoch": 2.2198156125447883, - "grad_norm": 0.010267596691846848, - "learning_rate": 0.00019999757204118253, - "loss": 46.0, - "step": 13784 - }, - { - "epoch": 2.2199766496235758, - "grad_norm": 0.002379964804276824, - "learning_rate": 0.00019999757168862888, - "loss": 46.0, - "step": 13785 - }, - { - "epoch": 2.2201376867023632, - "grad_norm": 0.0021109574008733034, - "learning_rate": 0.00019999757133604961, - "loss": 46.0, - "step": 13786 - }, - { - "epoch": 2.2202987237811507, - "grad_norm": 0.0028596543706953526, - "learning_rate": 0.00019999757098344476, - "loss": 46.0, - "step": 13787 - }, - { - "epoch": 2.220459760859938, - "grad_norm": 0.001897316542454064, - "learning_rate": 0.0001999975706308143, - "loss": 46.0, - "step": 13788 - }, - { - "epoch": 2.2206207979387256, - "grad_norm": 0.002938944846391678, - "learning_rate": 0.00019999757027815824, - "loss": 46.0, - "step": 13789 - }, - { - "epoch": 2.2207818350175126, - "grad_norm": 0.0007160701206885278, - "learning_rate": 0.00019999756992547662, - "loss": 46.0, - "step": 13790 - }, - { - "epoch": 2.2209428720963, - "grad_norm": 0.0088806739076972, - "learning_rate": 0.0001999975695727694, - "loss": 46.0, - "step": 13791 - }, - { - "epoch": 2.2211039091750875, - "grad_norm": 0.005651654209941626, - "learning_rate": 0.00019999756922003657, - "loss": 46.0, - "step": 13792 - }, - { - "epoch": 2.221264946253875, - "grad_norm": 0.0036610146053135395, - "learning_rate": 0.00019999756886727812, - "loss": 46.0, - "step": 13793 - }, - { - "epoch": 2.2214259833326624, - "grad_norm": 0.0004612719058059156, - "learning_rate": 0.0001999975685144941, - "loss": 46.0, - "step": 13794 - }, - { - "epoch": 2.22158702041145, - "grad_norm": 0.004077673889696598, - "learning_rate": 0.0001999975681616845, - "loss": 46.0, - "step": 13795 - }, - { - "epoch": 2.2217480574902373, - "grad_norm": 0.004129249136894941, - "learning_rate": 0.00019999756780884927, - "loss": 46.0, - "step": 13796 - }, - { - "epoch": 2.2219090945690247, - "grad_norm": 0.0007824993226677179, - "learning_rate": 0.0001999975674559885, - "loss": 46.0, - "step": 13797 - }, - { - "epoch": 2.2220701316478118, - "grad_norm": 0.009709149599075317, - "learning_rate": 0.0001999975671031021, - "loss": 46.0, - "step": 13798 - }, - { - "epoch": 2.222231168726599, - "grad_norm": 0.0019398038275539875, - "learning_rate": 0.0001999975667501901, - "loss": 46.0, - "step": 13799 - }, - { - "epoch": 2.2223922058053867, - "grad_norm": 0.0011810872238129377, - "learning_rate": 0.00019999756639725252, - "loss": 46.0, - "step": 13800 - }, - { - "epoch": 2.222553242884174, - "grad_norm": 0.009734337218105793, - "learning_rate": 0.00019999756604428934, - "loss": 46.0, - "step": 13801 - }, - { - "epoch": 2.2227142799629616, - "grad_norm": 0.004123416729271412, - "learning_rate": 0.00019999756569130057, - "loss": 46.0, - "step": 13802 - }, - { - "epoch": 2.222875317041749, - "grad_norm": 0.0038641542196273804, - "learning_rate": 0.00019999756533828618, - "loss": 46.0, - "step": 13803 - }, - { - "epoch": 2.223036354120536, - "grad_norm": 0.005792050156742334, - "learning_rate": 0.00019999756498524624, - "loss": 46.0, - "step": 13804 - }, - { - "epoch": 2.2231973911993235, - "grad_norm": 0.002649559173732996, - "learning_rate": 0.00019999756463218068, - "loss": 46.0, - "step": 13805 - }, - { - "epoch": 2.223358428278111, - "grad_norm": 0.002285517519339919, - "learning_rate": 0.00019999756427908954, - "loss": 46.0, - "step": 13806 - }, - { - "epoch": 2.2235194653568984, - "grad_norm": 0.004329164978116751, - "learning_rate": 0.0001999975639259728, - "loss": 46.0, - "step": 13807 - }, - { - "epoch": 2.223680502435686, - "grad_norm": 0.006754350382834673, - "learning_rate": 0.00019999756357283046, - "loss": 46.0, - "step": 13808 - }, - { - "epoch": 2.2238415395144733, - "grad_norm": 0.00586355896666646, - "learning_rate": 0.00019999756321966252, - "loss": 46.0, - "step": 13809 - }, - { - "epoch": 2.2240025765932607, - "grad_norm": 0.0006915184203535318, - "learning_rate": 0.000199997562866469, - "loss": 46.0, - "step": 13810 - }, - { - "epoch": 2.224163613672048, - "grad_norm": 0.0025954199954867363, - "learning_rate": 0.0001999975625132499, - "loss": 46.0, - "step": 13811 - }, - { - "epoch": 2.224324650750835, - "grad_norm": 0.004700150340795517, - "learning_rate": 0.00019999756216000517, - "loss": 46.0, - "step": 13812 - }, - { - "epoch": 2.2244856878296226, - "grad_norm": 0.002075010910630226, - "learning_rate": 0.00019999756180673486, - "loss": 46.0, - "step": 13813 - }, - { - "epoch": 2.22464672490841, - "grad_norm": 0.001411769655533135, - "learning_rate": 0.00019999756145343896, - "loss": 46.0, - "step": 13814 - }, - { - "epoch": 2.2248077619871975, - "grad_norm": 0.0015281918458640575, - "learning_rate": 0.00019999756110011745, - "loss": 46.0, - "step": 13815 - }, - { - "epoch": 2.224968799065985, - "grad_norm": 0.0006174591835588217, - "learning_rate": 0.00019999756074677035, - "loss": 46.0, - "step": 13816 - }, - { - "epoch": 2.2251298361447724, - "grad_norm": 0.011503061279654503, - "learning_rate": 0.00019999756039339766, - "loss": 46.0, - "step": 13817 - }, - { - "epoch": 2.22529087322356, - "grad_norm": 0.0024040464777499437, - "learning_rate": 0.0001999975600399994, - "loss": 46.0, - "step": 13818 - }, - { - "epoch": 2.225451910302347, - "grad_norm": 0.006287943106144667, - "learning_rate": 0.00019999755968657553, - "loss": 46.0, - "step": 13819 - }, - { - "epoch": 2.2256129473811344, - "grad_norm": 0.001936339191161096, - "learning_rate": 0.00019999755933312606, - "loss": 46.0, - "step": 13820 - }, - { - "epoch": 2.225773984459922, - "grad_norm": 0.0013389616506174207, - "learning_rate": 0.000199997558979651, - "loss": 46.0, - "step": 13821 - }, - { - "epoch": 2.2259350215387093, - "grad_norm": 0.000814276107121259, - "learning_rate": 0.00019999755862615032, - "loss": 46.0, - "step": 13822 - }, - { - "epoch": 2.2260960586174967, - "grad_norm": 0.0010869288817048073, - "learning_rate": 0.00019999755827262408, - "loss": 46.0, - "step": 13823 - }, - { - "epoch": 2.226257095696284, - "grad_norm": 0.0013230699114501476, - "learning_rate": 0.00019999755791907223, - "loss": 46.0, - "step": 13824 - }, - { - "epoch": 2.2264181327750716, - "grad_norm": 0.014168528839945793, - "learning_rate": 0.0001999975575654948, - "loss": 46.0, - "step": 13825 - }, - { - "epoch": 2.226579169853859, - "grad_norm": 0.007537615951150656, - "learning_rate": 0.00019999755721189177, - "loss": 46.0, - "step": 13826 - }, - { - "epoch": 2.226740206932646, - "grad_norm": 0.012373830191791058, - "learning_rate": 0.00019999755685826313, - "loss": 46.0, - "step": 13827 - }, - { - "epoch": 2.2269012440114335, - "grad_norm": 0.00253257411532104, - "learning_rate": 0.00019999755650460893, - "loss": 46.0, - "step": 13828 - }, - { - "epoch": 2.227062281090221, - "grad_norm": 0.0018066565971821547, - "learning_rate": 0.00019999755615092906, - "loss": 46.0, - "step": 13829 - }, - { - "epoch": 2.2272233181690084, - "grad_norm": 0.000954370538238436, - "learning_rate": 0.0001999975557972237, - "loss": 46.0, - "step": 13830 - }, - { - "epoch": 2.227384355247796, - "grad_norm": 0.007515036500990391, - "learning_rate": 0.00019999755544349268, - "loss": 46.0, - "step": 13831 - }, - { - "epoch": 2.2275453923265833, - "grad_norm": 0.0008438150398433208, - "learning_rate": 0.00019999755508973607, - "loss": 46.0, - "step": 13832 - }, - { - "epoch": 2.227706429405371, - "grad_norm": 0.0064139775931835175, - "learning_rate": 0.00019999755473595389, - "loss": 46.0, - "step": 13833 - }, - { - "epoch": 2.227867466484158, - "grad_norm": 0.006094703450798988, - "learning_rate": 0.00019999755438214608, - "loss": 46.0, - "step": 13834 - }, - { - "epoch": 2.2280285035629452, - "grad_norm": 0.0032856641337275505, - "learning_rate": 0.00019999755402831272, - "loss": 46.0, - "step": 13835 - }, - { - "epoch": 2.2281895406417327, - "grad_norm": 0.003365366719663143, - "learning_rate": 0.00019999755367445374, - "loss": 46.0, - "step": 13836 - }, - { - "epoch": 2.22835057772052, - "grad_norm": 0.0020414208993315697, - "learning_rate": 0.00019999755332056918, - "loss": 46.0, - "step": 13837 - }, - { - "epoch": 2.2285116147993076, - "grad_norm": 0.0013070415006950498, - "learning_rate": 0.000199997552966659, - "loss": 46.0, - "step": 13838 - }, - { - "epoch": 2.228672651878095, - "grad_norm": 0.0014498542295768857, - "learning_rate": 0.00019999755261272323, - "loss": 46.0, - "step": 13839 - }, - { - "epoch": 2.2288336889568825, - "grad_norm": 0.004660328384488821, - "learning_rate": 0.00019999755225876188, - "loss": 46.0, - "step": 13840 - }, - { - "epoch": 2.22899472603567, - "grad_norm": 0.002556582447141409, - "learning_rate": 0.00019999755190477494, - "loss": 46.0, - "step": 13841 - }, - { - "epoch": 2.229155763114457, - "grad_norm": 0.0025003976188600063, - "learning_rate": 0.00019999755155076238, - "loss": 46.0, - "step": 13842 - }, - { - "epoch": 2.2293168001932444, - "grad_norm": 0.005243140272796154, - "learning_rate": 0.00019999755119672427, - "loss": 46.0, - "step": 13843 - }, - { - "epoch": 2.229477837272032, - "grad_norm": 0.002213727915659547, - "learning_rate": 0.00019999755084266054, - "loss": 46.0, - "step": 13844 - }, - { - "epoch": 2.2296388743508193, - "grad_norm": 0.0006454632384702563, - "learning_rate": 0.0001999975504885712, - "loss": 46.0, - "step": 13845 - }, - { - "epoch": 2.2297999114296068, - "grad_norm": 0.0009064690093509853, - "learning_rate": 0.0001999975501344563, - "loss": 46.0, - "step": 13846 - }, - { - "epoch": 2.229960948508394, - "grad_norm": 0.0014630145160481334, - "learning_rate": 0.00019999754978031577, - "loss": 46.0, - "step": 13847 - }, - { - "epoch": 2.230121985587181, - "grad_norm": 0.003800722537562251, - "learning_rate": 0.00019999754942614967, - "loss": 46.0, - "step": 13848 - }, - { - "epoch": 2.2302830226659687, - "grad_norm": 0.001235337695106864, - "learning_rate": 0.00019999754907195795, - "loss": 46.0, - "step": 13849 - }, - { - "epoch": 2.230444059744756, - "grad_norm": 0.0037756022065877914, - "learning_rate": 0.00019999754871774067, - "loss": 46.0, - "step": 13850 - }, - { - "epoch": 2.2306050968235436, - "grad_norm": 0.002481248462572694, - "learning_rate": 0.00019999754836349777, - "loss": 46.0, - "step": 13851 - }, - { - "epoch": 2.230766133902331, - "grad_norm": 0.004169543739408255, - "learning_rate": 0.00019999754800922927, - "loss": 46.0, - "step": 13852 - }, - { - "epoch": 2.2309271709811185, - "grad_norm": 0.0013243324356153607, - "learning_rate": 0.00019999754765493523, - "loss": 46.0, - "step": 13853 - }, - { - "epoch": 2.231088208059906, - "grad_norm": 0.0020127661991864443, - "learning_rate": 0.00019999754730061552, - "loss": 46.0, - "step": 13854 - }, - { - "epoch": 2.2312492451386934, - "grad_norm": 0.0019206737633794546, - "learning_rate": 0.00019999754694627027, - "loss": 46.0, - "step": 13855 - }, - { - "epoch": 2.2314102822174804, - "grad_norm": 0.0027539818547666073, - "learning_rate": 0.00019999754659189942, - "loss": 46.0, - "step": 13856 - }, - { - "epoch": 2.231571319296268, - "grad_norm": 0.010542550124228, - "learning_rate": 0.00019999754623750295, - "loss": 46.0, - "step": 13857 - }, - { - "epoch": 2.2317323563750553, - "grad_norm": 0.0016768169589340687, - "learning_rate": 0.00019999754588308091, - "loss": 46.0, - "step": 13858 - }, - { - "epoch": 2.2318933934538427, - "grad_norm": 0.0043511539697647095, - "learning_rate": 0.00019999754552863327, - "loss": 46.0, - "step": 13859 - }, - { - "epoch": 2.23205443053263, - "grad_norm": 0.000767124816775322, - "learning_rate": 0.00019999754517416004, - "loss": 46.0, - "step": 13860 - }, - { - "epoch": 2.2322154676114176, - "grad_norm": 0.006702875252813101, - "learning_rate": 0.0001999975448196612, - "loss": 46.0, - "step": 13861 - }, - { - "epoch": 2.232376504690205, - "grad_norm": 0.0034737042151391506, - "learning_rate": 0.00019999754446513676, - "loss": 46.0, - "step": 13862 - }, - { - "epoch": 2.232537541768992, - "grad_norm": 0.001093236729502678, - "learning_rate": 0.00019999754411058676, - "loss": 46.0, - "step": 13863 - }, - { - "epoch": 2.2326985788477796, - "grad_norm": 0.008258485235273838, - "learning_rate": 0.00019999754375601115, - "loss": 46.0, - "step": 13864 - }, - { - "epoch": 2.232859615926567, - "grad_norm": 0.00031749665504321456, - "learning_rate": 0.0001999975434014099, - "loss": 46.0, - "step": 13865 - }, - { - "epoch": 2.2330206530053545, - "grad_norm": 0.0024656273890286684, - "learning_rate": 0.00019999754304678312, - "loss": 46.0, - "step": 13866 - }, - { - "epoch": 2.233181690084142, - "grad_norm": 0.0011689583770930767, - "learning_rate": 0.00019999754269213073, - "loss": 46.0, - "step": 13867 - }, - { - "epoch": 2.2333427271629294, - "grad_norm": 0.005402153357863426, - "learning_rate": 0.00019999754233745271, - "loss": 46.0, - "step": 13868 - }, - { - "epoch": 2.233503764241717, - "grad_norm": 0.0008777216426096857, - "learning_rate": 0.00019999754198274914, - "loss": 46.0, - "step": 13869 - }, - { - "epoch": 2.2336648013205043, - "grad_norm": 0.0011804127134382725, - "learning_rate": 0.00019999754162801996, - "loss": 46.0, - "step": 13870 - }, - { - "epoch": 2.2338258383992913, - "grad_norm": 0.0028473285492509604, - "learning_rate": 0.00019999754127326518, - "loss": 46.0, - "step": 13871 - }, - { - "epoch": 2.2339868754780787, - "grad_norm": 0.0008439497323706746, - "learning_rate": 0.0001999975409184848, - "loss": 46.0, - "step": 13872 - }, - { - "epoch": 2.234147912556866, - "grad_norm": 0.002870025811716914, - "learning_rate": 0.00019999754056367882, - "loss": 46.0, - "step": 13873 - }, - { - "epoch": 2.2343089496356536, - "grad_norm": 0.00207132613286376, - "learning_rate": 0.0001999975402088473, - "loss": 46.0, - "step": 13874 - }, - { - "epoch": 2.234469986714441, - "grad_norm": 0.0046815224923193455, - "learning_rate": 0.00019999753985399011, - "loss": 46.0, - "step": 13875 - }, - { - "epoch": 2.2346310237932285, - "grad_norm": 0.002843681490048766, - "learning_rate": 0.00019999753949910738, - "loss": 46.0, - "step": 13876 - }, - { - "epoch": 2.234792060872016, - "grad_norm": 0.001463265623897314, - "learning_rate": 0.00019999753914419906, - "loss": 46.0, - "step": 13877 - }, - { - "epoch": 2.234953097950803, - "grad_norm": 0.008958492428064346, - "learning_rate": 0.0001999975387892651, - "loss": 46.0, - "step": 13878 - }, - { - "epoch": 2.2351141350295904, - "grad_norm": 0.00046426264452748, - "learning_rate": 0.00019999753843430557, - "loss": 46.0, - "step": 13879 - }, - { - "epoch": 2.235275172108378, - "grad_norm": 0.0009298831573687494, - "learning_rate": 0.00019999753807932046, - "loss": 46.0, - "step": 13880 - }, - { - "epoch": 2.2354362091871653, - "grad_norm": 0.0017463818658143282, - "learning_rate": 0.00019999753772430973, - "loss": 46.0, - "step": 13881 - }, - { - "epoch": 2.235597246265953, - "grad_norm": 0.0019432863919064403, - "learning_rate": 0.00019999753736927342, - "loss": 46.0, - "step": 13882 - }, - { - "epoch": 2.2357582833447402, - "grad_norm": 0.0020308897364884615, - "learning_rate": 0.00019999753701421154, - "loss": 46.0, - "step": 13883 - }, - { - "epoch": 2.2359193204235277, - "grad_norm": 0.008548577316105366, - "learning_rate": 0.00019999753665912403, - "loss": 46.0, - "step": 13884 - }, - { - "epoch": 2.2360803575023147, - "grad_norm": 0.0007212921627797186, - "learning_rate": 0.00019999753630401093, - "loss": 46.0, - "step": 13885 - }, - { - "epoch": 2.236241394581102, - "grad_norm": 0.0028578033670783043, - "learning_rate": 0.00019999753594887224, - "loss": 46.0, - "step": 13886 - }, - { - "epoch": 2.2364024316598896, - "grad_norm": 0.004118448123335838, - "learning_rate": 0.00019999753559370796, - "loss": 46.0, - "step": 13887 - }, - { - "epoch": 2.236563468738677, - "grad_norm": 0.0014290903927758336, - "learning_rate": 0.00019999753523851807, - "loss": 46.0, - "step": 13888 - }, - { - "epoch": 2.2367245058174645, - "grad_norm": 0.005826373118907213, - "learning_rate": 0.00019999753488330262, - "loss": 46.0, - "step": 13889 - }, - { - "epoch": 2.236885542896252, - "grad_norm": 0.004704639315605164, - "learning_rate": 0.00019999753452806153, - "loss": 46.0, - "step": 13890 - }, - { - "epoch": 2.2370465799750394, - "grad_norm": 0.0041178148239851, - "learning_rate": 0.00019999753417279488, - "loss": 46.0, - "step": 13891 - }, - { - "epoch": 2.2372076170538264, - "grad_norm": 0.008068711496889591, - "learning_rate": 0.00019999753381750264, - "loss": 46.0, - "step": 13892 - }, - { - "epoch": 2.237368654132614, - "grad_norm": 0.001038464717566967, - "learning_rate": 0.00019999753346218476, - "loss": 46.0, - "step": 13893 - }, - { - "epoch": 2.2375296912114013, - "grad_norm": 0.0023721521720290184, - "learning_rate": 0.00019999753310684132, - "loss": 46.0, - "step": 13894 - }, - { - "epoch": 2.237690728290189, - "grad_norm": 0.004016389138996601, - "learning_rate": 0.0001999975327514723, - "loss": 46.0, - "step": 13895 - }, - { - "epoch": 2.2378517653689762, - "grad_norm": 0.0006583682843483984, - "learning_rate": 0.00019999753239607765, - "loss": 46.0, - "step": 13896 - }, - { - "epoch": 2.2380128024477637, - "grad_norm": 0.001595151494257152, - "learning_rate": 0.00019999753204065742, - "loss": 46.0, - "step": 13897 - }, - { - "epoch": 2.238173839526551, - "grad_norm": 0.0033382533583790064, - "learning_rate": 0.00019999753168521163, - "loss": 46.0, - "step": 13898 - }, - { - "epoch": 2.2383348766053386, - "grad_norm": 0.003177135717123747, - "learning_rate": 0.0001999975313297402, - "loss": 46.0, - "step": 13899 - }, - { - "epoch": 2.2384959136841256, - "grad_norm": 0.003992767538875341, - "learning_rate": 0.00019999753097424321, - "loss": 46.0, - "step": 13900 - }, - { - "epoch": 2.238656950762913, - "grad_norm": 0.0021767315920442343, - "learning_rate": 0.0001999975306187206, - "loss": 46.0, - "step": 13901 - }, - { - "epoch": 2.2388179878417005, - "grad_norm": 0.00391074363142252, - "learning_rate": 0.00019999753026317242, - "loss": 46.0, - "step": 13902 - }, - { - "epoch": 2.238979024920488, - "grad_norm": 0.004004878457635641, - "learning_rate": 0.0001999975299075986, - "loss": 46.0, - "step": 13903 - }, - { - "epoch": 2.2391400619992754, - "grad_norm": 0.002291835378855467, - "learning_rate": 0.00019999752955199922, - "loss": 46.0, - "step": 13904 - }, - { - "epoch": 2.239301099078063, - "grad_norm": 0.0015953409019857645, - "learning_rate": 0.00019999752919637427, - "loss": 46.0, - "step": 13905 - }, - { - "epoch": 2.2394621361568503, - "grad_norm": 0.004095470067113638, - "learning_rate": 0.0001999975288407237, - "loss": 46.0, - "step": 13906 - }, - { - "epoch": 2.2396231732356373, - "grad_norm": 0.0016459259204566479, - "learning_rate": 0.00019999752848504752, - "loss": 46.0, - "step": 13907 - }, - { - "epoch": 2.2397842103144248, - "grad_norm": 0.0006697041681036353, - "learning_rate": 0.00019999752812934575, - "loss": 46.0, - "step": 13908 - }, - { - "epoch": 2.239945247393212, - "grad_norm": 0.0038877055048942566, - "learning_rate": 0.0001999975277736184, - "loss": 46.0, - "step": 13909 - }, - { - "epoch": 2.2401062844719997, - "grad_norm": 0.0023673148825764656, - "learning_rate": 0.00019999752741786545, - "loss": 46.0, - "step": 13910 - }, - { - "epoch": 2.240267321550787, - "grad_norm": 0.0030799394007772207, - "learning_rate": 0.0001999975270620869, - "loss": 46.0, - "step": 13911 - }, - { - "epoch": 2.2404283586295746, - "grad_norm": 0.00277404161170125, - "learning_rate": 0.00019999752670628274, - "loss": 46.0, - "step": 13912 - }, - { - "epoch": 2.240589395708362, - "grad_norm": 0.008009926415979862, - "learning_rate": 0.00019999752635045304, - "loss": 46.0, - "step": 13913 - }, - { - "epoch": 2.2407504327871495, - "grad_norm": 0.004241722635924816, - "learning_rate": 0.0001999975259945977, - "loss": 46.0, - "step": 13914 - }, - { - "epoch": 2.2409114698659365, - "grad_norm": 0.001049460144713521, - "learning_rate": 0.00019999752563871676, - "loss": 46.0, - "step": 13915 - }, - { - "epoch": 2.241072506944724, - "grad_norm": 0.0017421930097043514, - "learning_rate": 0.00019999752528281027, - "loss": 46.0, - "step": 13916 - }, - { - "epoch": 2.2412335440235114, - "grad_norm": 0.0014313283609226346, - "learning_rate": 0.00019999752492687816, - "loss": 46.0, - "step": 13917 - }, - { - "epoch": 2.241394581102299, - "grad_norm": 0.0008132740622386336, - "learning_rate": 0.00019999752457092044, - "loss": 46.0, - "step": 13918 - }, - { - "epoch": 2.2415556181810863, - "grad_norm": 0.007433802355080843, - "learning_rate": 0.00019999752421493713, - "loss": 46.0, - "step": 13919 - }, - { - "epoch": 2.2417166552598737, - "grad_norm": 0.0022407250944525003, - "learning_rate": 0.00019999752385892826, - "loss": 46.0, - "step": 13920 - }, - { - "epoch": 2.2418776923386607, - "grad_norm": 0.004558785352855921, - "learning_rate": 0.00019999752350289377, - "loss": 46.0, - "step": 13921 - }, - { - "epoch": 2.242038729417448, - "grad_norm": 0.001668662647716701, - "learning_rate": 0.00019999752314683368, - "loss": 46.0, - "step": 13922 - }, - { - "epoch": 2.2421997664962356, - "grad_norm": 0.0007237204699777067, - "learning_rate": 0.00019999752279074802, - "loss": 46.0, - "step": 13923 - }, - { - "epoch": 2.242360803575023, - "grad_norm": 0.00036969332722947, - "learning_rate": 0.00019999752243463675, - "loss": 46.0, - "step": 13924 - }, - { - "epoch": 2.2425218406538105, - "grad_norm": 0.0026159719564020634, - "learning_rate": 0.0001999975220784999, - "loss": 46.0, - "step": 13925 - }, - { - "epoch": 2.242682877732598, - "grad_norm": 0.008634045720100403, - "learning_rate": 0.00019999752172233741, - "loss": 46.0, - "step": 13926 - }, - { - "epoch": 2.2428439148113855, - "grad_norm": 0.007141964975744486, - "learning_rate": 0.00019999752136614935, - "loss": 46.0, - "step": 13927 - }, - { - "epoch": 2.243004951890173, - "grad_norm": 0.00138210563454777, - "learning_rate": 0.0001999975210099357, - "loss": 46.0, - "step": 13928 - }, - { - "epoch": 2.24316598896896, - "grad_norm": 0.0020694248378276825, - "learning_rate": 0.00019999752065369647, - "loss": 46.0, - "step": 13929 - }, - { - "epoch": 2.2433270260477474, - "grad_norm": 0.005013769958168268, - "learning_rate": 0.00019999752029743167, - "loss": 46.0, - "step": 13930 - }, - { - "epoch": 2.243488063126535, - "grad_norm": 0.006472241133451462, - "learning_rate": 0.0001999975199411412, - "loss": 46.0, - "step": 13931 - }, - { - "epoch": 2.2436491002053223, - "grad_norm": 0.0019514893647283316, - "learning_rate": 0.0001999975195848252, - "loss": 46.0, - "step": 13932 - }, - { - "epoch": 2.2438101372841097, - "grad_norm": 0.0032446354161947966, - "learning_rate": 0.00019999751922848358, - "loss": 46.0, - "step": 13933 - }, - { - "epoch": 2.243971174362897, - "grad_norm": 0.0012381155975162983, - "learning_rate": 0.00019999751887211638, - "loss": 46.0, - "step": 13934 - }, - { - "epoch": 2.2441322114416846, - "grad_norm": 0.0012549014063552022, - "learning_rate": 0.00019999751851572354, - "loss": 46.0, - "step": 13935 - }, - { - "epoch": 2.2442932485204716, - "grad_norm": 0.0014573488151654601, - "learning_rate": 0.00019999751815930514, - "loss": 46.0, - "step": 13936 - }, - { - "epoch": 2.244454285599259, - "grad_norm": 0.005086123943328857, - "learning_rate": 0.00019999751780286115, - "loss": 46.0, - "step": 13937 - }, - { - "epoch": 2.2446153226780465, - "grad_norm": 0.0057265725918114185, - "learning_rate": 0.00019999751744639158, - "loss": 46.0, - "step": 13938 - }, - { - "epoch": 2.244776359756834, - "grad_norm": 0.002571030519902706, - "learning_rate": 0.0001999975170898964, - "loss": 46.0, - "step": 13939 - }, - { - "epoch": 2.2449373968356214, - "grad_norm": 0.00338509283028543, - "learning_rate": 0.00019999751673337561, - "loss": 46.0, - "step": 13940 - }, - { - "epoch": 2.245098433914409, - "grad_norm": 0.0008936020894907415, - "learning_rate": 0.00019999751637682925, - "loss": 46.0, - "step": 13941 - }, - { - "epoch": 2.2452594709931963, - "grad_norm": 0.002808912890031934, - "learning_rate": 0.00019999751602025728, - "loss": 46.0, - "step": 13942 - }, - { - "epoch": 2.245420508071984, - "grad_norm": 0.00510428985580802, - "learning_rate": 0.0001999975156636597, - "loss": 46.0, - "step": 13943 - }, - { - "epoch": 2.245581545150771, - "grad_norm": 0.00565666938200593, - "learning_rate": 0.00019999751530703656, - "loss": 46.0, - "step": 13944 - }, - { - "epoch": 2.2457425822295582, - "grad_norm": 0.0033489130437374115, - "learning_rate": 0.00019999751495038782, - "loss": 46.0, - "step": 13945 - }, - { - "epoch": 2.2459036193083457, - "grad_norm": 0.0011091098422184587, - "learning_rate": 0.00019999751459371344, - "loss": 46.0, - "step": 13946 - }, - { - "epoch": 2.246064656387133, - "grad_norm": 0.0026009439025074244, - "learning_rate": 0.00019999751423701353, - "loss": 46.0, - "step": 13947 - }, - { - "epoch": 2.2462256934659206, - "grad_norm": 0.005592728033661842, - "learning_rate": 0.000199997513880288, - "loss": 46.0, - "step": 13948 - }, - { - "epoch": 2.246386730544708, - "grad_norm": 0.0036806659772992134, - "learning_rate": 0.0001999975135235369, - "loss": 46.0, - "step": 13949 - }, - { - "epoch": 2.2465477676234955, - "grad_norm": 0.0007729671779088676, - "learning_rate": 0.00019999751316676013, - "loss": 46.0, - "step": 13950 - }, - { - "epoch": 2.2467088047022825, - "grad_norm": 0.010362662374973297, - "learning_rate": 0.00019999751280995784, - "loss": 46.0, - "step": 13951 - }, - { - "epoch": 2.24686984178107, - "grad_norm": 0.0007634959765709937, - "learning_rate": 0.00019999751245312991, - "loss": 46.0, - "step": 13952 - }, - { - "epoch": 2.2470308788598574, - "grad_norm": 0.0015676041366532445, - "learning_rate": 0.00019999751209627645, - "loss": 46.0, - "step": 13953 - }, - { - "epoch": 2.247191915938645, - "grad_norm": 0.007791996467858553, - "learning_rate": 0.00019999751173939735, - "loss": 46.0, - "step": 13954 - }, - { - "epoch": 2.2473529530174323, - "grad_norm": 0.003171280026435852, - "learning_rate": 0.00019999751138249266, - "loss": 46.0, - "step": 13955 - }, - { - "epoch": 2.2475139900962198, - "grad_norm": 0.0008149652276188135, - "learning_rate": 0.00019999751102556235, - "loss": 46.0, - "step": 13956 - }, - { - "epoch": 2.2476750271750072, - "grad_norm": 0.0016248066676780581, - "learning_rate": 0.00019999751066860648, - "loss": 46.0, - "step": 13957 - }, - { - "epoch": 2.2478360642537942, - "grad_norm": 0.0009147682576440275, - "learning_rate": 0.000199997510311625, - "loss": 46.0, - "step": 13958 - }, - { - "epoch": 2.2479971013325817, - "grad_norm": 0.004151404835283756, - "learning_rate": 0.00019999750995461794, - "loss": 46.0, - "step": 13959 - }, - { - "epoch": 2.248158138411369, - "grad_norm": 0.0007496776524931192, - "learning_rate": 0.00019999750959758528, - "loss": 46.0, - "step": 13960 - }, - { - "epoch": 2.2483191754901566, - "grad_norm": 0.002037890488281846, - "learning_rate": 0.00019999750924052701, - "loss": 46.0, - "step": 13961 - }, - { - "epoch": 2.248480212568944, - "grad_norm": 0.0019364447798579931, - "learning_rate": 0.00019999750888344316, - "loss": 46.0, - "step": 13962 - }, - { - "epoch": 2.2486412496477315, - "grad_norm": 0.005926021374762058, - "learning_rate": 0.00019999750852633372, - "loss": 46.0, - "step": 13963 - }, - { - "epoch": 2.248802286726519, - "grad_norm": 0.0032404763624072075, - "learning_rate": 0.00019999750816919869, - "loss": 46.0, - "step": 13964 - }, - { - "epoch": 2.248963323805306, - "grad_norm": 0.009493254125118256, - "learning_rate": 0.00019999750781203804, - "loss": 46.0, - "step": 13965 - }, - { - "epoch": 2.2491243608840934, - "grad_norm": 0.004816398024559021, - "learning_rate": 0.0001999975074548518, - "loss": 46.0, - "step": 13966 - }, - { - "epoch": 2.249285397962881, - "grad_norm": 0.001781512750312686, - "learning_rate": 0.00019999750709764, - "loss": 46.0, - "step": 13967 - }, - { - "epoch": 2.2494464350416683, - "grad_norm": 0.005384115967899561, - "learning_rate": 0.00019999750674040259, - "loss": 46.0, - "step": 13968 - }, - { - "epoch": 2.2496074721204558, - "grad_norm": 0.0015986048383638263, - "learning_rate": 0.0001999975063831396, - "loss": 46.0, - "step": 13969 - }, - { - "epoch": 2.249768509199243, - "grad_norm": 0.0026110601611435413, - "learning_rate": 0.00019999750602585096, - "loss": 46.0, - "step": 13970 - }, - { - "epoch": 2.2499295462780307, - "grad_norm": 0.01227173488587141, - "learning_rate": 0.00019999750566853676, - "loss": 46.0, - "step": 13971 - }, - { - "epoch": 2.250090583356818, - "grad_norm": 0.0045133428648114204, - "learning_rate": 0.00019999750531119695, - "loss": 46.0, - "step": 13972 - }, - { - "epoch": 2.250251620435605, - "grad_norm": 0.0007369336090050638, - "learning_rate": 0.00019999750495383159, - "loss": 46.0, - "step": 13973 - }, - { - "epoch": 2.2504126575143926, - "grad_norm": 0.0014093368081375957, - "learning_rate": 0.00019999750459644063, - "loss": 46.0, - "step": 13974 - }, - { - "epoch": 2.25057369459318, - "grad_norm": 0.0006778715178370476, - "learning_rate": 0.00019999750423902403, - "loss": 46.0, - "step": 13975 - }, - { - "epoch": 2.2507347316719675, - "grad_norm": 0.003913197200745344, - "learning_rate": 0.00019999750388158188, - "loss": 46.0, - "step": 13976 - }, - { - "epoch": 2.250895768750755, - "grad_norm": 0.0008797182817943394, - "learning_rate": 0.0001999975035241141, - "loss": 46.0, - "step": 13977 - }, - { - "epoch": 2.2510568058295424, - "grad_norm": 0.0010605764109641314, - "learning_rate": 0.00019999750316662077, - "loss": 46.0, - "step": 13978 - }, - { - "epoch": 2.25121784290833, - "grad_norm": 0.0032169094774872065, - "learning_rate": 0.0001999975028091018, - "loss": 46.0, - "step": 13979 - }, - { - "epoch": 2.251378879987117, - "grad_norm": 0.0009241517982445657, - "learning_rate": 0.00019999750245155724, - "loss": 46.0, - "step": 13980 - }, - { - "epoch": 2.2515399170659043, - "grad_norm": 0.0006561768823303282, - "learning_rate": 0.0001999975020939871, - "loss": 46.0, - "step": 13981 - }, - { - "epoch": 2.2517009541446917, - "grad_norm": 0.0007065307581797242, - "learning_rate": 0.00019999750173639136, - "loss": 46.0, - "step": 13982 - }, - { - "epoch": 2.251861991223479, - "grad_norm": 0.0031118488404899836, - "learning_rate": 0.00019999750137877003, - "loss": 46.0, - "step": 13983 - }, - { - "epoch": 2.2520230283022666, - "grad_norm": 0.0019888062961399555, - "learning_rate": 0.00019999750102112312, - "loss": 46.0, - "step": 13984 - }, - { - "epoch": 2.252184065381054, - "grad_norm": 0.0024370860774070024, - "learning_rate": 0.0001999975006634506, - "loss": 46.0, - "step": 13985 - }, - { - "epoch": 2.2523451024598415, - "grad_norm": 0.008042311295866966, - "learning_rate": 0.0001999975003057525, - "loss": 46.0, - "step": 13986 - }, - { - "epoch": 2.252506139538629, - "grad_norm": 0.006295335944741964, - "learning_rate": 0.0001999974999480288, - "loss": 46.0, - "step": 13987 - }, - { - "epoch": 2.252667176617416, - "grad_norm": 0.0051359799690544605, - "learning_rate": 0.00019999749959027948, - "loss": 46.0, - "step": 13988 - }, - { - "epoch": 2.2528282136962035, - "grad_norm": 0.007540623191744089, - "learning_rate": 0.00019999749923250458, - "loss": 46.0, - "step": 13989 - }, - { - "epoch": 2.252989250774991, - "grad_norm": 0.004492628388106823, - "learning_rate": 0.0001999974988747041, - "loss": 46.0, - "step": 13990 - }, - { - "epoch": 2.2531502878537784, - "grad_norm": 0.0016589396400377154, - "learning_rate": 0.00019999749851687802, - "loss": 46.0, - "step": 13991 - }, - { - "epoch": 2.253311324932566, - "grad_norm": 0.004892733413726091, - "learning_rate": 0.00019999749815902633, - "loss": 46.0, - "step": 13992 - }, - { - "epoch": 2.2534723620113533, - "grad_norm": 0.005860128439962864, - "learning_rate": 0.00019999749780114906, - "loss": 46.0, - "step": 13993 - }, - { - "epoch": 2.2536333990901403, - "grad_norm": 0.003079865127801895, - "learning_rate": 0.0001999974974432462, - "loss": 46.0, - "step": 13994 - }, - { - "epoch": 2.2537944361689277, - "grad_norm": 0.007694889325648546, - "learning_rate": 0.00019999749708531775, - "loss": 46.0, - "step": 13995 - }, - { - "epoch": 2.253955473247715, - "grad_norm": 0.005706564988940954, - "learning_rate": 0.00019999749672736368, - "loss": 46.0, - "step": 13996 - }, - { - "epoch": 2.2541165103265026, - "grad_norm": 0.0013106977567076683, - "learning_rate": 0.00019999749636938403, - "loss": 46.0, - "step": 13997 - }, - { - "epoch": 2.25427754740529, - "grad_norm": 0.005500475410372019, - "learning_rate": 0.0001999974960113788, - "loss": 46.0, - "step": 13998 - }, - { - "epoch": 2.2544385844840775, - "grad_norm": 0.0030034456867724657, - "learning_rate": 0.00019999749565334794, - "loss": 46.0, - "step": 13999 - }, - { - "epoch": 2.254599621562865, - "grad_norm": 0.0043139080516994, - "learning_rate": 0.00019999749529529153, - "loss": 46.0, - "step": 14000 - }, - { - "epoch": 2.2547606586416524, - "grad_norm": 0.004762556403875351, - "learning_rate": 0.0001999974949372095, - "loss": 46.0, - "step": 14001 - }, - { - "epoch": 2.25492169572044, - "grad_norm": 0.002331068739295006, - "learning_rate": 0.0001999974945791019, - "loss": 46.0, - "step": 14002 - }, - { - "epoch": 2.255082732799227, - "grad_norm": 0.0008562738075852394, - "learning_rate": 0.00019999749422096866, - "loss": 46.0, - "step": 14003 - }, - { - "epoch": 2.2552437698780143, - "grad_norm": 0.003933022264391184, - "learning_rate": 0.00019999749386280985, - "loss": 46.0, - "step": 14004 - }, - { - "epoch": 2.255404806956802, - "grad_norm": 0.0024339770898222923, - "learning_rate": 0.00019999749350462547, - "loss": 46.0, - "step": 14005 - }, - { - "epoch": 2.2555658440355892, - "grad_norm": 0.003784996923059225, - "learning_rate": 0.00019999749314641548, - "loss": 46.0, - "step": 14006 - }, - { - "epoch": 2.2557268811143767, - "grad_norm": 0.001622296986170113, - "learning_rate": 0.00019999749278817985, - "loss": 46.0, - "step": 14007 - }, - { - "epoch": 2.255887918193164, - "grad_norm": 0.010320202447474003, - "learning_rate": 0.00019999749242991866, - "loss": 46.0, - "step": 14008 - }, - { - "epoch": 2.256048955271951, - "grad_norm": 0.0008709838148206472, - "learning_rate": 0.00019999749207163188, - "loss": 46.0, - "step": 14009 - }, - { - "epoch": 2.2562099923507386, - "grad_norm": 0.0019778672140091658, - "learning_rate": 0.00019999749171331952, - "loss": 46.0, - "step": 14010 - }, - { - "epoch": 2.256371029429526, - "grad_norm": 0.004650791175663471, - "learning_rate": 0.00019999749135498157, - "loss": 46.0, - "step": 14011 - }, - { - "epoch": 2.2565320665083135, - "grad_norm": 0.0013059972552582622, - "learning_rate": 0.000199997490996618, - "loss": 46.0, - "step": 14012 - }, - { - "epoch": 2.256693103587101, - "grad_norm": 0.0027204507496207952, - "learning_rate": 0.00019999749063822885, - "loss": 46.0, - "step": 14013 - }, - { - "epoch": 2.2568541406658884, - "grad_norm": 0.004203621298074722, - "learning_rate": 0.0001999974902798141, - "loss": 46.0, - "step": 14014 - }, - { - "epoch": 2.257015177744676, - "grad_norm": 0.005804077722132206, - "learning_rate": 0.00019999748992137375, - "loss": 46.0, - "step": 14015 - }, - { - "epoch": 2.2571762148234633, - "grad_norm": 0.0010115739423781633, - "learning_rate": 0.0001999974895629078, - "loss": 46.0, - "step": 14016 - }, - { - "epoch": 2.2573372519022503, - "grad_norm": 0.0033738361671566963, - "learning_rate": 0.00019999748920441628, - "loss": 46.0, - "step": 14017 - }, - { - "epoch": 2.2574982889810378, - "grad_norm": 0.0015907844062894583, - "learning_rate": 0.00019999748884589916, - "loss": 46.0, - "step": 14018 - }, - { - "epoch": 2.2576593260598252, - "grad_norm": 0.0015422507422044873, - "learning_rate": 0.00019999748848735643, - "loss": 46.0, - "step": 14019 - }, - { - "epoch": 2.2578203631386127, - "grad_norm": 0.0022961099166423082, - "learning_rate": 0.0001999974881287881, - "loss": 46.0, - "step": 14020 - }, - { - "epoch": 2.2579814002174, - "grad_norm": 0.0034369323402643204, - "learning_rate": 0.0001999974877701942, - "loss": 46.0, - "step": 14021 - }, - { - "epoch": 2.2581424372961876, - "grad_norm": 0.0065235658548772335, - "learning_rate": 0.0001999974874115747, - "loss": 46.0, - "step": 14022 - }, - { - "epoch": 2.258303474374975, - "grad_norm": 0.0041592237539589405, - "learning_rate": 0.0001999974870529296, - "loss": 46.0, - "step": 14023 - }, - { - "epoch": 2.258464511453762, - "grad_norm": 0.002702559344470501, - "learning_rate": 0.0001999974866942589, - "loss": 46.0, - "step": 14024 - }, - { - "epoch": 2.2586255485325495, - "grad_norm": 0.003903142875060439, - "learning_rate": 0.0001999974863355626, - "loss": 46.0, - "step": 14025 - }, - { - "epoch": 2.258786585611337, - "grad_norm": 0.0040299296379089355, - "learning_rate": 0.00019999748597684074, - "loss": 46.0, - "step": 14026 - }, - { - "epoch": 2.2589476226901244, - "grad_norm": 0.0017029212322086096, - "learning_rate": 0.00019999748561809325, - "loss": 46.0, - "step": 14027 - }, - { - "epoch": 2.259108659768912, - "grad_norm": 0.00410170154646039, - "learning_rate": 0.00019999748525932018, - "loss": 46.0, - "step": 14028 - }, - { - "epoch": 2.2592696968476993, - "grad_norm": 0.013370919041335583, - "learning_rate": 0.00019999748490052153, - "loss": 46.0, - "step": 14029 - }, - { - "epoch": 2.2594307339264867, - "grad_norm": 0.0016519188648089767, - "learning_rate": 0.00019999748454169726, - "loss": 46.0, - "step": 14030 - }, - { - "epoch": 2.259591771005274, - "grad_norm": 0.005586672108620405, - "learning_rate": 0.0001999974841828474, - "loss": 46.0, - "step": 14031 - }, - { - "epoch": 2.259752808084061, - "grad_norm": 0.0018956048879772425, - "learning_rate": 0.00019999748382397198, - "loss": 46.0, - "step": 14032 - }, - { - "epoch": 2.2599138451628487, - "grad_norm": 0.0038738581351935863, - "learning_rate": 0.00019999748346507095, - "loss": 46.0, - "step": 14033 - }, - { - "epoch": 2.260074882241636, - "grad_norm": 0.001372222090139985, - "learning_rate": 0.00019999748310614427, - "loss": 46.0, - "step": 14034 - }, - { - "epoch": 2.2602359193204236, - "grad_norm": 0.00560833141207695, - "learning_rate": 0.0001999974827471921, - "loss": 46.0, - "step": 14035 - }, - { - "epoch": 2.260396956399211, - "grad_norm": 0.001876492751762271, - "learning_rate": 0.00019999748238821424, - "loss": 46.0, - "step": 14036 - }, - { - "epoch": 2.2605579934779985, - "grad_norm": 0.004113512579351664, - "learning_rate": 0.00019999748202921083, - "loss": 46.0, - "step": 14037 - }, - { - "epoch": 2.2607190305567855, - "grad_norm": 0.002034259494394064, - "learning_rate": 0.0001999974816701818, - "loss": 46.0, - "step": 14038 - }, - { - "epoch": 2.260880067635573, - "grad_norm": 0.0014255945570766926, - "learning_rate": 0.0001999974813111272, - "loss": 46.0, - "step": 14039 - }, - { - "epoch": 2.2610411047143604, - "grad_norm": 0.008380401879549026, - "learning_rate": 0.00019999748095204703, - "loss": 46.0, - "step": 14040 - }, - { - "epoch": 2.261202141793148, - "grad_norm": 0.001174133736640215, - "learning_rate": 0.00019999748059294122, - "loss": 46.0, - "step": 14041 - }, - { - "epoch": 2.2613631788719353, - "grad_norm": 0.0016927948454394937, - "learning_rate": 0.00019999748023380982, - "loss": 46.0, - "step": 14042 - }, - { - "epoch": 2.2615242159507227, - "grad_norm": 0.008112607523798943, - "learning_rate": 0.00019999747987465286, - "loss": 46.0, - "step": 14043 - }, - { - "epoch": 2.26168525302951, - "grad_norm": 0.0027933986857533455, - "learning_rate": 0.00019999747951547026, - "loss": 46.0, - "step": 14044 - }, - { - "epoch": 2.2618462901082976, - "grad_norm": 0.005972488783299923, - "learning_rate": 0.00019999747915626207, - "loss": 46.0, - "step": 14045 - }, - { - "epoch": 2.2620073271870846, - "grad_norm": 0.0016154962358996272, - "learning_rate": 0.00019999747879702833, - "loss": 46.0, - "step": 14046 - }, - { - "epoch": 2.262168364265872, - "grad_norm": 0.000661564408801496, - "learning_rate": 0.00019999747843776897, - "loss": 46.0, - "step": 14047 - }, - { - "epoch": 2.2623294013446595, - "grad_norm": 0.0013685725862160325, - "learning_rate": 0.00019999747807848402, - "loss": 46.0, - "step": 14048 - }, - { - "epoch": 2.262490438423447, - "grad_norm": 0.0008987082983367145, - "learning_rate": 0.00019999747771917345, - "loss": 46.0, - "step": 14049 - }, - { - "epoch": 2.2626514755022344, - "grad_norm": 0.002367316512390971, - "learning_rate": 0.00019999747735983733, - "loss": 46.0, - "step": 14050 - }, - { - "epoch": 2.262812512581022, - "grad_norm": 0.0014323643408715725, - "learning_rate": 0.0001999974770004756, - "loss": 46.0, - "step": 14051 - }, - { - "epoch": 2.2629735496598093, - "grad_norm": 0.0011208319338038564, - "learning_rate": 0.00019999747664108824, - "loss": 46.0, - "step": 14052 - }, - { - "epoch": 2.2631345867385964, - "grad_norm": 0.0026527922600507736, - "learning_rate": 0.00019999747628167533, - "loss": 46.0, - "step": 14053 - }, - { - "epoch": 2.263295623817384, - "grad_norm": 0.0012756938813254237, - "learning_rate": 0.0001999974759222368, - "loss": 46.0, - "step": 14054 - }, - { - "epoch": 2.2634566608961713, - "grad_norm": 0.002167481230571866, - "learning_rate": 0.00019999747556277272, - "loss": 46.0, - "step": 14055 - }, - { - "epoch": 2.2636176979749587, - "grad_norm": 0.0022501309867948294, - "learning_rate": 0.000199997475203283, - "loss": 46.0, - "step": 14056 - }, - { - "epoch": 2.263778735053746, - "grad_norm": 0.0005933298380114138, - "learning_rate": 0.00019999747484376768, - "loss": 46.0, - "step": 14057 - }, - { - "epoch": 2.2639397721325336, - "grad_norm": 0.0018701039953157306, - "learning_rate": 0.0001999974744842268, - "loss": 46.0, - "step": 14058 - }, - { - "epoch": 2.264100809211321, - "grad_norm": 0.0044218674302101135, - "learning_rate": 0.0001999974741246603, - "loss": 46.0, - "step": 14059 - }, - { - "epoch": 2.2642618462901085, - "grad_norm": 0.009935093112289906, - "learning_rate": 0.0001999974737650682, - "loss": 46.0, - "step": 14060 - }, - { - "epoch": 2.2644228833688955, - "grad_norm": 0.004034010693430901, - "learning_rate": 0.00019999747340545052, - "loss": 46.0, - "step": 14061 - }, - { - "epoch": 2.264583920447683, - "grad_norm": 0.003645299468189478, - "learning_rate": 0.00019999747304580727, - "loss": 46.0, - "step": 14062 - }, - { - "epoch": 2.2647449575264704, - "grad_norm": 0.0011655972339212894, - "learning_rate": 0.0001999974726861384, - "loss": 46.0, - "step": 14063 - }, - { - "epoch": 2.264905994605258, - "grad_norm": 0.0012588875833898783, - "learning_rate": 0.00019999747232644395, - "loss": 46.0, - "step": 14064 - }, - { - "epoch": 2.2650670316840453, - "grad_norm": 0.002597309648990631, - "learning_rate": 0.00019999747196672386, - "loss": 46.0, - "step": 14065 - }, - { - "epoch": 2.265228068762833, - "grad_norm": 0.0021933913230895996, - "learning_rate": 0.0001999974716069782, - "loss": 46.0, - "step": 14066 - }, - { - "epoch": 2.26538910584162, - "grad_norm": 0.007438479457050562, - "learning_rate": 0.000199997471247207, - "loss": 46.0, - "step": 14067 - }, - { - "epoch": 2.2655501429204072, - "grad_norm": 0.004280315712094307, - "learning_rate": 0.00019999747088741014, - "loss": 46.0, - "step": 14068 - }, - { - "epoch": 2.2657111799991947, - "grad_norm": 0.0029569321777671576, - "learning_rate": 0.0001999974705275877, - "loss": 46.0, - "step": 14069 - }, - { - "epoch": 2.265872217077982, - "grad_norm": 0.006991604343056679, - "learning_rate": 0.0001999974701677397, - "loss": 46.0, - "step": 14070 - }, - { - "epoch": 2.2660332541567696, - "grad_norm": 0.0019413722911849618, - "learning_rate": 0.00019999746980786608, - "loss": 46.0, - "step": 14071 - }, - { - "epoch": 2.266194291235557, - "grad_norm": 0.0034805142786353827, - "learning_rate": 0.00019999746944796684, - "loss": 46.0, - "step": 14072 - }, - { - "epoch": 2.2663553283143445, - "grad_norm": 0.0010933381272479892, - "learning_rate": 0.00019999746908804205, - "loss": 46.0, - "step": 14073 - }, - { - "epoch": 2.266516365393132, - "grad_norm": 0.006724651902914047, - "learning_rate": 0.00019999746872809162, - "loss": 46.0, - "step": 14074 - }, - { - "epoch": 2.2666774024719194, - "grad_norm": 0.0058417245745658875, - "learning_rate": 0.00019999746836811563, - "loss": 46.0, - "step": 14075 - }, - { - "epoch": 2.2668384395507064, - "grad_norm": 0.001392334932461381, - "learning_rate": 0.00019999746800811402, - "loss": 46.0, - "step": 14076 - }, - { - "epoch": 2.266999476629494, - "grad_norm": 0.0026276155840605497, - "learning_rate": 0.00019999746764808686, - "loss": 46.0, - "step": 14077 - }, - { - "epoch": 2.2671605137082813, - "grad_norm": 0.0057683903723955154, - "learning_rate": 0.00019999746728803408, - "loss": 46.0, - "step": 14078 - }, - { - "epoch": 2.2673215507870688, - "grad_norm": 0.0021580192260444164, - "learning_rate": 0.0001999974669279557, - "loss": 46.0, - "step": 14079 - }, - { - "epoch": 2.267482587865856, - "grad_norm": 0.005027227569371462, - "learning_rate": 0.00019999746656785173, - "loss": 46.0, - "step": 14080 - }, - { - "epoch": 2.2676436249446437, - "grad_norm": 0.004958228208124638, - "learning_rate": 0.00019999746620772218, - "loss": 46.0, - "step": 14081 - }, - { - "epoch": 2.2678046620234307, - "grad_norm": 0.0020200826693326235, - "learning_rate": 0.000199997465847567, - "loss": 46.0, - "step": 14082 - }, - { - "epoch": 2.267965699102218, - "grad_norm": 0.004566613584756851, - "learning_rate": 0.00019999746548738628, - "loss": 46.0, - "step": 14083 - }, - { - "epoch": 2.2681267361810056, - "grad_norm": 0.003017387818545103, - "learning_rate": 0.00019999746512717993, - "loss": 46.0, - "step": 14084 - }, - { - "epoch": 2.268287773259793, - "grad_norm": 0.0005770005518570542, - "learning_rate": 0.00019999746476694798, - "loss": 46.0, - "step": 14085 - }, - { - "epoch": 2.2684488103385805, - "grad_norm": 0.009023269638419151, - "learning_rate": 0.00019999746440669042, - "loss": 46.0, - "step": 14086 - }, - { - "epoch": 2.268609847417368, - "grad_norm": 0.0013379602460190654, - "learning_rate": 0.0001999974640464073, - "loss": 46.0, - "step": 14087 - }, - { - "epoch": 2.2687708844961554, - "grad_norm": 0.001787109998986125, - "learning_rate": 0.00019999746368609857, - "loss": 46.0, - "step": 14088 - }, - { - "epoch": 2.268931921574943, - "grad_norm": 0.0006844739546068013, - "learning_rate": 0.00019999746332576427, - "loss": 46.0, - "step": 14089 - }, - { - "epoch": 2.26909295865373, - "grad_norm": 0.0012267547426745296, - "learning_rate": 0.00019999746296540437, - "loss": 46.0, - "step": 14090 - }, - { - "epoch": 2.2692539957325173, - "grad_norm": 0.0031221327371895313, - "learning_rate": 0.00019999746260501887, - "loss": 46.0, - "step": 14091 - }, - { - "epoch": 2.2694150328113047, - "grad_norm": 0.0006667615962214768, - "learning_rate": 0.00019999746224460773, - "loss": 46.0, - "step": 14092 - }, - { - "epoch": 2.269576069890092, - "grad_norm": 0.0006900188745930791, - "learning_rate": 0.00019999746188417106, - "loss": 46.0, - "step": 14093 - }, - { - "epoch": 2.2697371069688796, - "grad_norm": 0.00814956147223711, - "learning_rate": 0.00019999746152370878, - "loss": 46.0, - "step": 14094 - }, - { - "epoch": 2.269898144047667, - "grad_norm": 0.0011044967686757445, - "learning_rate": 0.00019999746116322088, - "loss": 46.0, - "step": 14095 - }, - { - "epoch": 2.2700591811264546, - "grad_norm": 0.0026550323236733675, - "learning_rate": 0.0001999974608027074, - "loss": 46.0, - "step": 14096 - }, - { - "epoch": 2.2702202182052416, - "grad_norm": 0.0033718652557581663, - "learning_rate": 0.00019999746044216835, - "loss": 46.0, - "step": 14097 - }, - { - "epoch": 2.270381255284029, - "grad_norm": 0.008355478756129742, - "learning_rate": 0.00019999746008160366, - "loss": 46.0, - "step": 14098 - }, - { - "epoch": 2.2705422923628165, - "grad_norm": 0.005124479066580534, - "learning_rate": 0.00019999745972101342, - "loss": 46.0, - "step": 14099 - }, - { - "epoch": 2.270703329441604, - "grad_norm": 0.0027128334622830153, - "learning_rate": 0.00019999745936039758, - "loss": 46.0, - "step": 14100 - }, - { - "epoch": 2.2708643665203914, - "grad_norm": 0.001338453497737646, - "learning_rate": 0.0001999974589997561, - "loss": 46.0, - "step": 14101 - }, - { - "epoch": 2.271025403599179, - "grad_norm": 0.0014710178365930915, - "learning_rate": 0.00019999745863908907, - "loss": 46.0, - "step": 14102 - }, - { - "epoch": 2.2711864406779663, - "grad_norm": 0.0020965919829905033, - "learning_rate": 0.00019999745827839642, - "loss": 46.0, - "step": 14103 - }, - { - "epoch": 2.2713474777567537, - "grad_norm": 0.00815815944224596, - "learning_rate": 0.0001999974579176782, - "loss": 46.0, - "step": 14104 - }, - { - "epoch": 2.2715085148355407, - "grad_norm": 0.001573242712765932, - "learning_rate": 0.0001999974575569344, - "loss": 46.0, - "step": 14105 - }, - { - "epoch": 2.271669551914328, - "grad_norm": 0.002166392281651497, - "learning_rate": 0.00019999745719616495, - "loss": 46.0, - "step": 14106 - }, - { - "epoch": 2.2718305889931156, - "grad_norm": 0.004180111922323704, - "learning_rate": 0.00019999745683536995, - "loss": 46.0, - "step": 14107 - }, - { - "epoch": 2.271991626071903, - "grad_norm": 0.0008303820504806936, - "learning_rate": 0.00019999745647454934, - "loss": 46.0, - "step": 14108 - }, - { - "epoch": 2.2721526631506905, - "grad_norm": 0.00083970429841429, - "learning_rate": 0.00019999745611370317, - "loss": 46.0, - "step": 14109 - }, - { - "epoch": 2.272313700229478, - "grad_norm": 0.0021275547333061695, - "learning_rate": 0.00019999745575283132, - "loss": 46.0, - "step": 14110 - }, - { - "epoch": 2.272474737308265, - "grad_norm": 0.0011850871378555894, - "learning_rate": 0.00019999745539193395, - "loss": 46.0, - "step": 14111 - }, - { - "epoch": 2.2726357743870524, - "grad_norm": 0.0021773711778223515, - "learning_rate": 0.00019999745503101096, - "loss": 46.0, - "step": 14112 - }, - { - "epoch": 2.27279681146584, - "grad_norm": 0.005824453197419643, - "learning_rate": 0.00019999745467006239, - "loss": 46.0, - "step": 14113 - }, - { - "epoch": 2.2729578485446273, - "grad_norm": 0.001174977165646851, - "learning_rate": 0.0001999974543090882, - "loss": 46.0, - "step": 14114 - }, - { - "epoch": 2.273118885623415, - "grad_norm": 0.0017567890463396907, - "learning_rate": 0.00019999745394808845, - "loss": 46.0, - "step": 14115 - }, - { - "epoch": 2.2732799227022022, - "grad_norm": 0.002168092178180814, - "learning_rate": 0.0001999974535870631, - "loss": 46.0, - "step": 14116 - }, - { - "epoch": 2.2734409597809897, - "grad_norm": 0.001511600916273892, - "learning_rate": 0.00019999745322601213, - "loss": 46.0, - "step": 14117 - }, - { - "epoch": 2.273601996859777, - "grad_norm": 0.005391054321080446, - "learning_rate": 0.0001999974528649356, - "loss": 46.0, - "step": 14118 - }, - { - "epoch": 2.2737630339385646, - "grad_norm": 0.0030814053025096655, - "learning_rate": 0.00019999745250383344, - "loss": 46.0, - "step": 14119 - }, - { - "epoch": 2.2739240710173516, - "grad_norm": 0.008579681627452374, - "learning_rate": 0.0001999974521427057, - "loss": 46.0, - "step": 14120 - }, - { - "epoch": 2.274085108096139, - "grad_norm": 0.0012616177555173635, - "learning_rate": 0.00019999745178155237, - "loss": 46.0, - "step": 14121 - }, - { - "epoch": 2.2742461451749265, - "grad_norm": 0.0008829036960378289, - "learning_rate": 0.00019999745142037343, - "loss": 46.0, - "step": 14122 - }, - { - "epoch": 2.274407182253714, - "grad_norm": 0.010570581071078777, - "learning_rate": 0.00019999745105916893, - "loss": 46.0, - "step": 14123 - }, - { - "epoch": 2.2745682193325014, - "grad_norm": 0.0018389852484688163, - "learning_rate": 0.0001999974506979388, - "loss": 46.0, - "step": 14124 - }, - { - "epoch": 2.274729256411289, - "grad_norm": 0.0007157388608902693, - "learning_rate": 0.00019999745033668308, - "loss": 46.0, - "step": 14125 - }, - { - "epoch": 2.274890293490076, - "grad_norm": 0.0015591455157846212, - "learning_rate": 0.0001999974499754018, - "loss": 46.0, - "step": 14126 - }, - { - "epoch": 2.2750513305688633, - "grad_norm": 0.0007260916172526777, - "learning_rate": 0.00019999744961409491, - "loss": 46.0, - "step": 14127 - }, - { - "epoch": 2.275212367647651, - "grad_norm": 0.0007884514052420855, - "learning_rate": 0.0001999974492527624, - "loss": 46.0, - "step": 14128 - }, - { - "epoch": 2.2753734047264382, - "grad_norm": 0.0014991557691246271, - "learning_rate": 0.00019999744889140432, - "loss": 46.0, - "step": 14129 - }, - { - "epoch": 2.2755344418052257, - "grad_norm": 0.0013217049418017268, - "learning_rate": 0.00019999744853002062, - "loss": 46.0, - "step": 14130 - }, - { - "epoch": 2.275695478884013, - "grad_norm": 0.0012469340581446886, - "learning_rate": 0.00019999744816861137, - "loss": 46.0, - "step": 14131 - }, - { - "epoch": 2.2758565159628006, - "grad_norm": 0.00056753761600703, - "learning_rate": 0.00019999744780717648, - "loss": 46.0, - "step": 14132 - }, - { - "epoch": 2.276017553041588, - "grad_norm": 0.0014820970827713609, - "learning_rate": 0.00019999744744571602, - "loss": 46.0, - "step": 14133 - }, - { - "epoch": 2.276178590120375, - "grad_norm": 0.0015191090060397983, - "learning_rate": 0.00019999744708422998, - "loss": 46.0, - "step": 14134 - }, - { - "epoch": 2.2763396271991625, - "grad_norm": 0.004156601149588823, - "learning_rate": 0.00019999744672271832, - "loss": 46.0, - "step": 14135 - }, - { - "epoch": 2.27650066427795, - "grad_norm": 0.0034528549294918776, - "learning_rate": 0.00019999744636118108, - "loss": 46.0, - "step": 14136 - }, - { - "epoch": 2.2766617013567374, - "grad_norm": 0.004388425964862108, - "learning_rate": 0.00019999744599961825, - "loss": 46.0, - "step": 14137 - }, - { - "epoch": 2.276822738435525, - "grad_norm": 0.008133472874760628, - "learning_rate": 0.0001999974456380298, - "loss": 46.0, - "step": 14138 - }, - { - "epoch": 2.2769837755143123, - "grad_norm": 0.010087854228913784, - "learning_rate": 0.00019999744527641577, - "loss": 46.0, - "step": 14139 - }, - { - "epoch": 2.2771448125930998, - "grad_norm": 0.003923302982002497, - "learning_rate": 0.00019999744491477615, - "loss": 46.0, - "step": 14140 - }, - { - "epoch": 2.2773058496718868, - "grad_norm": 0.0033097814302891493, - "learning_rate": 0.00019999744455311095, - "loss": 46.0, - "step": 14141 - }, - { - "epoch": 2.277466886750674, - "grad_norm": 0.006234543398022652, - "learning_rate": 0.00019999744419142013, - "loss": 46.0, - "step": 14142 - }, - { - "epoch": 2.2776279238294617, - "grad_norm": 0.0005176379927434027, - "learning_rate": 0.00019999744382970375, - "loss": 46.0, - "step": 14143 - }, - { - "epoch": 2.277788960908249, - "grad_norm": 0.0010656907688826323, - "learning_rate": 0.00019999744346796172, - "loss": 46.0, - "step": 14144 - }, - { - "epoch": 2.2779499979870366, - "grad_norm": 0.00569300027564168, - "learning_rate": 0.00019999744310619411, - "loss": 46.0, - "step": 14145 - }, - { - "epoch": 2.278111035065824, - "grad_norm": 0.0027295351028442383, - "learning_rate": 0.00019999744274440095, - "loss": 46.0, - "step": 14146 - }, - { - "epoch": 2.2782720721446115, - "grad_norm": 0.0013617512304335833, - "learning_rate": 0.00019999744238258216, - "loss": 46.0, - "step": 14147 - }, - { - "epoch": 2.278433109223399, - "grad_norm": 0.0008217511349357665, - "learning_rate": 0.0001999974420207378, - "loss": 46.0, - "step": 14148 - }, - { - "epoch": 2.278594146302186, - "grad_norm": 0.005439859349280596, - "learning_rate": 0.00019999744165886783, - "loss": 46.0, - "step": 14149 - }, - { - "epoch": 2.2787551833809734, - "grad_norm": 0.0035284413024783134, - "learning_rate": 0.00019999744129697223, - "loss": 46.0, - "step": 14150 - }, - { - "epoch": 2.278916220459761, - "grad_norm": 0.0019542109221220016, - "learning_rate": 0.00019999744093505107, - "loss": 46.0, - "step": 14151 - }, - { - "epoch": 2.2790772575385483, - "grad_norm": 0.0008412288152612746, - "learning_rate": 0.00019999744057310436, - "loss": 46.0, - "step": 14152 - }, - { - "epoch": 2.2792382946173357, - "grad_norm": 0.0017408864805474877, - "learning_rate": 0.00019999744021113197, - "loss": 46.0, - "step": 14153 - }, - { - "epoch": 2.279399331696123, - "grad_norm": 0.007856925949454308, - "learning_rate": 0.00019999743984913405, - "loss": 46.0, - "step": 14154 - }, - { - "epoch": 2.27956036877491, - "grad_norm": 0.002513745566830039, - "learning_rate": 0.0001999974394871105, - "loss": 46.0, - "step": 14155 - }, - { - "epoch": 2.2797214058536976, - "grad_norm": 0.0028763373848050833, - "learning_rate": 0.0001999974391250614, - "loss": 46.0, - "step": 14156 - }, - { - "epoch": 2.279882442932485, - "grad_norm": 0.0010043465299531817, - "learning_rate": 0.00019999743876298665, - "loss": 46.0, - "step": 14157 - }, - { - "epoch": 2.2800434800112725, - "grad_norm": 0.0013878229074180126, - "learning_rate": 0.00019999743840088633, - "loss": 46.0, - "step": 14158 - }, - { - "epoch": 2.28020451709006, - "grad_norm": 0.0012308438308537006, - "learning_rate": 0.00019999743803876042, - "loss": 46.0, - "step": 14159 - }, - { - "epoch": 2.2803655541688475, - "grad_norm": 0.0009700900409370661, - "learning_rate": 0.00019999743767660892, - "loss": 46.0, - "step": 14160 - }, - { - "epoch": 2.280526591247635, - "grad_norm": 0.007130511105060577, - "learning_rate": 0.0001999974373144318, - "loss": 46.0, - "step": 14161 - }, - { - "epoch": 2.2806876283264224, - "grad_norm": 0.0023319628089666367, - "learning_rate": 0.00019999743695222914, - "loss": 46.0, - "step": 14162 - }, - { - "epoch": 2.2808486654052094, - "grad_norm": 0.0016403687186539173, - "learning_rate": 0.00019999743659000085, - "loss": 46.0, - "step": 14163 - }, - { - "epoch": 2.281009702483997, - "grad_norm": 0.001171128824353218, - "learning_rate": 0.00019999743622774695, - "loss": 46.0, - "step": 14164 - }, - { - "epoch": 2.2811707395627843, - "grad_norm": 0.002000638982281089, - "learning_rate": 0.0001999974358654675, - "loss": 46.0, - "step": 14165 - }, - { - "epoch": 2.2813317766415717, - "grad_norm": 0.0017989149782806635, - "learning_rate": 0.0001999974355031624, - "loss": 46.0, - "step": 14166 - }, - { - "epoch": 2.281492813720359, - "grad_norm": 0.003598693758249283, - "learning_rate": 0.00019999743514083175, - "loss": 46.0, - "step": 14167 - }, - { - "epoch": 2.2816538507991466, - "grad_norm": 0.004784971009939909, - "learning_rate": 0.00019999743477847547, - "loss": 46.0, - "step": 14168 - }, - { - "epoch": 2.281814887877934, - "grad_norm": 0.0012154612923040986, - "learning_rate": 0.0001999974344160936, - "loss": 46.0, - "step": 14169 - }, - { - "epoch": 2.281975924956721, - "grad_norm": 0.005106545519083738, - "learning_rate": 0.00019999743405368619, - "loss": 46.0, - "step": 14170 - }, - { - "epoch": 2.2821369620355085, - "grad_norm": 0.0068846410140395164, - "learning_rate": 0.00019999743369125312, - "loss": 46.0, - "step": 14171 - }, - { - "epoch": 2.282297999114296, - "grad_norm": 0.001367128104902804, - "learning_rate": 0.0001999974333287945, - "loss": 46.0, - "step": 14172 - }, - { - "epoch": 2.2824590361930834, - "grad_norm": 0.0030000554397702217, - "learning_rate": 0.00019999743296631025, - "loss": 46.0, - "step": 14173 - }, - { - "epoch": 2.282620073271871, - "grad_norm": 0.0023538365494459867, - "learning_rate": 0.00019999743260380043, - "loss": 46.0, - "step": 14174 - }, - { - "epoch": 2.2827811103506583, - "grad_norm": 0.0004020679334644228, - "learning_rate": 0.00019999743224126504, - "loss": 46.0, - "step": 14175 - }, - { - "epoch": 2.282942147429446, - "grad_norm": 0.0010587330907583237, - "learning_rate": 0.000199997431878704, - "loss": 46.0, - "step": 14176 - }, - { - "epoch": 2.2831031845082332, - "grad_norm": 0.0009267361019738019, - "learning_rate": 0.0001999974315161174, - "loss": 46.0, - "step": 14177 - }, - { - "epoch": 2.2832642215870202, - "grad_norm": 0.002004109090194106, - "learning_rate": 0.0001999974311535052, - "loss": 46.0, - "step": 14178 - }, - { - "epoch": 2.2834252586658077, - "grad_norm": 0.007367661688476801, - "learning_rate": 0.0001999974307908674, - "loss": 46.0, - "step": 14179 - }, - { - "epoch": 2.283586295744595, - "grad_norm": 0.0013515958562493324, - "learning_rate": 0.000199997430428204, - "loss": 46.0, - "step": 14180 - }, - { - "epoch": 2.2837473328233826, - "grad_norm": 0.0038866482209414244, - "learning_rate": 0.000199997430065515, - "loss": 46.0, - "step": 14181 - }, - { - "epoch": 2.28390836990217, - "grad_norm": 0.002496507717296481, - "learning_rate": 0.00019999742970280043, - "loss": 46.0, - "step": 14182 - }, - { - "epoch": 2.2840694069809575, - "grad_norm": 0.006878688465803862, - "learning_rate": 0.00019999742934006027, - "loss": 46.0, - "step": 14183 - }, - { - "epoch": 2.2842304440597445, - "grad_norm": 0.003757967846468091, - "learning_rate": 0.0001999974289772945, - "loss": 46.0, - "step": 14184 - }, - { - "epoch": 2.284391481138532, - "grad_norm": 0.001205588225275278, - "learning_rate": 0.00019999742861450315, - "loss": 46.0, - "step": 14185 - }, - { - "epoch": 2.2845525182173194, - "grad_norm": 0.0037523917853832245, - "learning_rate": 0.0001999974282516862, - "loss": 46.0, - "step": 14186 - }, - { - "epoch": 2.284713555296107, - "grad_norm": 0.008161641657352448, - "learning_rate": 0.00019999742788884362, - "loss": 46.0, - "step": 14187 - }, - { - "epoch": 2.2848745923748943, - "grad_norm": 0.003397412830963731, - "learning_rate": 0.0001999974275259755, - "loss": 46.0, - "step": 14188 - }, - { - "epoch": 2.2850356294536818, - "grad_norm": 0.006318398751318455, - "learning_rate": 0.00019999742716308175, - "loss": 46.0, - "step": 14189 - }, - { - "epoch": 2.2851966665324692, - "grad_norm": 0.0005041666445322335, - "learning_rate": 0.00019999742680016242, - "loss": 46.0, - "step": 14190 - }, - { - "epoch": 2.2853577036112567, - "grad_norm": 0.0012668590061366558, - "learning_rate": 0.00019999742643721748, - "loss": 46.0, - "step": 14191 - }, - { - "epoch": 2.285518740690044, - "grad_norm": 0.0006019673892296851, - "learning_rate": 0.00019999742607424697, - "loss": 46.0, - "step": 14192 - }, - { - "epoch": 2.285679777768831, - "grad_norm": 0.006719973403960466, - "learning_rate": 0.00019999742571125085, - "loss": 46.0, - "step": 14193 - }, - { - "epoch": 2.2858408148476186, - "grad_norm": 0.00344136287458241, - "learning_rate": 0.00019999742534822915, - "loss": 46.0, - "step": 14194 - }, - { - "epoch": 2.286001851926406, - "grad_norm": 0.0031660243403166533, - "learning_rate": 0.00019999742498518183, - "loss": 46.0, - "step": 14195 - }, - { - "epoch": 2.2861628890051935, - "grad_norm": 0.005834201816469431, - "learning_rate": 0.00019999742462210892, - "loss": 46.0, - "step": 14196 - }, - { - "epoch": 2.286323926083981, - "grad_norm": 0.0006354416836984456, - "learning_rate": 0.00019999742425901045, - "loss": 46.0, - "step": 14197 - }, - { - "epoch": 2.2864849631627684, - "grad_norm": 0.003480158746242523, - "learning_rate": 0.00019999742389588637, - "loss": 46.0, - "step": 14198 - }, - { - "epoch": 2.2866460002415554, - "grad_norm": 0.003493017051368952, - "learning_rate": 0.0001999974235327367, - "loss": 46.0, - "step": 14199 - }, - { - "epoch": 2.286807037320343, - "grad_norm": 0.0016648083692416549, - "learning_rate": 0.00019999742316956142, - "loss": 46.0, - "step": 14200 - }, - { - "epoch": 2.2869680743991303, - "grad_norm": 0.0014448920264840126, - "learning_rate": 0.00019999742280636052, - "loss": 46.0, - "step": 14201 - }, - { - "epoch": 2.2871291114779178, - "grad_norm": 0.0028257097583264112, - "learning_rate": 0.00019999742244313407, - "loss": 46.0, - "step": 14202 - }, - { - "epoch": 2.287290148556705, - "grad_norm": 0.0020331856794655323, - "learning_rate": 0.00019999742207988202, - "loss": 46.0, - "step": 14203 - }, - { - "epoch": 2.2874511856354927, - "grad_norm": 0.003894432447850704, - "learning_rate": 0.0001999974217166044, - "loss": 46.0, - "step": 14204 - }, - { - "epoch": 2.28761222271428, - "grad_norm": 0.004158115014433861, - "learning_rate": 0.00019999742135330112, - "loss": 46.0, - "step": 14205 - }, - { - "epoch": 2.2877732597930676, - "grad_norm": 0.005285014398396015, - "learning_rate": 0.00019999742098997228, - "loss": 46.0, - "step": 14206 - }, - { - "epoch": 2.2879342968718546, - "grad_norm": 0.002789412159472704, - "learning_rate": 0.00019999742062661784, - "loss": 46.0, - "step": 14207 - }, - { - "epoch": 2.288095333950642, - "grad_norm": 0.0015902383020147681, - "learning_rate": 0.00019999742026323783, - "loss": 46.0, - "step": 14208 - }, - { - "epoch": 2.2882563710294295, - "grad_norm": 0.0014236696297302842, - "learning_rate": 0.00019999741989983218, - "loss": 46.0, - "step": 14209 - }, - { - "epoch": 2.288417408108217, - "grad_norm": 0.003085595788434148, - "learning_rate": 0.000199997419536401, - "loss": 46.0, - "step": 14210 - }, - { - "epoch": 2.2885784451870044, - "grad_norm": 0.0011553569929674268, - "learning_rate": 0.00019999741917294417, - "loss": 46.0, - "step": 14211 - }, - { - "epoch": 2.288739482265792, - "grad_norm": 0.002870664931833744, - "learning_rate": 0.00019999741880946176, - "loss": 46.0, - "step": 14212 - }, - { - "epoch": 2.2889005193445793, - "grad_norm": 0.0032424384262412786, - "learning_rate": 0.00019999741844595377, - "loss": 46.0, - "step": 14213 - }, - { - "epoch": 2.2890615564233663, - "grad_norm": 0.0015928858192637563, - "learning_rate": 0.00019999741808242015, - "loss": 46.0, - "step": 14214 - }, - { - "epoch": 2.2892225935021537, - "grad_norm": 0.0037057893350720406, - "learning_rate": 0.00019999741771886098, - "loss": 46.0, - "step": 14215 - }, - { - "epoch": 2.289383630580941, - "grad_norm": 0.0011992177460342646, - "learning_rate": 0.0001999974173552762, - "loss": 46.0, - "step": 14216 - }, - { - "epoch": 2.2895446676597286, - "grad_norm": 0.0025299210101366043, - "learning_rate": 0.0001999974169916658, - "loss": 46.0, - "step": 14217 - }, - { - "epoch": 2.289705704738516, - "grad_norm": 0.0021669615525752306, - "learning_rate": 0.00019999741662802984, - "loss": 46.0, - "step": 14218 - }, - { - "epoch": 2.2898667418173035, - "grad_norm": 0.0013629866298288107, - "learning_rate": 0.0001999974162643683, - "loss": 46.0, - "step": 14219 - }, - { - "epoch": 2.290027778896091, - "grad_norm": 0.000873226614203304, - "learning_rate": 0.00019999741590068113, - "loss": 46.0, - "step": 14220 - }, - { - "epoch": 2.2901888159748784, - "grad_norm": 0.012677308171987534, - "learning_rate": 0.00019999741553696838, - "loss": 46.0, - "step": 14221 - }, - { - "epoch": 2.2903498530536655, - "grad_norm": 0.0020746940281242132, - "learning_rate": 0.00019999741517323004, - "loss": 46.0, - "step": 14222 - }, - { - "epoch": 2.290510890132453, - "grad_norm": 0.0023188902996480465, - "learning_rate": 0.0001999974148094661, - "loss": 46.0, - "step": 14223 - }, - { - "epoch": 2.2906719272112404, - "grad_norm": 0.0024838189128786325, - "learning_rate": 0.00019999741444567655, - "loss": 46.0, - "step": 14224 - }, - { - "epoch": 2.290832964290028, - "grad_norm": 0.004235392436385155, - "learning_rate": 0.0001999974140818614, - "loss": 46.0, - "step": 14225 - }, - { - "epoch": 2.2909940013688153, - "grad_norm": 0.0020327134989202023, - "learning_rate": 0.0001999974137180207, - "loss": 46.0, - "step": 14226 - }, - { - "epoch": 2.2911550384476027, - "grad_norm": 0.004046470392495394, - "learning_rate": 0.00019999741335415436, - "loss": 46.0, - "step": 14227 - }, - { - "epoch": 2.2913160755263897, - "grad_norm": 0.000421986827859655, - "learning_rate": 0.00019999741299026247, - "loss": 46.0, - "step": 14228 - }, - { - "epoch": 2.291477112605177, - "grad_norm": 0.004184163175523281, - "learning_rate": 0.00019999741262634495, - "loss": 46.0, - "step": 14229 - }, - { - "epoch": 2.2916381496839646, - "grad_norm": 0.0027920235879719257, - "learning_rate": 0.00019999741226240186, - "loss": 46.0, - "step": 14230 - }, - { - "epoch": 2.291799186762752, - "grad_norm": 0.004182478412985802, - "learning_rate": 0.00019999741189843318, - "loss": 46.0, - "step": 14231 - }, - { - "epoch": 2.2919602238415395, - "grad_norm": 0.0017277334118261933, - "learning_rate": 0.00019999741153443887, - "loss": 46.0, - "step": 14232 - }, - { - "epoch": 2.292121260920327, - "grad_norm": 0.0011476981453597546, - "learning_rate": 0.000199997411170419, - "loss": 46.0, - "step": 14233 - }, - { - "epoch": 2.2922822979991144, - "grad_norm": 0.002320182742550969, - "learning_rate": 0.0001999974108063735, - "loss": 46.0, - "step": 14234 - }, - { - "epoch": 2.292443335077902, - "grad_norm": 0.006419925019145012, - "learning_rate": 0.00019999741044230245, - "loss": 46.0, - "step": 14235 - }, - { - "epoch": 2.292604372156689, - "grad_norm": 0.004862943664193153, - "learning_rate": 0.00019999741007820578, - "loss": 46.0, - "step": 14236 - }, - { - "epoch": 2.2927654092354763, - "grad_norm": 0.005383777432143688, - "learning_rate": 0.0001999974097140835, - "loss": 46.0, - "step": 14237 - }, - { - "epoch": 2.292926446314264, - "grad_norm": 0.0025240981485694647, - "learning_rate": 0.00019999740934993564, - "loss": 46.0, - "step": 14238 - }, - { - "epoch": 2.2930874833930512, - "grad_norm": 0.0017017071368172765, - "learning_rate": 0.0001999974089857622, - "loss": 46.0, - "step": 14239 - }, - { - "epoch": 2.2932485204718387, - "grad_norm": 0.01041108462959528, - "learning_rate": 0.00019999740862156314, - "loss": 46.0, - "step": 14240 - }, - { - "epoch": 2.293409557550626, - "grad_norm": 0.010626653209328651, - "learning_rate": 0.00019999740825733852, - "loss": 46.0, - "step": 14241 - }, - { - "epoch": 2.2935705946294136, - "grad_norm": 0.0021872264333069324, - "learning_rate": 0.00019999740789308827, - "loss": 46.0, - "step": 14242 - }, - { - "epoch": 2.2937316317082006, - "grad_norm": 0.0034235031343996525, - "learning_rate": 0.00019999740752881247, - "loss": 46.0, - "step": 14243 - }, - { - "epoch": 2.293892668786988, - "grad_norm": 0.0019473993452265859, - "learning_rate": 0.00019999740716451103, - "loss": 46.0, - "step": 14244 - }, - { - "epoch": 2.2940537058657755, - "grad_norm": 0.0050170328468084335, - "learning_rate": 0.00019999740680018402, - "loss": 46.0, - "step": 14245 - }, - { - "epoch": 2.294214742944563, - "grad_norm": 0.0022957122419029474, - "learning_rate": 0.00019999740643583143, - "loss": 46.0, - "step": 14246 - }, - { - "epoch": 2.2943757800233504, - "grad_norm": 0.0026920344680547714, - "learning_rate": 0.0001999974060714532, - "loss": 46.0, - "step": 14247 - }, - { - "epoch": 2.294536817102138, - "grad_norm": 0.0025211991742253304, - "learning_rate": 0.0001999974057070494, - "loss": 46.0, - "step": 14248 - }, - { - "epoch": 2.2946978541809253, - "grad_norm": 0.004697122145444155, - "learning_rate": 0.00019999740534262003, - "loss": 46.0, - "step": 14249 - }, - { - "epoch": 2.2948588912597128, - "grad_norm": 0.0004291724180802703, - "learning_rate": 0.00019999740497816503, - "loss": 46.0, - "step": 14250 - }, - { - "epoch": 2.2950199283384998, - "grad_norm": 0.0021702591329813004, - "learning_rate": 0.00019999740461368445, - "loss": 46.0, - "step": 14251 - }, - { - "epoch": 2.2951809654172872, - "grad_norm": 0.004615240730345249, - "learning_rate": 0.0001999974042491783, - "loss": 46.0, - "step": 14252 - }, - { - "epoch": 2.2953420024960747, - "grad_norm": 0.011581462807953358, - "learning_rate": 0.00019999740388464653, - "loss": 46.0, - "step": 14253 - }, - { - "epoch": 2.295503039574862, - "grad_norm": 0.0035150221083313227, - "learning_rate": 0.00019999740352008916, - "loss": 46.0, - "step": 14254 - }, - { - "epoch": 2.2956640766536496, - "grad_norm": 0.005080073140561581, - "learning_rate": 0.0001999974031555062, - "loss": 46.0, - "step": 14255 - }, - { - "epoch": 2.295825113732437, - "grad_norm": 0.001563195139169693, - "learning_rate": 0.00019999740279089766, - "loss": 46.0, - "step": 14256 - }, - { - "epoch": 2.295986150811224, - "grad_norm": 0.004271410871297121, - "learning_rate": 0.0001999974024262635, - "loss": 46.0, - "step": 14257 - }, - { - "epoch": 2.2961471878900115, - "grad_norm": 0.00366918439976871, - "learning_rate": 0.00019999740206160378, - "loss": 46.0, - "step": 14258 - }, - { - "epoch": 2.296308224968799, - "grad_norm": 0.0017850485164672136, - "learning_rate": 0.00019999740169691842, - "loss": 46.0, - "step": 14259 - }, - { - "epoch": 2.2964692620475864, - "grad_norm": 0.0018911348888650537, - "learning_rate": 0.0001999974013322075, - "loss": 46.0, - "step": 14260 - }, - { - "epoch": 2.296630299126374, - "grad_norm": 0.00279340916313231, - "learning_rate": 0.000199997400967471, - "loss": 46.0, - "step": 14261 - }, - { - "epoch": 2.2967913362051613, - "grad_norm": 0.006460262928158045, - "learning_rate": 0.00019999740060270887, - "loss": 46.0, - "step": 14262 - }, - { - "epoch": 2.2969523732839487, - "grad_norm": 0.0020022308453917503, - "learning_rate": 0.00019999740023792116, - "loss": 46.0, - "step": 14263 - }, - { - "epoch": 2.297113410362736, - "grad_norm": 0.0008887032745406032, - "learning_rate": 0.00019999739987310787, - "loss": 46.0, - "step": 14264 - }, - { - "epoch": 2.2972744474415236, - "grad_norm": 0.0008106484892778099, - "learning_rate": 0.00019999739950826896, - "loss": 46.0, - "step": 14265 - }, - { - "epoch": 2.2974354845203107, - "grad_norm": 0.0006976912845857441, - "learning_rate": 0.00019999739914340446, - "loss": 46.0, - "step": 14266 - }, - { - "epoch": 2.297596521599098, - "grad_norm": 0.0012451495276764035, - "learning_rate": 0.00019999739877851438, - "loss": 46.0, - "step": 14267 - }, - { - "epoch": 2.2977575586778856, - "grad_norm": 0.002070675604045391, - "learning_rate": 0.0001999973984135987, - "loss": 46.0, - "step": 14268 - }, - { - "epoch": 2.297918595756673, - "grad_norm": 0.000902310770470649, - "learning_rate": 0.00019999739804865742, - "loss": 46.0, - "step": 14269 - }, - { - "epoch": 2.2980796328354605, - "grad_norm": 0.001746731111779809, - "learning_rate": 0.00019999739768369055, - "loss": 46.0, - "step": 14270 - }, - { - "epoch": 2.298240669914248, - "grad_norm": 0.002341133076697588, - "learning_rate": 0.0001999973973186981, - "loss": 46.0, - "step": 14271 - }, - { - "epoch": 2.298401706993035, - "grad_norm": 0.008440453559160233, - "learning_rate": 0.00019999739695368004, - "loss": 46.0, - "step": 14272 - }, - { - "epoch": 2.2985627440718224, - "grad_norm": 0.0009725481504574418, - "learning_rate": 0.00019999739658863638, - "loss": 46.0, - "step": 14273 - }, - { - "epoch": 2.29872378115061, - "grad_norm": 0.002638152800500393, - "learning_rate": 0.00019999739622356716, - "loss": 46.0, - "step": 14274 - }, - { - "epoch": 2.2988848182293973, - "grad_norm": 0.0019563420210033655, - "learning_rate": 0.0001999973958584723, - "loss": 46.0, - "step": 14275 - }, - { - "epoch": 2.2990458553081847, - "grad_norm": 0.004565945826470852, - "learning_rate": 0.00019999739549335187, - "loss": 46.0, - "step": 14276 - }, - { - "epoch": 2.299206892386972, - "grad_norm": 0.0025682856794446707, - "learning_rate": 0.00019999739512820583, - "loss": 46.0, - "step": 14277 - }, - { - "epoch": 2.2993679294657596, - "grad_norm": 0.0042879958637058735, - "learning_rate": 0.0001999973947630342, - "loss": 46.0, - "step": 14278 - }, - { - "epoch": 2.299528966544547, - "grad_norm": 0.0011807095725089312, - "learning_rate": 0.000199997394397837, - "loss": 46.0, - "step": 14279 - }, - { - "epoch": 2.299690003623334, - "grad_norm": 0.0043216426856815815, - "learning_rate": 0.00019999739403261417, - "loss": 46.0, - "step": 14280 - }, - { - "epoch": 2.2998510407021215, - "grad_norm": 0.0039243851788342, - "learning_rate": 0.00019999739366736576, - "loss": 46.0, - "step": 14281 - }, - { - "epoch": 2.300012077780909, - "grad_norm": 0.01166997104883194, - "learning_rate": 0.00019999739330209178, - "loss": 46.0, - "step": 14282 - }, - { - "epoch": 2.3001731148596964, - "grad_norm": 0.0018461483996361494, - "learning_rate": 0.00019999739293679217, - "loss": 46.0, - "step": 14283 - }, - { - "epoch": 2.300334151938484, - "grad_norm": 0.0016292390646412969, - "learning_rate": 0.000199997392571467, - "loss": 46.0, - "step": 14284 - }, - { - "epoch": 2.3004951890172713, - "grad_norm": 0.00411981251090765, - "learning_rate": 0.0001999973922061162, - "loss": 46.0, - "step": 14285 - }, - { - "epoch": 2.300656226096059, - "grad_norm": 0.00183815136551857, - "learning_rate": 0.00019999739184073986, - "loss": 46.0, - "step": 14286 - }, - { - "epoch": 2.300817263174846, - "grad_norm": 0.0005163150490261614, - "learning_rate": 0.00019999739147533787, - "loss": 46.0, - "step": 14287 - }, - { - "epoch": 2.3009783002536333, - "grad_norm": 0.0014224780024960637, - "learning_rate": 0.0001999973911099103, - "loss": 46.0, - "step": 14288 - }, - { - "epoch": 2.3011393373324207, - "grad_norm": 0.002101413905620575, - "learning_rate": 0.00019999739074445713, - "loss": 46.0, - "step": 14289 - }, - { - "epoch": 2.301300374411208, - "grad_norm": 0.004940839949995279, - "learning_rate": 0.00019999739037897837, - "loss": 46.0, - "step": 14290 - }, - { - "epoch": 2.3014614114899956, - "grad_norm": 0.008614122867584229, - "learning_rate": 0.00019999739001347403, - "loss": 46.0, - "step": 14291 - }, - { - "epoch": 2.301622448568783, - "grad_norm": 0.0016401347238570452, - "learning_rate": 0.00019999738964794408, - "loss": 46.0, - "step": 14292 - }, - { - "epoch": 2.3017834856475705, - "grad_norm": 0.0012262946693226695, - "learning_rate": 0.00019999738928238857, - "loss": 46.0, - "step": 14293 - }, - { - "epoch": 2.301944522726358, - "grad_norm": 0.0008248728699982166, - "learning_rate": 0.0001999973889168074, - "loss": 46.0, - "step": 14294 - }, - { - "epoch": 2.302105559805145, - "grad_norm": 0.009803717024624348, - "learning_rate": 0.00019999738855120072, - "loss": 46.0, - "step": 14295 - }, - { - "epoch": 2.3022665968839324, - "grad_norm": 0.0015913811512291431, - "learning_rate": 0.0001999973881855684, - "loss": 46.0, - "step": 14296 - }, - { - "epoch": 2.30242763396272, - "grad_norm": 0.0033310051076114178, - "learning_rate": 0.00019999738781991045, - "loss": 46.0, - "step": 14297 - }, - { - "epoch": 2.3025886710415073, - "grad_norm": 0.0027968536596745253, - "learning_rate": 0.00019999738745422695, - "loss": 46.0, - "step": 14298 - }, - { - "epoch": 2.302749708120295, - "grad_norm": 0.0011340482160449028, - "learning_rate": 0.00019999738708851786, - "loss": 46.0, - "step": 14299 - }, - { - "epoch": 2.3029107451990822, - "grad_norm": 0.0032437255140393972, - "learning_rate": 0.00019999738672278315, - "loss": 46.0, - "step": 14300 - }, - { - "epoch": 2.3030717822778692, - "grad_norm": 0.001587683567777276, - "learning_rate": 0.0001999973863570229, - "loss": 46.0, - "step": 14301 - }, - { - "epoch": 2.3032328193566567, - "grad_norm": 0.003964809235185385, - "learning_rate": 0.000199997385991237, - "loss": 46.0, - "step": 14302 - }, - { - "epoch": 2.303393856435444, - "grad_norm": 0.0011232803808525205, - "learning_rate": 0.00019999738562542552, - "loss": 46.0, - "step": 14303 - }, - { - "epoch": 2.3035548935142316, - "grad_norm": 0.005081545561552048, - "learning_rate": 0.00019999738525958844, - "loss": 46.0, - "step": 14304 - }, - { - "epoch": 2.303715930593019, - "grad_norm": 0.003260744037106633, - "learning_rate": 0.00019999738489372577, - "loss": 46.0, - "step": 14305 - }, - { - "epoch": 2.3038769676718065, - "grad_norm": 0.004290606826543808, - "learning_rate": 0.0001999973845278375, - "loss": 46.0, - "step": 14306 - }, - { - "epoch": 2.304038004750594, - "grad_norm": 0.002330171875655651, - "learning_rate": 0.00019999738416192367, - "loss": 46.0, - "step": 14307 - }, - { - "epoch": 2.3041990418293814, - "grad_norm": 0.002846022602170706, - "learning_rate": 0.00019999738379598421, - "loss": 46.0, - "step": 14308 - }, - { - "epoch": 2.304360078908169, - "grad_norm": 0.002025551162660122, - "learning_rate": 0.00019999738343001917, - "loss": 46.0, - "step": 14309 - }, - { - "epoch": 2.304521115986956, - "grad_norm": 0.0012415667297318578, - "learning_rate": 0.00019999738306402854, - "loss": 46.0, - "step": 14310 - }, - { - "epoch": 2.3046821530657433, - "grad_norm": 0.0018311068415641785, - "learning_rate": 0.0001999973826980123, - "loss": 46.0, - "step": 14311 - }, - { - "epoch": 2.3048431901445308, - "grad_norm": 0.002649351954460144, - "learning_rate": 0.00019999738233197046, - "loss": 46.0, - "step": 14312 - }, - { - "epoch": 2.305004227223318, - "grad_norm": 0.0074116941541433334, - "learning_rate": 0.00019999738196590304, - "loss": 46.0, - "step": 14313 - }, - { - "epoch": 2.3051652643021057, - "grad_norm": 0.0034662620164453983, - "learning_rate": 0.00019999738159981006, - "loss": 46.0, - "step": 14314 - }, - { - "epoch": 2.305326301380893, - "grad_norm": 0.0014256008435040712, - "learning_rate": 0.00019999738123369141, - "loss": 46.0, - "step": 14315 - }, - { - "epoch": 2.30548733845968, - "grad_norm": 0.005318333860486746, - "learning_rate": 0.00019999738086754723, - "loss": 46.0, - "step": 14316 - }, - { - "epoch": 2.3056483755384676, - "grad_norm": 0.0006057993159629405, - "learning_rate": 0.0001999973805013774, - "loss": 46.0, - "step": 14317 - }, - { - "epoch": 2.305809412617255, - "grad_norm": 0.003133472753688693, - "learning_rate": 0.00019999738013518203, - "loss": 46.0, - "step": 14318 - }, - { - "epoch": 2.3059704496960425, - "grad_norm": 0.001851937035098672, - "learning_rate": 0.00019999737976896106, - "loss": 46.0, - "step": 14319 - }, - { - "epoch": 2.30613148677483, - "grad_norm": 0.003944995813071728, - "learning_rate": 0.00019999737940271448, - "loss": 46.0, - "step": 14320 - }, - { - "epoch": 2.3062925238536174, - "grad_norm": 0.0003884835459757596, - "learning_rate": 0.0001999973790364423, - "loss": 46.0, - "step": 14321 - }, - { - "epoch": 2.306453560932405, - "grad_norm": 0.004032814409583807, - "learning_rate": 0.00019999737867014452, - "loss": 46.0, - "step": 14322 - }, - { - "epoch": 2.3066145980111923, - "grad_norm": 0.0020193925593048334, - "learning_rate": 0.00019999737830382117, - "loss": 46.0, - "step": 14323 - }, - { - "epoch": 2.3067756350899793, - "grad_norm": 0.002641970757395029, - "learning_rate": 0.0001999973779374722, - "loss": 46.0, - "step": 14324 - }, - { - "epoch": 2.3069366721687667, - "grad_norm": 0.0020113217178732157, - "learning_rate": 0.00019999737757109767, - "loss": 46.0, - "step": 14325 - }, - { - "epoch": 2.307097709247554, - "grad_norm": 0.002422425663098693, - "learning_rate": 0.0001999973772046975, - "loss": 46.0, - "step": 14326 - }, - { - "epoch": 2.3072587463263416, - "grad_norm": 0.0014323890209197998, - "learning_rate": 0.00019999737683827179, - "loss": 46.0, - "step": 14327 - }, - { - "epoch": 2.307419783405129, - "grad_norm": 0.0009370298939757049, - "learning_rate": 0.00019999737647182045, - "loss": 46.0, - "step": 14328 - }, - { - "epoch": 2.3075808204839166, - "grad_norm": 0.006395894102752209, - "learning_rate": 0.00019999737610534353, - "loss": 46.0, - "step": 14329 - }, - { - "epoch": 2.307741857562704, - "grad_norm": 0.0007703409646637738, - "learning_rate": 0.00019999737573884102, - "loss": 46.0, - "step": 14330 - }, - { - "epoch": 2.307902894641491, - "grad_norm": 0.0033213100396096706, - "learning_rate": 0.00019999737537231287, - "loss": 46.0, - "step": 14331 - }, - { - "epoch": 2.3080639317202785, - "grad_norm": 0.0036874287761747837, - "learning_rate": 0.00019999737500575916, - "loss": 46.0, - "step": 14332 - }, - { - "epoch": 2.308224968799066, - "grad_norm": 0.0031320613343268633, - "learning_rate": 0.00019999737463917986, - "loss": 46.0, - "step": 14333 - }, - { - "epoch": 2.3083860058778534, - "grad_norm": 0.003985895775258541, - "learning_rate": 0.00019999737427257497, - "loss": 46.0, - "step": 14334 - }, - { - "epoch": 2.308547042956641, - "grad_norm": 0.003459648694843054, - "learning_rate": 0.00019999737390594447, - "loss": 46.0, - "step": 14335 - }, - { - "epoch": 2.3087080800354283, - "grad_norm": 0.0037921795155853033, - "learning_rate": 0.00019999737353928838, - "loss": 46.0, - "step": 14336 - }, - { - "epoch": 2.3088691171142157, - "grad_norm": 0.0005934066139161587, - "learning_rate": 0.0001999973731726067, - "loss": 46.0, - "step": 14337 - }, - { - "epoch": 2.309030154193003, - "grad_norm": 0.0029144128784537315, - "learning_rate": 0.00019999737280589942, - "loss": 46.0, - "step": 14338 - }, - { - "epoch": 2.30919119127179, - "grad_norm": 0.003398422384634614, - "learning_rate": 0.00019999737243916657, - "loss": 46.0, - "step": 14339 - }, - { - "epoch": 2.3093522283505776, - "grad_norm": 0.0014305945951491594, - "learning_rate": 0.00019999737207240808, - "loss": 46.0, - "step": 14340 - }, - { - "epoch": 2.309513265429365, - "grad_norm": 0.0023654219694435596, - "learning_rate": 0.00019999737170562406, - "loss": 46.0, - "step": 14341 - }, - { - "epoch": 2.3096743025081525, - "grad_norm": 0.0015124857891350985, - "learning_rate": 0.0001999973713388144, - "loss": 46.0, - "step": 14342 - }, - { - "epoch": 2.30983533958694, - "grad_norm": 0.0005553358932957053, - "learning_rate": 0.00019999737097197915, - "loss": 46.0, - "step": 14343 - }, - { - "epoch": 2.3099963766657274, - "grad_norm": 0.004616947844624519, - "learning_rate": 0.0001999973706051183, - "loss": 46.0, - "step": 14344 - }, - { - "epoch": 2.3101574137445144, - "grad_norm": 0.0018007889157161117, - "learning_rate": 0.00019999737023823188, - "loss": 46.0, - "step": 14345 - }, - { - "epoch": 2.310318450823302, - "grad_norm": 0.0023752308916300535, - "learning_rate": 0.00019999736987131984, - "loss": 46.0, - "step": 14346 - }, - { - "epoch": 2.3104794879020893, - "grad_norm": 0.0038026210386306047, - "learning_rate": 0.0001999973695043822, - "loss": 46.0, - "step": 14347 - }, - { - "epoch": 2.310640524980877, - "grad_norm": 0.004556956235319376, - "learning_rate": 0.000199997369137419, - "loss": 46.0, - "step": 14348 - }, - { - "epoch": 2.3108015620596642, - "grad_norm": 0.010329912416636944, - "learning_rate": 0.00019999736877043017, - "loss": 46.0, - "step": 14349 - }, - { - "epoch": 2.3109625991384517, - "grad_norm": 0.0013289510970935225, - "learning_rate": 0.00019999736840341578, - "loss": 46.0, - "step": 14350 - }, - { - "epoch": 2.311123636217239, - "grad_norm": 0.0009634209563955665, - "learning_rate": 0.00019999736803637578, - "loss": 46.0, - "step": 14351 - }, - { - "epoch": 2.3112846732960266, - "grad_norm": 0.004501132760196924, - "learning_rate": 0.0001999973676693102, - "loss": 46.0, - "step": 14352 - }, - { - "epoch": 2.3114457103748136, - "grad_norm": 0.006060115061700344, - "learning_rate": 0.000199997367302219, - "loss": 46.0, - "step": 14353 - }, - { - "epoch": 2.311606747453601, - "grad_norm": 0.00291638495400548, - "learning_rate": 0.00019999736693510222, - "loss": 46.0, - "step": 14354 - }, - { - "epoch": 2.3117677845323885, - "grad_norm": 0.0017816550098359585, - "learning_rate": 0.00019999736656795984, - "loss": 46.0, - "step": 14355 - }, - { - "epoch": 2.311928821611176, - "grad_norm": 0.003616332309320569, - "learning_rate": 0.00019999736620079187, - "loss": 46.0, - "step": 14356 - }, - { - "epoch": 2.3120898586899634, - "grad_norm": 0.001433882280252874, - "learning_rate": 0.00019999736583359832, - "loss": 46.0, - "step": 14357 - }, - { - "epoch": 2.312250895768751, - "grad_norm": 0.0012286021374166012, - "learning_rate": 0.00019999736546637915, - "loss": 46.0, - "step": 14358 - }, - { - "epoch": 2.3124119328475383, - "grad_norm": 0.0017214068211615086, - "learning_rate": 0.00019999736509913437, - "loss": 46.0, - "step": 14359 - }, - { - "epoch": 2.3125729699263253, - "grad_norm": 0.00138735375367105, - "learning_rate": 0.00019999736473186403, - "loss": 46.0, - "step": 14360 - }, - { - "epoch": 2.312734007005113, - "grad_norm": 0.006167803891003132, - "learning_rate": 0.0001999973643645681, - "loss": 46.0, - "step": 14361 - }, - { - "epoch": 2.3128950440839002, - "grad_norm": 0.001048176665790379, - "learning_rate": 0.00019999736399724659, - "loss": 46.0, - "step": 14362 - }, - { - "epoch": 2.3130560811626877, - "grad_norm": 0.0004285548347979784, - "learning_rate": 0.00019999736362989946, - "loss": 46.0, - "step": 14363 - }, - { - "epoch": 2.313217118241475, - "grad_norm": 0.0031015214044600725, - "learning_rate": 0.0001999973632625267, - "loss": 46.0, - "step": 14364 - }, - { - "epoch": 2.3133781553202626, - "grad_norm": 0.005603897850960493, - "learning_rate": 0.0001999973628951284, - "loss": 46.0, - "step": 14365 - }, - { - "epoch": 2.31353919239905, - "grad_norm": 0.0017382488586008549, - "learning_rate": 0.0001999973625277045, - "loss": 46.0, - "step": 14366 - }, - { - "epoch": 2.3137002294778375, - "grad_norm": 0.001150375115685165, - "learning_rate": 0.00019999736216025498, - "loss": 46.0, - "step": 14367 - }, - { - "epoch": 2.3138612665566245, - "grad_norm": 0.00736632663756609, - "learning_rate": 0.0001999973617927799, - "loss": 46.0, - "step": 14368 - }, - { - "epoch": 2.314022303635412, - "grad_norm": 0.0058825197629630566, - "learning_rate": 0.00019999736142527918, - "loss": 46.0, - "step": 14369 - }, - { - "epoch": 2.3141833407141994, - "grad_norm": 0.00473377900198102, - "learning_rate": 0.00019999736105775289, - "loss": 46.0, - "step": 14370 - }, - { - "epoch": 2.314344377792987, - "grad_norm": 0.0009924237383529544, - "learning_rate": 0.000199997360690201, - "loss": 46.0, - "step": 14371 - }, - { - "epoch": 2.3145054148717743, - "grad_norm": 0.0013253850629553199, - "learning_rate": 0.00019999736032262356, - "loss": 46.0, - "step": 14372 - }, - { - "epoch": 2.3146664519505618, - "grad_norm": 0.002478858921676874, - "learning_rate": 0.00019999735995502045, - "loss": 46.0, - "step": 14373 - }, - { - "epoch": 2.3148274890293488, - "grad_norm": 0.009007279761135578, - "learning_rate": 0.0001999973595873918, - "loss": 46.0, - "step": 14374 - }, - { - "epoch": 2.314988526108136, - "grad_norm": 0.0030016270466148853, - "learning_rate": 0.00019999735921973755, - "loss": 46.0, - "step": 14375 - }, - { - "epoch": 2.3151495631869237, - "grad_norm": 0.003416825085878372, - "learning_rate": 0.00019999735885205768, - "loss": 46.0, - "step": 14376 - }, - { - "epoch": 2.315310600265711, - "grad_norm": 0.0080626355484128, - "learning_rate": 0.00019999735848435225, - "loss": 46.0, - "step": 14377 - }, - { - "epoch": 2.3154716373444986, - "grad_norm": 0.003937988542020321, - "learning_rate": 0.0001999973581166212, - "loss": 46.0, - "step": 14378 - }, - { - "epoch": 2.315632674423286, - "grad_norm": 0.006850492209196091, - "learning_rate": 0.00019999735774886457, - "loss": 46.0, - "step": 14379 - }, - { - "epoch": 2.3157937115020735, - "grad_norm": 0.0020145776215940714, - "learning_rate": 0.00019999735738108232, - "loss": 46.0, - "step": 14380 - }, - { - "epoch": 2.315954748580861, - "grad_norm": 0.0019912244752049446, - "learning_rate": 0.00019999735701327448, - "loss": 46.0, - "step": 14381 - }, - { - "epoch": 2.3161157856596484, - "grad_norm": 0.002376051153987646, - "learning_rate": 0.0001999973566454411, - "loss": 46.0, - "step": 14382 - }, - { - "epoch": 2.3162768227384354, - "grad_norm": 0.0009518018341623247, - "learning_rate": 0.00019999735627758205, - "loss": 46.0, - "step": 14383 - }, - { - "epoch": 2.316437859817223, - "grad_norm": 0.007644172292202711, - "learning_rate": 0.00019999735590969746, - "loss": 46.0, - "step": 14384 - }, - { - "epoch": 2.3165988968960103, - "grad_norm": 0.00538233108818531, - "learning_rate": 0.00019999735554178724, - "loss": 46.0, - "step": 14385 - }, - { - "epoch": 2.3167599339747977, - "grad_norm": 0.005443088710308075, - "learning_rate": 0.00019999735517385145, - "loss": 46.0, - "step": 14386 - }, - { - "epoch": 2.316920971053585, - "grad_norm": 0.0018549426458775997, - "learning_rate": 0.0001999973548058901, - "loss": 46.0, - "step": 14387 - }, - { - "epoch": 2.3170820081323726, - "grad_norm": 0.0059515186585485935, - "learning_rate": 0.0001999973544379031, - "loss": 46.0, - "step": 14388 - }, - { - "epoch": 2.3172430452111596, - "grad_norm": 0.0036792298778891563, - "learning_rate": 0.0001999973540698905, - "loss": 46.0, - "step": 14389 - }, - { - "epoch": 2.317404082289947, - "grad_norm": 0.0018364182906225324, - "learning_rate": 0.00019999735370185233, - "loss": 46.0, - "step": 14390 - }, - { - "epoch": 2.3175651193687345, - "grad_norm": 0.0043747033923864365, - "learning_rate": 0.00019999735333378857, - "loss": 46.0, - "step": 14391 - }, - { - "epoch": 2.317726156447522, - "grad_norm": 0.0015284968540072441, - "learning_rate": 0.0001999973529656992, - "loss": 46.0, - "step": 14392 - }, - { - "epoch": 2.3178871935263095, - "grad_norm": 0.0009432507795281708, - "learning_rate": 0.00019999735259758423, - "loss": 46.0, - "step": 14393 - }, - { - "epoch": 2.318048230605097, - "grad_norm": 0.002124601975083351, - "learning_rate": 0.00019999735222944368, - "loss": 46.0, - "step": 14394 - }, - { - "epoch": 2.3182092676838844, - "grad_norm": 0.004489487037062645, - "learning_rate": 0.00019999735186127754, - "loss": 46.0, - "step": 14395 - }, - { - "epoch": 2.318370304762672, - "grad_norm": 0.006924944929778576, - "learning_rate": 0.00019999735149308582, - "loss": 46.0, - "step": 14396 - }, - { - "epoch": 2.318531341841459, - "grad_norm": 0.0012586968950927258, - "learning_rate": 0.00019999735112486848, - "loss": 46.0, - "step": 14397 - }, - { - "epoch": 2.3186923789202463, - "grad_norm": 0.0008771170396357775, - "learning_rate": 0.00019999735075662556, - "loss": 46.0, - "step": 14398 - }, - { - "epoch": 2.3188534159990337, - "grad_norm": 0.0014779956545680761, - "learning_rate": 0.00019999735038835702, - "loss": 46.0, - "step": 14399 - }, - { - "epoch": 2.319014453077821, - "grad_norm": 0.0020623544696718454, - "learning_rate": 0.00019999735002006292, - "loss": 46.0, - "step": 14400 - }, - { - "epoch": 2.3191754901566086, - "grad_norm": 0.0032336018048226833, - "learning_rate": 0.0001999973496517432, - "loss": 46.0, - "step": 14401 - }, - { - "epoch": 2.319336527235396, - "grad_norm": 0.005756643135100603, - "learning_rate": 0.0001999973492833979, - "loss": 46.0, - "step": 14402 - }, - { - "epoch": 2.3194975643141835, - "grad_norm": 0.0010476246243342757, - "learning_rate": 0.000199997348915027, - "loss": 46.0, - "step": 14403 - }, - { - "epoch": 2.3196586013929705, - "grad_norm": 0.006958004552870989, - "learning_rate": 0.0001999973485466305, - "loss": 46.0, - "step": 14404 - }, - { - "epoch": 2.319819638471758, - "grad_norm": 0.0006009237840771675, - "learning_rate": 0.00019999734817820842, - "loss": 46.0, - "step": 14405 - }, - { - "epoch": 2.3199806755505454, - "grad_norm": 0.0033234297297894955, - "learning_rate": 0.00019999734780976077, - "loss": 46.0, - "step": 14406 - }, - { - "epoch": 2.320141712629333, - "grad_norm": 0.004243780858814716, - "learning_rate": 0.00019999734744128745, - "loss": 46.0, - "step": 14407 - }, - { - "epoch": 2.3203027497081203, - "grad_norm": 0.0007655392400920391, - "learning_rate": 0.0001999973470727886, - "loss": 46.0, - "step": 14408 - }, - { - "epoch": 2.320463786786908, - "grad_norm": 0.005025248508900404, - "learning_rate": 0.00019999734670426414, - "loss": 46.0, - "step": 14409 - }, - { - "epoch": 2.3206248238656952, - "grad_norm": 0.0018490883521735668, - "learning_rate": 0.00019999734633571408, - "loss": 46.0, - "step": 14410 - }, - { - "epoch": 2.3207858609444827, - "grad_norm": 0.0029667378403246403, - "learning_rate": 0.00019999734596713844, - "loss": 46.0, - "step": 14411 - }, - { - "epoch": 2.3209468980232697, - "grad_norm": 0.0011171108344569802, - "learning_rate": 0.0001999973455985372, - "loss": 46.0, - "step": 14412 - }, - { - "epoch": 2.321107935102057, - "grad_norm": 0.0028639500960707664, - "learning_rate": 0.00019999734522991035, - "loss": 46.0, - "step": 14413 - }, - { - "epoch": 2.3212689721808446, - "grad_norm": 0.004016688093543053, - "learning_rate": 0.00019999734486125792, - "loss": 46.0, - "step": 14414 - }, - { - "epoch": 2.321430009259632, - "grad_norm": 0.002466451143845916, - "learning_rate": 0.00019999734449257988, - "loss": 46.0, - "step": 14415 - }, - { - "epoch": 2.3215910463384195, - "grad_norm": 0.0021750994492322206, - "learning_rate": 0.00019999734412387628, - "loss": 46.0, - "step": 14416 - }, - { - "epoch": 2.321752083417207, - "grad_norm": 0.0008831341983750463, - "learning_rate": 0.00019999734375514706, - "loss": 46.0, - "step": 14417 - }, - { - "epoch": 2.321913120495994, - "grad_norm": 0.006661385763436556, - "learning_rate": 0.00019999734338639225, - "loss": 46.0, - "step": 14418 - }, - { - "epoch": 2.3220741575747814, - "grad_norm": 0.005850530695170164, - "learning_rate": 0.00019999734301761184, - "loss": 46.0, - "step": 14419 - }, - { - "epoch": 2.322235194653569, - "grad_norm": 0.0030837017111480236, - "learning_rate": 0.00019999734264880586, - "loss": 46.0, - "step": 14420 - }, - { - "epoch": 2.3223962317323563, - "grad_norm": 0.008410700596868992, - "learning_rate": 0.00019999734227997424, - "loss": 46.0, - "step": 14421 - }, - { - "epoch": 2.3225572688111438, - "grad_norm": 0.004040918778628111, - "learning_rate": 0.00019999734191111706, - "loss": 46.0, - "step": 14422 - }, - { - "epoch": 2.3227183058899312, - "grad_norm": 0.004590666387230158, - "learning_rate": 0.00019999734154223426, - "loss": 46.0, - "step": 14423 - }, - { - "epoch": 2.3228793429687187, - "grad_norm": 0.010090491734445095, - "learning_rate": 0.00019999734117332588, - "loss": 46.0, - "step": 14424 - }, - { - "epoch": 2.323040380047506, - "grad_norm": 0.0024465101305395365, - "learning_rate": 0.00019999734080439194, - "loss": 46.0, - "step": 14425 - }, - { - "epoch": 2.3232014171262936, - "grad_norm": 0.0039410837925970554, - "learning_rate": 0.00019999734043543236, - "loss": 46.0, - "step": 14426 - }, - { - "epoch": 2.3233624542050806, - "grad_norm": 0.013590541668236256, - "learning_rate": 0.00019999734006644721, - "loss": 46.0, - "step": 14427 - }, - { - "epoch": 2.323523491283868, - "grad_norm": 0.006381265819072723, - "learning_rate": 0.00019999733969743646, - "loss": 46.0, - "step": 14428 - }, - { - "epoch": 2.3236845283626555, - "grad_norm": 0.002500042086467147, - "learning_rate": 0.0001999973393284001, - "loss": 46.0, - "step": 14429 - }, - { - "epoch": 2.323845565441443, - "grad_norm": 0.001969830598682165, - "learning_rate": 0.00019999733895933815, - "loss": 46.0, - "step": 14430 - }, - { - "epoch": 2.3240066025202304, - "grad_norm": 0.0016278802650049329, - "learning_rate": 0.00019999733859025063, - "loss": 46.0, - "step": 14431 - }, - { - "epoch": 2.324167639599018, - "grad_norm": 0.00324880238622427, - "learning_rate": 0.00019999733822113747, - "loss": 46.0, - "step": 14432 - }, - { - "epoch": 2.324328676677805, - "grad_norm": 0.0015159628819674253, - "learning_rate": 0.00019999733785199875, - "loss": 46.0, - "step": 14433 - }, - { - "epoch": 2.3244897137565923, - "grad_norm": 0.0071920352056622505, - "learning_rate": 0.00019999733748283447, - "loss": 46.0, - "step": 14434 - }, - { - "epoch": 2.3246507508353798, - "grad_norm": 0.0006405320600606501, - "learning_rate": 0.00019999733711364455, - "loss": 46.0, - "step": 14435 - }, - { - "epoch": 2.324811787914167, - "grad_norm": 0.001085036899894476, - "learning_rate": 0.00019999733674442904, - "loss": 46.0, - "step": 14436 - }, - { - "epoch": 2.3249728249929547, - "grad_norm": 0.005076251458376646, - "learning_rate": 0.00019999733637518795, - "loss": 46.0, - "step": 14437 - }, - { - "epoch": 2.325133862071742, - "grad_norm": 0.0016791843809187412, - "learning_rate": 0.00019999733600592126, - "loss": 46.0, - "step": 14438 - }, - { - "epoch": 2.3252948991505296, - "grad_norm": 0.0012755368370562792, - "learning_rate": 0.00019999733563662897, - "loss": 46.0, - "step": 14439 - }, - { - "epoch": 2.325455936229317, - "grad_norm": 0.001654770807363093, - "learning_rate": 0.00019999733526731108, - "loss": 46.0, - "step": 14440 - }, - { - "epoch": 2.325616973308104, - "grad_norm": 0.002825975650921464, - "learning_rate": 0.0001999973348979676, - "loss": 46.0, - "step": 14441 - }, - { - "epoch": 2.3257780103868915, - "grad_norm": 0.0038898123893886805, - "learning_rate": 0.00019999733452859855, - "loss": 46.0, - "step": 14442 - }, - { - "epoch": 2.325939047465679, - "grad_norm": 0.0050439839251339436, - "learning_rate": 0.00019999733415920385, - "loss": 46.0, - "step": 14443 - }, - { - "epoch": 2.3261000845444664, - "grad_norm": 0.0011504930444061756, - "learning_rate": 0.0001999973337897836, - "loss": 46.0, - "step": 14444 - }, - { - "epoch": 2.326261121623254, - "grad_norm": 0.0024441902060061693, - "learning_rate": 0.00019999733342033774, - "loss": 46.0, - "step": 14445 - }, - { - "epoch": 2.3264221587020413, - "grad_norm": 0.0015909976791590452, - "learning_rate": 0.0001999973330508663, - "loss": 46.0, - "step": 14446 - }, - { - "epoch": 2.3265831957808287, - "grad_norm": 0.005187246948480606, - "learning_rate": 0.00019999733268136926, - "loss": 46.0, - "step": 14447 - }, - { - "epoch": 2.3267442328596157, - "grad_norm": 0.0075108944438397884, - "learning_rate": 0.00019999733231184662, - "loss": 46.0, - "step": 14448 - }, - { - "epoch": 2.326905269938403, - "grad_norm": 0.000731126288883388, - "learning_rate": 0.0001999973319422984, - "loss": 46.0, - "step": 14449 - }, - { - "epoch": 2.3270663070171906, - "grad_norm": 0.0032302439212799072, - "learning_rate": 0.0001999973315727246, - "loss": 46.0, - "step": 14450 - }, - { - "epoch": 2.327227344095978, - "grad_norm": 0.0005128925549797714, - "learning_rate": 0.00019999733120312514, - "loss": 46.0, - "step": 14451 - }, - { - "epoch": 2.3273883811747655, - "grad_norm": 0.003352082334458828, - "learning_rate": 0.00019999733083350015, - "loss": 46.0, - "step": 14452 - }, - { - "epoch": 2.327549418253553, - "grad_norm": 0.001001317403279245, - "learning_rate": 0.00019999733046384953, - "loss": 46.0, - "step": 14453 - }, - { - "epoch": 2.3277104553323404, - "grad_norm": 0.004955696407705545, - "learning_rate": 0.0001999973300941733, - "loss": 46.0, - "step": 14454 - }, - { - "epoch": 2.327871492411128, - "grad_norm": 0.001593914465047419, - "learning_rate": 0.00019999732972447154, - "loss": 46.0, - "step": 14455 - }, - { - "epoch": 2.328032529489915, - "grad_norm": 0.001249672845005989, - "learning_rate": 0.00019999732935474412, - "loss": 46.0, - "step": 14456 - }, - { - "epoch": 2.3281935665687024, - "grad_norm": 0.0013835192658007145, - "learning_rate": 0.00019999732898499115, - "loss": 46.0, - "step": 14457 - }, - { - "epoch": 2.32835460364749, - "grad_norm": 0.0015039057470858097, - "learning_rate": 0.00019999732861521259, - "loss": 46.0, - "step": 14458 - }, - { - "epoch": 2.3285156407262773, - "grad_norm": 0.0008849624427966774, - "learning_rate": 0.0001999973282454084, - "loss": 46.0, - "step": 14459 - }, - { - "epoch": 2.3286766778050647, - "grad_norm": 0.001315623288974166, - "learning_rate": 0.00019999732787557865, - "loss": 46.0, - "step": 14460 - }, - { - "epoch": 2.328837714883852, - "grad_norm": 0.0036567416973412037, - "learning_rate": 0.0001999973275057233, - "loss": 46.0, - "step": 14461 - }, - { - "epoch": 2.328998751962639, - "grad_norm": 0.003333244239911437, - "learning_rate": 0.0001999973271358423, - "loss": 46.0, - "step": 14462 - }, - { - "epoch": 2.3291597890414266, - "grad_norm": 0.0010374191915616393, - "learning_rate": 0.00019999732676593578, - "loss": 46.0, - "step": 14463 - }, - { - "epoch": 2.329320826120214, - "grad_norm": 0.004147120285779238, - "learning_rate": 0.00019999732639600364, - "loss": 46.0, - "step": 14464 - }, - { - "epoch": 2.3294818631990015, - "grad_norm": 0.004270139615982771, - "learning_rate": 0.0001999973260260459, - "loss": 46.0, - "step": 14465 - }, - { - "epoch": 2.329642900277789, - "grad_norm": 0.003310170490294695, - "learning_rate": 0.00019999732565606257, - "loss": 46.0, - "step": 14466 - }, - { - "epoch": 2.3298039373565764, - "grad_norm": 0.0004039326449856162, - "learning_rate": 0.00019999732528605364, - "loss": 46.0, - "step": 14467 - }, - { - "epoch": 2.329964974435364, - "grad_norm": 0.006733214017003775, - "learning_rate": 0.00019999732491601913, - "loss": 46.0, - "step": 14468 - }, - { - "epoch": 2.3301260115141513, - "grad_norm": 0.004155580885708332, - "learning_rate": 0.000199997324545959, - "loss": 46.0, - "step": 14469 - }, - { - "epoch": 2.3302870485929383, - "grad_norm": 0.011322176083922386, - "learning_rate": 0.00019999732417587328, - "loss": 46.0, - "step": 14470 - }, - { - "epoch": 2.330448085671726, - "grad_norm": 0.0024532803799957037, - "learning_rate": 0.000199997323805762, - "loss": 46.0, - "step": 14471 - }, - { - "epoch": 2.3306091227505132, - "grad_norm": 0.0013457033783197403, - "learning_rate": 0.00019999732343562511, - "loss": 46.0, - "step": 14472 - }, - { - "epoch": 2.3307701598293007, - "grad_norm": 0.003234673058614135, - "learning_rate": 0.0001999973230654626, - "loss": 46.0, - "step": 14473 - }, - { - "epoch": 2.330931196908088, - "grad_norm": 0.008821835741400719, - "learning_rate": 0.00019999732269527454, - "loss": 46.0, - "step": 14474 - }, - { - "epoch": 2.3310922339868756, - "grad_norm": 0.004558505956083536, - "learning_rate": 0.00019999732232506086, - "loss": 46.0, - "step": 14475 - }, - { - "epoch": 2.331253271065663, - "grad_norm": 0.004502071533352137, - "learning_rate": 0.0001999973219548216, - "loss": 46.0, - "step": 14476 - }, - { - "epoch": 2.33141430814445, - "grad_norm": 0.0018074504332616925, - "learning_rate": 0.00019999732158455672, - "loss": 46.0, - "step": 14477 - }, - { - "epoch": 2.3315753452232375, - "grad_norm": 0.004936224315315485, - "learning_rate": 0.00019999732121426625, - "loss": 46.0, - "step": 14478 - }, - { - "epoch": 2.331736382302025, - "grad_norm": 0.0023984541185200214, - "learning_rate": 0.0001999973208439502, - "loss": 46.0, - "step": 14479 - }, - { - "epoch": 2.3318974193808124, - "grad_norm": 0.002477796282619238, - "learning_rate": 0.00019999732047360852, - "loss": 46.0, - "step": 14480 - }, - { - "epoch": 2.3320584564596, - "grad_norm": 0.000671895919367671, - "learning_rate": 0.0001999973201032413, - "loss": 46.0, - "step": 14481 - }, - { - "epoch": 2.3322194935383873, - "grad_norm": 0.011366534046828747, - "learning_rate": 0.00019999731973284845, - "loss": 46.0, - "step": 14482 - }, - { - "epoch": 2.3323805306171748, - "grad_norm": 0.005910404492169619, - "learning_rate": 0.00019999731936243002, - "loss": 46.0, - "step": 14483 - }, - { - "epoch": 2.332541567695962, - "grad_norm": 0.0013881241902709007, - "learning_rate": 0.00019999731899198603, - "loss": 46.0, - "step": 14484 - }, - { - "epoch": 2.3327026047747492, - "grad_norm": 0.004842846654355526, - "learning_rate": 0.0001999973186215164, - "loss": 46.0, - "step": 14485 - }, - { - "epoch": 2.3328636418535367, - "grad_norm": 0.0016857337905094028, - "learning_rate": 0.00019999731825102117, - "loss": 46.0, - "step": 14486 - }, - { - "epoch": 2.333024678932324, - "grad_norm": 0.003317498601973057, - "learning_rate": 0.00019999731788050037, - "loss": 46.0, - "step": 14487 - }, - { - "epoch": 2.3331857160111116, - "grad_norm": 0.0031158719211816788, - "learning_rate": 0.00019999731750995397, - "loss": 46.0, - "step": 14488 - }, - { - "epoch": 2.333346753089899, - "grad_norm": 0.0009414818487130105, - "learning_rate": 0.00019999731713938194, - "loss": 46.0, - "step": 14489 - }, - { - "epoch": 2.3335077901686865, - "grad_norm": 0.0009883511811494827, - "learning_rate": 0.00019999731676878434, - "loss": 46.0, - "step": 14490 - }, - { - "epoch": 2.3336688272474735, - "grad_norm": 0.005314912647008896, - "learning_rate": 0.00019999731639816116, - "loss": 46.0, - "step": 14491 - }, - { - "epoch": 2.333829864326261, - "grad_norm": 0.0005769595736637712, - "learning_rate": 0.0001999973160275124, - "loss": 46.0, - "step": 14492 - }, - { - "epoch": 2.3339909014050484, - "grad_norm": 0.0016682537971064448, - "learning_rate": 0.00019999731565683803, - "loss": 46.0, - "step": 14493 - }, - { - "epoch": 2.334151938483836, - "grad_norm": 0.0011705616489052773, - "learning_rate": 0.00019999731528613806, - "loss": 46.0, - "step": 14494 - }, - { - "epoch": 2.3343129755626233, - "grad_norm": 0.0011205258779227734, - "learning_rate": 0.0001999973149154125, - "loss": 46.0, - "step": 14495 - }, - { - "epoch": 2.3344740126414107, - "grad_norm": 0.0009598342003300786, - "learning_rate": 0.00019999731454466133, - "loss": 46.0, - "step": 14496 - }, - { - "epoch": 2.334635049720198, - "grad_norm": 0.004641356877982616, - "learning_rate": 0.0001999973141738846, - "loss": 46.0, - "step": 14497 - }, - { - "epoch": 2.3347960867989856, - "grad_norm": 0.005388359073549509, - "learning_rate": 0.00019999731380308222, - "loss": 46.0, - "step": 14498 - }, - { - "epoch": 2.334957123877773, - "grad_norm": 0.0012352181365713477, - "learning_rate": 0.0001999973134322543, - "loss": 46.0, - "step": 14499 - }, - { - "epoch": 2.33511816095656, - "grad_norm": 0.003375965403392911, - "learning_rate": 0.00019999731306140074, - "loss": 46.0, - "step": 14500 - }, - { - "epoch": 2.3352791980353476, - "grad_norm": 0.008423916064202785, - "learning_rate": 0.00019999731269052163, - "loss": 46.0, - "step": 14501 - }, - { - "epoch": 2.335440235114135, - "grad_norm": 0.005135141778737307, - "learning_rate": 0.0001999973123196169, - "loss": 46.0, - "step": 14502 - }, - { - "epoch": 2.3356012721929225, - "grad_norm": 0.0012736058561131358, - "learning_rate": 0.0001999973119486866, - "loss": 46.0, - "step": 14503 - }, - { - "epoch": 2.33576230927171, - "grad_norm": 0.004501240327954292, - "learning_rate": 0.00019999731157773067, - "loss": 46.0, - "step": 14504 - }, - { - "epoch": 2.3359233463504974, - "grad_norm": 0.0011809362331405282, - "learning_rate": 0.0001999973112067492, - "loss": 46.0, - "step": 14505 - }, - { - "epoch": 2.3360843834292844, - "grad_norm": 0.005849318578839302, - "learning_rate": 0.00019999731083574206, - "loss": 46.0, - "step": 14506 - }, - { - "epoch": 2.336245420508072, - "grad_norm": 0.007024803198873997, - "learning_rate": 0.0001999973104647094, - "loss": 46.0, - "step": 14507 - }, - { - "epoch": 2.3364064575868593, - "grad_norm": 0.0015148492529988289, - "learning_rate": 0.0001999973100936511, - "loss": 46.0, - "step": 14508 - }, - { - "epoch": 2.3365674946656467, - "grad_norm": 0.0012120538158342242, - "learning_rate": 0.00019999730972256722, - "loss": 46.0, - "step": 14509 - }, - { - "epoch": 2.336728531744434, - "grad_norm": 0.0033014232758432627, - "learning_rate": 0.00019999730935145774, - "loss": 46.0, - "step": 14510 - }, - { - "epoch": 2.3368895688232216, - "grad_norm": 0.006597817875444889, - "learning_rate": 0.00019999730898032268, - "loss": 46.0, - "step": 14511 - }, - { - "epoch": 2.337050605902009, - "grad_norm": 0.0015597755555063486, - "learning_rate": 0.000199997308609162, - "loss": 46.0, - "step": 14512 - }, - { - "epoch": 2.3372116429807965, - "grad_norm": 0.005910459440201521, - "learning_rate": 0.00019999730823797577, - "loss": 46.0, - "step": 14513 - }, - { - "epoch": 2.3373726800595835, - "grad_norm": 0.006780663505196571, - "learning_rate": 0.0001999973078667639, - "loss": 46.0, - "step": 14514 - }, - { - "epoch": 2.337533717138371, - "grad_norm": 0.004193446133285761, - "learning_rate": 0.00019999730749552645, - "loss": 46.0, - "step": 14515 - }, - { - "epoch": 2.3376947542171584, - "grad_norm": 0.0010009112302213907, - "learning_rate": 0.0001999973071242634, - "loss": 46.0, - "step": 14516 - }, - { - "epoch": 2.337855791295946, - "grad_norm": 0.003624504432082176, - "learning_rate": 0.0001999973067529748, - "loss": 46.0, - "step": 14517 - }, - { - "epoch": 2.3380168283747333, - "grad_norm": 0.004376953933387995, - "learning_rate": 0.00019999730638166056, - "loss": 46.0, - "step": 14518 - }, - { - "epoch": 2.338177865453521, - "grad_norm": 0.005358573514968157, - "learning_rate": 0.00019999730601032072, - "loss": 46.0, - "step": 14519 - }, - { - "epoch": 2.3383389025323082, - "grad_norm": 0.006792285013943911, - "learning_rate": 0.00019999730563895532, - "loss": 46.0, - "step": 14520 - }, - { - "epoch": 2.3384999396110953, - "grad_norm": 0.004689349792897701, - "learning_rate": 0.00019999730526756428, - "loss": 46.0, - "step": 14521 - }, - { - "epoch": 2.3386609766898827, - "grad_norm": 0.003819323144853115, - "learning_rate": 0.0001999973048961477, - "loss": 46.0, - "step": 14522 - }, - { - "epoch": 2.33882201376867, - "grad_norm": 0.004058188758790493, - "learning_rate": 0.0001999973045247055, - "loss": 46.0, - "step": 14523 - }, - { - "epoch": 2.3389830508474576, - "grad_norm": 0.001914954511448741, - "learning_rate": 0.0001999973041532377, - "loss": 46.0, - "step": 14524 - }, - { - "epoch": 2.339144087926245, - "grad_norm": 0.005341460462659597, - "learning_rate": 0.00019999730378174432, - "loss": 46.0, - "step": 14525 - }, - { - "epoch": 2.3393051250050325, - "grad_norm": 0.0023206814657896757, - "learning_rate": 0.00019999730341022535, - "loss": 46.0, - "step": 14526 - }, - { - "epoch": 2.33946616208382, - "grad_norm": 0.0015874041710048914, - "learning_rate": 0.00019999730303868075, - "loss": 46.0, - "step": 14527 - }, - { - "epoch": 2.3396271991626074, - "grad_norm": 0.002369649475440383, - "learning_rate": 0.00019999730266711057, - "loss": 46.0, - "step": 14528 - }, - { - "epoch": 2.3397882362413944, - "grad_norm": 0.001215968863107264, - "learning_rate": 0.00019999730229551484, - "loss": 46.0, - "step": 14529 - }, - { - "epoch": 2.339949273320182, - "grad_norm": 0.005948281846940517, - "learning_rate": 0.00019999730192389346, - "loss": 46.0, - "step": 14530 - }, - { - "epoch": 2.3401103103989693, - "grad_norm": 0.0033018679823726416, - "learning_rate": 0.0001999973015522465, - "loss": 46.0, - "step": 14531 - }, - { - "epoch": 2.340271347477757, - "grad_norm": 0.0013913020957261324, - "learning_rate": 0.00019999730118057396, - "loss": 46.0, - "step": 14532 - }, - { - "epoch": 2.3404323845565442, - "grad_norm": 0.013141454197466373, - "learning_rate": 0.00019999730080887582, - "loss": 46.0, - "step": 14533 - }, - { - "epoch": 2.3405934216353317, - "grad_norm": 0.010674758814275265, - "learning_rate": 0.0001999973004371521, - "loss": 46.0, - "step": 14534 - }, - { - "epoch": 2.3407544587141187, - "grad_norm": 0.0015034181997179985, - "learning_rate": 0.00019999730006540274, - "loss": 46.0, - "step": 14535 - }, - { - "epoch": 2.340915495792906, - "grad_norm": 0.0012631191639229655, - "learning_rate": 0.00019999729969362781, - "loss": 46.0, - "step": 14536 - }, - { - "epoch": 2.3410765328716936, - "grad_norm": 0.002949714893475175, - "learning_rate": 0.00019999729932182732, - "loss": 46.0, - "step": 14537 - }, - { - "epoch": 2.341237569950481, - "grad_norm": 0.00282492209225893, - "learning_rate": 0.0001999972989500012, - "loss": 46.0, - "step": 14538 - }, - { - "epoch": 2.3413986070292685, - "grad_norm": 0.0036208292003721, - "learning_rate": 0.00019999729857814947, - "loss": 46.0, - "step": 14539 - }, - { - "epoch": 2.341559644108056, - "grad_norm": 0.003570919157937169, - "learning_rate": 0.0001999972982062722, - "loss": 46.0, - "step": 14540 - }, - { - "epoch": 2.3417206811868434, - "grad_norm": 0.0020160884596407413, - "learning_rate": 0.0001999972978343693, - "loss": 46.0, - "step": 14541 - }, - { - "epoch": 2.341881718265631, - "grad_norm": 0.004158018156886101, - "learning_rate": 0.0001999972974624408, - "loss": 46.0, - "step": 14542 - }, - { - "epoch": 2.342042755344418, - "grad_norm": 0.005943570286035538, - "learning_rate": 0.00019999729709048672, - "loss": 46.0, - "step": 14543 - }, - { - "epoch": 2.3422037924232053, - "grad_norm": 0.0007683020667172968, - "learning_rate": 0.00019999729671850704, - "loss": 46.0, - "step": 14544 - }, - { - "epoch": 2.3423648295019928, - "grad_norm": 0.010151375085115433, - "learning_rate": 0.00019999729634650177, - "loss": 46.0, - "step": 14545 - }, - { - "epoch": 2.34252586658078, - "grad_norm": 0.0011483295820653439, - "learning_rate": 0.0001999972959744709, - "loss": 46.0, - "step": 14546 - }, - { - "epoch": 2.3426869036595677, - "grad_norm": 0.0015884640160948038, - "learning_rate": 0.00019999729560241444, - "loss": 46.0, - "step": 14547 - }, - { - "epoch": 2.342847940738355, - "grad_norm": 0.0023586833849549294, - "learning_rate": 0.00019999729523033239, - "loss": 46.0, - "step": 14548 - }, - { - "epoch": 2.3430089778171426, - "grad_norm": 0.002251328667625785, - "learning_rate": 0.00019999729485822474, - "loss": 46.0, - "step": 14549 - }, - { - "epoch": 2.3431700148959296, - "grad_norm": 0.002140614204108715, - "learning_rate": 0.0001999972944860915, - "loss": 46.0, - "step": 14550 - }, - { - "epoch": 2.343331051974717, - "grad_norm": 0.0035234992392361164, - "learning_rate": 0.00019999729411393266, - "loss": 46.0, - "step": 14551 - }, - { - "epoch": 2.3434920890535045, - "grad_norm": 0.0005595273105427623, - "learning_rate": 0.00019999729374174825, - "loss": 46.0, - "step": 14552 - }, - { - "epoch": 2.343653126132292, - "grad_norm": 0.0020130749326199293, - "learning_rate": 0.0001999972933695382, - "loss": 46.0, - "step": 14553 - }, - { - "epoch": 2.3438141632110794, - "grad_norm": 0.004177992232143879, - "learning_rate": 0.0001999972929973026, - "loss": 46.0, - "step": 14554 - }, - { - "epoch": 2.343975200289867, - "grad_norm": 0.0009268194553442299, - "learning_rate": 0.00019999729262504138, - "loss": 46.0, - "step": 14555 - }, - { - "epoch": 2.3441362373686543, - "grad_norm": 0.004611593205481768, - "learning_rate": 0.00019999729225275457, - "loss": 46.0, - "step": 14556 - }, - { - "epoch": 2.3442972744474417, - "grad_norm": 0.0008703362545929849, - "learning_rate": 0.00019999729188044217, - "loss": 46.0, - "step": 14557 - }, - { - "epoch": 2.3444583115262287, - "grad_norm": 0.00476370844990015, - "learning_rate": 0.00019999729150810418, - "loss": 46.0, - "step": 14558 - }, - { - "epoch": 2.344619348605016, - "grad_norm": 0.0023048759903758764, - "learning_rate": 0.00019999729113574056, - "loss": 46.0, - "step": 14559 - }, - { - "epoch": 2.3447803856838036, - "grad_norm": 0.005057384259998798, - "learning_rate": 0.00019999729076335137, - "loss": 46.0, - "step": 14560 - }, - { - "epoch": 2.344941422762591, - "grad_norm": 0.007750918157398701, - "learning_rate": 0.00019999729039093663, - "loss": 46.0, - "step": 14561 - }, - { - "epoch": 2.3451024598413786, - "grad_norm": 0.0019412693800404668, - "learning_rate": 0.00019999729001849624, - "loss": 46.0, - "step": 14562 - }, - { - "epoch": 2.345263496920166, - "grad_norm": 0.0013990812003612518, - "learning_rate": 0.00019999728964603027, - "loss": 46.0, - "step": 14563 - }, - { - "epoch": 2.345424533998953, - "grad_norm": 0.003175352932885289, - "learning_rate": 0.0001999972892735387, - "loss": 46.0, - "step": 14564 - }, - { - "epoch": 2.3455855710777405, - "grad_norm": 0.0075635043904185295, - "learning_rate": 0.00019999728890102156, - "loss": 46.0, - "step": 14565 - }, - { - "epoch": 2.345746608156528, - "grad_norm": 0.008029410615563393, - "learning_rate": 0.00019999728852847882, - "loss": 46.0, - "step": 14566 - }, - { - "epoch": 2.3459076452353154, - "grad_norm": 0.0016418209997937083, - "learning_rate": 0.00019999728815591044, - "loss": 46.0, - "step": 14567 - }, - { - "epoch": 2.346068682314103, - "grad_norm": 0.003333429340273142, - "learning_rate": 0.00019999728778331654, - "loss": 46.0, - "step": 14568 - }, - { - "epoch": 2.3462297193928903, - "grad_norm": 0.004290881101042032, - "learning_rate": 0.000199997287410697, - "loss": 46.0, - "step": 14569 - }, - { - "epoch": 2.3463907564716777, - "grad_norm": 0.0023911602329462767, - "learning_rate": 0.00019999728703805187, - "loss": 46.0, - "step": 14570 - }, - { - "epoch": 2.346551793550465, - "grad_norm": 0.007665710058063269, - "learning_rate": 0.00019999728666538115, - "loss": 46.0, - "step": 14571 - }, - { - "epoch": 2.3467128306292526, - "grad_norm": 0.0028941675554960966, - "learning_rate": 0.00019999728629268483, - "loss": 46.0, - "step": 14572 - }, - { - "epoch": 2.3468738677080396, - "grad_norm": 0.0006980894249863923, - "learning_rate": 0.00019999728591996293, - "loss": 46.0, - "step": 14573 - }, - { - "epoch": 2.347034904786827, - "grad_norm": 0.0026659141294658184, - "learning_rate": 0.00019999728554721542, - "loss": 46.0, - "step": 14574 - }, - { - "epoch": 2.3471959418656145, - "grad_norm": 0.006842970848083496, - "learning_rate": 0.00019999728517444232, - "loss": 46.0, - "step": 14575 - }, - { - "epoch": 2.347356978944402, - "grad_norm": 0.007536585908383131, - "learning_rate": 0.00019999728480164363, - "loss": 46.0, - "step": 14576 - }, - { - "epoch": 2.3475180160231894, - "grad_norm": 0.0008965461165644228, - "learning_rate": 0.00019999728442881935, - "loss": 46.0, - "step": 14577 - }, - { - "epoch": 2.347679053101977, - "grad_norm": 0.003250017063692212, - "learning_rate": 0.00019999728405596946, - "loss": 46.0, - "step": 14578 - }, - { - "epoch": 2.347840090180764, - "grad_norm": 0.00532780634239316, - "learning_rate": 0.00019999728368309399, - "loss": 46.0, - "step": 14579 - }, - { - "epoch": 2.3480011272595513, - "grad_norm": 0.0025089976843446493, - "learning_rate": 0.00019999728331019295, - "loss": 46.0, - "step": 14580 - }, - { - "epoch": 2.348162164338339, - "grad_norm": 0.0033368461299687624, - "learning_rate": 0.00019999728293726627, - "loss": 46.0, - "step": 14581 - }, - { - "epoch": 2.3483232014171262, - "grad_norm": 0.000839138578157872, - "learning_rate": 0.00019999728256431403, - "loss": 46.0, - "step": 14582 - }, - { - "epoch": 2.3484842384959137, - "grad_norm": 0.001168245100416243, - "learning_rate": 0.00019999728219133615, - "loss": 46.0, - "step": 14583 - }, - { - "epoch": 2.348645275574701, - "grad_norm": 0.003142967354506254, - "learning_rate": 0.0001999972818183327, - "loss": 46.0, - "step": 14584 - }, - { - "epoch": 2.3488063126534886, - "grad_norm": 0.005092882085591555, - "learning_rate": 0.00019999728144530368, - "loss": 46.0, - "step": 14585 - }, - { - "epoch": 2.348967349732276, - "grad_norm": 0.010230480693280697, - "learning_rate": 0.00019999728107224904, - "loss": 46.0, - "step": 14586 - }, - { - "epoch": 2.349128386811063, - "grad_norm": 0.0034412737004458904, - "learning_rate": 0.00019999728069916884, - "loss": 46.0, - "step": 14587 - }, - { - "epoch": 2.3492894238898505, - "grad_norm": 0.012240048497915268, - "learning_rate": 0.000199997280326063, - "loss": 46.0, - "step": 14588 - }, - { - "epoch": 2.349450460968638, - "grad_norm": 0.009447953663766384, - "learning_rate": 0.0001999972799529316, - "loss": 46.0, - "step": 14589 - }, - { - "epoch": 2.3496114980474254, - "grad_norm": 0.0072211576625704765, - "learning_rate": 0.00019999727957977458, - "loss": 46.0, - "step": 14590 - }, - { - "epoch": 2.349772535126213, - "grad_norm": 0.0038550281897187233, - "learning_rate": 0.00019999727920659197, - "loss": 46.0, - "step": 14591 - }, - { - "epoch": 2.3499335722050003, - "grad_norm": 0.005188861396163702, - "learning_rate": 0.00019999727883338378, - "loss": 46.0, - "step": 14592 - }, - { - "epoch": 2.3500946092837878, - "grad_norm": 0.0036283619701862335, - "learning_rate": 0.00019999727846014997, - "loss": 46.0, - "step": 14593 - }, - { - "epoch": 2.350255646362575, - "grad_norm": 0.0006508153164759278, - "learning_rate": 0.00019999727808689058, - "loss": 46.0, - "step": 14594 - }, - { - "epoch": 2.3504166834413622, - "grad_norm": 0.009944497607648373, - "learning_rate": 0.0001999972777136056, - "loss": 46.0, - "step": 14595 - }, - { - "epoch": 2.3505777205201497, - "grad_norm": 0.009078366681933403, - "learning_rate": 0.00019999727734029503, - "loss": 46.0, - "step": 14596 - }, - { - "epoch": 2.350738757598937, - "grad_norm": 0.0038890123832970858, - "learning_rate": 0.00019999727696695888, - "loss": 46.0, - "step": 14597 - }, - { - "epoch": 2.3508997946777246, - "grad_norm": 0.0009394356166012585, - "learning_rate": 0.0001999972765935971, - "loss": 46.0, - "step": 14598 - }, - { - "epoch": 2.351060831756512, - "grad_norm": 0.002508430043235421, - "learning_rate": 0.00019999727622020975, - "loss": 46.0, - "step": 14599 - }, - { - "epoch": 2.3512218688352995, - "grad_norm": 0.010747738182544708, - "learning_rate": 0.00019999727584679678, - "loss": 46.0, - "step": 14600 - }, - { - "epoch": 2.351382905914087, - "grad_norm": 0.0017628934001550078, - "learning_rate": 0.00019999727547335825, - "loss": 46.0, - "step": 14601 - }, - { - "epoch": 2.351543942992874, - "grad_norm": 0.0013982991222292185, - "learning_rate": 0.0001999972750998941, - "loss": 46.0, - "step": 14602 - }, - { - "epoch": 2.3517049800716614, - "grad_norm": 0.006652480456978083, - "learning_rate": 0.00019999727472640438, - "loss": 46.0, - "step": 14603 - }, - { - "epoch": 2.351866017150449, - "grad_norm": 0.010210984386503696, - "learning_rate": 0.00019999727435288906, - "loss": 46.0, - "step": 14604 - }, - { - "epoch": 2.3520270542292363, - "grad_norm": 0.002986464649438858, - "learning_rate": 0.00019999727397934812, - "loss": 46.0, - "step": 14605 - }, - { - "epoch": 2.3521880913080238, - "grad_norm": 0.004176314454525709, - "learning_rate": 0.00019999727360578163, - "loss": 46.0, - "step": 14606 - }, - { - "epoch": 2.352349128386811, - "grad_norm": 0.006032838951796293, - "learning_rate": 0.0001999972732321895, - "loss": 46.0, - "step": 14607 - }, - { - "epoch": 2.352510165465598, - "grad_norm": 0.002583032939583063, - "learning_rate": 0.0001999972728585718, - "loss": 46.0, - "step": 14608 - }, - { - "epoch": 2.3526712025443857, - "grad_norm": 0.0016644189599901438, - "learning_rate": 0.0001999972724849285, - "loss": 46.0, - "step": 14609 - }, - { - "epoch": 2.352832239623173, - "grad_norm": 0.0020572547800838947, - "learning_rate": 0.0001999972721112596, - "loss": 46.0, - "step": 14610 - }, - { - "epoch": 2.3529932767019606, - "grad_norm": 0.005446291994303465, - "learning_rate": 0.0001999972717375651, - "loss": 46.0, - "step": 14611 - }, - { - "epoch": 2.353154313780748, - "grad_norm": 0.003693035803735256, - "learning_rate": 0.00019999727136384504, - "loss": 46.0, - "step": 14612 - }, - { - "epoch": 2.3533153508595355, - "grad_norm": 0.0006589462282136083, - "learning_rate": 0.00019999727099009938, - "loss": 46.0, - "step": 14613 - }, - { - "epoch": 2.353476387938323, - "grad_norm": 0.0004005154187325388, - "learning_rate": 0.00019999727061632808, - "loss": 46.0, - "step": 14614 - }, - { - "epoch": 2.3536374250171104, - "grad_norm": 0.004574046935886145, - "learning_rate": 0.00019999727024253122, - "loss": 46.0, - "step": 14615 - }, - { - "epoch": 2.353798462095898, - "grad_norm": 0.003939672838896513, - "learning_rate": 0.00019999726986870878, - "loss": 46.0, - "step": 14616 - }, - { - "epoch": 2.353959499174685, - "grad_norm": 0.0012294218176975846, - "learning_rate": 0.00019999726949486072, - "loss": 46.0, - "step": 14617 - }, - { - "epoch": 2.3541205362534723, - "grad_norm": 0.005164221860468388, - "learning_rate": 0.00019999726912098707, - "loss": 46.0, - "step": 14618 - }, - { - "epoch": 2.3542815733322597, - "grad_norm": 0.0019406527280807495, - "learning_rate": 0.0001999972687470878, - "loss": 46.0, - "step": 14619 - }, - { - "epoch": 2.354442610411047, - "grad_norm": 0.0016347814816981554, - "learning_rate": 0.00019999726837316298, - "loss": 46.0, - "step": 14620 - }, - { - "epoch": 2.3546036474898346, - "grad_norm": 0.0034431861713528633, - "learning_rate": 0.00019999726799921257, - "loss": 46.0, - "step": 14621 - }, - { - "epoch": 2.354764684568622, - "grad_norm": 0.008472124114632607, - "learning_rate": 0.00019999726762523655, - "loss": 46.0, - "step": 14622 - }, - { - "epoch": 2.354925721647409, - "grad_norm": 0.003613944398239255, - "learning_rate": 0.0001999972672512349, - "loss": 46.0, - "step": 14623 - }, - { - "epoch": 2.3550867587261965, - "grad_norm": 0.007264977321028709, - "learning_rate": 0.00019999726687720771, - "loss": 46.0, - "step": 14624 - }, - { - "epoch": 2.355247795804984, - "grad_norm": 0.0028498023748397827, - "learning_rate": 0.0001999972665031549, - "loss": 46.0, - "step": 14625 - }, - { - "epoch": 2.3554088328837715, - "grad_norm": 0.002462163334712386, - "learning_rate": 0.0001999972661290765, - "loss": 46.0, - "step": 14626 - }, - { - "epoch": 2.355569869962559, - "grad_norm": 0.0006279117078520358, - "learning_rate": 0.00019999726575497252, - "loss": 46.0, - "step": 14627 - }, - { - "epoch": 2.3557309070413464, - "grad_norm": 0.004870972596108913, - "learning_rate": 0.00019999726538084294, - "loss": 46.0, - "step": 14628 - }, - { - "epoch": 2.355891944120134, - "grad_norm": 0.0027601835317909718, - "learning_rate": 0.00019999726500668773, - "loss": 46.0, - "step": 14629 - }, - { - "epoch": 2.3560529811989213, - "grad_norm": 0.004281362984329462, - "learning_rate": 0.00019999726463250695, - "loss": 46.0, - "step": 14630 - }, - { - "epoch": 2.3562140182777083, - "grad_norm": 0.0011230497620999813, - "learning_rate": 0.00019999726425830062, - "loss": 46.0, - "step": 14631 - }, - { - "epoch": 2.3563750553564957, - "grad_norm": 0.01176318246871233, - "learning_rate": 0.00019999726388406864, - "loss": 46.0, - "step": 14632 - }, - { - "epoch": 2.356536092435283, - "grad_norm": 0.011590281501412392, - "learning_rate": 0.00019999726350981107, - "loss": 46.0, - "step": 14633 - }, - { - "epoch": 2.3566971295140706, - "grad_norm": 0.0013203065609559417, - "learning_rate": 0.00019999726313552792, - "loss": 46.0, - "step": 14634 - }, - { - "epoch": 2.356858166592858, - "grad_norm": 0.0014492750633507967, - "learning_rate": 0.00019999726276121919, - "loss": 46.0, - "step": 14635 - }, - { - "epoch": 2.3570192036716455, - "grad_norm": 0.001195266260765493, - "learning_rate": 0.00019999726238688483, - "loss": 46.0, - "step": 14636 - }, - { - "epoch": 2.357180240750433, - "grad_norm": 0.007008660584688187, - "learning_rate": 0.0001999972620125249, - "loss": 46.0, - "step": 14637 - }, - { - "epoch": 2.35734127782922, - "grad_norm": 0.002571654040366411, - "learning_rate": 0.00019999726163813937, - "loss": 46.0, - "step": 14638 - }, - { - "epoch": 2.3575023149080074, - "grad_norm": 0.00040334215736947954, - "learning_rate": 0.00019999726126372825, - "loss": 46.0, - "step": 14639 - }, - { - "epoch": 2.357663351986795, - "grad_norm": 0.007106542121618986, - "learning_rate": 0.00019999726088929155, - "loss": 46.0, - "step": 14640 - }, - { - "epoch": 2.3578243890655823, - "grad_norm": 0.0012591092381626368, - "learning_rate": 0.0001999972605148292, - "loss": 46.0, - "step": 14641 - }, - { - "epoch": 2.35798542614437, - "grad_norm": 0.002261930610984564, - "learning_rate": 0.0001999972601403413, - "loss": 46.0, - "step": 14642 - }, - { - "epoch": 2.3581464632231572, - "grad_norm": 0.002048380207270384, - "learning_rate": 0.00019999725976582782, - "loss": 46.0, - "step": 14643 - }, - { - "epoch": 2.3583075003019447, - "grad_norm": 0.00038474483881145716, - "learning_rate": 0.0001999972593912887, - "loss": 46.0, - "step": 14644 - }, - { - "epoch": 2.358468537380732, - "grad_norm": 0.003740274580195546, - "learning_rate": 0.00019999725901672402, - "loss": 46.0, - "step": 14645 - }, - { - "epoch": 2.358629574459519, - "grad_norm": 0.0005207539070397615, - "learning_rate": 0.00019999725864213374, - "loss": 46.0, - "step": 14646 - }, - { - "epoch": 2.3587906115383066, - "grad_norm": 0.003268243046477437, - "learning_rate": 0.00019999725826751785, - "loss": 46.0, - "step": 14647 - }, - { - "epoch": 2.358951648617094, - "grad_norm": 0.0018632604042068124, - "learning_rate": 0.00019999725789287637, - "loss": 46.0, - "step": 14648 - }, - { - "epoch": 2.3591126856958815, - "grad_norm": 0.0018150507239624858, - "learning_rate": 0.00019999725751820933, - "loss": 46.0, - "step": 14649 - }, - { - "epoch": 2.359273722774669, - "grad_norm": 0.0014653692487627268, - "learning_rate": 0.00019999725714351665, - "loss": 46.0, - "step": 14650 - }, - { - "epoch": 2.3594347598534564, - "grad_norm": 0.005365099757909775, - "learning_rate": 0.00019999725676879838, - "loss": 46.0, - "step": 14651 - }, - { - "epoch": 2.3595957969322434, - "grad_norm": 0.0016364929033443332, - "learning_rate": 0.00019999725639405455, - "loss": 46.0, - "step": 14652 - }, - { - "epoch": 2.359756834011031, - "grad_norm": 0.0016520697390660644, - "learning_rate": 0.0001999972560192851, - "loss": 46.0, - "step": 14653 - }, - { - "epoch": 2.3599178710898183, - "grad_norm": 0.002825340023264289, - "learning_rate": 0.00019999725564449008, - "loss": 46.0, - "step": 14654 - }, - { - "epoch": 2.3600789081686058, - "grad_norm": 0.003544642822816968, - "learning_rate": 0.00019999725526966943, - "loss": 46.0, - "step": 14655 - }, - { - "epoch": 2.3602399452473932, - "grad_norm": 0.0015102059114724398, - "learning_rate": 0.00019999725489482323, - "loss": 46.0, - "step": 14656 - }, - { - "epoch": 2.3604009823261807, - "grad_norm": 0.01047484204173088, - "learning_rate": 0.0001999972545199514, - "loss": 46.0, - "step": 14657 - }, - { - "epoch": 2.360562019404968, - "grad_norm": 0.0022438461892306805, - "learning_rate": 0.000199997254145054, - "loss": 46.0, - "step": 14658 - }, - { - "epoch": 2.3607230564837556, - "grad_norm": 0.0013933316804468632, - "learning_rate": 0.00019999725377013098, - "loss": 46.0, - "step": 14659 - }, - { - "epoch": 2.3608840935625426, - "grad_norm": 0.0035694336984306574, - "learning_rate": 0.00019999725339518238, - "loss": 46.0, - "step": 14660 - }, - { - "epoch": 2.36104513064133, - "grad_norm": 0.005351288244128227, - "learning_rate": 0.0001999972530202082, - "loss": 46.0, - "step": 14661 - }, - { - "epoch": 2.3612061677201175, - "grad_norm": 0.0035978180821985006, - "learning_rate": 0.0001999972526452084, - "loss": 46.0, - "step": 14662 - }, - { - "epoch": 2.361367204798905, - "grad_norm": 0.006602867506444454, - "learning_rate": 0.000199997252270183, - "loss": 46.0, - "step": 14663 - }, - { - "epoch": 2.3615282418776924, - "grad_norm": 0.006617933511734009, - "learning_rate": 0.00019999725189513202, - "loss": 46.0, - "step": 14664 - }, - { - "epoch": 2.36168927895648, - "grad_norm": 0.001885879086330533, - "learning_rate": 0.00019999725152005545, - "loss": 46.0, - "step": 14665 - }, - { - "epoch": 2.3618503160352673, - "grad_norm": 0.0033384154085069895, - "learning_rate": 0.00019999725114495332, - "loss": 46.0, - "step": 14666 - }, - { - "epoch": 2.3620113531140543, - "grad_norm": 0.0019791023805737495, - "learning_rate": 0.00019999725076982555, - "loss": 46.0, - "step": 14667 - }, - { - "epoch": 2.3621723901928418, - "grad_norm": 0.005918820854276419, - "learning_rate": 0.0001999972503946722, - "loss": 46.0, - "step": 14668 - }, - { - "epoch": 2.362333427271629, - "grad_norm": 0.006328626070171595, - "learning_rate": 0.00019999725001949324, - "loss": 46.0, - "step": 14669 - }, - { - "epoch": 2.3624944643504167, - "grad_norm": 0.004716383758932352, - "learning_rate": 0.0001999972496442887, - "loss": 46.0, - "step": 14670 - }, - { - "epoch": 2.362655501429204, - "grad_norm": 0.007791764102876186, - "learning_rate": 0.00019999724926905854, - "loss": 46.0, - "step": 14671 - }, - { - "epoch": 2.3628165385079916, - "grad_norm": 0.0012216089526191354, - "learning_rate": 0.00019999724889380283, - "loss": 46.0, - "step": 14672 - }, - { - "epoch": 2.362977575586779, - "grad_norm": 0.010274586267769337, - "learning_rate": 0.00019999724851852148, - "loss": 46.0, - "step": 14673 - }, - { - "epoch": 2.3631386126655665, - "grad_norm": 0.002135322894901037, - "learning_rate": 0.0001999972481432146, - "loss": 46.0, - "step": 14674 - }, - { - "epoch": 2.3632996497443535, - "grad_norm": 0.001198525307700038, - "learning_rate": 0.00019999724776788207, - "loss": 46.0, - "step": 14675 - }, - { - "epoch": 2.363460686823141, - "grad_norm": 0.0005366932600736618, - "learning_rate": 0.00019999724739252396, - "loss": 46.0, - "step": 14676 - }, - { - "epoch": 2.3636217239019284, - "grad_norm": 0.004019653890281916, - "learning_rate": 0.00019999724701714024, - "loss": 46.0, - "step": 14677 - }, - { - "epoch": 2.363782760980716, - "grad_norm": 0.002869992284104228, - "learning_rate": 0.00019999724664173093, - "loss": 46.0, - "step": 14678 - }, - { - "epoch": 2.3639437980595033, - "grad_norm": 0.003484451211988926, - "learning_rate": 0.00019999724626629605, - "loss": 46.0, - "step": 14679 - }, - { - "epoch": 2.3641048351382907, - "grad_norm": 0.0034715859219431877, - "learning_rate": 0.0001999972458908356, - "loss": 46.0, - "step": 14680 - }, - { - "epoch": 2.3642658722170777, - "grad_norm": 0.0035185804590582848, - "learning_rate": 0.0001999972455153495, - "loss": 46.0, - "step": 14681 - }, - { - "epoch": 2.364426909295865, - "grad_norm": 0.0008727594395168126, - "learning_rate": 0.00019999724513983783, - "loss": 46.0, - "step": 14682 - }, - { - "epoch": 2.3645879463746526, - "grad_norm": 0.0005920006078667939, - "learning_rate": 0.00019999724476430056, - "loss": 46.0, - "step": 14683 - }, - { - "epoch": 2.36474898345344, - "grad_norm": 0.000762270123232156, - "learning_rate": 0.0001999972443887377, - "loss": 46.0, - "step": 14684 - }, - { - "epoch": 2.3649100205322275, - "grad_norm": 0.004592764191329479, - "learning_rate": 0.00019999724401314922, - "loss": 46.0, - "step": 14685 - }, - { - "epoch": 2.365071057611015, - "grad_norm": 0.005814696196466684, - "learning_rate": 0.0001999972436375352, - "loss": 46.0, - "step": 14686 - }, - { - "epoch": 2.3652320946898024, - "grad_norm": 0.01590869575738907, - "learning_rate": 0.00019999724326189556, - "loss": 46.0, - "step": 14687 - }, - { - "epoch": 2.36539313176859, - "grad_norm": 0.0025805397890508175, - "learning_rate": 0.00019999724288623032, - "loss": 46.0, - "step": 14688 - }, - { - "epoch": 2.3655541688473773, - "grad_norm": 0.0008912792545743287, - "learning_rate": 0.00019999724251053947, - "loss": 46.0, - "step": 14689 - }, - { - "epoch": 2.3657152059261644, - "grad_norm": 0.0018187996465712786, - "learning_rate": 0.00019999724213482306, - "loss": 46.0, - "step": 14690 - }, - { - "epoch": 2.365876243004952, - "grad_norm": 0.007388916332274675, - "learning_rate": 0.00019999724175908103, - "loss": 46.0, - "step": 14691 - }, - { - "epoch": 2.3660372800837393, - "grad_norm": 0.0017756903544068336, - "learning_rate": 0.00019999724138331342, - "loss": 46.0, - "step": 14692 - }, - { - "epoch": 2.3661983171625267, - "grad_norm": 0.0015834859805181623, - "learning_rate": 0.0001999972410075202, - "loss": 46.0, - "step": 14693 - }, - { - "epoch": 2.366359354241314, - "grad_norm": 0.00608128122985363, - "learning_rate": 0.0001999972406317014, - "loss": 46.0, - "step": 14694 - }, - { - "epoch": 2.3665203913201016, - "grad_norm": 0.008327258750796318, - "learning_rate": 0.00019999724025585703, - "loss": 46.0, - "step": 14695 - }, - { - "epoch": 2.3666814283988886, - "grad_norm": 0.0007105885888449848, - "learning_rate": 0.000199997239879987, - "loss": 46.0, - "step": 14696 - }, - { - "epoch": 2.366842465477676, - "grad_norm": 0.0014984381850808859, - "learning_rate": 0.00019999723950409143, - "loss": 46.0, - "step": 14697 - }, - { - "epoch": 2.3670035025564635, - "grad_norm": 0.0008859752560965717, - "learning_rate": 0.00019999723912817024, - "loss": 46.0, - "step": 14698 - }, - { - "epoch": 2.367164539635251, - "grad_norm": 0.002013867488130927, - "learning_rate": 0.0001999972387522235, - "loss": 46.0, - "step": 14699 - }, - { - "epoch": 2.3673255767140384, - "grad_norm": 0.0023132688365876675, - "learning_rate": 0.0001999972383762511, - "loss": 46.0, - "step": 14700 - }, - { - "epoch": 2.367486613792826, - "grad_norm": 0.0020256282296031713, - "learning_rate": 0.00019999723800025315, - "loss": 46.0, - "step": 14701 - }, - { - "epoch": 2.3676476508716133, - "grad_norm": 0.001999534899368882, - "learning_rate": 0.00019999723762422958, - "loss": 46.0, - "step": 14702 - }, - { - "epoch": 2.367808687950401, - "grad_norm": 0.00644769286736846, - "learning_rate": 0.00019999723724818043, - "loss": 46.0, - "step": 14703 - }, - { - "epoch": 2.367969725029188, - "grad_norm": 0.00218023918569088, - "learning_rate": 0.0001999972368721057, - "loss": 46.0, - "step": 14704 - }, - { - "epoch": 2.3681307621079752, - "grad_norm": 0.006280168425291777, - "learning_rate": 0.00019999723649600536, - "loss": 46.0, - "step": 14705 - }, - { - "epoch": 2.3682917991867627, - "grad_norm": 0.0030314770992845297, - "learning_rate": 0.00019999723611987942, - "loss": 46.0, - "step": 14706 - }, - { - "epoch": 2.36845283626555, - "grad_norm": 0.001299206051044166, - "learning_rate": 0.00019999723574372791, - "loss": 46.0, - "step": 14707 - }, - { - "epoch": 2.3686138733443376, - "grad_norm": 0.002936696633696556, - "learning_rate": 0.00019999723536755077, - "loss": 46.0, - "step": 14708 - }, - { - "epoch": 2.368774910423125, - "grad_norm": 0.0010150355519726872, - "learning_rate": 0.00019999723499134807, - "loss": 46.0, - "step": 14709 - }, - { - "epoch": 2.3689359475019125, - "grad_norm": 0.0006543753552250564, - "learning_rate": 0.00019999723461511975, - "loss": 46.0, - "step": 14710 - }, - { - "epoch": 2.3690969845806995, - "grad_norm": 0.0041079167276620865, - "learning_rate": 0.00019999723423886584, - "loss": 46.0, - "step": 14711 - }, - { - "epoch": 2.369258021659487, - "grad_norm": 0.0016417294973507524, - "learning_rate": 0.00019999723386258635, - "loss": 46.0, - "step": 14712 - }, - { - "epoch": 2.3694190587382744, - "grad_norm": 0.001662685303017497, - "learning_rate": 0.00019999723348628124, - "loss": 46.0, - "step": 14713 - }, - { - "epoch": 2.369580095817062, - "grad_norm": 0.0009432067163288593, - "learning_rate": 0.00019999723310995058, - "loss": 46.0, - "step": 14714 - }, - { - "epoch": 2.3697411328958493, - "grad_norm": 0.003060807939618826, - "learning_rate": 0.0001999972327335943, - "loss": 46.0, - "step": 14715 - }, - { - "epoch": 2.3699021699746368, - "grad_norm": 0.002123053651303053, - "learning_rate": 0.00019999723235721243, - "loss": 46.0, - "step": 14716 - }, - { - "epoch": 2.370063207053424, - "grad_norm": 0.007227302063256502, - "learning_rate": 0.00019999723198080494, - "loss": 46.0, - "step": 14717 - }, - { - "epoch": 2.3702242441322117, - "grad_norm": 0.005050660111010075, - "learning_rate": 0.0001999972316043719, - "loss": 46.0, - "step": 14718 - }, - { - "epoch": 2.3703852812109987, - "grad_norm": 0.0077496604062616825, - "learning_rate": 0.00019999723122791322, - "loss": 46.0, - "step": 14719 - }, - { - "epoch": 2.370546318289786, - "grad_norm": 0.004484896548092365, - "learning_rate": 0.00019999723085142897, - "loss": 46.0, - "step": 14720 - }, - { - "epoch": 2.3707073553685736, - "grad_norm": 0.003823069855570793, - "learning_rate": 0.00019999723047491914, - "loss": 46.0, - "step": 14721 - }, - { - "epoch": 2.370868392447361, - "grad_norm": 0.0010556193301454186, - "learning_rate": 0.0001999972300983837, - "loss": 46.0, - "step": 14722 - }, - { - "epoch": 2.3710294295261485, - "grad_norm": 0.000592644268181175, - "learning_rate": 0.00019999722972182266, - "loss": 46.0, - "step": 14723 - }, - { - "epoch": 2.371190466604936, - "grad_norm": 0.009649808518588543, - "learning_rate": 0.00019999722934523604, - "loss": 46.0, - "step": 14724 - }, - { - "epoch": 2.371351503683723, - "grad_norm": 0.003973050974309444, - "learning_rate": 0.0001999972289686238, - "loss": 46.0, - "step": 14725 - }, - { - "epoch": 2.3715125407625104, - "grad_norm": 0.0018054497195407748, - "learning_rate": 0.000199997228591986, - "loss": 46.0, - "step": 14726 - }, - { - "epoch": 2.371673577841298, - "grad_norm": 0.001413486199453473, - "learning_rate": 0.00019999722821532258, - "loss": 46.0, - "step": 14727 - }, - { - "epoch": 2.3718346149200853, - "grad_norm": 0.0015482193557545543, - "learning_rate": 0.00019999722783863358, - "loss": 46.0, - "step": 14728 - }, - { - "epoch": 2.3719956519988727, - "grad_norm": 0.0022801414597779512, - "learning_rate": 0.00019999722746191897, - "loss": 46.0, - "step": 14729 - }, - { - "epoch": 2.37215668907766, - "grad_norm": 0.0022812786046415567, - "learning_rate": 0.00019999722708517878, - "loss": 46.0, - "step": 14730 - }, - { - "epoch": 2.3723177261564476, - "grad_norm": 0.0009693917236290872, - "learning_rate": 0.00019999722670841302, - "loss": 46.0, - "step": 14731 - }, - { - "epoch": 2.372478763235235, - "grad_norm": 0.0022720452398061752, - "learning_rate": 0.00019999722633162162, - "loss": 46.0, - "step": 14732 - }, - { - "epoch": 2.3726398003140226, - "grad_norm": 0.0035725245252251625, - "learning_rate": 0.00019999722595480463, - "loss": 46.0, - "step": 14733 - }, - { - "epoch": 2.3728008373928096, - "grad_norm": 0.0024852852802723646, - "learning_rate": 0.0001999972255779621, - "loss": 46.0, - "step": 14734 - }, - { - "epoch": 2.372961874471597, - "grad_norm": 0.0014489275636151433, - "learning_rate": 0.0001999972252010939, - "loss": 46.0, - "step": 14735 - }, - { - "epoch": 2.3731229115503845, - "grad_norm": 0.003032973036170006, - "learning_rate": 0.00019999722482420013, - "loss": 46.0, - "step": 14736 - }, - { - "epoch": 2.373283948629172, - "grad_norm": 0.0027989703230559826, - "learning_rate": 0.0001999972244472808, - "loss": 46.0, - "step": 14737 - }, - { - "epoch": 2.3734449857079594, - "grad_norm": 0.0029548613820225, - "learning_rate": 0.00019999722407033584, - "loss": 46.0, - "step": 14738 - }, - { - "epoch": 2.373606022786747, - "grad_norm": 0.002398983808234334, - "learning_rate": 0.0001999972236933653, - "loss": 46.0, - "step": 14739 - }, - { - "epoch": 2.373767059865534, - "grad_norm": 0.003854044945910573, - "learning_rate": 0.00019999722331636916, - "loss": 46.0, - "step": 14740 - }, - { - "epoch": 2.3739280969443213, - "grad_norm": 0.001955259358510375, - "learning_rate": 0.00019999722293934747, - "loss": 46.0, - "step": 14741 - }, - { - "epoch": 2.3740891340231087, - "grad_norm": 0.002410275163128972, - "learning_rate": 0.00019999722256230012, - "loss": 46.0, - "step": 14742 - }, - { - "epoch": 2.374250171101896, - "grad_norm": 0.0031933600548654795, - "learning_rate": 0.0001999972221852272, - "loss": 46.0, - "step": 14743 - }, - { - "epoch": 2.3744112081806836, - "grad_norm": 0.005795541685074568, - "learning_rate": 0.0001999972218081287, - "loss": 46.0, - "step": 14744 - }, - { - "epoch": 2.374572245259471, - "grad_norm": 0.007466630078852177, - "learning_rate": 0.0001999972214310046, - "loss": 46.0, - "step": 14745 - }, - { - "epoch": 2.3747332823382585, - "grad_norm": 0.003981498070061207, - "learning_rate": 0.0001999972210538549, - "loss": 46.0, - "step": 14746 - }, - { - "epoch": 2.374894319417046, - "grad_norm": 0.0017899404047057033, - "learning_rate": 0.00019999722067667958, - "loss": 46.0, - "step": 14747 - }, - { - "epoch": 2.375055356495833, - "grad_norm": 0.007729220669716597, - "learning_rate": 0.00019999722029947868, - "loss": 46.0, - "step": 14748 - }, - { - "epoch": 2.3752163935746204, - "grad_norm": 0.0016655372455716133, - "learning_rate": 0.00019999721992225222, - "loss": 46.0, - "step": 14749 - }, - { - "epoch": 2.375377430653408, - "grad_norm": 0.0016331017250195146, - "learning_rate": 0.00019999721954500014, - "loss": 46.0, - "step": 14750 - }, - { - "epoch": 2.3755384677321953, - "grad_norm": 0.00180652248673141, - "learning_rate": 0.0001999972191677225, - "loss": 46.0, - "step": 14751 - }, - { - "epoch": 2.375699504810983, - "grad_norm": 0.0006934988196007907, - "learning_rate": 0.00019999721879041923, - "loss": 46.0, - "step": 14752 - }, - { - "epoch": 2.3758605418897702, - "grad_norm": 0.003972357604652643, - "learning_rate": 0.00019999721841309036, - "loss": 46.0, - "step": 14753 - }, - { - "epoch": 2.3760215789685577, - "grad_norm": 0.004908293019980192, - "learning_rate": 0.0001999972180357359, - "loss": 46.0, - "step": 14754 - }, - { - "epoch": 2.3761826160473447, - "grad_norm": 0.01182826142758131, - "learning_rate": 0.00019999721765835587, - "loss": 46.0, - "step": 14755 - }, - { - "epoch": 2.376343653126132, - "grad_norm": 0.00035638068220578134, - "learning_rate": 0.00019999721728095022, - "loss": 46.0, - "step": 14756 - }, - { - "epoch": 2.3765046902049196, - "grad_norm": 0.0019747004844248295, - "learning_rate": 0.00019999721690351898, - "loss": 46.0, - "step": 14757 - }, - { - "epoch": 2.376665727283707, - "grad_norm": 0.0015068651409819722, - "learning_rate": 0.00019999721652606215, - "loss": 46.0, - "step": 14758 - }, - { - "epoch": 2.3768267643624945, - "grad_norm": 0.000996322720311582, - "learning_rate": 0.00019999721614857974, - "loss": 46.0, - "step": 14759 - }, - { - "epoch": 2.376987801441282, - "grad_norm": 0.004778548143804073, - "learning_rate": 0.0001999972157710717, - "loss": 46.0, - "step": 14760 - }, - { - "epoch": 2.3771488385200694, - "grad_norm": 0.0013410389656201005, - "learning_rate": 0.00019999721539353812, - "loss": 46.0, - "step": 14761 - }, - { - "epoch": 2.377309875598857, - "grad_norm": 0.0065715196542441845, - "learning_rate": 0.0001999972150159789, - "loss": 46.0, - "step": 14762 - }, - { - "epoch": 2.377470912677644, - "grad_norm": 0.0010227446909993887, - "learning_rate": 0.0001999972146383941, - "loss": 46.0, - "step": 14763 - }, - { - "epoch": 2.3776319497564313, - "grad_norm": 0.001720933592878282, - "learning_rate": 0.0001999972142607837, - "loss": 46.0, - "step": 14764 - }, - { - "epoch": 2.377792986835219, - "grad_norm": 0.0023274989798665047, - "learning_rate": 0.00019999721388314773, - "loss": 46.0, - "step": 14765 - }, - { - "epoch": 2.3779540239140062, - "grad_norm": 0.002022327622398734, - "learning_rate": 0.00019999721350548612, - "loss": 46.0, - "step": 14766 - }, - { - "epoch": 2.3781150609927937, - "grad_norm": 0.0007186635630205274, - "learning_rate": 0.00019999721312779895, - "loss": 46.0, - "step": 14767 - }, - { - "epoch": 2.378276098071581, - "grad_norm": 0.0006771271000616252, - "learning_rate": 0.00019999721275008617, - "loss": 46.0, - "step": 14768 - }, - { - "epoch": 2.378437135150368, - "grad_norm": 0.007208125665783882, - "learning_rate": 0.00019999721237234783, - "loss": 46.0, - "step": 14769 - }, - { - "epoch": 2.3785981722291556, - "grad_norm": 0.001271755900233984, - "learning_rate": 0.00019999721199458385, - "loss": 46.0, - "step": 14770 - }, - { - "epoch": 2.378759209307943, - "grad_norm": 0.003842888167127967, - "learning_rate": 0.0001999972116167943, - "loss": 46.0, - "step": 14771 - }, - { - "epoch": 2.3789202463867305, - "grad_norm": 0.0008683003834448755, - "learning_rate": 0.00019999721123897915, - "loss": 46.0, - "step": 14772 - }, - { - "epoch": 2.379081283465518, - "grad_norm": 0.0010391527321189642, - "learning_rate": 0.0001999972108611384, - "loss": 46.0, - "step": 14773 - }, - { - "epoch": 2.3792423205443054, - "grad_norm": 0.0033191770780831575, - "learning_rate": 0.00019999721048327208, - "loss": 46.0, - "step": 14774 - }, - { - "epoch": 2.379403357623093, - "grad_norm": 0.0008084566215984523, - "learning_rate": 0.00019999721010538016, - "loss": 46.0, - "step": 14775 - }, - { - "epoch": 2.3795643947018803, - "grad_norm": 0.0020298969466239214, - "learning_rate": 0.00019999720972746263, - "loss": 46.0, - "step": 14776 - }, - { - "epoch": 2.3797254317806673, - "grad_norm": 0.0011275119613856077, - "learning_rate": 0.0001999972093495195, - "loss": 46.0, - "step": 14777 - }, - { - "epoch": 2.3798864688594548, - "grad_norm": 0.00478333467617631, - "learning_rate": 0.0001999972089715508, - "loss": 46.0, - "step": 14778 - }, - { - "epoch": 2.380047505938242, - "grad_norm": 0.002023993758484721, - "learning_rate": 0.00019999720859355648, - "loss": 46.0, - "step": 14779 - }, - { - "epoch": 2.3802085430170297, - "grad_norm": 0.008766070939600468, - "learning_rate": 0.00019999720821553657, - "loss": 46.0, - "step": 14780 - }, - { - "epoch": 2.380369580095817, - "grad_norm": 0.00048753657029010355, - "learning_rate": 0.0001999972078374911, - "loss": 46.0, - "step": 14781 - }, - { - "epoch": 2.3805306171746046, - "grad_norm": 0.016918152570724487, - "learning_rate": 0.00019999720745942, - "loss": 46.0, - "step": 14782 - }, - { - "epoch": 2.380691654253392, - "grad_norm": 0.009116408415138721, - "learning_rate": 0.00019999720708132332, - "loss": 46.0, - "step": 14783 - }, - { - "epoch": 2.380852691332179, - "grad_norm": 0.0025388484355062246, - "learning_rate": 0.00019999720670320104, - "loss": 46.0, - "step": 14784 - }, - { - "epoch": 2.3810137284109665, - "grad_norm": 0.0019006110960617661, - "learning_rate": 0.00019999720632505317, - "loss": 46.0, - "step": 14785 - }, - { - "epoch": 2.381174765489754, - "grad_norm": 0.002911211922764778, - "learning_rate": 0.0001999972059468797, - "loss": 46.0, - "step": 14786 - }, - { - "epoch": 2.3813358025685414, - "grad_norm": 0.001458571059629321, - "learning_rate": 0.00019999720556868064, - "loss": 46.0, - "step": 14787 - }, - { - "epoch": 2.381496839647329, - "grad_norm": 0.0031664129346609116, - "learning_rate": 0.000199997205190456, - "loss": 46.0, - "step": 14788 - }, - { - "epoch": 2.3816578767261163, - "grad_norm": 0.002005769405514002, - "learning_rate": 0.00019999720481220573, - "loss": 46.0, - "step": 14789 - }, - { - "epoch": 2.3818189138049037, - "grad_norm": 0.0038936263881623745, - "learning_rate": 0.0001999972044339299, - "loss": 46.0, - "step": 14790 - }, - { - "epoch": 2.381979950883691, - "grad_norm": 0.0027838812675327063, - "learning_rate": 0.00019999720405562848, - "loss": 46.0, - "step": 14791 - }, - { - "epoch": 2.382140987962478, - "grad_norm": 0.0007916048052720726, - "learning_rate": 0.00019999720367730144, - "loss": 46.0, - "step": 14792 - }, - { - "epoch": 2.3823020250412656, - "grad_norm": 0.0008116143872030079, - "learning_rate": 0.00019999720329894882, - "loss": 46.0, - "step": 14793 - }, - { - "epoch": 2.382463062120053, - "grad_norm": 0.0021078018471598625, - "learning_rate": 0.0001999972029205706, - "loss": 46.0, - "step": 14794 - }, - { - "epoch": 2.3826240991988406, - "grad_norm": 0.0019509702688083053, - "learning_rate": 0.00019999720254216676, - "loss": 46.0, - "step": 14795 - }, - { - "epoch": 2.382785136277628, - "grad_norm": 0.01486461702734232, - "learning_rate": 0.00019999720216373737, - "loss": 46.0, - "step": 14796 - }, - { - "epoch": 2.3829461733564155, - "grad_norm": 0.004105749074369669, - "learning_rate": 0.00019999720178528237, - "loss": 46.0, - "step": 14797 - }, - { - "epoch": 2.3831072104352025, - "grad_norm": 0.0022464694920927286, - "learning_rate": 0.00019999720140680176, - "loss": 46.0, - "step": 14798 - }, - { - "epoch": 2.38326824751399, - "grad_norm": 0.014298107475042343, - "learning_rate": 0.0001999972010282956, - "loss": 46.0, - "step": 14799 - }, - { - "epoch": 2.3834292845927774, - "grad_norm": 0.0015294221229851246, - "learning_rate": 0.00019999720064976383, - "loss": 46.0, - "step": 14800 - }, - { - "epoch": 2.383590321671565, - "grad_norm": 0.011498850770294666, - "learning_rate": 0.0001999972002712064, - "loss": 46.0, - "step": 14801 - }, - { - "epoch": 2.3837513587503523, - "grad_norm": 0.01088939979672432, - "learning_rate": 0.00019999719989262347, - "loss": 46.0, - "step": 14802 - }, - { - "epoch": 2.3839123958291397, - "grad_norm": 0.006420196499675512, - "learning_rate": 0.0001999971995140149, - "loss": 46.0, - "step": 14803 - }, - { - "epoch": 2.384073432907927, - "grad_norm": 0.002970593748614192, - "learning_rate": 0.0001999971991353807, - "loss": 46.0, - "step": 14804 - }, - { - "epoch": 2.3842344699867146, - "grad_norm": 0.0008491531480103731, - "learning_rate": 0.00019999719875672098, - "loss": 46.0, - "step": 14805 - }, - { - "epoch": 2.384395507065502, - "grad_norm": 0.0015757104847580194, - "learning_rate": 0.00019999719837803564, - "loss": 46.0, - "step": 14806 - }, - { - "epoch": 2.384556544144289, - "grad_norm": 0.0011876648059114814, - "learning_rate": 0.0001999971979993247, - "loss": 46.0, - "step": 14807 - }, - { - "epoch": 2.3847175812230765, - "grad_norm": 0.0011319088516756892, - "learning_rate": 0.00019999719762058815, - "loss": 46.0, - "step": 14808 - }, - { - "epoch": 2.384878618301864, - "grad_norm": 0.0015093715628609061, - "learning_rate": 0.00019999719724182602, - "loss": 46.0, - "step": 14809 - }, - { - "epoch": 2.3850396553806514, - "grad_norm": 0.0016423582565039396, - "learning_rate": 0.00019999719686303828, - "loss": 46.0, - "step": 14810 - }, - { - "epoch": 2.385200692459439, - "grad_norm": 0.0007229059119708836, - "learning_rate": 0.00019999719648422496, - "loss": 46.0, - "step": 14811 - }, - { - "epoch": 2.3853617295382263, - "grad_norm": 0.0011481448309496045, - "learning_rate": 0.00019999719610538604, - "loss": 46.0, - "step": 14812 - }, - { - "epoch": 2.3855227666170133, - "grad_norm": 0.006847640499472618, - "learning_rate": 0.00019999719572652157, - "loss": 46.0, - "step": 14813 - }, - { - "epoch": 2.385683803695801, - "grad_norm": 0.0017043565167114139, - "learning_rate": 0.00019999719534763143, - "loss": 46.0, - "step": 14814 - }, - { - "epoch": 2.3858448407745882, - "grad_norm": 0.002039613900706172, - "learning_rate": 0.00019999719496871575, - "loss": 46.0, - "step": 14815 - }, - { - "epoch": 2.3860058778533757, - "grad_norm": 0.002016628859564662, - "learning_rate": 0.00019999719458977446, - "loss": 46.0, - "step": 14816 - }, - { - "epoch": 2.386166914932163, - "grad_norm": 0.0033311331644654274, - "learning_rate": 0.00019999719421080755, - "loss": 46.0, - "step": 14817 - }, - { - "epoch": 2.3863279520109506, - "grad_norm": 0.0027217241004109383, - "learning_rate": 0.0001999971938318151, - "loss": 46.0, - "step": 14818 - }, - { - "epoch": 2.386488989089738, - "grad_norm": 0.0003538668970577419, - "learning_rate": 0.000199997193452797, - "loss": 46.0, - "step": 14819 - }, - { - "epoch": 2.3866500261685255, - "grad_norm": 0.007403582334518433, - "learning_rate": 0.00019999719307375334, - "loss": 46.0, - "step": 14820 - }, - { - "epoch": 2.3868110632473125, - "grad_norm": 0.0077627371065318584, - "learning_rate": 0.0001999971926946841, - "loss": 46.0, - "step": 14821 - }, - { - "epoch": 2.3869721003261, - "grad_norm": 0.006375632248818874, - "learning_rate": 0.00019999719231558922, - "loss": 46.0, - "step": 14822 - }, - { - "epoch": 2.3871331374048874, - "grad_norm": 0.009840652346611023, - "learning_rate": 0.0001999971919364688, - "loss": 46.0, - "step": 14823 - }, - { - "epoch": 2.387294174483675, - "grad_norm": 0.002215093933045864, - "learning_rate": 0.00019999719155732275, - "loss": 46.0, - "step": 14824 - }, - { - "epoch": 2.3874552115624623, - "grad_norm": 0.0007720871944911778, - "learning_rate": 0.00019999719117815113, - "loss": 46.0, - "step": 14825 - }, - { - "epoch": 2.3876162486412498, - "grad_norm": 0.00482658576220274, - "learning_rate": 0.00019999719079895388, - "loss": 46.0, - "step": 14826 - }, - { - "epoch": 2.3877772857200372, - "grad_norm": 0.001992464531213045, - "learning_rate": 0.00019999719041973105, - "loss": 46.0, - "step": 14827 - }, - { - "epoch": 2.3879383227988242, - "grad_norm": 0.002385077066719532, - "learning_rate": 0.00019999719004048263, - "loss": 46.0, - "step": 14828 - }, - { - "epoch": 2.3880993598776117, - "grad_norm": 0.005993297789245844, - "learning_rate": 0.00019999718966120863, - "loss": 46.0, - "step": 14829 - }, - { - "epoch": 2.388260396956399, - "grad_norm": 0.0031910741236060858, - "learning_rate": 0.000199997189281909, - "loss": 46.0, - "step": 14830 - }, - { - "epoch": 2.3884214340351866, - "grad_norm": 0.0024330129381269217, - "learning_rate": 0.00019999718890258378, - "loss": 46.0, - "step": 14831 - }, - { - "epoch": 2.388582471113974, - "grad_norm": 0.0025851314421743155, - "learning_rate": 0.000199997188523233, - "loss": 46.0, - "step": 14832 - }, - { - "epoch": 2.3887435081927615, - "grad_norm": 0.001485941931605339, - "learning_rate": 0.00019999718814385663, - "loss": 46.0, - "step": 14833 - }, - { - "epoch": 2.388904545271549, - "grad_norm": 0.004651450552046299, - "learning_rate": 0.00019999718776445464, - "loss": 46.0, - "step": 14834 - }, - { - "epoch": 2.3890655823503364, - "grad_norm": 0.00271437456831336, - "learning_rate": 0.00019999718738502705, - "loss": 46.0, - "step": 14835 - }, - { - "epoch": 2.3892266194291234, - "grad_norm": 0.0004945089458487928, - "learning_rate": 0.00019999718700557388, - "loss": 46.0, - "step": 14836 - }, - { - "epoch": 2.389387656507911, - "grad_norm": 0.001572782639414072, - "learning_rate": 0.0001999971866260951, - "loss": 46.0, - "step": 14837 - }, - { - "epoch": 2.3895486935866983, - "grad_norm": 0.0014373004669323564, - "learning_rate": 0.00019999718624659076, - "loss": 46.0, - "step": 14838 - }, - { - "epoch": 2.3897097306654858, - "grad_norm": 0.0012333723716437817, - "learning_rate": 0.0001999971858670608, - "loss": 46.0, - "step": 14839 - }, - { - "epoch": 2.389870767744273, - "grad_norm": 0.006921854335814714, - "learning_rate": 0.00019999718548750525, - "loss": 46.0, - "step": 14840 - }, - { - "epoch": 2.3900318048230607, - "grad_norm": 0.0035812475252896547, - "learning_rate": 0.0001999971851079241, - "loss": 46.0, - "step": 14841 - }, - { - "epoch": 2.3901928419018477, - "grad_norm": 0.0031608252320438623, - "learning_rate": 0.00019999718472831738, - "loss": 46.0, - "step": 14842 - }, - { - "epoch": 2.390353878980635, - "grad_norm": 0.0011296499287709594, - "learning_rate": 0.00019999718434868502, - "loss": 46.0, - "step": 14843 - }, - { - "epoch": 2.3905149160594226, - "grad_norm": 0.003385860240086913, - "learning_rate": 0.00019999718396902712, - "loss": 46.0, - "step": 14844 - }, - { - "epoch": 2.39067595313821, - "grad_norm": 0.0009854146046563983, - "learning_rate": 0.0001999971835893436, - "loss": 46.0, - "step": 14845 - }, - { - "epoch": 2.3908369902169975, - "grad_norm": 0.0012606142554432154, - "learning_rate": 0.00019999718320963446, - "loss": 46.0, - "step": 14846 - }, - { - "epoch": 2.390998027295785, - "grad_norm": 0.005932171829044819, - "learning_rate": 0.00019999718282989976, - "loss": 46.0, - "step": 14847 - }, - { - "epoch": 2.3911590643745724, - "grad_norm": 0.01149502582848072, - "learning_rate": 0.00019999718245013946, - "loss": 46.0, - "step": 14848 - }, - { - "epoch": 2.39132010145336, - "grad_norm": 0.0016206150176003575, - "learning_rate": 0.00019999718207035357, - "loss": 46.0, - "step": 14849 - }, - { - "epoch": 2.391481138532147, - "grad_norm": 0.010081893764436245, - "learning_rate": 0.00019999718169054208, - "loss": 46.0, - "step": 14850 - }, - { - "epoch": 2.3916421756109343, - "grad_norm": 0.004157517571002245, - "learning_rate": 0.00019999718131070496, - "loss": 46.0, - "step": 14851 - }, - { - "epoch": 2.3918032126897217, - "grad_norm": 0.00331402150914073, - "learning_rate": 0.0001999971809308423, - "loss": 46.0, - "step": 14852 - }, - { - "epoch": 2.391964249768509, - "grad_norm": 0.005962159018963575, - "learning_rate": 0.00019999718055095403, - "loss": 46.0, - "step": 14853 - }, - { - "epoch": 2.3921252868472966, - "grad_norm": 0.0069003477692604065, - "learning_rate": 0.00019999718017104016, - "loss": 46.0, - "step": 14854 - }, - { - "epoch": 2.392286323926084, - "grad_norm": 0.0042977905832231045, - "learning_rate": 0.0001999971797911007, - "loss": 46.0, - "step": 14855 - }, - { - "epoch": 2.3924473610048715, - "grad_norm": 0.002373104216530919, - "learning_rate": 0.00019999717941113565, - "loss": 46.0, - "step": 14856 - }, - { - "epoch": 2.3926083980836585, - "grad_norm": 0.0027774544432759285, - "learning_rate": 0.00019999717903114501, - "loss": 46.0, - "step": 14857 - }, - { - "epoch": 2.392769435162446, - "grad_norm": 0.004388848785310984, - "learning_rate": 0.00019999717865112874, - "loss": 46.0, - "step": 14858 - }, - { - "epoch": 2.3929304722412335, - "grad_norm": 0.007151155732572079, - "learning_rate": 0.0001999971782710869, - "loss": 46.0, - "step": 14859 - }, - { - "epoch": 2.393091509320021, - "grad_norm": 0.004671855829656124, - "learning_rate": 0.00019999717789101948, - "loss": 46.0, - "step": 14860 - }, - { - "epoch": 2.3932525463988084, - "grad_norm": 0.005775718484073877, - "learning_rate": 0.00019999717751092644, - "loss": 46.0, - "step": 14861 - }, - { - "epoch": 2.393413583477596, - "grad_norm": 0.003141038352623582, - "learning_rate": 0.00019999717713080784, - "loss": 46.0, - "step": 14862 - }, - { - "epoch": 2.3935746205563833, - "grad_norm": 0.00044997193617746234, - "learning_rate": 0.0001999971767506636, - "loss": 46.0, - "step": 14863 - }, - { - "epoch": 2.3937356576351707, - "grad_norm": 0.008000251837074757, - "learning_rate": 0.0001999971763704938, - "loss": 46.0, - "step": 14864 - }, - { - "epoch": 2.3938966947139577, - "grad_norm": 0.001150397234596312, - "learning_rate": 0.00019999717599029841, - "loss": 46.0, - "step": 14865 - }, - { - "epoch": 2.394057731792745, - "grad_norm": 0.005656852386891842, - "learning_rate": 0.0001999971756100774, - "loss": 46.0, - "step": 14866 - }, - { - "epoch": 2.3942187688715326, - "grad_norm": 0.0030843340791761875, - "learning_rate": 0.00019999717522983082, - "loss": 46.0, - "step": 14867 - }, - { - "epoch": 2.39437980595032, - "grad_norm": 0.000997645198367536, - "learning_rate": 0.00019999717484955862, - "loss": 46.0, - "step": 14868 - }, - { - "epoch": 2.3945408430291075, - "grad_norm": 0.007723720744252205, - "learning_rate": 0.00019999717446926086, - "loss": 46.0, - "step": 14869 - }, - { - "epoch": 2.394701880107895, - "grad_norm": 0.004024618770927191, - "learning_rate": 0.00019999717408893748, - "loss": 46.0, - "step": 14870 - }, - { - "epoch": 2.3948629171866824, - "grad_norm": 0.0029589401092380285, - "learning_rate": 0.0001999971737085885, - "loss": 46.0, - "step": 14871 - }, - { - "epoch": 2.3950239542654694, - "grad_norm": 0.006197699345648289, - "learning_rate": 0.00019999717332821394, - "loss": 46.0, - "step": 14872 - }, - { - "epoch": 2.395184991344257, - "grad_norm": 0.00799486506730318, - "learning_rate": 0.0001999971729478138, - "loss": 46.0, - "step": 14873 - }, - { - "epoch": 2.3953460284230443, - "grad_norm": 0.005815913435071707, - "learning_rate": 0.00019999717256738805, - "loss": 46.0, - "step": 14874 - }, - { - "epoch": 2.395507065501832, - "grad_norm": 0.0010205275611951947, - "learning_rate": 0.0001999971721869367, - "loss": 46.0, - "step": 14875 - }, - { - "epoch": 2.3956681025806192, - "grad_norm": 0.003628675825893879, - "learning_rate": 0.00019999717180645975, - "loss": 46.0, - "step": 14876 - }, - { - "epoch": 2.3958291396594067, - "grad_norm": 0.0016113735036924481, - "learning_rate": 0.0001999971714259572, - "loss": 46.0, - "step": 14877 - }, - { - "epoch": 2.395990176738194, - "grad_norm": 0.00639236718416214, - "learning_rate": 0.0001999971710454291, - "loss": 46.0, - "step": 14878 - }, - { - "epoch": 2.3961512138169816, - "grad_norm": 0.007282912265509367, - "learning_rate": 0.00019999717066487537, - "loss": 46.0, - "step": 14879 - }, - { - "epoch": 2.3963122508957686, - "grad_norm": 0.002596805104985833, - "learning_rate": 0.00019999717028429607, - "loss": 46.0, - "step": 14880 - }, - { - "epoch": 2.396473287974556, - "grad_norm": 0.003063717857003212, - "learning_rate": 0.00019999716990369115, - "loss": 46.0, - "step": 14881 - }, - { - "epoch": 2.3966343250533435, - "grad_norm": 0.0022629154846072197, - "learning_rate": 0.00019999716952306064, - "loss": 46.0, - "step": 14882 - }, - { - "epoch": 2.396795362132131, - "grad_norm": 0.00248700357042253, - "learning_rate": 0.00019999716914240453, - "loss": 46.0, - "step": 14883 - }, - { - "epoch": 2.3969563992109184, - "grad_norm": 0.002188142156228423, - "learning_rate": 0.00019999716876172285, - "loss": 46.0, - "step": 14884 - }, - { - "epoch": 2.397117436289706, - "grad_norm": 0.004589318763464689, - "learning_rate": 0.00019999716838101555, - "loss": 46.0, - "step": 14885 - }, - { - "epoch": 2.397278473368493, - "grad_norm": 0.001246978179551661, - "learning_rate": 0.00019999716800028267, - "loss": 46.0, - "step": 14886 - }, - { - "epoch": 2.3974395104472803, - "grad_norm": 0.00201410916633904, - "learning_rate": 0.0001999971676195242, - "loss": 46.0, - "step": 14887 - }, - { - "epoch": 2.3976005475260678, - "grad_norm": 0.0018756977515295148, - "learning_rate": 0.00019999716723874013, - "loss": 46.0, - "step": 14888 - }, - { - "epoch": 2.3977615846048552, - "grad_norm": 0.001583478064276278, - "learning_rate": 0.00019999716685793048, - "loss": 46.0, - "step": 14889 - }, - { - "epoch": 2.3979226216836427, - "grad_norm": 0.0005613099783658981, - "learning_rate": 0.0001999971664770952, - "loss": 46.0, - "step": 14890 - }, - { - "epoch": 2.39808365876243, - "grad_norm": 0.0013335056137293577, - "learning_rate": 0.00019999716609623436, - "loss": 46.0, - "step": 14891 - }, - { - "epoch": 2.3982446958412176, - "grad_norm": 0.0022021501790732145, - "learning_rate": 0.00019999716571534793, - "loss": 46.0, - "step": 14892 - }, - { - "epoch": 2.398405732920005, - "grad_norm": 0.004060426261276007, - "learning_rate": 0.00019999716533443586, - "loss": 46.0, - "step": 14893 - }, - { - "epoch": 2.398566769998792, - "grad_norm": 0.0009838812984526157, - "learning_rate": 0.00019999716495349825, - "loss": 46.0, - "step": 14894 - }, - { - "epoch": 2.3987278070775795, - "grad_norm": 0.0049540214240550995, - "learning_rate": 0.000199997164572535, - "loss": 46.0, - "step": 14895 - }, - { - "epoch": 2.398888844156367, - "grad_norm": 0.005037226714193821, - "learning_rate": 0.0001999971641915462, - "loss": 46.0, - "step": 14896 - }, - { - "epoch": 2.3990498812351544, - "grad_norm": 0.001430519507266581, - "learning_rate": 0.0001999971638105318, - "loss": 46.0, - "step": 14897 - }, - { - "epoch": 2.399210918313942, - "grad_norm": 0.0011447274591773748, - "learning_rate": 0.00019999716342949177, - "loss": 46.0, - "step": 14898 - }, - { - "epoch": 2.3993719553927293, - "grad_norm": 0.0023657106794416904, - "learning_rate": 0.00019999716304842618, - "loss": 46.0, - "step": 14899 - }, - { - "epoch": 2.3995329924715167, - "grad_norm": 0.0013266261667013168, - "learning_rate": 0.00019999716266733497, - "loss": 46.0, - "step": 14900 - }, - { - "epoch": 2.3996940295503038, - "grad_norm": 0.006027529016137123, - "learning_rate": 0.0001999971622862182, - "loss": 46.0, - "step": 14901 - }, - { - "epoch": 2.399855066629091, - "grad_norm": 0.0024401552509516478, - "learning_rate": 0.0001999971619050758, - "loss": 46.0, - "step": 14902 - }, - { - "epoch": 2.4000161037078787, - "grad_norm": 0.004408691078424454, - "learning_rate": 0.0001999971615239078, - "loss": 46.0, - "step": 14903 - }, - { - "epoch": 2.400177140786666, - "grad_norm": 0.007748245261609554, - "learning_rate": 0.00019999716114271423, - "loss": 46.0, - "step": 14904 - }, - { - "epoch": 2.4003381778654536, - "grad_norm": 0.0010851210681721568, - "learning_rate": 0.00019999716076149506, - "loss": 46.0, - "step": 14905 - }, - { - "epoch": 2.400499214944241, - "grad_norm": 0.0025137283373624086, - "learning_rate": 0.0001999971603802503, - "loss": 46.0, - "step": 14906 - }, - { - "epoch": 2.4006602520230285, - "grad_norm": 0.0047279237769544125, - "learning_rate": 0.00019999715999897996, - "loss": 46.0, - "step": 14907 - }, - { - "epoch": 2.400821289101816, - "grad_norm": 0.004317928571254015, - "learning_rate": 0.000199997159617684, - "loss": 46.0, - "step": 14908 - }, - { - "epoch": 2.400982326180603, - "grad_norm": 0.001495165517553687, - "learning_rate": 0.00019999715923636245, - "loss": 46.0, - "step": 14909 - }, - { - "epoch": 2.4011433632593904, - "grad_norm": 0.0009965263307094574, - "learning_rate": 0.0001999971588550153, - "loss": 46.0, - "step": 14910 - }, - { - "epoch": 2.401304400338178, - "grad_norm": 0.0025162403471767902, - "learning_rate": 0.00019999715847364256, - "loss": 46.0, - "step": 14911 - }, - { - "epoch": 2.4014654374169653, - "grad_norm": 0.002974242437630892, - "learning_rate": 0.00019999715809224423, - "loss": 46.0, - "step": 14912 - }, - { - "epoch": 2.4016264744957527, - "grad_norm": 0.002061853650957346, - "learning_rate": 0.0001999971577108203, - "loss": 46.0, - "step": 14913 - }, - { - "epoch": 2.40178751157454, - "grad_norm": 0.0032915319316089153, - "learning_rate": 0.00019999715732937082, - "loss": 46.0, - "step": 14914 - }, - { - "epoch": 2.401948548653327, - "grad_norm": 0.0010055049788206816, - "learning_rate": 0.0001999971569478957, - "loss": 46.0, - "step": 14915 - }, - { - "epoch": 2.4021095857321146, - "grad_norm": 0.0016115966718643904, - "learning_rate": 0.00019999715656639498, - "loss": 46.0, - "step": 14916 - }, - { - "epoch": 2.402270622810902, - "grad_norm": 0.0027726360131055117, - "learning_rate": 0.00019999715618486868, - "loss": 46.0, - "step": 14917 - }, - { - "epoch": 2.4024316598896895, - "grad_norm": 0.0062407683581113815, - "learning_rate": 0.0001999971558033168, - "loss": 46.0, - "step": 14918 - }, - { - "epoch": 2.402592696968477, - "grad_norm": 0.00368437054567039, - "learning_rate": 0.00019999715542173932, - "loss": 46.0, - "step": 14919 - }, - { - "epoch": 2.4027537340472644, - "grad_norm": 0.0019279866246506572, - "learning_rate": 0.00019999715504013624, - "loss": 46.0, - "step": 14920 - }, - { - "epoch": 2.402914771126052, - "grad_norm": 0.006922426633536816, - "learning_rate": 0.00019999715465850753, - "loss": 46.0, - "step": 14921 - }, - { - "epoch": 2.4030758082048393, - "grad_norm": 0.0036330455914139748, - "learning_rate": 0.00019999715427685327, - "loss": 46.0, - "step": 14922 - }, - { - "epoch": 2.403236845283627, - "grad_norm": 0.004200490657240152, - "learning_rate": 0.00019999715389517342, - "loss": 46.0, - "step": 14923 - }, - { - "epoch": 2.403397882362414, - "grad_norm": 0.00803413987159729, - "learning_rate": 0.00019999715351346796, - "loss": 46.0, - "step": 14924 - }, - { - "epoch": 2.4035589194412013, - "grad_norm": 0.003241396276280284, - "learning_rate": 0.0001999971531317369, - "loss": 46.0, - "step": 14925 - }, - { - "epoch": 2.4037199565199887, - "grad_norm": 0.0018149223178625107, - "learning_rate": 0.00019999715274998024, - "loss": 46.0, - "step": 14926 - }, - { - "epoch": 2.403880993598776, - "grad_norm": 0.012932018376886845, - "learning_rate": 0.000199997152368198, - "loss": 46.0, - "step": 14927 - }, - { - "epoch": 2.4040420306775636, - "grad_norm": 0.001356132561340928, - "learning_rate": 0.00019999715198639018, - "loss": 46.0, - "step": 14928 - }, - { - "epoch": 2.404203067756351, - "grad_norm": 0.001434671925380826, - "learning_rate": 0.00019999715160455675, - "loss": 46.0, - "step": 14929 - }, - { - "epoch": 2.404364104835138, - "grad_norm": 0.00057082693092525, - "learning_rate": 0.00019999715122269774, - "loss": 46.0, - "step": 14930 - }, - { - "epoch": 2.4045251419139255, - "grad_norm": 0.001431204262189567, - "learning_rate": 0.0001999971508408131, - "loss": 46.0, - "step": 14931 - }, - { - "epoch": 2.404686178992713, - "grad_norm": 0.002392623107880354, - "learning_rate": 0.0001999971504589029, - "loss": 46.0, - "step": 14932 - }, - { - "epoch": 2.4048472160715004, - "grad_norm": 0.0021177795715630054, - "learning_rate": 0.00019999715007696712, - "loss": 46.0, - "step": 14933 - }, - { - "epoch": 2.405008253150288, - "grad_norm": 0.0009942085016518831, - "learning_rate": 0.0001999971496950057, - "loss": 46.0, - "step": 14934 - }, - { - "epoch": 2.4051692902290753, - "grad_norm": 0.0022986209951341152, - "learning_rate": 0.0001999971493130187, - "loss": 46.0, - "step": 14935 - }, - { - "epoch": 2.405330327307863, - "grad_norm": 0.0008761899662204087, - "learning_rate": 0.0001999971489310061, - "loss": 46.0, - "step": 14936 - }, - { - "epoch": 2.4054913643866502, - "grad_norm": 0.0021304022520780563, - "learning_rate": 0.00019999714854896793, - "loss": 46.0, - "step": 14937 - }, - { - "epoch": 2.4056524014654372, - "grad_norm": 0.0006061995518393815, - "learning_rate": 0.00019999714816690416, - "loss": 46.0, - "step": 14938 - }, - { - "epoch": 2.4058134385442247, - "grad_norm": 0.0013981732772663236, - "learning_rate": 0.0001999971477848148, - "loss": 46.0, - "step": 14939 - }, - { - "epoch": 2.405974475623012, - "grad_norm": 0.0017136174719780684, - "learning_rate": 0.00019999714740269982, - "loss": 46.0, - "step": 14940 - }, - { - "epoch": 2.4061355127017996, - "grad_norm": 0.00294609391130507, - "learning_rate": 0.00019999714702055926, - "loss": 46.0, - "step": 14941 - }, - { - "epoch": 2.406296549780587, - "grad_norm": 0.009054875001311302, - "learning_rate": 0.0001999971466383931, - "loss": 46.0, - "step": 14942 - }, - { - "epoch": 2.4064575868593745, - "grad_norm": 0.0005580727593041956, - "learning_rate": 0.00019999714625620137, - "loss": 46.0, - "step": 14943 - }, - { - "epoch": 2.406618623938162, - "grad_norm": 0.004075147211551666, - "learning_rate": 0.00019999714587398402, - "loss": 46.0, - "step": 14944 - }, - { - "epoch": 2.406779661016949, - "grad_norm": 0.0031745126470923424, - "learning_rate": 0.0001999971454917411, - "loss": 46.0, - "step": 14945 - }, - { - "epoch": 2.4069406980957364, - "grad_norm": 0.00141071155667305, - "learning_rate": 0.00019999714510947255, - "loss": 46.0, - "step": 14946 - }, - { - "epoch": 2.407101735174524, - "grad_norm": 0.0023850006982684135, - "learning_rate": 0.00019999714472717845, - "loss": 46.0, - "step": 14947 - }, - { - "epoch": 2.4072627722533113, - "grad_norm": 0.002219144022092223, - "learning_rate": 0.00019999714434485873, - "loss": 46.0, - "step": 14948 - }, - { - "epoch": 2.4074238093320988, - "grad_norm": 0.0008797465707175434, - "learning_rate": 0.00019999714396251342, - "loss": 46.0, - "step": 14949 - }, - { - "epoch": 2.407584846410886, - "grad_norm": 0.006275119259953499, - "learning_rate": 0.00019999714358014253, - "loss": 46.0, - "step": 14950 - }, - { - "epoch": 2.4077458834896737, - "grad_norm": 0.0007379829185083508, - "learning_rate": 0.000199997143197746, - "loss": 46.0, - "step": 14951 - }, - { - "epoch": 2.407906920568461, - "grad_norm": 0.0011120568960905075, - "learning_rate": 0.0001999971428153239, - "loss": 46.0, - "step": 14952 - }, - { - "epoch": 2.408067957647248, - "grad_norm": 0.003335511777549982, - "learning_rate": 0.00019999714243287625, - "loss": 46.0, - "step": 14953 - }, - { - "epoch": 2.4082289947260356, - "grad_norm": 0.002802102128043771, - "learning_rate": 0.00019999714205040295, - "loss": 46.0, - "step": 14954 - }, - { - "epoch": 2.408390031804823, - "grad_norm": 0.002060298342257738, - "learning_rate": 0.00019999714166790407, - "loss": 46.0, - "step": 14955 - }, - { - "epoch": 2.4085510688836105, - "grad_norm": 0.0016143816756084561, - "learning_rate": 0.0001999971412853796, - "loss": 46.0, - "step": 14956 - }, - { - "epoch": 2.408712105962398, - "grad_norm": 0.0033419255632907152, - "learning_rate": 0.00019999714090282951, - "loss": 46.0, - "step": 14957 - }, - { - "epoch": 2.4088731430411854, - "grad_norm": 0.004869893193244934, - "learning_rate": 0.00019999714052025387, - "loss": 46.0, - "step": 14958 - }, - { - "epoch": 2.4090341801199724, - "grad_norm": 0.00765066035091877, - "learning_rate": 0.00019999714013765264, - "loss": 46.0, - "step": 14959 - }, - { - "epoch": 2.40919521719876, - "grad_norm": 0.009709512814879417, - "learning_rate": 0.00019999713975502577, - "loss": 46.0, - "step": 14960 - }, - { - "epoch": 2.4093562542775473, - "grad_norm": 0.005278721451759338, - "learning_rate": 0.00019999713937237333, - "loss": 46.0, - "step": 14961 - }, - { - "epoch": 2.4095172913563347, - "grad_norm": 0.008919495157897472, - "learning_rate": 0.00019999713898969529, - "loss": 46.0, - "step": 14962 - }, - { - "epoch": 2.409678328435122, - "grad_norm": 0.004585885908454657, - "learning_rate": 0.00019999713860699168, - "loss": 46.0, - "step": 14963 - }, - { - "epoch": 2.4098393655139096, - "grad_norm": 0.0020918655209243298, - "learning_rate": 0.00019999713822426246, - "loss": 46.0, - "step": 14964 - }, - { - "epoch": 2.410000402592697, - "grad_norm": 0.0037259364034980536, - "learning_rate": 0.00019999713784150762, - "loss": 46.0, - "step": 14965 - }, - { - "epoch": 2.4101614396714846, - "grad_norm": 0.002379780635237694, - "learning_rate": 0.00019999713745872722, - "loss": 46.0, - "step": 14966 - }, - { - "epoch": 2.4103224767502716, - "grad_norm": 0.0013787498464807868, - "learning_rate": 0.00019999713707592121, - "loss": 46.0, - "step": 14967 - }, - { - "epoch": 2.410483513829059, - "grad_norm": 0.003032082924619317, - "learning_rate": 0.00019999713669308962, - "loss": 46.0, - "step": 14968 - }, - { - "epoch": 2.4106445509078465, - "grad_norm": 0.0030817643273621798, - "learning_rate": 0.00019999713631023243, - "loss": 46.0, - "step": 14969 - }, - { - "epoch": 2.410805587986634, - "grad_norm": 0.0008349365671165287, - "learning_rate": 0.00019999713592734963, - "loss": 46.0, - "step": 14970 - }, - { - "epoch": 2.4109666250654214, - "grad_norm": 0.0023388417903333902, - "learning_rate": 0.00019999713554444125, - "loss": 46.0, - "step": 14971 - }, - { - "epoch": 2.411127662144209, - "grad_norm": 0.0008196062408387661, - "learning_rate": 0.00019999713516150727, - "loss": 46.0, - "step": 14972 - }, - { - "epoch": 2.4112886992229963, - "grad_norm": 0.0014353410806506872, - "learning_rate": 0.0001999971347785477, - "loss": 46.0, - "step": 14973 - }, - { - "epoch": 2.4114497363017833, - "grad_norm": 0.0014467175351455808, - "learning_rate": 0.00019999713439556254, - "loss": 46.0, - "step": 14974 - }, - { - "epoch": 2.4116107733805707, - "grad_norm": 0.0007559700752608478, - "learning_rate": 0.00019999713401255178, - "loss": 46.0, - "step": 14975 - }, - { - "epoch": 2.411771810459358, - "grad_norm": 0.0010023186914622784, - "learning_rate": 0.00019999713362951543, - "loss": 46.0, - "step": 14976 - }, - { - "epoch": 2.4119328475381456, - "grad_norm": 0.0034047209192067385, - "learning_rate": 0.00019999713324645347, - "loss": 46.0, - "step": 14977 - }, - { - "epoch": 2.412093884616933, - "grad_norm": 0.009829115122556686, - "learning_rate": 0.00019999713286336594, - "loss": 46.0, - "step": 14978 - }, - { - "epoch": 2.4122549216957205, - "grad_norm": 0.0035766204819083214, - "learning_rate": 0.0001999971324802528, - "loss": 46.0, - "step": 14979 - }, - { - "epoch": 2.412415958774508, - "grad_norm": 0.003498433856293559, - "learning_rate": 0.00019999713209711408, - "loss": 46.0, - "step": 14980 - }, - { - "epoch": 2.4125769958532954, - "grad_norm": 0.008506706915795803, - "learning_rate": 0.00019999713171394974, - "loss": 46.0, - "step": 14981 - }, - { - "epoch": 2.4127380329320824, - "grad_norm": 0.0007884965161792934, - "learning_rate": 0.00019999713133075981, - "loss": 46.0, - "step": 14982 - }, - { - "epoch": 2.41289907001087, - "grad_norm": 0.014257362112402916, - "learning_rate": 0.0001999971309475443, - "loss": 46.0, - "step": 14983 - }, - { - "epoch": 2.4130601070896573, - "grad_norm": 0.0014589891070500016, - "learning_rate": 0.0001999971305643032, - "loss": 46.0, - "step": 14984 - }, - { - "epoch": 2.413221144168445, - "grad_norm": 0.0008877819636836648, - "learning_rate": 0.0001999971301810365, - "loss": 46.0, - "step": 14985 - }, - { - "epoch": 2.4133821812472322, - "grad_norm": 0.00781265553086996, - "learning_rate": 0.0001999971297977442, - "loss": 46.0, - "step": 14986 - }, - { - "epoch": 2.4135432183260197, - "grad_norm": 0.007220007479190826, - "learning_rate": 0.00019999712941442632, - "loss": 46.0, - "step": 14987 - }, - { - "epoch": 2.4137042554048067, - "grad_norm": 0.0015027987537905574, - "learning_rate": 0.00019999712903108282, - "loss": 46.0, - "step": 14988 - }, - { - "epoch": 2.413865292483594, - "grad_norm": 0.003292992478236556, - "learning_rate": 0.00019999712864771375, - "loss": 46.0, - "step": 14989 - }, - { - "epoch": 2.4140263295623816, - "grad_norm": 0.003830128815025091, - "learning_rate": 0.00019999712826431908, - "loss": 46.0, - "step": 14990 - }, - { - "epoch": 2.414187366641169, - "grad_norm": 0.001486469409428537, - "learning_rate": 0.00019999712788089884, - "loss": 46.0, - "step": 14991 - }, - { - "epoch": 2.4143484037199565, - "grad_norm": 0.0014831129228696227, - "learning_rate": 0.00019999712749745296, - "loss": 46.0, - "step": 14992 - }, - { - "epoch": 2.414509440798744, - "grad_norm": 0.007500668056309223, - "learning_rate": 0.00019999712711398152, - "loss": 46.0, - "step": 14993 - }, - { - "epoch": 2.4146704778775314, - "grad_norm": 0.001474690856412053, - "learning_rate": 0.00019999712673048444, - "loss": 46.0, - "step": 14994 - }, - { - "epoch": 2.414831514956319, - "grad_norm": 0.0032330448739230633, - "learning_rate": 0.00019999712634696183, - "loss": 46.0, - "step": 14995 - }, - { - "epoch": 2.4149925520351063, - "grad_norm": 0.0046609858982264996, - "learning_rate": 0.0001999971259634136, - "loss": 46.0, - "step": 14996 - }, - { - "epoch": 2.4151535891138933, - "grad_norm": 0.004924978129565716, - "learning_rate": 0.00019999712557983976, - "loss": 46.0, - "step": 14997 - }, - { - "epoch": 2.415314626192681, - "grad_norm": 0.004856908228248358, - "learning_rate": 0.00019999712519624035, - "loss": 46.0, - "step": 14998 - }, - { - "epoch": 2.4154756632714682, - "grad_norm": 0.004884161055088043, - "learning_rate": 0.0001999971248126153, - "loss": 46.0, - "step": 14999 - }, - { - "epoch": 2.4156367003502557, - "grad_norm": 0.002176631474867463, - "learning_rate": 0.00019999712442896473, - "loss": 46.0, - "step": 15000 - }, - { - "epoch": 2.415797737429043, - "grad_norm": 0.002556507708504796, - "learning_rate": 0.0001999971240452885, - "loss": 46.0, - "step": 15001 - }, - { - "epoch": 2.4159587745078306, - "grad_norm": 0.002298701787367463, - "learning_rate": 0.00019999712366158668, - "loss": 46.0, - "step": 15002 - }, - { - "epoch": 2.4161198115866176, - "grad_norm": 0.0019604400731623173, - "learning_rate": 0.0001999971232778593, - "loss": 46.0, - "step": 15003 - }, - { - "epoch": 2.416280848665405, - "grad_norm": 0.0022047865204513073, - "learning_rate": 0.0001999971228941063, - "loss": 46.0, - "step": 15004 - }, - { - "epoch": 2.4164418857441925, - "grad_norm": 0.0036231528501957655, - "learning_rate": 0.00019999712251032775, - "loss": 46.0, - "step": 15005 - }, - { - "epoch": 2.41660292282298, - "grad_norm": 0.0019326030742377043, - "learning_rate": 0.00019999712212652354, - "loss": 46.0, - "step": 15006 - }, - { - "epoch": 2.4167639599017674, - "grad_norm": 0.004777549300342798, - "learning_rate": 0.00019999712174269377, - "loss": 46.0, - "step": 15007 - }, - { - "epoch": 2.416924996980555, - "grad_norm": 0.0011353356530889869, - "learning_rate": 0.00019999712135883841, - "loss": 46.0, - "step": 15008 - }, - { - "epoch": 2.4170860340593423, - "grad_norm": 0.00082369614392519, - "learning_rate": 0.00019999712097495747, - "loss": 46.0, - "step": 15009 - }, - { - "epoch": 2.4172470711381298, - "grad_norm": 0.0021870385389775038, - "learning_rate": 0.00019999712059105091, - "loss": 46.0, - "step": 15010 - }, - { - "epoch": 2.4174081082169168, - "grad_norm": 0.0009749503806233406, - "learning_rate": 0.00019999712020711877, - "loss": 46.0, - "step": 15011 - }, - { - "epoch": 2.417569145295704, - "grad_norm": 0.004610496107488871, - "learning_rate": 0.00019999711982316104, - "loss": 46.0, - "step": 15012 - }, - { - "epoch": 2.4177301823744917, - "grad_norm": 0.0026237955316901207, - "learning_rate": 0.0001999971194391777, - "loss": 46.0, - "step": 15013 - }, - { - "epoch": 2.417891219453279, - "grad_norm": 0.0005824874388054013, - "learning_rate": 0.00019999711905516873, - "loss": 46.0, - "step": 15014 - }, - { - "epoch": 2.4180522565320666, - "grad_norm": 0.002638162113726139, - "learning_rate": 0.00019999711867113422, - "loss": 46.0, - "step": 15015 - }, - { - "epoch": 2.418213293610854, - "grad_norm": 0.004464765544980764, - "learning_rate": 0.0001999971182870741, - "loss": 46.0, - "step": 15016 - }, - { - "epoch": 2.4183743306896415, - "grad_norm": 0.00509841600432992, - "learning_rate": 0.0001999971179029884, - "loss": 46.0, - "step": 15017 - }, - { - "epoch": 2.4185353677684285, - "grad_norm": 0.0038666953332722187, - "learning_rate": 0.00019999711751887708, - "loss": 46.0, - "step": 15018 - }, - { - "epoch": 2.418696404847216, - "grad_norm": 0.0007318682037293911, - "learning_rate": 0.00019999711713474018, - "loss": 46.0, - "step": 15019 - }, - { - "epoch": 2.4188574419260034, - "grad_norm": 0.0016925224335864186, - "learning_rate": 0.00019999711675057767, - "loss": 46.0, - "step": 15020 - }, - { - "epoch": 2.419018479004791, - "grad_norm": 0.004939534701406956, - "learning_rate": 0.0001999971163663896, - "loss": 46.0, - "step": 15021 - }, - { - "epoch": 2.4191795160835783, - "grad_norm": 0.00682695722207427, - "learning_rate": 0.00019999711598217592, - "loss": 46.0, - "step": 15022 - }, - { - "epoch": 2.4193405531623657, - "grad_norm": 0.001709630829282105, - "learning_rate": 0.00019999711559793665, - "loss": 46.0, - "step": 15023 - }, - { - "epoch": 2.419501590241153, - "grad_norm": 0.003777744248509407, - "learning_rate": 0.00019999711521367176, - "loss": 46.0, - "step": 15024 - }, - { - "epoch": 2.4196626273199406, - "grad_norm": 0.000905441353097558, - "learning_rate": 0.00019999711482938132, - "loss": 46.0, - "step": 15025 - }, - { - "epoch": 2.4198236643987276, - "grad_norm": 0.0019152357708662748, - "learning_rate": 0.00019999711444506523, - "loss": 46.0, - "step": 15026 - }, - { - "epoch": 2.419984701477515, - "grad_norm": 0.012113110162317753, - "learning_rate": 0.00019999711406072358, - "loss": 46.0, - "step": 15027 - }, - { - "epoch": 2.4201457385563026, - "grad_norm": 0.002100149402394891, - "learning_rate": 0.00019999711367635635, - "loss": 46.0, - "step": 15028 - }, - { - "epoch": 2.42030677563509, - "grad_norm": 0.006856370717287064, - "learning_rate": 0.0001999971132919635, - "loss": 46.0, - "step": 15029 - }, - { - "epoch": 2.4204678127138775, - "grad_norm": 0.0014345949748530984, - "learning_rate": 0.00019999711290754507, - "loss": 46.0, - "step": 15030 - }, - { - "epoch": 2.420628849792665, - "grad_norm": 0.004308851435780525, - "learning_rate": 0.00019999711252310104, - "loss": 46.0, - "step": 15031 - }, - { - "epoch": 2.420789886871452, - "grad_norm": 0.0012742943363264203, - "learning_rate": 0.0001999971121386314, - "loss": 46.0, - "step": 15032 - }, - { - "epoch": 2.4209509239502394, - "grad_norm": 0.005597689189016819, - "learning_rate": 0.0001999971117541362, - "loss": 46.0, - "step": 15033 - }, - { - "epoch": 2.421111961029027, - "grad_norm": 0.001218323246575892, - "learning_rate": 0.0001999971113696154, - "loss": 46.0, - "step": 15034 - }, - { - "epoch": 2.4212729981078143, - "grad_norm": 0.006999565754085779, - "learning_rate": 0.00019999711098506897, - "loss": 46.0, - "step": 15035 - }, - { - "epoch": 2.4214340351866017, - "grad_norm": 0.002158647170290351, - "learning_rate": 0.00019999711060049698, - "loss": 46.0, - "step": 15036 - }, - { - "epoch": 2.421595072265389, - "grad_norm": 0.0011474784696474671, - "learning_rate": 0.00019999711021589938, - "loss": 46.0, - "step": 15037 - }, - { - "epoch": 2.4217561093441766, - "grad_norm": 0.006823455449193716, - "learning_rate": 0.0001999971098312762, - "loss": 46.0, - "step": 15038 - }, - { - "epoch": 2.421917146422964, - "grad_norm": 0.0015572105767205358, - "learning_rate": 0.0001999971094466274, - "loss": 46.0, - "step": 15039 - }, - { - "epoch": 2.4220781835017515, - "grad_norm": 0.002025666180998087, - "learning_rate": 0.000199997109061953, - "loss": 46.0, - "step": 15040 - }, - { - "epoch": 2.4222392205805385, - "grad_norm": 0.006478309165686369, - "learning_rate": 0.00019999710867725306, - "loss": 46.0, - "step": 15041 - }, - { - "epoch": 2.422400257659326, - "grad_norm": 0.0015047671040520072, - "learning_rate": 0.00019999710829252747, - "loss": 46.0, - "step": 15042 - }, - { - "epoch": 2.4225612947381134, - "grad_norm": 0.00624757120385766, - "learning_rate": 0.00019999710790777632, - "loss": 46.0, - "step": 15043 - }, - { - "epoch": 2.422722331816901, - "grad_norm": 0.0006859186105430126, - "learning_rate": 0.00019999710752299955, - "loss": 46.0, - "step": 15044 - }, - { - "epoch": 2.4228833688956883, - "grad_norm": 0.0015119699528440833, - "learning_rate": 0.00019999710713819723, - "loss": 46.0, - "step": 15045 - }, - { - "epoch": 2.423044405974476, - "grad_norm": 0.0013596673961728811, - "learning_rate": 0.0001999971067533693, - "loss": 46.0, - "step": 15046 - }, - { - "epoch": 2.423205443053263, - "grad_norm": 0.006602685898542404, - "learning_rate": 0.00019999710636851574, - "loss": 46.0, - "step": 15047 - }, - { - "epoch": 2.4233664801320502, - "grad_norm": 0.0016174034681171179, - "learning_rate": 0.00019999710598363662, - "loss": 46.0, - "step": 15048 - }, - { - "epoch": 2.4235275172108377, - "grad_norm": 0.001133667421527207, - "learning_rate": 0.00019999710559873187, - "loss": 46.0, - "step": 15049 - }, - { - "epoch": 2.423688554289625, - "grad_norm": 0.0011858440702781081, - "learning_rate": 0.00019999710521380158, - "loss": 46.0, - "step": 15050 - }, - { - "epoch": 2.4238495913684126, - "grad_norm": 0.0017343696672469378, - "learning_rate": 0.00019999710482884565, - "loss": 46.0, - "step": 15051 - }, - { - "epoch": 2.4240106284472, - "grad_norm": 0.0026560635305941105, - "learning_rate": 0.00019999710444386414, - "loss": 46.0, - "step": 15052 - }, - { - "epoch": 2.4241716655259875, - "grad_norm": 0.005059913732111454, - "learning_rate": 0.00019999710405885703, - "loss": 46.0, - "step": 15053 - }, - { - "epoch": 2.424332702604775, - "grad_norm": 0.005734195001423359, - "learning_rate": 0.00019999710367382437, - "loss": 46.0, - "step": 15054 - }, - { - "epoch": 2.424493739683562, - "grad_norm": 0.004790952894836664, - "learning_rate": 0.00019999710328876607, - "loss": 46.0, - "step": 15055 - }, - { - "epoch": 2.4246547767623494, - "grad_norm": 0.001556164352223277, - "learning_rate": 0.00019999710290368217, - "loss": 46.0, - "step": 15056 - }, - { - "epoch": 2.424815813841137, - "grad_norm": 0.0006882189773023129, - "learning_rate": 0.0001999971025185727, - "loss": 46.0, - "step": 15057 - }, - { - "epoch": 2.4249768509199243, - "grad_norm": 0.000901675783097744, - "learning_rate": 0.00019999710213343763, - "loss": 46.0, - "step": 15058 - }, - { - "epoch": 2.4251378879987118, - "grad_norm": 0.002039017155766487, - "learning_rate": 0.00019999710174827695, - "loss": 46.0, - "step": 15059 - }, - { - "epoch": 2.4252989250774992, - "grad_norm": 0.010212313383817673, - "learning_rate": 0.0001999971013630907, - "loss": 46.0, - "step": 15060 - }, - { - "epoch": 2.4254599621562867, - "grad_norm": 0.00674798060208559, - "learning_rate": 0.00019999710097787885, - "loss": 46.0, - "step": 15061 - }, - { - "epoch": 2.4256209992350737, - "grad_norm": 0.003238980658352375, - "learning_rate": 0.0001999971005926414, - "loss": 46.0, - "step": 15062 - }, - { - "epoch": 2.425782036313861, - "grad_norm": 0.007025308907032013, - "learning_rate": 0.00019999710020737835, - "loss": 46.0, - "step": 15063 - }, - { - "epoch": 2.4259430733926486, - "grad_norm": 0.006400213576853275, - "learning_rate": 0.0001999970998220897, - "loss": 46.0, - "step": 15064 - }, - { - "epoch": 2.426104110471436, - "grad_norm": 0.003598931012675166, - "learning_rate": 0.00019999709943677548, - "loss": 46.0, - "step": 15065 - }, - { - "epoch": 2.4262651475502235, - "grad_norm": 0.002956765005365014, - "learning_rate": 0.00019999709905143566, - "loss": 46.0, - "step": 15066 - }, - { - "epoch": 2.426426184629011, - "grad_norm": 0.003289261134341359, - "learning_rate": 0.00019999709866607023, - "loss": 46.0, - "step": 15067 - }, - { - "epoch": 2.4265872217077984, - "grad_norm": 0.00262340996414423, - "learning_rate": 0.00019999709828067924, - "loss": 46.0, - "step": 15068 - }, - { - "epoch": 2.426748258786586, - "grad_norm": 0.002898283302783966, - "learning_rate": 0.00019999709789526263, - "loss": 46.0, - "step": 15069 - }, - { - "epoch": 2.426909295865373, - "grad_norm": 0.005563546437770128, - "learning_rate": 0.00019999709750982044, - "loss": 46.0, - "step": 15070 - }, - { - "epoch": 2.4270703329441603, - "grad_norm": 0.0016081866342574358, - "learning_rate": 0.00019999709712435263, - "loss": 46.0, - "step": 15071 - }, - { - "epoch": 2.4272313700229478, - "grad_norm": 0.001116144354455173, - "learning_rate": 0.00019999709673885924, - "loss": 46.0, - "step": 15072 - }, - { - "epoch": 2.427392407101735, - "grad_norm": 0.0029423756059259176, - "learning_rate": 0.00019999709635334025, - "loss": 46.0, - "step": 15073 - }, - { - "epoch": 2.4275534441805227, - "grad_norm": 0.002192451385781169, - "learning_rate": 0.00019999709596779568, - "loss": 46.0, - "step": 15074 - }, - { - "epoch": 2.42771448125931, - "grad_norm": 0.0009762877598404884, - "learning_rate": 0.0001999970955822255, - "loss": 46.0, - "step": 15075 - }, - { - "epoch": 2.427875518338097, - "grad_norm": 0.002556285820901394, - "learning_rate": 0.00019999709519662973, - "loss": 46.0, - "step": 15076 - }, - { - "epoch": 2.4280365554168846, - "grad_norm": 0.008240649476647377, - "learning_rate": 0.00019999709481100837, - "loss": 46.0, - "step": 15077 - }, - { - "epoch": 2.428197592495672, - "grad_norm": 0.0010634667705744505, - "learning_rate": 0.00019999709442536143, - "loss": 46.0, - "step": 15078 - }, - { - "epoch": 2.4283586295744595, - "grad_norm": 0.0036353811156004667, - "learning_rate": 0.00019999709403968887, - "loss": 46.0, - "step": 15079 - }, - { - "epoch": 2.428519666653247, - "grad_norm": 0.0009891962399706244, - "learning_rate": 0.00019999709365399072, - "loss": 46.0, - "step": 15080 - }, - { - "epoch": 2.4286807037320344, - "grad_norm": 0.002965726889669895, - "learning_rate": 0.00019999709326826701, - "loss": 46.0, - "step": 15081 - }, - { - "epoch": 2.428841740810822, - "grad_norm": 0.008746610954403877, - "learning_rate": 0.00019999709288251767, - "loss": 46.0, - "step": 15082 - }, - { - "epoch": 2.4290027778896093, - "grad_norm": 0.002273347694426775, - "learning_rate": 0.00019999709249674273, - "loss": 46.0, - "step": 15083 - }, - { - "epoch": 2.4291638149683963, - "grad_norm": 0.0005529571790248156, - "learning_rate": 0.0001999970921109422, - "loss": 46.0, - "step": 15084 - }, - { - "epoch": 2.4293248520471837, - "grad_norm": 0.005225272383540869, - "learning_rate": 0.00019999709172511613, - "loss": 46.0, - "step": 15085 - }, - { - "epoch": 2.429485889125971, - "grad_norm": 0.0026787114329636097, - "learning_rate": 0.0001999970913392644, - "loss": 46.0, - "step": 15086 - }, - { - "epoch": 2.4296469262047586, - "grad_norm": 0.0020417559426277876, - "learning_rate": 0.0001999970909533871, - "loss": 46.0, - "step": 15087 - }, - { - "epoch": 2.429807963283546, - "grad_norm": 0.0030723819509148598, - "learning_rate": 0.00019999709056748422, - "loss": 46.0, - "step": 15088 - }, - { - "epoch": 2.4299690003623335, - "grad_norm": 0.002557441359385848, - "learning_rate": 0.00019999709018155573, - "loss": 46.0, - "step": 15089 - }, - { - "epoch": 2.430130037441121, - "grad_norm": 0.0005858919466845691, - "learning_rate": 0.00019999708979560163, - "loss": 46.0, - "step": 15090 - }, - { - "epoch": 2.430291074519908, - "grad_norm": 0.0015735187334939837, - "learning_rate": 0.00019999708940962198, - "loss": 46.0, - "step": 15091 - }, - { - "epoch": 2.4304521115986955, - "grad_norm": 0.00510045513510704, - "learning_rate": 0.0001999970890236167, - "loss": 46.0, - "step": 15092 - }, - { - "epoch": 2.430613148677483, - "grad_norm": 0.0010346854105591774, - "learning_rate": 0.0001999970886375858, - "loss": 46.0, - "step": 15093 - }, - { - "epoch": 2.4307741857562704, - "grad_norm": 0.0014213686808943748, - "learning_rate": 0.00019999708825152936, - "loss": 46.0, - "step": 15094 - }, - { - "epoch": 2.430935222835058, - "grad_norm": 0.0025495279114693403, - "learning_rate": 0.0001999970878654473, - "loss": 46.0, - "step": 15095 - }, - { - "epoch": 2.4310962599138453, - "grad_norm": 0.004010245203971863, - "learning_rate": 0.00019999708747933965, - "loss": 46.0, - "step": 15096 - }, - { - "epoch": 2.4312572969926327, - "grad_norm": 0.002637492259964347, - "learning_rate": 0.00019999708709320642, - "loss": 46.0, - "step": 15097 - }, - { - "epoch": 2.43141833407142, - "grad_norm": 0.0017560409614816308, - "learning_rate": 0.00019999708670704756, - "loss": 46.0, - "step": 15098 - }, - { - "epoch": 2.431579371150207, - "grad_norm": 0.0018749248702079058, - "learning_rate": 0.00019999708632086313, - "loss": 46.0, - "step": 15099 - }, - { - "epoch": 2.4317404082289946, - "grad_norm": 0.0029514131601899862, - "learning_rate": 0.00019999708593465313, - "loss": 46.0, - "step": 15100 - }, - { - "epoch": 2.431901445307782, - "grad_norm": 0.0014235855778679252, - "learning_rate": 0.0001999970855484175, - "loss": 46.0, - "step": 15101 - }, - { - "epoch": 2.4320624823865695, - "grad_norm": 0.0014500432880595326, - "learning_rate": 0.0001999970851621563, - "loss": 46.0, - "step": 15102 - }, - { - "epoch": 2.432223519465357, - "grad_norm": 0.003533906303346157, - "learning_rate": 0.00019999708477586947, - "loss": 46.0, - "step": 15103 - }, - { - "epoch": 2.4323845565441444, - "grad_norm": 0.005040640011429787, - "learning_rate": 0.00019999708438955707, - "loss": 46.0, - "step": 15104 - }, - { - "epoch": 2.4325455936229314, - "grad_norm": 0.003788456553593278, - "learning_rate": 0.00019999708400321908, - "loss": 46.0, - "step": 15105 - }, - { - "epoch": 2.432706630701719, - "grad_norm": 0.002008729847148061, - "learning_rate": 0.00019999708361685548, - "loss": 46.0, - "step": 15106 - }, - { - "epoch": 2.4328676677805063, - "grad_norm": 0.0017324623186141253, - "learning_rate": 0.0001999970832304663, - "loss": 46.0, - "step": 15107 - }, - { - "epoch": 2.433028704859294, - "grad_norm": 0.0033673131838440895, - "learning_rate": 0.00019999708284405151, - "loss": 46.0, - "step": 15108 - }, - { - "epoch": 2.4331897419380812, - "grad_norm": 0.0046812789514660835, - "learning_rate": 0.00019999708245761115, - "loss": 46.0, - "step": 15109 - }, - { - "epoch": 2.4333507790168687, - "grad_norm": 0.00128064991440624, - "learning_rate": 0.00019999708207114517, - "loss": 46.0, - "step": 15110 - }, - { - "epoch": 2.433511816095656, - "grad_norm": 0.00835944153368473, - "learning_rate": 0.0001999970816846536, - "loss": 46.0, - "step": 15111 - }, - { - "epoch": 2.4336728531744436, - "grad_norm": 0.003728966461494565, - "learning_rate": 0.00019999708129813648, - "loss": 46.0, - "step": 15112 - }, - { - "epoch": 2.433833890253231, - "grad_norm": 0.001890342915430665, - "learning_rate": 0.00019999708091159374, - "loss": 46.0, - "step": 15113 - }, - { - "epoch": 2.433994927332018, - "grad_norm": 0.012927904725074768, - "learning_rate": 0.00019999708052502538, - "loss": 46.0, - "step": 15114 - }, - { - "epoch": 2.4341559644108055, - "grad_norm": 0.001587510691024363, - "learning_rate": 0.00019999708013843144, - "loss": 46.0, - "step": 15115 - }, - { - "epoch": 2.434317001489593, - "grad_norm": 0.0012859930284321308, - "learning_rate": 0.0001999970797518119, - "loss": 46.0, - "step": 15116 - }, - { - "epoch": 2.4344780385683804, - "grad_norm": 0.0025028849486261606, - "learning_rate": 0.00019999707936516677, - "loss": 46.0, - "step": 15117 - }, - { - "epoch": 2.434639075647168, - "grad_norm": 0.005002887919545174, - "learning_rate": 0.00019999707897849607, - "loss": 46.0, - "step": 15118 - }, - { - "epoch": 2.4348001127259553, - "grad_norm": 0.0005653257830999792, - "learning_rate": 0.00019999707859179975, - "loss": 46.0, - "step": 15119 - }, - { - "epoch": 2.4349611498047423, - "grad_norm": 0.001642723334953189, - "learning_rate": 0.00019999707820507785, - "loss": 46.0, - "step": 15120 - }, - { - "epoch": 2.4351221868835298, - "grad_norm": 0.002714628353714943, - "learning_rate": 0.00019999707781833036, - "loss": 46.0, - "step": 15121 - }, - { - "epoch": 2.4352832239623172, - "grad_norm": 0.001865481142885983, - "learning_rate": 0.00019999707743155725, - "loss": 46.0, - "step": 15122 - }, - { - "epoch": 2.4354442610411047, - "grad_norm": 0.009342843666672707, - "learning_rate": 0.00019999707704475856, - "loss": 46.0, - "step": 15123 - }, - { - "epoch": 2.435605298119892, - "grad_norm": 0.0034045828506350517, - "learning_rate": 0.00019999707665793425, - "loss": 46.0, - "step": 15124 - }, - { - "epoch": 2.4357663351986796, - "grad_norm": 0.0012691464507952332, - "learning_rate": 0.00019999707627108438, - "loss": 46.0, - "step": 15125 - }, - { - "epoch": 2.435927372277467, - "grad_norm": 0.0018481611041352153, - "learning_rate": 0.0001999970758842089, - "loss": 46.0, - "step": 15126 - }, - { - "epoch": 2.4360884093562545, - "grad_norm": 0.0030695737805217505, - "learning_rate": 0.00019999707549730786, - "loss": 46.0, - "step": 15127 - }, - { - "epoch": 2.4362494464350415, - "grad_norm": 0.0012932880781590939, - "learning_rate": 0.0001999970751103812, - "loss": 46.0, - "step": 15128 - }, - { - "epoch": 2.436410483513829, - "grad_norm": 0.0030933471862226725, - "learning_rate": 0.00019999707472342896, - "loss": 46.0, - "step": 15129 - }, - { - "epoch": 2.4365715205926164, - "grad_norm": 0.001895736437290907, - "learning_rate": 0.0001999970743364511, - "loss": 46.0, - "step": 15130 - }, - { - "epoch": 2.436732557671404, - "grad_norm": 0.0023412881419062614, - "learning_rate": 0.00019999707394944766, - "loss": 46.0, - "step": 15131 - }, - { - "epoch": 2.4368935947501913, - "grad_norm": 0.0022735181264579296, - "learning_rate": 0.00019999707356241862, - "loss": 46.0, - "step": 15132 - }, - { - "epoch": 2.4370546318289787, - "grad_norm": 0.0024098760914057493, - "learning_rate": 0.00019999707317536398, - "loss": 46.0, - "step": 15133 - }, - { - "epoch": 2.437215668907766, - "grad_norm": 0.0007730990764684975, - "learning_rate": 0.00019999707278828374, - "loss": 46.0, - "step": 15134 - }, - { - "epoch": 2.437376705986553, - "grad_norm": 0.004460522439330816, - "learning_rate": 0.00019999707240117792, - "loss": 46.0, - "step": 15135 - }, - { - "epoch": 2.4375377430653407, - "grad_norm": 0.005358945578336716, - "learning_rate": 0.00019999707201404652, - "loss": 46.0, - "step": 15136 - }, - { - "epoch": 2.437698780144128, - "grad_norm": 0.003004533238708973, - "learning_rate": 0.00019999707162688952, - "loss": 46.0, - "step": 15137 - }, - { - "epoch": 2.4378598172229156, - "grad_norm": 0.0032510908786207438, - "learning_rate": 0.0001999970712397069, - "loss": 46.0, - "step": 15138 - }, - { - "epoch": 2.438020854301703, - "grad_norm": 0.002162961522117257, - "learning_rate": 0.0001999970708524987, - "loss": 46.0, - "step": 15139 - }, - { - "epoch": 2.4381818913804905, - "grad_norm": 0.0015356499934569001, - "learning_rate": 0.00019999707046526493, - "loss": 46.0, - "step": 15140 - }, - { - "epoch": 2.438342928459278, - "grad_norm": 0.0042129456996917725, - "learning_rate": 0.00019999707007800554, - "loss": 46.0, - "step": 15141 - }, - { - "epoch": 2.4385039655380654, - "grad_norm": 0.0011561081046238542, - "learning_rate": 0.00019999706969072055, - "loss": 46.0, - "step": 15142 - }, - { - "epoch": 2.4386650026168524, - "grad_norm": 0.00693810498341918, - "learning_rate": 0.00019999706930340998, - "loss": 46.0, - "step": 15143 - }, - { - "epoch": 2.43882603969564, - "grad_norm": 0.0008224930497817695, - "learning_rate": 0.00019999706891607382, - "loss": 46.0, - "step": 15144 - }, - { - "epoch": 2.4389870767744273, - "grad_norm": 0.000820188841316849, - "learning_rate": 0.00019999706852871207, - "loss": 46.0, - "step": 15145 - }, - { - "epoch": 2.4391481138532147, - "grad_norm": 0.0014454848133027554, - "learning_rate": 0.0001999970681413247, - "loss": 46.0, - "step": 15146 - }, - { - "epoch": 2.439309150932002, - "grad_norm": 0.006936587393283844, - "learning_rate": 0.00019999706775391176, - "loss": 46.0, - "step": 15147 - }, - { - "epoch": 2.4394701880107896, - "grad_norm": 0.0022375667467713356, - "learning_rate": 0.0001999970673664732, - "loss": 46.0, - "step": 15148 - }, - { - "epoch": 2.4396312250895766, - "grad_norm": 0.005586666986346245, - "learning_rate": 0.00019999706697900908, - "loss": 46.0, - "step": 15149 - }, - { - "epoch": 2.439792262168364, - "grad_norm": 0.0030356997158378363, - "learning_rate": 0.00019999706659151934, - "loss": 46.0, - "step": 15150 - }, - { - "epoch": 2.4399532992471515, - "grad_norm": 0.006025013048201799, - "learning_rate": 0.00019999706620400402, - "loss": 46.0, - "step": 15151 - }, - { - "epoch": 2.440114336325939, - "grad_norm": 0.0006395922973752022, - "learning_rate": 0.0001999970658164631, - "loss": 46.0, - "step": 15152 - }, - { - "epoch": 2.4402753734047264, - "grad_norm": 0.016943635419011116, - "learning_rate": 0.0001999970654288966, - "loss": 46.0, - "step": 15153 - }, - { - "epoch": 2.440436410483514, - "grad_norm": 0.0020490989554673433, - "learning_rate": 0.0001999970650413045, - "loss": 46.0, - "step": 15154 - }, - { - "epoch": 2.4405974475623013, - "grad_norm": 0.003953774459660053, - "learning_rate": 0.0001999970646536868, - "loss": 46.0, - "step": 15155 - }, - { - "epoch": 2.440758484641089, - "grad_norm": 0.003514370182529092, - "learning_rate": 0.0001999970642660435, - "loss": 46.0, - "step": 15156 - }, - { - "epoch": 2.4409195217198763, - "grad_norm": 0.0014731710543856025, - "learning_rate": 0.00019999706387837458, - "loss": 46.0, - "step": 15157 - }, - { - "epoch": 2.4410805587986633, - "grad_norm": 0.002514501567929983, - "learning_rate": 0.00019999706349068012, - "loss": 46.0, - "step": 15158 - }, - { - "epoch": 2.4412415958774507, - "grad_norm": 0.0017521616537123919, - "learning_rate": 0.00019999706310296005, - "loss": 46.0, - "step": 15159 - }, - { - "epoch": 2.441402632956238, - "grad_norm": 0.007570066023617983, - "learning_rate": 0.00019999706271521433, - "loss": 46.0, - "step": 15160 - }, - { - "epoch": 2.4415636700350256, - "grad_norm": 0.007405099458992481, - "learning_rate": 0.0001999970623274431, - "loss": 46.0, - "step": 15161 - }, - { - "epoch": 2.441724707113813, - "grad_norm": 0.004569333977997303, - "learning_rate": 0.00019999706193964625, - "loss": 46.0, - "step": 15162 - }, - { - "epoch": 2.4418857441926005, - "grad_norm": 0.0010243856813758612, - "learning_rate": 0.0001999970615518238, - "loss": 46.0, - "step": 15163 - }, - { - "epoch": 2.4420467812713875, - "grad_norm": 0.0019582172390073538, - "learning_rate": 0.00019999706116397573, - "loss": 46.0, - "step": 15164 - }, - { - "epoch": 2.442207818350175, - "grad_norm": 0.002119868528097868, - "learning_rate": 0.0001999970607761021, - "loss": 46.0, - "step": 15165 - }, - { - "epoch": 2.4423688554289624, - "grad_norm": 0.0009905437473207712, - "learning_rate": 0.00019999706038820284, - "loss": 46.0, - "step": 15166 - }, - { - "epoch": 2.44252989250775, - "grad_norm": 0.0014513899805024266, - "learning_rate": 0.00019999706000027804, - "loss": 46.0, - "step": 15167 - }, - { - "epoch": 2.4426909295865373, - "grad_norm": 0.0010807986836880445, - "learning_rate": 0.0001999970596123276, - "loss": 46.0, - "step": 15168 - }, - { - "epoch": 2.442851966665325, - "grad_norm": 0.002748708939179778, - "learning_rate": 0.00019999705922435157, - "loss": 46.0, - "step": 15169 - }, - { - "epoch": 2.4430130037441122, - "grad_norm": 0.0014928893651813269, - "learning_rate": 0.00019999705883634995, - "loss": 46.0, - "step": 15170 - }, - { - "epoch": 2.4431740408228997, - "grad_norm": 0.0013083310332149267, - "learning_rate": 0.00019999705844832275, - "loss": 46.0, - "step": 15171 - }, - { - "epoch": 2.4433350779016867, - "grad_norm": 0.006076828110963106, - "learning_rate": 0.00019999705806026996, - "loss": 46.0, - "step": 15172 - }, - { - "epoch": 2.443496114980474, - "grad_norm": 0.003243883140385151, - "learning_rate": 0.00019999705767219156, - "loss": 46.0, - "step": 15173 - }, - { - "epoch": 2.4436571520592616, - "grad_norm": 0.0012554647400975227, - "learning_rate": 0.00019999705728408756, - "loss": 46.0, - "step": 15174 - }, - { - "epoch": 2.443818189138049, - "grad_norm": 0.0049966671504080296, - "learning_rate": 0.000199997056895958, - "loss": 46.0, - "step": 15175 - }, - { - "epoch": 2.4439792262168365, - "grad_norm": 0.009989946149289608, - "learning_rate": 0.0001999970565078028, - "loss": 46.0, - "step": 15176 - }, - { - "epoch": 2.444140263295624, - "grad_norm": 0.0031812230590730906, - "learning_rate": 0.00019999705611962204, - "loss": 46.0, - "step": 15177 - }, - { - "epoch": 2.4443013003744114, - "grad_norm": 0.0013804750051349401, - "learning_rate": 0.00019999705573141565, - "loss": 46.0, - "step": 15178 - }, - { - "epoch": 2.4444623374531984, - "grad_norm": 0.0021921817678958178, - "learning_rate": 0.00019999705534318372, - "loss": 46.0, - "step": 15179 - }, - { - "epoch": 2.444623374531986, - "grad_norm": 0.00106932467315346, - "learning_rate": 0.00019999705495492615, - "loss": 46.0, - "step": 15180 - }, - { - "epoch": 2.4447844116107733, - "grad_norm": 0.004719895776361227, - "learning_rate": 0.000199997054566643, - "loss": 46.0, - "step": 15181 - }, - { - "epoch": 2.4449454486895608, - "grad_norm": 0.0025353101082146168, - "learning_rate": 0.00019999705417833425, - "loss": 46.0, - "step": 15182 - }, - { - "epoch": 2.445106485768348, - "grad_norm": 0.0012668155832216144, - "learning_rate": 0.00019999705378999992, - "loss": 46.0, - "step": 15183 - }, - { - "epoch": 2.4452675228471357, - "grad_norm": 0.002183428732678294, - "learning_rate": 0.00019999705340163998, - "loss": 46.0, - "step": 15184 - }, - { - "epoch": 2.445428559925923, - "grad_norm": 0.0007727840566076338, - "learning_rate": 0.00019999705301325447, - "loss": 46.0, - "step": 15185 - }, - { - "epoch": 2.4455895970047106, - "grad_norm": 0.002086078282445669, - "learning_rate": 0.00019999705262484333, - "loss": 46.0, - "step": 15186 - }, - { - "epoch": 2.4457506340834976, - "grad_norm": 0.006999995559453964, - "learning_rate": 0.00019999705223640662, - "loss": 46.0, - "step": 15187 - }, - { - "epoch": 2.445911671162285, - "grad_norm": 0.004946023691445589, - "learning_rate": 0.00019999705184794433, - "loss": 46.0, - "step": 15188 - }, - { - "epoch": 2.4460727082410725, - "grad_norm": 0.0027686201501637697, - "learning_rate": 0.00019999705145945642, - "loss": 46.0, - "step": 15189 - }, - { - "epoch": 2.44623374531986, - "grad_norm": 0.002385171363130212, - "learning_rate": 0.00019999705107094293, - "loss": 46.0, - "step": 15190 - }, - { - "epoch": 2.4463947823986474, - "grad_norm": 0.009861724451184273, - "learning_rate": 0.00019999705068240382, - "loss": 46.0, - "step": 15191 - }, - { - "epoch": 2.446555819477435, - "grad_norm": 0.00755418511107564, - "learning_rate": 0.00019999705029383915, - "loss": 46.0, - "step": 15192 - }, - { - "epoch": 2.446716856556222, - "grad_norm": 0.0023931374307721853, - "learning_rate": 0.00019999704990524887, - "loss": 46.0, - "step": 15193 - }, - { - "epoch": 2.4468778936350093, - "grad_norm": 0.007331613451242447, - "learning_rate": 0.000199997049516633, - "loss": 46.0, - "step": 15194 - }, - { - "epoch": 2.4470389307137967, - "grad_norm": 0.00670094508677721, - "learning_rate": 0.00019999704912799154, - "loss": 46.0, - "step": 15195 - }, - { - "epoch": 2.447199967792584, - "grad_norm": 0.004451238550245762, - "learning_rate": 0.00019999704873932447, - "loss": 46.0, - "step": 15196 - }, - { - "epoch": 2.4473610048713716, - "grad_norm": 0.006701333448290825, - "learning_rate": 0.0001999970483506318, - "loss": 46.0, - "step": 15197 - }, - { - "epoch": 2.447522041950159, - "grad_norm": 0.003331623040139675, - "learning_rate": 0.00019999704796191356, - "loss": 46.0, - "step": 15198 - }, - { - "epoch": 2.4476830790289466, - "grad_norm": 0.009729933924973011, - "learning_rate": 0.00019999704757316973, - "loss": 46.0, - "step": 15199 - }, - { - "epoch": 2.447844116107734, - "grad_norm": 0.007817571051418781, - "learning_rate": 0.00019999704718440028, - "loss": 46.0, - "step": 15200 - }, - { - "epoch": 2.448005153186521, - "grad_norm": 0.0011042491532862186, - "learning_rate": 0.00019999704679560525, - "loss": 46.0, - "step": 15201 - }, - { - "epoch": 2.4481661902653085, - "grad_norm": 0.008335348218679428, - "learning_rate": 0.00019999704640678463, - "loss": 46.0, - "step": 15202 - }, - { - "epoch": 2.448327227344096, - "grad_norm": 0.0063887545838952065, - "learning_rate": 0.00019999704601793842, - "loss": 46.0, - "step": 15203 - }, - { - "epoch": 2.4484882644228834, - "grad_norm": 0.001595465000718832, - "learning_rate": 0.0001999970456290666, - "loss": 46.0, - "step": 15204 - }, - { - "epoch": 2.448649301501671, - "grad_norm": 0.002345220185816288, - "learning_rate": 0.0001999970452401692, - "loss": 46.0, - "step": 15205 - }, - { - "epoch": 2.4488103385804583, - "grad_norm": 0.0016180694801732898, - "learning_rate": 0.0001999970448512462, - "loss": 46.0, - "step": 15206 - }, - { - "epoch": 2.4489713756592457, - "grad_norm": 0.002247192431241274, - "learning_rate": 0.0001999970444622976, - "loss": 46.0, - "step": 15207 - }, - { - "epoch": 2.4491324127380327, - "grad_norm": 0.000935447053052485, - "learning_rate": 0.0001999970440733234, - "loss": 46.0, - "step": 15208 - }, - { - "epoch": 2.44929344981682, - "grad_norm": 0.002954952185973525, - "learning_rate": 0.00019999704368432362, - "loss": 46.0, - "step": 15209 - }, - { - "epoch": 2.4494544868956076, - "grad_norm": 0.004415550269186497, - "learning_rate": 0.00019999704329529824, - "loss": 46.0, - "step": 15210 - }, - { - "epoch": 2.449615523974395, - "grad_norm": 0.0007914695888757706, - "learning_rate": 0.00019999704290624726, - "loss": 46.0, - "step": 15211 - }, - { - "epoch": 2.4497765610531825, - "grad_norm": 0.001555940369144082, - "learning_rate": 0.0001999970425171707, - "loss": 46.0, - "step": 15212 - }, - { - "epoch": 2.44993759813197, - "grad_norm": 0.0003372641222085804, - "learning_rate": 0.00019999704212806855, - "loss": 46.0, - "step": 15213 - }, - { - "epoch": 2.4500986352107574, - "grad_norm": 0.009569228626787663, - "learning_rate": 0.00019999704173894077, - "loss": 46.0, - "step": 15214 - }, - { - "epoch": 2.450259672289545, - "grad_norm": 0.003237672382965684, - "learning_rate": 0.00019999704134978746, - "loss": 46.0, - "step": 15215 - }, - { - "epoch": 2.450420709368332, - "grad_norm": 0.003576381830498576, - "learning_rate": 0.00019999704096060848, - "loss": 46.0, - "step": 15216 - }, - { - "epoch": 2.4505817464471193, - "grad_norm": 0.005659961141645908, - "learning_rate": 0.00019999704057140395, - "loss": 46.0, - "step": 15217 - }, - { - "epoch": 2.450742783525907, - "grad_norm": 0.004003557842224836, - "learning_rate": 0.00019999704018217385, - "loss": 46.0, - "step": 15218 - }, - { - "epoch": 2.4509038206046942, - "grad_norm": 0.006936076562851667, - "learning_rate": 0.0001999970397929181, - "loss": 46.0, - "step": 15219 - }, - { - "epoch": 2.4510648576834817, - "grad_norm": 0.0014235333073884249, - "learning_rate": 0.00019999703940363678, - "loss": 46.0, - "step": 15220 - }, - { - "epoch": 2.451225894762269, - "grad_norm": 0.010424616746604443, - "learning_rate": 0.00019999703901432987, - "loss": 46.0, - "step": 15221 - }, - { - "epoch": 2.451386931841056, - "grad_norm": 0.0011427258141338825, - "learning_rate": 0.00019999703862499737, - "loss": 46.0, - "step": 15222 - }, - { - "epoch": 2.4515479689198436, - "grad_norm": 0.002132270485162735, - "learning_rate": 0.00019999703823563925, - "loss": 46.0, - "step": 15223 - }, - { - "epoch": 2.451709005998631, - "grad_norm": 0.0016713673248887062, - "learning_rate": 0.00019999703784625555, - "loss": 46.0, - "step": 15224 - }, - { - "epoch": 2.4518700430774185, - "grad_norm": 0.005765865091234446, - "learning_rate": 0.0001999970374568463, - "loss": 46.0, - "step": 15225 - }, - { - "epoch": 2.452031080156206, - "grad_norm": 0.0059107705019414425, - "learning_rate": 0.00019999703706741139, - "loss": 46.0, - "step": 15226 - }, - { - "epoch": 2.4521921172349934, - "grad_norm": 0.0017577632097527385, - "learning_rate": 0.00019999703667795092, - "loss": 46.0, - "step": 15227 - }, - { - "epoch": 2.452353154313781, - "grad_norm": 0.0013335597468540072, - "learning_rate": 0.00019999703628846484, - "loss": 46.0, - "step": 15228 - }, - { - "epoch": 2.4525141913925683, - "grad_norm": 0.0015751039609313011, - "learning_rate": 0.00019999703589895318, - "loss": 46.0, - "step": 15229 - }, - { - "epoch": 2.4526752284713558, - "grad_norm": 0.004719140939414501, - "learning_rate": 0.00019999703550941593, - "loss": 46.0, - "step": 15230 - }, - { - "epoch": 2.452836265550143, - "grad_norm": 0.0012358779786154628, - "learning_rate": 0.00019999703511985306, - "loss": 46.0, - "step": 15231 - }, - { - "epoch": 2.4529973026289302, - "grad_norm": 0.0019273769576102495, - "learning_rate": 0.0001999970347302646, - "loss": 46.0, - "step": 15232 - }, - { - "epoch": 2.4531583397077177, - "grad_norm": 0.0045170956291258335, - "learning_rate": 0.0001999970343406506, - "loss": 46.0, - "step": 15233 - }, - { - "epoch": 2.453319376786505, - "grad_norm": 0.0035083426628261805, - "learning_rate": 0.00019999703395101094, - "loss": 46.0, - "step": 15234 - }, - { - "epoch": 2.4534804138652926, - "grad_norm": 0.0018365541473031044, - "learning_rate": 0.0001999970335613457, - "loss": 46.0, - "step": 15235 - }, - { - "epoch": 2.45364145094408, - "grad_norm": 0.003354813437908888, - "learning_rate": 0.0001999970331716549, - "loss": 46.0, - "step": 15236 - }, - { - "epoch": 2.453802488022867, - "grad_norm": 0.002219960791990161, - "learning_rate": 0.00019999703278193845, - "loss": 46.0, - "step": 15237 - }, - { - "epoch": 2.4539635251016545, - "grad_norm": 0.0006025100010447204, - "learning_rate": 0.00019999703239219644, - "loss": 46.0, - "step": 15238 - }, - { - "epoch": 2.454124562180442, - "grad_norm": 0.0017555751837790012, - "learning_rate": 0.00019999703200242883, - "loss": 46.0, - "step": 15239 - }, - { - "epoch": 2.4542855992592294, - "grad_norm": 0.0005677935550920665, - "learning_rate": 0.00019999703161263562, - "loss": 46.0, - "step": 15240 - }, - { - "epoch": 2.454446636338017, - "grad_norm": 0.0015791229670867324, - "learning_rate": 0.00019999703122281686, - "loss": 46.0, - "step": 15241 - }, - { - "epoch": 2.4546076734168043, - "grad_norm": 0.00508342869579792, - "learning_rate": 0.00019999703083297245, - "loss": 46.0, - "step": 15242 - }, - { - "epoch": 2.4547687104955918, - "grad_norm": 0.0024866480380296707, - "learning_rate": 0.00019999703044310248, - "loss": 46.0, - "step": 15243 - }, - { - "epoch": 2.454929747574379, - "grad_norm": 0.0010701797436922789, - "learning_rate": 0.0001999970300532069, - "loss": 46.0, - "step": 15244 - }, - { - "epoch": 2.455090784653166, - "grad_norm": 0.001672895741648972, - "learning_rate": 0.00019999702966328573, - "loss": 46.0, - "step": 15245 - }, - { - "epoch": 2.4552518217319537, - "grad_norm": 0.0010022710775956511, - "learning_rate": 0.00019999702927333895, - "loss": 46.0, - "step": 15246 - }, - { - "epoch": 2.455412858810741, - "grad_norm": 0.0030301359947770834, - "learning_rate": 0.00019999702888336658, - "loss": 46.0, - "step": 15247 - }, - { - "epoch": 2.4555738958895286, - "grad_norm": 0.0033945185132324696, - "learning_rate": 0.00019999702849336862, - "loss": 46.0, - "step": 15248 - }, - { - "epoch": 2.455734932968316, - "grad_norm": 0.0018690304132178426, - "learning_rate": 0.00019999702810334508, - "loss": 46.0, - "step": 15249 - }, - { - "epoch": 2.4558959700471035, - "grad_norm": 0.006364199798554182, - "learning_rate": 0.00019999702771329595, - "loss": 46.0, - "step": 15250 - }, - { - "epoch": 2.456057007125891, - "grad_norm": 0.002885845024138689, - "learning_rate": 0.0001999970273232212, - "loss": 46.0, - "step": 15251 - }, - { - "epoch": 2.456218044204678, - "grad_norm": 0.0018506854539737105, - "learning_rate": 0.00019999702693312087, - "loss": 46.0, - "step": 15252 - }, - { - "epoch": 2.4563790812834654, - "grad_norm": 0.002193527529016137, - "learning_rate": 0.00019999702654299495, - "loss": 46.0, - "step": 15253 - }, - { - "epoch": 2.456540118362253, - "grad_norm": 0.008521744050085545, - "learning_rate": 0.00019999702615284344, - "loss": 46.0, - "step": 15254 - }, - { - "epoch": 2.4567011554410403, - "grad_norm": 0.0013411276740953326, - "learning_rate": 0.00019999702576266632, - "loss": 46.0, - "step": 15255 - }, - { - "epoch": 2.4568621925198277, - "grad_norm": 0.0075626433826982975, - "learning_rate": 0.00019999702537246361, - "loss": 46.0, - "step": 15256 - }, - { - "epoch": 2.457023229598615, - "grad_norm": 0.002869025571271777, - "learning_rate": 0.00019999702498223532, - "loss": 46.0, - "step": 15257 - }, - { - "epoch": 2.4571842666774026, - "grad_norm": 0.00453701987862587, - "learning_rate": 0.0001999970245919814, - "loss": 46.0, - "step": 15258 - }, - { - "epoch": 2.45734530375619, - "grad_norm": 0.0061545055359601974, - "learning_rate": 0.0001999970242017019, - "loss": 46.0, - "step": 15259 - }, - { - "epoch": 2.457506340834977, - "grad_norm": 0.0009747164440341294, - "learning_rate": 0.00019999702381139685, - "loss": 46.0, - "step": 15260 - }, - { - "epoch": 2.4576673779137646, - "grad_norm": 0.0009364444995298982, - "learning_rate": 0.00019999702342106616, - "loss": 46.0, - "step": 15261 - }, - { - "epoch": 2.457828414992552, - "grad_norm": 0.0016260718693956733, - "learning_rate": 0.0001999970230307099, - "loss": 46.0, - "step": 15262 - }, - { - "epoch": 2.4579894520713395, - "grad_norm": 0.0007474543526768684, - "learning_rate": 0.00019999702264032802, - "loss": 46.0, - "step": 15263 - }, - { - "epoch": 2.458150489150127, - "grad_norm": 0.00042786996345967054, - "learning_rate": 0.00019999702224992056, - "loss": 46.0, - "step": 15264 - }, - { - "epoch": 2.4583115262289144, - "grad_norm": 0.0020948343444615602, - "learning_rate": 0.0001999970218594875, - "loss": 46.0, - "step": 15265 - }, - { - "epoch": 2.4584725633077014, - "grad_norm": 0.004851227160543203, - "learning_rate": 0.00019999702146902886, - "loss": 46.0, - "step": 15266 - }, - { - "epoch": 2.458633600386489, - "grad_norm": 0.005663125775754452, - "learning_rate": 0.00019999702107854463, - "loss": 46.0, - "step": 15267 - }, - { - "epoch": 2.4587946374652763, - "grad_norm": 0.002608599839732051, - "learning_rate": 0.0001999970206880348, - "loss": 46.0, - "step": 15268 - }, - { - "epoch": 2.4589556745440637, - "grad_norm": 0.005585620645433664, - "learning_rate": 0.00019999702029749935, - "loss": 46.0, - "step": 15269 - }, - { - "epoch": 2.459116711622851, - "grad_norm": 0.001136680832132697, - "learning_rate": 0.00019999701990693834, - "loss": 46.0, - "step": 15270 - }, - { - "epoch": 2.4592777487016386, - "grad_norm": 0.0014970415504649282, - "learning_rate": 0.0001999970195163517, - "loss": 46.0, - "step": 15271 - }, - { - "epoch": 2.459438785780426, - "grad_norm": 0.025296634063124657, - "learning_rate": 0.00019999701912573953, - "loss": 46.0, - "step": 15272 - }, - { - "epoch": 2.4595998228592135, - "grad_norm": 0.0009249933646060526, - "learning_rate": 0.00019999701873510168, - "loss": 46.0, - "step": 15273 - }, - { - "epoch": 2.4597608599380005, - "grad_norm": 0.0022794988472014666, - "learning_rate": 0.0001999970183444383, - "loss": 46.0, - "step": 15274 - }, - { - "epoch": 2.459921897016788, - "grad_norm": 0.0019057078752666712, - "learning_rate": 0.0001999970179537493, - "loss": 46.0, - "step": 15275 - }, - { - "epoch": 2.4600829340955754, - "grad_norm": 0.0028241551481187344, - "learning_rate": 0.0001999970175630347, - "loss": 46.0, - "step": 15276 - }, - { - "epoch": 2.460243971174363, - "grad_norm": 0.000636660901363939, - "learning_rate": 0.0001999970171722945, - "loss": 46.0, - "step": 15277 - }, - { - "epoch": 2.4604050082531503, - "grad_norm": 0.001617730362340808, - "learning_rate": 0.00019999701678152874, - "loss": 46.0, - "step": 15278 - }, - { - "epoch": 2.460566045331938, - "grad_norm": 0.0014377145562320948, - "learning_rate": 0.0001999970163907374, - "loss": 46.0, - "step": 15279 - }, - { - "epoch": 2.4607270824107252, - "grad_norm": 0.00459147896617651, - "learning_rate": 0.0001999970159999204, - "loss": 46.0, - "step": 15280 - }, - { - "epoch": 2.4608881194895122, - "grad_norm": 0.0019778336863964796, - "learning_rate": 0.00019999701560907785, - "loss": 46.0, - "step": 15281 - }, - { - "epoch": 2.4610491565682997, - "grad_norm": 0.002988354302942753, - "learning_rate": 0.0001999970152182097, - "loss": 46.0, - "step": 15282 - }, - { - "epoch": 2.461210193647087, - "grad_norm": 0.0014327128883451223, - "learning_rate": 0.00019999701482731593, - "loss": 46.0, - "step": 15283 - }, - { - "epoch": 2.4613712307258746, - "grad_norm": 0.00252849911339581, - "learning_rate": 0.0001999970144363966, - "loss": 46.0, - "step": 15284 - }, - { - "epoch": 2.461532267804662, - "grad_norm": 0.0063990941271185875, - "learning_rate": 0.00019999701404545167, - "loss": 46.0, - "step": 15285 - }, - { - "epoch": 2.4616933048834495, - "grad_norm": 0.0021387904416769743, - "learning_rate": 0.00019999701365448113, - "loss": 46.0, - "step": 15286 - }, - { - "epoch": 2.461854341962237, - "grad_norm": 0.0062814331613481045, - "learning_rate": 0.000199997013263485, - "loss": 46.0, - "step": 15287 - }, - { - "epoch": 2.4620153790410244, - "grad_norm": 0.0012795914663001895, - "learning_rate": 0.0001999970128724633, - "loss": 46.0, - "step": 15288 - }, - { - "epoch": 2.4621764161198114, - "grad_norm": 0.00279766833409667, - "learning_rate": 0.00019999701248141598, - "loss": 46.0, - "step": 15289 - }, - { - "epoch": 2.462337453198599, - "grad_norm": 0.00167724653147161, - "learning_rate": 0.00019999701209034307, - "loss": 46.0, - "step": 15290 - }, - { - "epoch": 2.4624984902773863, - "grad_norm": 0.0009418915724381804, - "learning_rate": 0.0001999970116992446, - "loss": 46.0, - "step": 15291 - }, - { - "epoch": 2.4626595273561738, - "grad_norm": 0.0020624124445021152, - "learning_rate": 0.00019999701130812047, - "loss": 46.0, - "step": 15292 - }, - { - "epoch": 2.4628205644349612, - "grad_norm": 0.0005897238734178245, - "learning_rate": 0.0001999970109169708, - "loss": 46.0, - "step": 15293 - }, - { - "epoch": 2.4629816015137487, - "grad_norm": 0.0012851186329498887, - "learning_rate": 0.0001999970105257955, - "loss": 46.0, - "step": 15294 - }, - { - "epoch": 2.4631426385925357, - "grad_norm": 0.005485603120177984, - "learning_rate": 0.00019999701013459462, - "loss": 46.0, - "step": 15295 - }, - { - "epoch": 2.463303675671323, - "grad_norm": 0.008505761623382568, - "learning_rate": 0.00019999700974336816, - "loss": 46.0, - "step": 15296 - }, - { - "epoch": 2.4634647127501106, - "grad_norm": 0.0012100688181817532, - "learning_rate": 0.0001999970093521161, - "loss": 46.0, - "step": 15297 - }, - { - "epoch": 2.463625749828898, - "grad_norm": 0.0034214206971228123, - "learning_rate": 0.00019999700896083844, - "loss": 46.0, - "step": 15298 - }, - { - "epoch": 2.4637867869076855, - "grad_norm": 0.0004850381810683757, - "learning_rate": 0.00019999700856953518, - "loss": 46.0, - "step": 15299 - }, - { - "epoch": 2.463947823986473, - "grad_norm": 0.004832991864532232, - "learning_rate": 0.0001999970081782063, - "loss": 46.0, - "step": 15300 - }, - { - "epoch": 2.4641088610652604, - "grad_norm": 0.0006791831692680717, - "learning_rate": 0.00019999700778685188, - "loss": 46.0, - "step": 15301 - }, - { - "epoch": 2.464269898144048, - "grad_norm": 0.005785138346254826, - "learning_rate": 0.00019999700739547187, - "loss": 46.0, - "step": 15302 - }, - { - "epoch": 2.4644309352228353, - "grad_norm": 0.0032062644604593515, - "learning_rate": 0.00019999700700406623, - "loss": 46.0, - "step": 15303 - }, - { - "epoch": 2.4645919723016223, - "grad_norm": 0.002940522972494364, - "learning_rate": 0.00019999700661263502, - "loss": 46.0, - "step": 15304 - }, - { - "epoch": 2.4647530093804098, - "grad_norm": 0.004605089779943228, - "learning_rate": 0.00019999700622117818, - "loss": 46.0, - "step": 15305 - }, - { - "epoch": 2.464914046459197, - "grad_norm": 0.001077226479537785, - "learning_rate": 0.0001999970058296958, - "loss": 46.0, - "step": 15306 - }, - { - "epoch": 2.4650750835379847, - "grad_norm": 0.011306436732411385, - "learning_rate": 0.00019999700543818776, - "loss": 46.0, - "step": 15307 - }, - { - "epoch": 2.465236120616772, - "grad_norm": 0.012863697484135628, - "learning_rate": 0.00019999700504665414, - "loss": 46.0, - "step": 15308 - }, - { - "epoch": 2.4653971576955596, - "grad_norm": 0.0012285165721550584, - "learning_rate": 0.00019999700465509496, - "loss": 46.0, - "step": 15309 - }, - { - "epoch": 2.4655581947743466, - "grad_norm": 0.003248112043365836, - "learning_rate": 0.0001999970042635102, - "loss": 46.0, - "step": 15310 - }, - { - "epoch": 2.465719231853134, - "grad_norm": 0.0025695119984447956, - "learning_rate": 0.00019999700387189978, - "loss": 46.0, - "step": 15311 - }, - { - "epoch": 2.4658802689319215, - "grad_norm": 0.003024042584002018, - "learning_rate": 0.0001999970034802638, - "loss": 46.0, - "step": 15312 - }, - { - "epoch": 2.466041306010709, - "grad_norm": 0.0055190748535096645, - "learning_rate": 0.00019999700308860225, - "loss": 46.0, - "step": 15313 - }, - { - "epoch": 2.4662023430894964, - "grad_norm": 0.001415694016031921, - "learning_rate": 0.00019999700269691508, - "loss": 46.0, - "step": 15314 - }, - { - "epoch": 2.466363380168284, - "grad_norm": 0.004782662261277437, - "learning_rate": 0.00019999700230520232, - "loss": 46.0, - "step": 15315 - }, - { - "epoch": 2.4665244172470713, - "grad_norm": 0.0019428686937317252, - "learning_rate": 0.00019999700191346395, - "loss": 46.0, - "step": 15316 - }, - { - "epoch": 2.4666854543258587, - "grad_norm": 0.0031769087072461843, - "learning_rate": 0.00019999700152170002, - "loss": 46.0, - "step": 15317 - }, - { - "epoch": 2.4668464914046457, - "grad_norm": 0.003012104192748666, - "learning_rate": 0.00019999700112991047, - "loss": 46.0, - "step": 15318 - }, - { - "epoch": 2.467007528483433, - "grad_norm": 0.002719815354794264, - "learning_rate": 0.00019999700073809537, - "loss": 46.0, - "step": 15319 - }, - { - "epoch": 2.4671685655622206, - "grad_norm": 0.0027671062853187323, - "learning_rate": 0.00019999700034625462, - "loss": 46.0, - "step": 15320 - }, - { - "epoch": 2.467329602641008, - "grad_norm": 0.0009664149256423116, - "learning_rate": 0.0001999969999543883, - "loss": 46.0, - "step": 15321 - }, - { - "epoch": 2.4674906397197955, - "grad_norm": 0.0020425315015017986, - "learning_rate": 0.00019999699956249636, - "loss": 46.0, - "step": 15322 - }, - { - "epoch": 2.467651676798583, - "grad_norm": 0.008751987479627132, - "learning_rate": 0.00019999699917057885, - "loss": 46.0, - "step": 15323 - }, - { - "epoch": 2.4678127138773704, - "grad_norm": 0.0017249657539650798, - "learning_rate": 0.00019999699877863575, - "loss": 46.0, - "step": 15324 - }, - { - "epoch": 2.4679737509561575, - "grad_norm": 0.0011153070954605937, - "learning_rate": 0.00019999699838666707, - "loss": 46.0, - "step": 15325 - }, - { - "epoch": 2.468134788034945, - "grad_norm": 0.0008131980430334806, - "learning_rate": 0.00019999699799467274, - "loss": 46.0, - "step": 15326 - }, - { - "epoch": 2.4682958251137324, - "grad_norm": 0.0010892102727666497, - "learning_rate": 0.00019999699760265286, - "loss": 46.0, - "step": 15327 - }, - { - "epoch": 2.46845686219252, - "grad_norm": 0.0010309504577890038, - "learning_rate": 0.00019999699721060738, - "loss": 46.0, - "step": 15328 - }, - { - "epoch": 2.4686178992713073, - "grad_norm": 0.003638485912233591, - "learning_rate": 0.00019999699681853632, - "loss": 46.0, - "step": 15329 - }, - { - "epoch": 2.4687789363500947, - "grad_norm": 0.0011200020089745522, - "learning_rate": 0.00019999699642643965, - "loss": 46.0, - "step": 15330 - }, - { - "epoch": 2.468939973428882, - "grad_norm": 0.007710890378803015, - "learning_rate": 0.00019999699603431736, - "loss": 46.0, - "step": 15331 - }, - { - "epoch": 2.4691010105076696, - "grad_norm": 0.007329565938562155, - "learning_rate": 0.00019999699564216951, - "loss": 46.0, - "step": 15332 - }, - { - "epoch": 2.4692620475864566, - "grad_norm": 0.0017656265990808606, - "learning_rate": 0.00019999699524999608, - "loss": 46.0, - "step": 15333 - }, - { - "epoch": 2.469423084665244, - "grad_norm": 0.0009448722121305764, - "learning_rate": 0.00019999699485779703, - "loss": 46.0, - "step": 15334 - }, - { - "epoch": 2.4695841217440315, - "grad_norm": 0.0010442073689773679, - "learning_rate": 0.00019999699446557236, - "loss": 46.0, - "step": 15335 - }, - { - "epoch": 2.469745158822819, - "grad_norm": 0.009211316704750061, - "learning_rate": 0.00019999699407332214, - "loss": 46.0, - "step": 15336 - }, - { - "epoch": 2.4699061959016064, - "grad_norm": 0.007101106457412243, - "learning_rate": 0.00019999699368104633, - "loss": 46.0, - "step": 15337 - }, - { - "epoch": 2.470067232980394, - "grad_norm": 0.005491578485816717, - "learning_rate": 0.00019999699328874488, - "loss": 46.0, - "step": 15338 - }, - { - "epoch": 2.470228270059181, - "grad_norm": 0.007363234180957079, - "learning_rate": 0.00019999699289641786, - "loss": 46.0, - "step": 15339 - }, - { - "epoch": 2.4703893071379683, - "grad_norm": 0.001275977585464716, - "learning_rate": 0.00019999699250406526, - "loss": 46.0, - "step": 15340 - }, - { - "epoch": 2.470550344216756, - "grad_norm": 0.005832497496157885, - "learning_rate": 0.00019999699211168705, - "loss": 46.0, - "step": 15341 - }, - { - "epoch": 2.4707113812955432, - "grad_norm": 0.0006780567346140742, - "learning_rate": 0.00019999699171928325, - "loss": 46.0, - "step": 15342 - }, - { - "epoch": 2.4708724183743307, - "grad_norm": 0.0016721902647987008, - "learning_rate": 0.00019999699132685386, - "loss": 46.0, - "step": 15343 - }, - { - "epoch": 2.471033455453118, - "grad_norm": 0.0041099428199231625, - "learning_rate": 0.00019999699093439888, - "loss": 46.0, - "step": 15344 - }, - { - "epoch": 2.4711944925319056, - "grad_norm": 0.011762848123908043, - "learning_rate": 0.0001999969905419183, - "loss": 46.0, - "step": 15345 - }, - { - "epoch": 2.471355529610693, - "grad_norm": 0.0023676606360822916, - "learning_rate": 0.00019999699014941211, - "loss": 46.0, - "step": 15346 - }, - { - "epoch": 2.4715165666894805, - "grad_norm": 0.000821719819214195, - "learning_rate": 0.00019999698975688035, - "loss": 46.0, - "step": 15347 - }, - { - "epoch": 2.4716776037682675, - "grad_norm": 0.0027383260894566774, - "learning_rate": 0.00019999698936432297, - "loss": 46.0, - "step": 15348 - }, - { - "epoch": 2.471838640847055, - "grad_norm": 0.005373249761760235, - "learning_rate": 0.00019999698897174, - "loss": 46.0, - "step": 15349 - }, - { - "epoch": 2.4719996779258424, - "grad_norm": 0.0031262037809938192, - "learning_rate": 0.00019999698857913148, - "loss": 46.0, - "step": 15350 - }, - { - "epoch": 2.47216071500463, - "grad_norm": 0.002600323176011443, - "learning_rate": 0.0001999969881864973, - "loss": 46.0, - "step": 15351 - }, - { - "epoch": 2.4723217520834173, - "grad_norm": 0.0008210457162931561, - "learning_rate": 0.00019999698779383758, - "loss": 46.0, - "step": 15352 - }, - { - "epoch": 2.4724827891622048, - "grad_norm": 0.0038396476302295923, - "learning_rate": 0.00019999698740115224, - "loss": 46.0, - "step": 15353 - }, - { - "epoch": 2.4726438262409918, - "grad_norm": 0.004013812635093927, - "learning_rate": 0.00019999698700844131, - "loss": 46.0, - "step": 15354 - }, - { - "epoch": 2.4728048633197792, - "grad_norm": 0.0030101195443421602, - "learning_rate": 0.00019999698661570477, - "loss": 46.0, - "step": 15355 - }, - { - "epoch": 2.4729659003985667, - "grad_norm": 0.003551741596311331, - "learning_rate": 0.00019999698622294267, - "loss": 46.0, - "step": 15356 - }, - { - "epoch": 2.473126937477354, - "grad_norm": 0.006579568609595299, - "learning_rate": 0.00019999698583015495, - "loss": 46.0, - "step": 15357 - }, - { - "epoch": 2.4732879745561416, - "grad_norm": 0.0028752542566508055, - "learning_rate": 0.00019999698543734165, - "loss": 46.0, - "step": 15358 - }, - { - "epoch": 2.473449011634929, - "grad_norm": 0.003337211674079299, - "learning_rate": 0.00019999698504450275, - "loss": 46.0, - "step": 15359 - }, - { - "epoch": 2.4736100487137165, - "grad_norm": 0.004037004429847002, - "learning_rate": 0.00019999698465163825, - "loss": 46.0, - "step": 15360 - }, - { - "epoch": 2.473771085792504, - "grad_norm": 0.0017564335139468312, - "learning_rate": 0.00019999698425874818, - "loss": 46.0, - "step": 15361 - }, - { - "epoch": 2.473932122871291, - "grad_norm": 0.0026432075537741184, - "learning_rate": 0.00019999698386583247, - "loss": 46.0, - "step": 15362 - }, - { - "epoch": 2.4740931599500784, - "grad_norm": 0.006666726898401976, - "learning_rate": 0.0001999969834728912, - "loss": 46.0, - "step": 15363 - }, - { - "epoch": 2.474254197028866, - "grad_norm": 0.0017813860904425383, - "learning_rate": 0.00019999698307992435, - "loss": 46.0, - "step": 15364 - }, - { - "epoch": 2.4744152341076533, - "grad_norm": 0.0011548922630026937, - "learning_rate": 0.00019999698268693188, - "loss": 46.0, - "step": 15365 - }, - { - "epoch": 2.4745762711864407, - "grad_norm": 0.01014232449233532, - "learning_rate": 0.00019999698229391383, - "loss": 46.0, - "step": 15366 - }, - { - "epoch": 2.474737308265228, - "grad_norm": 0.0008472424815408885, - "learning_rate": 0.00019999698190087016, - "loss": 46.0, - "step": 15367 - }, - { - "epoch": 2.4748983453440156, - "grad_norm": 0.0013406119542196393, - "learning_rate": 0.00019999698150780093, - "loss": 46.0, - "step": 15368 - }, - { - "epoch": 2.4750593824228027, - "grad_norm": 0.002712155692279339, - "learning_rate": 0.00019999698111470608, - "loss": 46.0, - "step": 15369 - }, - { - "epoch": 2.47522041950159, - "grad_norm": 0.0007146503776311874, - "learning_rate": 0.00019999698072158565, - "loss": 46.0, - "step": 15370 - }, - { - "epoch": 2.4753814565803776, - "grad_norm": 0.0066480678506195545, - "learning_rate": 0.0001999969803284396, - "loss": 46.0, - "step": 15371 - }, - { - "epoch": 2.475542493659165, - "grad_norm": 0.0040450431406497955, - "learning_rate": 0.00019999697993526797, - "loss": 46.0, - "step": 15372 - }, - { - "epoch": 2.4757035307379525, - "grad_norm": 0.0003057619323953986, - "learning_rate": 0.00019999697954207077, - "loss": 46.0, - "step": 15373 - }, - { - "epoch": 2.47586456781674, - "grad_norm": 0.0017980325501412153, - "learning_rate": 0.00019999697914884797, - "loss": 46.0, - "step": 15374 - }, - { - "epoch": 2.4760256048955274, - "grad_norm": 0.0051462966948747635, - "learning_rate": 0.00019999697875559954, - "loss": 46.0, - "step": 15375 - }, - { - "epoch": 2.476186641974315, - "grad_norm": 0.0020662748720496893, - "learning_rate": 0.00019999697836232556, - "loss": 46.0, - "step": 15376 - }, - { - "epoch": 2.476347679053102, - "grad_norm": 0.0019423920894041657, - "learning_rate": 0.00019999697796902597, - "loss": 46.0, - "step": 15377 - }, - { - "epoch": 2.4765087161318893, - "grad_norm": 0.002050557406619191, - "learning_rate": 0.00019999697757570078, - "loss": 46.0, - "step": 15378 - }, - { - "epoch": 2.4766697532106767, - "grad_norm": 0.008099086582660675, - "learning_rate": 0.00019999697718234998, - "loss": 46.0, - "step": 15379 - }, - { - "epoch": 2.476830790289464, - "grad_norm": 0.011153250932693481, - "learning_rate": 0.00019999697678897363, - "loss": 46.0, - "step": 15380 - }, - { - "epoch": 2.4769918273682516, - "grad_norm": 0.004111459478735924, - "learning_rate": 0.00019999697639557165, - "loss": 46.0, - "step": 15381 - }, - { - "epoch": 2.477152864447039, - "grad_norm": 0.002271935110911727, - "learning_rate": 0.0001999969760021441, - "loss": 46.0, - "step": 15382 - }, - { - "epoch": 2.477313901525826, - "grad_norm": 0.0054304893128573895, - "learning_rate": 0.00019999697560869092, - "loss": 46.0, - "step": 15383 - }, - { - "epoch": 2.4774749386046135, - "grad_norm": 0.007937449030578136, - "learning_rate": 0.00019999697521521216, - "loss": 46.0, - "step": 15384 - }, - { - "epoch": 2.477635975683401, - "grad_norm": 0.0027499040588736534, - "learning_rate": 0.00019999697482170784, - "loss": 46.0, - "step": 15385 - }, - { - "epoch": 2.4777970127621884, - "grad_norm": 0.00043278661905787885, - "learning_rate": 0.00019999697442817788, - "loss": 46.0, - "step": 15386 - }, - { - "epoch": 2.477958049840976, - "grad_norm": 0.0015744095435366035, - "learning_rate": 0.00019999697403462235, - "loss": 46.0, - "step": 15387 - }, - { - "epoch": 2.4781190869197633, - "grad_norm": 0.0023089037276804447, - "learning_rate": 0.00019999697364104125, - "loss": 46.0, - "step": 15388 - }, - { - "epoch": 2.478280123998551, - "grad_norm": 0.004893219098448753, - "learning_rate": 0.0001999969732474345, - "loss": 46.0, - "step": 15389 - }, - { - "epoch": 2.4784411610773383, - "grad_norm": 0.002226802986115217, - "learning_rate": 0.00019999697285380218, - "loss": 46.0, - "step": 15390 - }, - { - "epoch": 2.4786021981561253, - "grad_norm": 0.0015342776896432042, - "learning_rate": 0.00019999697246014426, - "loss": 46.0, - "step": 15391 - }, - { - "epoch": 2.4787632352349127, - "grad_norm": 0.0013666960876435041, - "learning_rate": 0.00019999697206646077, - "loss": 46.0, - "step": 15392 - }, - { - "epoch": 2.4789242723137, - "grad_norm": 0.006661290302872658, - "learning_rate": 0.00019999697167275167, - "loss": 46.0, - "step": 15393 - }, - { - "epoch": 2.4790853093924876, - "grad_norm": 0.003398258937522769, - "learning_rate": 0.000199996971279017, - "loss": 46.0, - "step": 15394 - }, - { - "epoch": 2.479246346471275, - "grad_norm": 0.003983153495937586, - "learning_rate": 0.00019999697088525669, - "loss": 46.0, - "step": 15395 - }, - { - "epoch": 2.4794073835500625, - "grad_norm": 0.0010558993089944124, - "learning_rate": 0.00019999697049147082, - "loss": 46.0, - "step": 15396 - }, - { - "epoch": 2.47956842062885, - "grad_norm": 0.001031601568683982, - "learning_rate": 0.00019999697009765935, - "loss": 46.0, - "step": 15397 - }, - { - "epoch": 2.479729457707637, - "grad_norm": 0.0014107549795880914, - "learning_rate": 0.00019999696970382226, - "loss": 46.0, - "step": 15398 - }, - { - "epoch": 2.4798904947864244, - "grad_norm": 0.0007549440488219261, - "learning_rate": 0.0001999969693099596, - "loss": 46.0, - "step": 15399 - }, - { - "epoch": 2.480051531865212, - "grad_norm": 0.005329488310962915, - "learning_rate": 0.00019999696891607135, - "loss": 46.0, - "step": 15400 - }, - { - "epoch": 2.4802125689439993, - "grad_norm": 0.00309107918292284, - "learning_rate": 0.00019999696852215747, - "loss": 46.0, - "step": 15401 - }, - { - "epoch": 2.480373606022787, - "grad_norm": 0.0018073205137625337, - "learning_rate": 0.00019999696812821806, - "loss": 46.0, - "step": 15402 - }, - { - "epoch": 2.4805346431015742, - "grad_norm": 0.0013899158220738173, - "learning_rate": 0.000199996967734253, - "loss": 46.0, - "step": 15403 - }, - { - "epoch": 2.4806956801803617, - "grad_norm": 0.0008333500591106713, - "learning_rate": 0.00019999696734026236, - "loss": 46.0, - "step": 15404 - }, - { - "epoch": 2.480856717259149, - "grad_norm": 0.004558071959763765, - "learning_rate": 0.00019999696694624614, - "loss": 46.0, - "step": 15405 - }, - { - "epoch": 2.481017754337936, - "grad_norm": 0.007042016834020615, - "learning_rate": 0.00019999696655220432, - "loss": 46.0, - "step": 15406 - }, - { - "epoch": 2.4811787914167236, - "grad_norm": 0.0023991644848138094, - "learning_rate": 0.0001999969661581369, - "loss": 46.0, - "step": 15407 - }, - { - "epoch": 2.481339828495511, - "grad_norm": 0.01589331403374672, - "learning_rate": 0.0001999969657640439, - "loss": 46.0, - "step": 15408 - }, - { - "epoch": 2.4815008655742985, - "grad_norm": 0.004673053044825792, - "learning_rate": 0.0001999969653699253, - "loss": 46.0, - "step": 15409 - }, - { - "epoch": 2.481661902653086, - "grad_norm": 0.004645536653697491, - "learning_rate": 0.00019999696497578109, - "loss": 46.0, - "step": 15410 - }, - { - "epoch": 2.4818229397318734, - "grad_norm": 0.0024915996473282576, - "learning_rate": 0.0001999969645816113, - "loss": 46.0, - "step": 15411 - }, - { - "epoch": 2.4819839768106604, - "grad_norm": 0.0017545539885759354, - "learning_rate": 0.00019999696418741592, - "loss": 46.0, - "step": 15412 - }, - { - "epoch": 2.482145013889448, - "grad_norm": 0.002297217957675457, - "learning_rate": 0.0001999969637931949, - "loss": 46.0, - "step": 15413 - }, - { - "epoch": 2.4823060509682353, - "grad_norm": 0.007391363848000765, - "learning_rate": 0.00019999696339894835, - "loss": 46.0, - "step": 15414 - }, - { - "epoch": 2.4824670880470228, - "grad_norm": 0.0032937314826995134, - "learning_rate": 0.0001999969630046762, - "loss": 46.0, - "step": 15415 - }, - { - "epoch": 2.48262812512581, - "grad_norm": 0.0007750993245281279, - "learning_rate": 0.00019999696261037843, - "loss": 46.0, - "step": 15416 - }, - { - "epoch": 2.4827891622045977, - "grad_norm": 0.00166051066480577, - "learning_rate": 0.00019999696221605507, - "loss": 46.0, - "step": 15417 - }, - { - "epoch": 2.482950199283385, - "grad_norm": 0.006945026572793722, - "learning_rate": 0.00019999696182170613, - "loss": 46.0, - "step": 15418 - }, - { - "epoch": 2.4831112363621726, - "grad_norm": 0.002210700884461403, - "learning_rate": 0.00019999696142733155, - "loss": 46.0, - "step": 15419 - }, - { - "epoch": 2.48327227344096, - "grad_norm": 0.005010916851460934, - "learning_rate": 0.00019999696103293143, - "loss": 46.0, - "step": 15420 - }, - { - "epoch": 2.483433310519747, - "grad_norm": 0.0005126605392433703, - "learning_rate": 0.0001999969606385057, - "loss": 46.0, - "step": 15421 - }, - { - "epoch": 2.4835943475985345, - "grad_norm": 0.0022305878810584545, - "learning_rate": 0.00019999696024405436, - "loss": 46.0, - "step": 15422 - }, - { - "epoch": 2.483755384677322, - "grad_norm": 0.0021804654970765114, - "learning_rate": 0.00019999695984957745, - "loss": 46.0, - "step": 15423 - }, - { - "epoch": 2.4839164217561094, - "grad_norm": 0.0023431049194186926, - "learning_rate": 0.00019999695945507493, - "loss": 46.0, - "step": 15424 - }, - { - "epoch": 2.484077458834897, - "grad_norm": 0.0016466027591377497, - "learning_rate": 0.00019999695906054683, - "loss": 46.0, - "step": 15425 - }, - { - "epoch": 2.4842384959136843, - "grad_norm": 0.0035464242100715637, - "learning_rate": 0.0001999969586659931, - "loss": 46.0, - "step": 15426 - }, - { - "epoch": 2.4843995329924713, - "grad_norm": 0.002741391770541668, - "learning_rate": 0.0001999969582714138, - "loss": 46.0, - "step": 15427 - }, - { - "epoch": 2.4845605700712587, - "grad_norm": 0.001966150477528572, - "learning_rate": 0.00019999695787680893, - "loss": 46.0, - "step": 15428 - }, - { - "epoch": 2.484721607150046, - "grad_norm": 0.00907079130411148, - "learning_rate": 0.00019999695748217845, - "loss": 46.0, - "step": 15429 - }, - { - "epoch": 2.4848826442288336, - "grad_norm": 0.000851382443215698, - "learning_rate": 0.00019999695708752238, - "loss": 46.0, - "step": 15430 - }, - { - "epoch": 2.485043681307621, - "grad_norm": 0.004795973654836416, - "learning_rate": 0.0001999969566928407, - "loss": 46.0, - "step": 15431 - }, - { - "epoch": 2.4852047183864086, - "grad_norm": 0.000985900405794382, - "learning_rate": 0.00019999695629813343, - "loss": 46.0, - "step": 15432 - }, - { - "epoch": 2.485365755465196, - "grad_norm": 0.006527276244014502, - "learning_rate": 0.00019999695590340054, - "loss": 46.0, - "step": 15433 - }, - { - "epoch": 2.4855267925439835, - "grad_norm": 0.006105063483119011, - "learning_rate": 0.0001999969555086421, - "loss": 46.0, - "step": 15434 - }, - { - "epoch": 2.4856878296227705, - "grad_norm": 0.0022340097930282354, - "learning_rate": 0.00019999695511385804, - "loss": 46.0, - "step": 15435 - }, - { - "epoch": 2.485848866701558, - "grad_norm": 0.001533149159513414, - "learning_rate": 0.0001999969547190484, - "loss": 46.0, - "step": 15436 - }, - { - "epoch": 2.4860099037803454, - "grad_norm": 0.008326280862092972, - "learning_rate": 0.00019999695432421316, - "loss": 46.0, - "step": 15437 - }, - { - "epoch": 2.486170940859133, - "grad_norm": 0.00044093994074501097, - "learning_rate": 0.00019999695392935234, - "loss": 46.0, - "step": 15438 - }, - { - "epoch": 2.4863319779379203, - "grad_norm": 0.0013709098566323519, - "learning_rate": 0.0001999969535344659, - "loss": 46.0, - "step": 15439 - }, - { - "epoch": 2.4864930150167077, - "grad_norm": 0.0010089291026815772, - "learning_rate": 0.00019999695313955388, - "loss": 46.0, - "step": 15440 - }, - { - "epoch": 2.486654052095495, - "grad_norm": 0.00929345190525055, - "learning_rate": 0.00019999695274461625, - "loss": 46.0, - "step": 15441 - }, - { - "epoch": 2.486815089174282, - "grad_norm": 0.005143813788890839, - "learning_rate": 0.00019999695234965305, - "loss": 46.0, - "step": 15442 - }, - { - "epoch": 2.4869761262530696, - "grad_norm": 0.001325281453318894, - "learning_rate": 0.00019999695195466424, - "loss": 46.0, - "step": 15443 - }, - { - "epoch": 2.487137163331857, - "grad_norm": 0.0017885920824483037, - "learning_rate": 0.00019999695155964987, - "loss": 46.0, - "step": 15444 - }, - { - "epoch": 2.4872982004106445, - "grad_norm": 0.0013921266654506326, - "learning_rate": 0.00019999695116460986, - "loss": 46.0, - "step": 15445 - }, - { - "epoch": 2.487459237489432, - "grad_norm": 0.004191992804408073, - "learning_rate": 0.00019999695076954429, - "loss": 46.0, - "step": 15446 - }, - { - "epoch": 2.4876202745682194, - "grad_norm": 0.002598048886284232, - "learning_rate": 0.0001999969503744531, - "loss": 46.0, - "step": 15447 - }, - { - "epoch": 2.487781311647007, - "grad_norm": 0.002194049069657922, - "learning_rate": 0.00019999694997933633, - "loss": 46.0, - "step": 15448 - }, - { - "epoch": 2.4879423487257943, - "grad_norm": 0.002046563196927309, - "learning_rate": 0.00019999694958419394, - "loss": 46.0, - "step": 15449 - }, - { - "epoch": 2.4881033858045813, - "grad_norm": 0.0022202348336577415, - "learning_rate": 0.000199996949189026, - "loss": 46.0, - "step": 15450 - }, - { - "epoch": 2.488264422883369, - "grad_norm": 0.000771569088101387, - "learning_rate": 0.00019999694879383243, - "loss": 46.0, - "step": 15451 - }, - { - "epoch": 2.4884254599621562, - "grad_norm": 0.0006596861639991403, - "learning_rate": 0.00019999694839861328, - "loss": 46.0, - "step": 15452 - }, - { - "epoch": 2.4885864970409437, - "grad_norm": 0.001168744289316237, - "learning_rate": 0.00019999694800336854, - "loss": 46.0, - "step": 15453 - }, - { - "epoch": 2.488747534119731, - "grad_norm": 0.0031377053819596767, - "learning_rate": 0.0001999969476080982, - "loss": 46.0, - "step": 15454 - }, - { - "epoch": 2.4889085711985186, - "grad_norm": 0.0023955118376761675, - "learning_rate": 0.00019999694721280225, - "loss": 46.0, - "step": 15455 - }, - { - "epoch": 2.4890696082773056, - "grad_norm": 0.0022035203874111176, - "learning_rate": 0.00019999694681748076, - "loss": 46.0, - "step": 15456 - }, - { - "epoch": 2.489230645356093, - "grad_norm": 0.004684357903897762, - "learning_rate": 0.00019999694642213362, - "loss": 46.0, - "step": 15457 - }, - { - "epoch": 2.4893916824348805, - "grad_norm": 0.004034834448248148, - "learning_rate": 0.00019999694602676092, - "loss": 46.0, - "step": 15458 - }, - { - "epoch": 2.489552719513668, - "grad_norm": 0.0030605599749833345, - "learning_rate": 0.0001999969456313626, - "loss": 46.0, - "step": 15459 - }, - { - "epoch": 2.4897137565924554, - "grad_norm": 0.00829601101577282, - "learning_rate": 0.0001999969452359387, - "loss": 46.0, - "step": 15460 - }, - { - "epoch": 2.489874793671243, - "grad_norm": 0.00346462894231081, - "learning_rate": 0.00019999694484048921, - "loss": 46.0, - "step": 15461 - }, - { - "epoch": 2.4900358307500303, - "grad_norm": 0.00260978564620018, - "learning_rate": 0.00019999694444501408, - "loss": 46.0, - "step": 15462 - }, - { - "epoch": 2.4901968678288178, - "grad_norm": 0.0013203290291130543, - "learning_rate": 0.00019999694404951342, - "loss": 46.0, - "step": 15463 - }, - { - "epoch": 2.4903579049076052, - "grad_norm": 0.007363055367022753, - "learning_rate": 0.00019999694365398715, - "loss": 46.0, - "step": 15464 - }, - { - "epoch": 2.4905189419863922, - "grad_norm": 0.004174639470875263, - "learning_rate": 0.00019999694325843525, - "loss": 46.0, - "step": 15465 - }, - { - "epoch": 2.4906799790651797, - "grad_norm": 0.0006472582463175058, - "learning_rate": 0.0001999969428628578, - "loss": 46.0, - "step": 15466 - }, - { - "epoch": 2.490841016143967, - "grad_norm": 0.001878245733678341, - "learning_rate": 0.00019999694246725474, - "loss": 46.0, - "step": 15467 - }, - { - "epoch": 2.4910020532227546, - "grad_norm": 0.0013203345006331801, - "learning_rate": 0.00019999694207162606, - "loss": 46.0, - "step": 15468 - }, - { - "epoch": 2.491163090301542, - "grad_norm": 0.00864538736641407, - "learning_rate": 0.00019999694167597182, - "loss": 46.0, - "step": 15469 - }, - { - "epoch": 2.4913241273803295, - "grad_norm": 0.0008616847335360944, - "learning_rate": 0.000199996941280292, - "loss": 46.0, - "step": 15470 - }, - { - "epoch": 2.4914851644591165, - "grad_norm": 0.011509637348353863, - "learning_rate": 0.00019999694088458652, - "loss": 46.0, - "step": 15471 - }, - { - "epoch": 2.491646201537904, - "grad_norm": 0.006392920855432749, - "learning_rate": 0.00019999694048885552, - "loss": 46.0, - "step": 15472 - }, - { - "epoch": 2.4918072386166914, - "grad_norm": 0.0009942905744537711, - "learning_rate": 0.0001999969400930989, - "loss": 46.0, - "step": 15473 - }, - { - "epoch": 2.491968275695479, - "grad_norm": 0.0076873814687132835, - "learning_rate": 0.00019999693969731668, - "loss": 46.0, - "step": 15474 - }, - { - "epoch": 2.4921293127742663, - "grad_norm": 0.0010989124421030283, - "learning_rate": 0.00019999693930150886, - "loss": 46.0, - "step": 15475 - }, - { - "epoch": 2.4922903498530538, - "grad_norm": 0.001140486216172576, - "learning_rate": 0.00019999693890567546, - "loss": 46.0, - "step": 15476 - }, - { - "epoch": 2.492451386931841, - "grad_norm": 0.002440092386677861, - "learning_rate": 0.00019999693850981644, - "loss": 46.0, - "step": 15477 - }, - { - "epoch": 2.4926124240106287, - "grad_norm": 0.0034286475274711847, - "learning_rate": 0.00019999693811393186, - "loss": 46.0, - "step": 15478 - }, - { - "epoch": 2.4927734610894157, - "grad_norm": 0.0026829675771296024, - "learning_rate": 0.00019999693771802167, - "loss": 46.0, - "step": 15479 - }, - { - "epoch": 2.492934498168203, - "grad_norm": 0.009589627385139465, - "learning_rate": 0.0001999969373220859, - "loss": 46.0, - "step": 15480 - }, - { - "epoch": 2.4930955352469906, - "grad_norm": 0.0019724280573427677, - "learning_rate": 0.0001999969369261245, - "loss": 46.0, - "step": 15481 - }, - { - "epoch": 2.493256572325778, - "grad_norm": 0.001608406426385045, - "learning_rate": 0.0001999969365301375, - "loss": 46.0, - "step": 15482 - }, - { - "epoch": 2.4934176094045655, - "grad_norm": 0.0018170453840866685, - "learning_rate": 0.00019999693613412494, - "loss": 46.0, - "step": 15483 - }, - { - "epoch": 2.493578646483353, - "grad_norm": 0.0027995998971164227, - "learning_rate": 0.0001999969357380868, - "loss": 46.0, - "step": 15484 - }, - { - "epoch": 2.4937396835621404, - "grad_norm": 0.0012566003715619445, - "learning_rate": 0.00019999693534202305, - "loss": 46.0, - "step": 15485 - }, - { - "epoch": 2.4939007206409274, - "grad_norm": 0.0020886787679046392, - "learning_rate": 0.0001999969349459337, - "loss": 46.0, - "step": 15486 - }, - { - "epoch": 2.494061757719715, - "grad_norm": 0.0009640268399380147, - "learning_rate": 0.00019999693454981877, - "loss": 46.0, - "step": 15487 - }, - { - "epoch": 2.4942227947985023, - "grad_norm": 0.00232880562543869, - "learning_rate": 0.0001999969341536782, - "loss": 46.0, - "step": 15488 - }, - { - "epoch": 2.4943838318772897, - "grad_norm": 0.0019670519977808, - "learning_rate": 0.00019999693375751207, - "loss": 46.0, - "step": 15489 - }, - { - "epoch": 2.494544868956077, - "grad_norm": 0.0031430241651833057, - "learning_rate": 0.00019999693336132036, - "loss": 46.0, - "step": 15490 - }, - { - "epoch": 2.4947059060348646, - "grad_norm": 0.0027613763231784105, - "learning_rate": 0.00019999693296510301, - "loss": 46.0, - "step": 15491 - }, - { - "epoch": 2.494866943113652, - "grad_norm": 0.0004651047638617456, - "learning_rate": 0.0001999969325688601, - "loss": 46.0, - "step": 15492 - }, - { - "epoch": 2.4950279801924395, - "grad_norm": 0.0029555028304457664, - "learning_rate": 0.0001999969321725916, - "loss": 46.0, - "step": 15493 - }, - { - "epoch": 2.4951890172712266, - "grad_norm": 0.0048257531598210335, - "learning_rate": 0.0001999969317762975, - "loss": 46.0, - "step": 15494 - }, - { - "epoch": 2.495350054350014, - "grad_norm": 0.0019671109039336443, - "learning_rate": 0.0001999969313799778, - "loss": 46.0, - "step": 15495 - }, - { - "epoch": 2.4955110914288015, - "grad_norm": 0.0012454113457351923, - "learning_rate": 0.00019999693098363253, - "loss": 46.0, - "step": 15496 - }, - { - "epoch": 2.495672128507589, - "grad_norm": 0.004872001241892576, - "learning_rate": 0.00019999693058726163, - "loss": 46.0, - "step": 15497 - }, - { - "epoch": 2.4958331655863764, - "grad_norm": 0.0027476674877107143, - "learning_rate": 0.00019999693019086517, - "loss": 46.0, - "step": 15498 - }, - { - "epoch": 2.495994202665164, - "grad_norm": 0.0028232336044311523, - "learning_rate": 0.0001999969297944431, - "loss": 46.0, - "step": 15499 - }, - { - "epoch": 2.496155239743951, - "grad_norm": 0.002674079267308116, - "learning_rate": 0.00019999692939799544, - "loss": 46.0, - "step": 15500 - }, - { - "epoch": 2.4963162768227383, - "grad_norm": 0.001613573869690299, - "learning_rate": 0.00019999692900152217, - "loss": 46.0, - "step": 15501 - }, - { - "epoch": 2.4964773139015257, - "grad_norm": 0.002364043379202485, - "learning_rate": 0.0001999969286050233, - "loss": 46.0, - "step": 15502 - }, - { - "epoch": 2.496638350980313, - "grad_norm": 0.0010843501659110188, - "learning_rate": 0.00019999692820849883, - "loss": 46.0, - "step": 15503 - }, - { - "epoch": 2.4967993880591006, - "grad_norm": 0.00433829240500927, - "learning_rate": 0.0001999969278119488, - "loss": 46.0, - "step": 15504 - }, - { - "epoch": 2.496960425137888, - "grad_norm": 0.0006053737597540021, - "learning_rate": 0.0001999969274153732, - "loss": 46.0, - "step": 15505 - }, - { - "epoch": 2.4971214622166755, - "grad_norm": 0.004616125021129847, - "learning_rate": 0.00019999692701877194, - "loss": 46.0, - "step": 15506 - }, - { - "epoch": 2.497282499295463, - "grad_norm": 0.001427347306162119, - "learning_rate": 0.00019999692662214512, - "loss": 46.0, - "step": 15507 - }, - { - "epoch": 2.49744353637425, - "grad_norm": 0.0005079785478301346, - "learning_rate": 0.00019999692622549268, - "loss": 46.0, - "step": 15508 - }, - { - "epoch": 2.4976045734530374, - "grad_norm": 0.006529994774609804, - "learning_rate": 0.00019999692582881468, - "loss": 46.0, - "step": 15509 - }, - { - "epoch": 2.497765610531825, - "grad_norm": 0.003117122920230031, - "learning_rate": 0.00019999692543211107, - "loss": 46.0, - "step": 15510 - }, - { - "epoch": 2.4979266476106123, - "grad_norm": 0.010971128940582275, - "learning_rate": 0.00019999692503538187, - "loss": 46.0, - "step": 15511 - }, - { - "epoch": 2.4980876846894, - "grad_norm": 0.003298766678199172, - "learning_rate": 0.00019999692463862706, - "loss": 46.0, - "step": 15512 - }, - { - "epoch": 2.4982487217681872, - "grad_norm": 0.008274292573332787, - "learning_rate": 0.00019999692424184669, - "loss": 46.0, - "step": 15513 - }, - { - "epoch": 2.4984097588469747, - "grad_norm": 0.0023968422319740057, - "learning_rate": 0.00019999692384504072, - "loss": 46.0, - "step": 15514 - }, - { - "epoch": 2.4985707959257617, - "grad_norm": 0.0019090829882770777, - "learning_rate": 0.00019999692344820912, - "loss": 46.0, - "step": 15515 - }, - { - "epoch": 2.498731833004549, - "grad_norm": 0.0037231524474918842, - "learning_rate": 0.00019999692305135196, - "loss": 46.0, - "step": 15516 - }, - { - "epoch": 2.4988928700833366, - "grad_norm": 0.006756780203431845, - "learning_rate": 0.0001999969226544692, - "loss": 46.0, - "step": 15517 - }, - { - "epoch": 2.499053907162124, - "grad_norm": 0.005277234595268965, - "learning_rate": 0.00019999692225756085, - "loss": 46.0, - "step": 15518 - }, - { - "epoch": 2.4992149442409115, - "grad_norm": 0.001203195541165769, - "learning_rate": 0.0001999969218606269, - "loss": 46.0, - "step": 15519 - }, - { - "epoch": 2.499375981319699, - "grad_norm": 0.002336875768378377, - "learning_rate": 0.00019999692146366733, - "loss": 46.0, - "step": 15520 - }, - { - "epoch": 2.4995370183984864, - "grad_norm": 0.009139231406152248, - "learning_rate": 0.0001999969210666822, - "loss": 46.0, - "step": 15521 - }, - { - "epoch": 2.499698055477274, - "grad_norm": 0.0034921576734632254, - "learning_rate": 0.00019999692066967147, - "loss": 46.0, - "step": 15522 - }, - { - "epoch": 2.499859092556061, - "grad_norm": 0.0009065789636224508, - "learning_rate": 0.00019999692027263512, - "loss": 46.0, - "step": 15523 - }, - { - "epoch": 2.5000201296348483, - "grad_norm": 0.0011126770405098796, - "learning_rate": 0.0001999969198755732, - "loss": 46.0, - "step": 15524 - }, - { - "epoch": 2.5001811667136358, - "grad_norm": 0.0009970295941457152, - "learning_rate": 0.0001999969194784857, - "loss": 46.0, - "step": 15525 - }, - { - "epoch": 2.5003422037924232, - "grad_norm": 0.006593253929167986, - "learning_rate": 0.00019999691908137256, - "loss": 46.0, - "step": 15526 - }, - { - "epoch": 2.5005032408712107, - "grad_norm": 0.0012017093831673265, - "learning_rate": 0.00019999691868423386, - "loss": 46.0, - "step": 15527 - }, - { - "epoch": 2.500664277949998, - "grad_norm": 0.00404240470379591, - "learning_rate": 0.00019999691828706957, - "loss": 46.0, - "step": 15528 - }, - { - "epoch": 2.500825315028785, - "grad_norm": 0.001371907303109765, - "learning_rate": 0.00019999691788987966, - "loss": 46.0, - "step": 15529 - }, - { - "epoch": 2.5009863521075726, - "grad_norm": 0.002074048388749361, - "learning_rate": 0.00019999691749266417, - "loss": 46.0, - "step": 15530 - }, - { - "epoch": 2.50114738918636, - "grad_norm": 0.007534390315413475, - "learning_rate": 0.0001999969170954231, - "loss": 46.0, - "step": 15531 - }, - { - "epoch": 2.5013084262651475, - "grad_norm": 0.002561690052971244, - "learning_rate": 0.0001999969166981564, - "loss": 46.0, - "step": 15532 - }, - { - "epoch": 2.501469463343935, - "grad_norm": 0.0016198353841900826, - "learning_rate": 0.00019999691630086415, - "loss": 46.0, - "step": 15533 - }, - { - "epoch": 2.5016305004227224, - "grad_norm": 0.004757654387503862, - "learning_rate": 0.00019999691590354628, - "loss": 46.0, - "step": 15534 - }, - { - "epoch": 2.50179153750151, - "grad_norm": 0.0013877920573577285, - "learning_rate": 0.00019999691550620283, - "loss": 46.0, - "step": 15535 - }, - { - "epoch": 2.5019525745802973, - "grad_norm": 0.0051576984114944935, - "learning_rate": 0.00019999691510883376, - "loss": 46.0, - "step": 15536 - }, - { - "epoch": 2.5021136116590847, - "grad_norm": 0.00174257205799222, - "learning_rate": 0.0001999969147114391, - "loss": 46.0, - "step": 15537 - }, - { - "epoch": 2.5022746487378718, - "grad_norm": 0.0060266535729169846, - "learning_rate": 0.0001999969143140189, - "loss": 46.0, - "step": 15538 - }, - { - "epoch": 2.502435685816659, - "grad_norm": 0.008352944627404213, - "learning_rate": 0.00019999691391657304, - "loss": 46.0, - "step": 15539 - }, - { - "epoch": 2.5025967228954467, - "grad_norm": 0.0027539320290088654, - "learning_rate": 0.00019999691351910162, - "loss": 46.0, - "step": 15540 - }, - { - "epoch": 2.502757759974234, - "grad_norm": 0.002372451825067401, - "learning_rate": 0.0001999969131216046, - "loss": 46.0, - "step": 15541 - }, - { - "epoch": 2.5029187970530216, - "grad_norm": 0.00531651871278882, - "learning_rate": 0.000199996912724082, - "loss": 46.0, - "step": 15542 - }, - { - "epoch": 2.503079834131809, - "grad_norm": 0.0063546826131641865, - "learning_rate": 0.00019999691232653376, - "loss": 46.0, - "step": 15543 - }, - { - "epoch": 2.503240871210596, - "grad_norm": 0.0009585191146470606, - "learning_rate": 0.00019999691192895995, - "loss": 46.0, - "step": 15544 - }, - { - "epoch": 2.5034019082893835, - "grad_norm": 0.0018841332057490945, - "learning_rate": 0.00019999691153136057, - "loss": 46.0, - "step": 15545 - }, - { - "epoch": 2.503562945368171, - "grad_norm": 0.003862903220579028, - "learning_rate": 0.00019999691113373554, - "loss": 46.0, - "step": 15546 - }, - { - "epoch": 2.5037239824469584, - "grad_norm": 0.0038681388832628727, - "learning_rate": 0.00019999691073608496, - "loss": 46.0, - "step": 15547 - }, - { - "epoch": 2.503885019525746, - "grad_norm": 0.001058459747582674, - "learning_rate": 0.00019999691033840877, - "loss": 46.0, - "step": 15548 - }, - { - "epoch": 2.5040460566045333, - "grad_norm": 0.0011678003938868642, - "learning_rate": 0.000199996909940707, - "loss": 46.0, - "step": 15549 - }, - { - "epoch": 2.5042070936833207, - "grad_norm": 0.0016315385000780225, - "learning_rate": 0.00019999690954297962, - "loss": 46.0, - "step": 15550 - }, - { - "epoch": 2.504368130762108, - "grad_norm": 0.0024730025324970484, - "learning_rate": 0.00019999690914522669, - "loss": 46.0, - "step": 15551 - }, - { - "epoch": 2.5045291678408956, - "grad_norm": 0.0024479362182319164, - "learning_rate": 0.00019999690874744812, - "loss": 46.0, - "step": 15552 - }, - { - "epoch": 2.5046902049196826, - "grad_norm": 0.0019860391039401293, - "learning_rate": 0.00019999690834964398, - "loss": 46.0, - "step": 15553 - }, - { - "epoch": 2.50485124199847, - "grad_norm": 0.0030104625038802624, - "learning_rate": 0.0001999969079518142, - "loss": 46.0, - "step": 15554 - }, - { - "epoch": 2.5050122790772575, - "grad_norm": 0.0040182070806622505, - "learning_rate": 0.00019999690755395888, - "loss": 46.0, - "step": 15555 - }, - { - "epoch": 2.505173316156045, - "grad_norm": 0.0012150746770203114, - "learning_rate": 0.00019999690715607793, - "loss": 46.0, - "step": 15556 - }, - { - "epoch": 2.5053343532348324, - "grad_norm": 0.010076258331537247, - "learning_rate": 0.00019999690675817142, - "loss": 46.0, - "step": 15557 - }, - { - "epoch": 2.5054953903136195, - "grad_norm": 0.0036031913477927446, - "learning_rate": 0.00019999690636023928, - "loss": 46.0, - "step": 15558 - }, - { - "epoch": 2.505656427392407, - "grad_norm": 0.0033157074358314276, - "learning_rate": 0.00019999690596228157, - "loss": 46.0, - "step": 15559 - }, - { - "epoch": 2.5058174644711944, - "grad_norm": 0.0029015454929322004, - "learning_rate": 0.00019999690556429827, - "loss": 46.0, - "step": 15560 - }, - { - "epoch": 2.505978501549982, - "grad_norm": 0.0010072861332446337, - "learning_rate": 0.00019999690516628936, - "loss": 46.0, - "step": 15561 - }, - { - "epoch": 2.5061395386287693, - "grad_norm": 0.003890155116096139, - "learning_rate": 0.00019999690476825487, - "loss": 46.0, - "step": 15562 - }, - { - "epoch": 2.5063005757075567, - "grad_norm": 0.001681747380644083, - "learning_rate": 0.00019999690437019475, - "loss": 46.0, - "step": 15563 - }, - { - "epoch": 2.506461612786344, - "grad_norm": 0.0047065881080925465, - "learning_rate": 0.00019999690397210906, - "loss": 46.0, - "step": 15564 - }, - { - "epoch": 2.5066226498651316, - "grad_norm": 0.002220695838332176, - "learning_rate": 0.0001999969035739978, - "loss": 46.0, - "step": 15565 - }, - { - "epoch": 2.506783686943919, - "grad_norm": 0.0048478953540325165, - "learning_rate": 0.00019999690317586092, - "loss": 46.0, - "step": 15566 - }, - { - "epoch": 2.506944724022706, - "grad_norm": 0.004355294164270163, - "learning_rate": 0.00019999690277769844, - "loss": 46.0, - "step": 15567 - }, - { - "epoch": 2.5071057611014935, - "grad_norm": 0.004388112109154463, - "learning_rate": 0.0001999969023795104, - "loss": 46.0, - "step": 15568 - }, - { - "epoch": 2.507266798180281, - "grad_norm": 0.01185726746916771, - "learning_rate": 0.00019999690198129673, - "loss": 46.0, - "step": 15569 - }, - { - "epoch": 2.5074278352590684, - "grad_norm": 0.001428323914296925, - "learning_rate": 0.00019999690158305748, - "loss": 46.0, - "step": 15570 - }, - { - "epoch": 2.507588872337856, - "grad_norm": 0.01658983901143074, - "learning_rate": 0.00019999690118479264, - "loss": 46.0, - "step": 15571 - }, - { - "epoch": 2.5077499094166433, - "grad_norm": 0.0010460596531629562, - "learning_rate": 0.00019999690078650217, - "loss": 46.0, - "step": 15572 - }, - { - "epoch": 2.5079109464954303, - "grad_norm": 0.0032196880783885717, - "learning_rate": 0.00019999690038818616, - "loss": 46.0, - "step": 15573 - }, - { - "epoch": 2.508071983574218, - "grad_norm": 0.001326313940808177, - "learning_rate": 0.00019999689998984453, - "loss": 46.0, - "step": 15574 - }, - { - "epoch": 2.5082330206530052, - "grad_norm": 0.0025898038875311613, - "learning_rate": 0.0001999968995914773, - "loss": 46.0, - "step": 15575 - }, - { - "epoch": 2.5083940577317927, - "grad_norm": 0.007924860343337059, - "learning_rate": 0.0001999968991930845, - "loss": 46.0, - "step": 15576 - }, - { - "epoch": 2.50855509481058, - "grad_norm": 0.0020931025501340628, - "learning_rate": 0.00019999689879466608, - "loss": 46.0, - "step": 15577 - }, - { - "epoch": 2.5087161318893676, - "grad_norm": 0.0003746419388335198, - "learning_rate": 0.00019999689839622205, - "loss": 46.0, - "step": 15578 - }, - { - "epoch": 2.508877168968155, - "grad_norm": 0.009165162220597267, - "learning_rate": 0.00019999689799775247, - "loss": 46.0, - "step": 15579 - }, - { - "epoch": 2.5090382060469425, - "grad_norm": 0.001062337658368051, - "learning_rate": 0.0001999968975992573, - "loss": 46.0, - "step": 15580 - }, - { - "epoch": 2.50919924312573, - "grad_norm": 0.006670957896858454, - "learning_rate": 0.00019999689720073648, - "loss": 46.0, - "step": 15581 - }, - { - "epoch": 2.509360280204517, - "grad_norm": 0.0018045203760266304, - "learning_rate": 0.00019999689680219013, - "loss": 46.0, - "step": 15582 - }, - { - "epoch": 2.5095213172833044, - "grad_norm": 0.0033146946225315332, - "learning_rate": 0.00019999689640361814, - "loss": 46.0, - "step": 15583 - }, - { - "epoch": 2.509682354362092, - "grad_norm": 0.008988477289676666, - "learning_rate": 0.00019999689600502056, - "loss": 46.0, - "step": 15584 - }, - { - "epoch": 2.5098433914408793, - "grad_norm": 0.0010040607303380966, - "learning_rate": 0.0001999968956063974, - "loss": 46.0, - "step": 15585 - }, - { - "epoch": 2.5100044285196668, - "grad_norm": 0.0007079103961586952, - "learning_rate": 0.00019999689520774864, - "loss": 46.0, - "step": 15586 - }, - { - "epoch": 2.5101654655984538, - "grad_norm": 0.0005958284600637853, - "learning_rate": 0.00019999689480907428, - "loss": 46.0, - "step": 15587 - }, - { - "epoch": 2.5103265026772412, - "grad_norm": 0.007474029902368784, - "learning_rate": 0.00019999689441037435, - "loss": 46.0, - "step": 15588 - }, - { - "epoch": 2.5104875397560287, - "grad_norm": 0.0017049798043444753, - "learning_rate": 0.0001999968940116488, - "loss": 46.0, - "step": 15589 - }, - { - "epoch": 2.510648576834816, - "grad_norm": 0.0053446246311068535, - "learning_rate": 0.00019999689361289766, - "loss": 46.0, - "step": 15590 - }, - { - "epoch": 2.5108096139136036, - "grad_norm": 0.007703241426497698, - "learning_rate": 0.00019999689321412097, - "loss": 46.0, - "step": 15591 - }, - { - "epoch": 2.510970650992391, - "grad_norm": 0.000802951748482883, - "learning_rate": 0.00019999689281531864, - "loss": 46.0, - "step": 15592 - }, - { - "epoch": 2.5111316880711785, - "grad_norm": 0.002584241796284914, - "learning_rate": 0.00019999689241649072, - "loss": 46.0, - "step": 15593 - }, - { - "epoch": 2.511292725149966, - "grad_norm": 0.001001784228719771, - "learning_rate": 0.0001999968920176372, - "loss": 46.0, - "step": 15594 - }, - { - "epoch": 2.5114537622287534, - "grad_norm": 0.0016463365172967315, - "learning_rate": 0.0001999968916187581, - "loss": 46.0, - "step": 15595 - }, - { - "epoch": 2.511614799307541, - "grad_norm": 0.0009321488323621452, - "learning_rate": 0.00019999689121985342, - "loss": 46.0, - "step": 15596 - }, - { - "epoch": 2.511775836386328, - "grad_norm": 0.00227958126924932, - "learning_rate": 0.0001999968908209231, - "loss": 46.0, - "step": 15597 - }, - { - "epoch": 2.5119368734651153, - "grad_norm": 0.0020615821704268456, - "learning_rate": 0.0001999968904219672, - "loss": 46.0, - "step": 15598 - }, - { - "epoch": 2.5120979105439027, - "grad_norm": 0.0005884967977181077, - "learning_rate": 0.00019999689002298573, - "loss": 46.0, - "step": 15599 - }, - { - "epoch": 2.51225894762269, - "grad_norm": 0.0010356927523389459, - "learning_rate": 0.00019999688962397868, - "loss": 46.0, - "step": 15600 - }, - { - "epoch": 2.5124199847014776, - "grad_norm": 0.010368126444518566, - "learning_rate": 0.00019999688922494598, - "loss": 46.0, - "step": 15601 - }, - { - "epoch": 2.5125810217802647, - "grad_norm": 0.0043180398643016815, - "learning_rate": 0.00019999688882588773, - "loss": 46.0, - "step": 15602 - }, - { - "epoch": 2.512742058859052, - "grad_norm": 0.0012822233838960528, - "learning_rate": 0.00019999688842680389, - "loss": 46.0, - "step": 15603 - }, - { - "epoch": 2.5129030959378396, - "grad_norm": 0.0037582844961434603, - "learning_rate": 0.00019999688802769443, - "loss": 46.0, - "step": 15604 - }, - { - "epoch": 2.513064133016627, - "grad_norm": 0.006926591973751783, - "learning_rate": 0.00019999688762855939, - "loss": 46.0, - "step": 15605 - }, - { - "epoch": 2.5132251700954145, - "grad_norm": 0.002861557062715292, - "learning_rate": 0.00019999688722939873, - "loss": 46.0, - "step": 15606 - }, - { - "epoch": 2.513386207174202, - "grad_norm": 0.0034386483021080494, - "learning_rate": 0.0001999968868302125, - "loss": 46.0, - "step": 15607 - }, - { - "epoch": 2.5135472442529894, - "grad_norm": 0.007441123481839895, - "learning_rate": 0.00019999688643100068, - "loss": 46.0, - "step": 15608 - }, - { - "epoch": 2.513708281331777, - "grad_norm": 0.0021146193612366915, - "learning_rate": 0.00019999688603176329, - "loss": 46.0, - "step": 15609 - }, - { - "epoch": 2.5138693184105643, - "grad_norm": 0.00563927972689271, - "learning_rate": 0.00019999688563250025, - "loss": 46.0, - "step": 15610 - }, - { - "epoch": 2.5140303554893513, - "grad_norm": 0.002676117466762662, - "learning_rate": 0.00019999688523321166, - "loss": 46.0, - "step": 15611 - }, - { - "epoch": 2.5141913925681387, - "grad_norm": 0.009635621681809425, - "learning_rate": 0.00019999688483389742, - "loss": 46.0, - "step": 15612 - }, - { - "epoch": 2.514352429646926, - "grad_norm": 0.005568181164562702, - "learning_rate": 0.00019999688443455763, - "loss": 46.0, - "step": 15613 - }, - { - "epoch": 2.5145134667257136, - "grad_norm": 0.0030140401795506477, - "learning_rate": 0.00019999688403519225, - "loss": 46.0, - "step": 15614 - }, - { - "epoch": 2.514674503804501, - "grad_norm": 0.003594675799831748, - "learning_rate": 0.00019999688363580128, - "loss": 46.0, - "step": 15615 - }, - { - "epoch": 2.5148355408832885, - "grad_norm": 0.004549028351902962, - "learning_rate": 0.0001999968832363847, - "loss": 46.0, - "step": 15616 - }, - { - "epoch": 2.5149965779620755, - "grad_norm": 0.001601845258846879, - "learning_rate": 0.00019999688283694252, - "loss": 46.0, - "step": 15617 - }, - { - "epoch": 2.515157615040863, - "grad_norm": 0.0016188089502975345, - "learning_rate": 0.00019999688243747474, - "loss": 46.0, - "step": 15618 - }, - { - "epoch": 2.5153186521196504, - "grad_norm": 0.009368407540023327, - "learning_rate": 0.0001999968820379814, - "loss": 46.0, - "step": 15619 - }, - { - "epoch": 2.515479689198438, - "grad_norm": 0.0025999932549893856, - "learning_rate": 0.00019999688163846243, - "loss": 46.0, - "step": 15620 - }, - { - "epoch": 2.5156407262772253, - "grad_norm": 0.0028097087051719427, - "learning_rate": 0.00019999688123891786, - "loss": 46.0, - "step": 15621 - }, - { - "epoch": 2.515801763356013, - "grad_norm": 0.005572411231696606, - "learning_rate": 0.00019999688083934772, - "loss": 46.0, - "step": 15622 - }, - { - "epoch": 2.5159628004348003, - "grad_norm": 0.0024771352764219046, - "learning_rate": 0.000199996880439752, - "loss": 46.0, - "step": 15623 - }, - { - "epoch": 2.5161238375135877, - "grad_norm": 0.0062670293264091015, - "learning_rate": 0.00019999688004013067, - "loss": 46.0, - "step": 15624 - }, - { - "epoch": 2.516284874592375, - "grad_norm": 0.0016047862591221929, - "learning_rate": 0.00019999687964048372, - "loss": 46.0, - "step": 15625 - }, - { - "epoch": 2.516445911671162, - "grad_norm": 0.0021048770286142826, - "learning_rate": 0.0001999968792408112, - "loss": 46.0, - "step": 15626 - }, - { - "epoch": 2.5166069487499496, - "grad_norm": 0.006055477075278759, - "learning_rate": 0.0001999968788411131, - "loss": 46.0, - "step": 15627 - }, - { - "epoch": 2.516767985828737, - "grad_norm": 0.0007138599758036435, - "learning_rate": 0.0001999968784413894, - "loss": 46.0, - "step": 15628 - }, - { - "epoch": 2.5169290229075245, - "grad_norm": 0.0019480041228234768, - "learning_rate": 0.0001999968780416401, - "loss": 46.0, - "step": 15629 - }, - { - "epoch": 2.517090059986312, - "grad_norm": 0.004484706092625856, - "learning_rate": 0.00019999687764186522, - "loss": 46.0, - "step": 15630 - }, - { - "epoch": 2.517251097065099, - "grad_norm": 0.002487529069185257, - "learning_rate": 0.00019999687724206472, - "loss": 46.0, - "step": 15631 - }, - { - "epoch": 2.5174121341438864, - "grad_norm": 0.00363441975787282, - "learning_rate": 0.00019999687684223863, - "loss": 46.0, - "step": 15632 - }, - { - "epoch": 2.517573171222674, - "grad_norm": 0.0015664249658584595, - "learning_rate": 0.00019999687644238693, - "loss": 46.0, - "step": 15633 - }, - { - "epoch": 2.5177342083014613, - "grad_norm": 0.0047117420472204685, - "learning_rate": 0.00019999687604250967, - "loss": 46.0, - "step": 15634 - }, - { - "epoch": 2.517895245380249, - "grad_norm": 0.0007667237659916282, - "learning_rate": 0.00019999687564260682, - "loss": 46.0, - "step": 15635 - }, - { - "epoch": 2.5180562824590362, - "grad_norm": 0.0014587810728698969, - "learning_rate": 0.00019999687524267833, - "loss": 46.0, - "step": 15636 - }, - { - "epoch": 2.5182173195378237, - "grad_norm": 0.0013479279587045312, - "learning_rate": 0.00019999687484272428, - "loss": 46.0, - "step": 15637 - }, - { - "epoch": 2.518378356616611, - "grad_norm": 0.0033141302410513163, - "learning_rate": 0.00019999687444274464, - "loss": 46.0, - "step": 15638 - }, - { - "epoch": 2.5185393936953986, - "grad_norm": 0.006300832610577345, - "learning_rate": 0.0001999968740427394, - "loss": 46.0, - "step": 15639 - }, - { - "epoch": 2.5187004307741856, - "grad_norm": 0.0022819561418145895, - "learning_rate": 0.00019999687364270855, - "loss": 46.0, - "step": 15640 - }, - { - "epoch": 2.518861467852973, - "grad_norm": 0.003823867067694664, - "learning_rate": 0.00019999687324265213, - "loss": 46.0, - "step": 15641 - }, - { - "epoch": 2.5190225049317605, - "grad_norm": 0.0007260690326802433, - "learning_rate": 0.0001999968728425701, - "loss": 46.0, - "step": 15642 - }, - { - "epoch": 2.519183542010548, - "grad_norm": 0.0003675887128338218, - "learning_rate": 0.00019999687244246249, - "loss": 46.0, - "step": 15643 - }, - { - "epoch": 2.5193445790893354, - "grad_norm": 0.003494833828881383, - "learning_rate": 0.00019999687204232927, - "loss": 46.0, - "step": 15644 - }, - { - "epoch": 2.519505616168123, - "grad_norm": 0.002476646564900875, - "learning_rate": 0.00019999687164217047, - "loss": 46.0, - "step": 15645 - }, - { - "epoch": 2.51966665324691, - "grad_norm": 0.0030917986296117306, - "learning_rate": 0.00019999687124198605, - "loss": 46.0, - "step": 15646 - }, - { - "epoch": 2.5198276903256973, - "grad_norm": 0.001155301695689559, - "learning_rate": 0.00019999687084177605, - "loss": 46.0, - "step": 15647 - }, - { - "epoch": 2.5199887274044848, - "grad_norm": 0.0011219088919460773, - "learning_rate": 0.00019999687044154046, - "loss": 46.0, - "step": 15648 - }, - { - "epoch": 2.520149764483272, - "grad_norm": 0.005977956112474203, - "learning_rate": 0.00019999687004127928, - "loss": 46.0, - "step": 15649 - }, - { - "epoch": 2.5203108015620597, - "grad_norm": 0.002195816021412611, - "learning_rate": 0.00019999686964099252, - "loss": 46.0, - "step": 15650 - }, - { - "epoch": 2.520471838640847, - "grad_norm": 0.006962589919567108, - "learning_rate": 0.00019999686924068014, - "loss": 46.0, - "step": 15651 - }, - { - "epoch": 2.5206328757196346, - "grad_norm": 0.0008813330787234008, - "learning_rate": 0.0001999968688403422, - "loss": 46.0, - "step": 15652 - }, - { - "epoch": 2.520793912798422, - "grad_norm": 0.004322392400354147, - "learning_rate": 0.00019999686843997862, - "loss": 46.0, - "step": 15653 - }, - { - "epoch": 2.5209549498772095, - "grad_norm": 0.005127814132720232, - "learning_rate": 0.00019999686803958945, - "loss": 46.0, - "step": 15654 - }, - { - "epoch": 2.5211159869559965, - "grad_norm": 0.0008404377149417996, - "learning_rate": 0.0001999968676391747, - "loss": 46.0, - "step": 15655 - }, - { - "epoch": 2.521277024034784, - "grad_norm": 0.003995373845100403, - "learning_rate": 0.00019999686723873436, - "loss": 46.0, - "step": 15656 - }, - { - "epoch": 2.5214380611135714, - "grad_norm": 0.006385750137269497, - "learning_rate": 0.00019999686683826843, - "loss": 46.0, - "step": 15657 - }, - { - "epoch": 2.521599098192359, - "grad_norm": 0.005439152475446463, - "learning_rate": 0.00019999686643777689, - "loss": 46.0, - "step": 15658 - }, - { - "epoch": 2.5217601352711463, - "grad_norm": 0.0017037151847034693, - "learning_rate": 0.00019999686603725978, - "loss": 46.0, - "step": 15659 - }, - { - "epoch": 2.5219211723499337, - "grad_norm": 0.0009728716686367989, - "learning_rate": 0.00019999686563671704, - "loss": 46.0, - "step": 15660 - }, - { - "epoch": 2.5220822094287207, - "grad_norm": 0.009522090665996075, - "learning_rate": 0.00019999686523614876, - "loss": 46.0, - "step": 15661 - }, - { - "epoch": 2.522243246507508, - "grad_norm": 0.0010149332229048014, - "learning_rate": 0.00019999686483555484, - "loss": 46.0, - "step": 15662 - }, - { - "epoch": 2.5224042835862956, - "grad_norm": 0.0019164497498422861, - "learning_rate": 0.00019999686443493534, - "loss": 46.0, - "step": 15663 - }, - { - "epoch": 2.522565320665083, - "grad_norm": 0.00240589608438313, - "learning_rate": 0.00019999686403429024, - "loss": 46.0, - "step": 15664 - }, - { - "epoch": 2.5227263577438706, - "grad_norm": 0.002015412785112858, - "learning_rate": 0.00019999686363361956, - "loss": 46.0, - "step": 15665 - }, - { - "epoch": 2.522887394822658, - "grad_norm": 0.00035457449848763645, - "learning_rate": 0.00019999686323292327, - "loss": 46.0, - "step": 15666 - }, - { - "epoch": 2.5230484319014455, - "grad_norm": 0.011164935305714607, - "learning_rate": 0.0001999968628322014, - "loss": 46.0, - "step": 15667 - }, - { - "epoch": 2.523209468980233, - "grad_norm": 0.004561011679470539, - "learning_rate": 0.00019999686243145392, - "loss": 46.0, - "step": 15668 - }, - { - "epoch": 2.5233705060590204, - "grad_norm": 0.003374389372766018, - "learning_rate": 0.00019999686203068086, - "loss": 46.0, - "step": 15669 - }, - { - "epoch": 2.5235315431378074, - "grad_norm": 0.0009957189904525876, - "learning_rate": 0.00019999686162988222, - "loss": 46.0, - "step": 15670 - }, - { - "epoch": 2.523692580216595, - "grad_norm": 0.004869405180215836, - "learning_rate": 0.00019999686122905793, - "loss": 46.0, - "step": 15671 - }, - { - "epoch": 2.5238536172953823, - "grad_norm": 0.009459338150918484, - "learning_rate": 0.0001999968608282081, - "loss": 46.0, - "step": 15672 - }, - { - "epoch": 2.5240146543741697, - "grad_norm": 0.0032983017154037952, - "learning_rate": 0.00019999686042733266, - "loss": 46.0, - "step": 15673 - }, - { - "epoch": 2.524175691452957, - "grad_norm": 0.00269155390560627, - "learning_rate": 0.0001999968600264316, - "loss": 46.0, - "step": 15674 - }, - { - "epoch": 2.524336728531744, - "grad_norm": 0.008832383900880814, - "learning_rate": 0.00019999685962550498, - "loss": 46.0, - "step": 15675 - }, - { - "epoch": 2.5244977656105316, - "grad_norm": 0.002459681360051036, - "learning_rate": 0.00019999685922455278, - "loss": 46.0, - "step": 15676 - }, - { - "epoch": 2.524658802689319, - "grad_norm": 0.008503999561071396, - "learning_rate": 0.00019999685882357495, - "loss": 46.0, - "step": 15677 - }, - { - "epoch": 2.5248198397681065, - "grad_norm": 0.001488016452640295, - "learning_rate": 0.00019999685842257155, - "loss": 46.0, - "step": 15678 - }, - { - "epoch": 2.524980876846894, - "grad_norm": 0.001180895371362567, - "learning_rate": 0.00019999685802154254, - "loss": 46.0, - "step": 15679 - }, - { - "epoch": 2.5251419139256814, - "grad_norm": 0.0034441722091287374, - "learning_rate": 0.00019999685762048795, - "loss": 46.0, - "step": 15680 - }, - { - "epoch": 2.525302951004469, - "grad_norm": 0.0035903742536902428, - "learning_rate": 0.00019999685721940776, - "loss": 46.0, - "step": 15681 - }, - { - "epoch": 2.5254639880832563, - "grad_norm": 0.0016748111229389906, - "learning_rate": 0.00019999685681830194, - "loss": 46.0, - "step": 15682 - }, - { - "epoch": 2.525625025162044, - "grad_norm": 0.0018760936800390482, - "learning_rate": 0.00019999685641717058, - "loss": 46.0, - "step": 15683 - }, - { - "epoch": 2.525786062240831, - "grad_norm": 0.00208725081756711, - "learning_rate": 0.00019999685601601358, - "loss": 46.0, - "step": 15684 - }, - { - "epoch": 2.5259470993196182, - "grad_norm": 0.0019400343298912048, - "learning_rate": 0.00019999685561483102, - "loss": 46.0, - "step": 15685 - }, - { - "epoch": 2.5261081363984057, - "grad_norm": 0.0032016015611588955, - "learning_rate": 0.00019999685521362285, - "loss": 46.0, - "step": 15686 - }, - { - "epoch": 2.526269173477193, - "grad_norm": 0.0012566620716825128, - "learning_rate": 0.00019999685481238911, - "loss": 46.0, - "step": 15687 - }, - { - "epoch": 2.5264302105559806, - "grad_norm": 0.002221318194642663, - "learning_rate": 0.00019999685441112974, - "loss": 46.0, - "step": 15688 - }, - { - "epoch": 2.526591247634768, - "grad_norm": 0.0016371557721868157, - "learning_rate": 0.0001999968540098448, - "loss": 46.0, - "step": 15689 - }, - { - "epoch": 2.526752284713555, - "grad_norm": 0.0012239270145073533, - "learning_rate": 0.00019999685360853425, - "loss": 46.0, - "step": 15690 - }, - { - "epoch": 2.5269133217923425, - "grad_norm": 0.001530595705844462, - "learning_rate": 0.00019999685320719812, - "loss": 46.0, - "step": 15691 - }, - { - "epoch": 2.52707435887113, - "grad_norm": 0.0008028285228647292, - "learning_rate": 0.0001999968528058364, - "loss": 46.0, - "step": 15692 - }, - { - "epoch": 2.5272353959499174, - "grad_norm": 0.007375949993729591, - "learning_rate": 0.00019999685240444908, - "loss": 46.0, - "step": 15693 - }, - { - "epoch": 2.527396433028705, - "grad_norm": 0.002284626942127943, - "learning_rate": 0.00019999685200303616, - "loss": 46.0, - "step": 15694 - }, - { - "epoch": 2.5275574701074923, - "grad_norm": 0.0019130881410092115, - "learning_rate": 0.00019999685160159764, - "loss": 46.0, - "step": 15695 - }, - { - "epoch": 2.5277185071862798, - "grad_norm": 0.0006606450770050287, - "learning_rate": 0.00019999685120013355, - "loss": 46.0, - "step": 15696 - }, - { - "epoch": 2.5278795442650672, - "grad_norm": 0.0025943140499293804, - "learning_rate": 0.00019999685079864383, - "loss": 46.0, - "step": 15697 - }, - { - "epoch": 2.5280405813438547, - "grad_norm": 0.0006648980197496712, - "learning_rate": 0.00019999685039712856, - "loss": 46.0, - "step": 15698 - }, - { - "epoch": 2.5282016184226417, - "grad_norm": 0.0018283718964084983, - "learning_rate": 0.00019999684999558767, - "loss": 46.0, - "step": 15699 - }, - { - "epoch": 2.528362655501429, - "grad_norm": 0.0010951616568490863, - "learning_rate": 0.0001999968495940212, - "loss": 46.0, - "step": 15700 - }, - { - "epoch": 2.5285236925802166, - "grad_norm": 0.005087762605398893, - "learning_rate": 0.0001999968491924291, - "loss": 46.0, - "step": 15701 - }, - { - "epoch": 2.528684729659004, - "grad_norm": 0.004206470213830471, - "learning_rate": 0.00019999684879081145, - "loss": 46.0, - "step": 15702 - }, - { - "epoch": 2.5288457667377915, - "grad_norm": 0.0051929946057498455, - "learning_rate": 0.0001999968483891682, - "loss": 46.0, - "step": 15703 - }, - { - "epoch": 2.5290068038165785, - "grad_norm": 0.0040488215163350105, - "learning_rate": 0.0001999968479874993, - "loss": 46.0, - "step": 15704 - }, - { - "epoch": 2.529167840895366, - "grad_norm": 0.007053784094750881, - "learning_rate": 0.00019999684758580485, - "loss": 46.0, - "step": 15705 - }, - { - "epoch": 2.5293288779741534, - "grad_norm": 0.008661018684506416, - "learning_rate": 0.00019999684718408482, - "loss": 46.0, - "step": 15706 - }, - { - "epoch": 2.529489915052941, - "grad_norm": 0.001179200946353376, - "learning_rate": 0.00019999684678233918, - "loss": 46.0, - "step": 15707 - }, - { - "epoch": 2.5296509521317283, - "grad_norm": 0.0025416710413992405, - "learning_rate": 0.00019999684638056795, - "loss": 46.0, - "step": 15708 - }, - { - "epoch": 2.5298119892105158, - "grad_norm": 0.003737241495400667, - "learning_rate": 0.0001999968459787711, - "loss": 46.0, - "step": 15709 - }, - { - "epoch": 2.529973026289303, - "grad_norm": 0.007871951907873154, - "learning_rate": 0.0001999968455769487, - "loss": 46.0, - "step": 15710 - }, - { - "epoch": 2.5301340633680907, - "grad_norm": 0.0009385569137521088, - "learning_rate": 0.0001999968451751007, - "loss": 46.0, - "step": 15711 - }, - { - "epoch": 2.530295100446878, - "grad_norm": 0.0017049235757440329, - "learning_rate": 0.0001999968447732271, - "loss": 46.0, - "step": 15712 - }, - { - "epoch": 2.5304561375256656, - "grad_norm": 0.002088160952553153, - "learning_rate": 0.00019999684437132785, - "loss": 46.0, - "step": 15713 - }, - { - "epoch": 2.5306171746044526, - "grad_norm": 0.0007619318785145879, - "learning_rate": 0.00019999684396940305, - "loss": 46.0, - "step": 15714 - }, - { - "epoch": 2.53077821168324, - "grad_norm": 0.0006408804329112172, - "learning_rate": 0.00019999684356745268, - "loss": 46.0, - "step": 15715 - }, - { - "epoch": 2.5309392487620275, - "grad_norm": 0.004502313677221537, - "learning_rate": 0.0001999968431654767, - "loss": 46.0, - "step": 15716 - }, - { - "epoch": 2.531100285840815, - "grad_norm": 0.0038554640486836433, - "learning_rate": 0.0001999968427634751, - "loss": 46.0, - "step": 15717 - }, - { - "epoch": 2.5312613229196024, - "grad_norm": 0.0018654678715392947, - "learning_rate": 0.00019999684236144796, - "loss": 46.0, - "step": 15718 - }, - { - "epoch": 2.5314223599983894, - "grad_norm": 0.004255733452737331, - "learning_rate": 0.00019999684195939516, - "loss": 46.0, - "step": 15719 - }, - { - "epoch": 2.531583397077177, - "grad_norm": 0.0025084849912673235, - "learning_rate": 0.00019999684155731678, - "loss": 46.0, - "step": 15720 - }, - { - "epoch": 2.5317444341559643, - "grad_norm": 0.004545204341411591, - "learning_rate": 0.00019999684115521284, - "loss": 46.0, - "step": 15721 - }, - { - "epoch": 2.5319054712347517, - "grad_norm": 0.0012935440754517913, - "learning_rate": 0.0001999968407530833, - "loss": 46.0, - "step": 15722 - }, - { - "epoch": 2.532066508313539, - "grad_norm": 0.0109461210668087, - "learning_rate": 0.00019999684035092817, - "loss": 46.0, - "step": 15723 - }, - { - "epoch": 2.5322275453923266, - "grad_norm": 0.0017023389227688313, - "learning_rate": 0.00019999683994874744, - "loss": 46.0, - "step": 15724 - }, - { - "epoch": 2.532388582471114, - "grad_norm": 0.0012963255867362022, - "learning_rate": 0.00019999683954654107, - "loss": 46.0, - "step": 15725 - }, - { - "epoch": 2.5325496195499015, - "grad_norm": 0.004191033076494932, - "learning_rate": 0.00019999683914430916, - "loss": 46.0, - "step": 15726 - }, - { - "epoch": 2.532710656628689, - "grad_norm": 0.0024496472906321287, - "learning_rate": 0.00019999683874205164, - "loss": 46.0, - "step": 15727 - }, - { - "epoch": 2.532871693707476, - "grad_norm": 0.0008307733805850148, - "learning_rate": 0.0001999968383397685, - "loss": 46.0, - "step": 15728 - }, - { - "epoch": 2.5330327307862635, - "grad_norm": 0.0008192848763428628, - "learning_rate": 0.0001999968379374598, - "loss": 46.0, - "step": 15729 - }, - { - "epoch": 2.533193767865051, - "grad_norm": 0.003604383207857609, - "learning_rate": 0.0001999968375351255, - "loss": 46.0, - "step": 15730 - }, - { - "epoch": 2.5333548049438384, - "grad_norm": 0.005528864450752735, - "learning_rate": 0.00019999683713276561, - "loss": 46.0, - "step": 15731 - }, - { - "epoch": 2.533515842022626, - "grad_norm": 0.005030097905546427, - "learning_rate": 0.00019999683673038013, - "loss": 46.0, - "step": 15732 - }, - { - "epoch": 2.5336768791014133, - "grad_norm": 0.0011510084150359035, - "learning_rate": 0.00019999683632796904, - "loss": 46.0, - "step": 15733 - }, - { - "epoch": 2.5338379161802003, - "grad_norm": 0.0024968741927295923, - "learning_rate": 0.00019999683592553235, - "loss": 46.0, - "step": 15734 - }, - { - "epoch": 2.5339989532589877, - "grad_norm": 0.0025567039847373962, - "learning_rate": 0.00019999683552307008, - "loss": 46.0, - "step": 15735 - }, - { - "epoch": 2.534159990337775, - "grad_norm": 0.008249803446233273, - "learning_rate": 0.00019999683512058223, - "loss": 46.0, - "step": 15736 - }, - { - "epoch": 2.5343210274165626, - "grad_norm": 0.006332262884825468, - "learning_rate": 0.00019999683471806875, - "loss": 46.0, - "step": 15737 - }, - { - "epoch": 2.53448206449535, - "grad_norm": 0.0017382610822096467, - "learning_rate": 0.00019999683431552972, - "loss": 46.0, - "step": 15738 - }, - { - "epoch": 2.5346431015741375, - "grad_norm": 0.0012596466112881899, - "learning_rate": 0.00019999683391296508, - "loss": 46.0, - "step": 15739 - }, - { - "epoch": 2.534804138652925, - "grad_norm": 0.0007674636435694993, - "learning_rate": 0.00019999683351037482, - "loss": 46.0, - "step": 15740 - }, - { - "epoch": 2.5349651757317124, - "grad_norm": 0.0037965932860970497, - "learning_rate": 0.000199996833107759, - "loss": 46.0, - "step": 15741 - }, - { - "epoch": 2.5351262128105, - "grad_norm": 0.0009536274010315537, - "learning_rate": 0.00019999683270511756, - "loss": 46.0, - "step": 15742 - }, - { - "epoch": 2.535287249889287, - "grad_norm": 0.004552791360765696, - "learning_rate": 0.0001999968323024505, - "loss": 46.0, - "step": 15743 - }, - { - "epoch": 2.5354482869680743, - "grad_norm": 0.0035578443203121424, - "learning_rate": 0.0001999968318997579, - "loss": 46.0, - "step": 15744 - }, - { - "epoch": 2.535609324046862, - "grad_norm": 0.0018478657584637403, - "learning_rate": 0.0001999968314970397, - "loss": 46.0, - "step": 15745 - }, - { - "epoch": 2.5357703611256492, - "grad_norm": 0.0032003531232476234, - "learning_rate": 0.00019999683109429587, - "loss": 46.0, - "step": 15746 - }, - { - "epoch": 2.5359313982044367, - "grad_norm": 0.0011525305453687906, - "learning_rate": 0.00019999683069152647, - "loss": 46.0, - "step": 15747 - }, - { - "epoch": 2.5360924352832237, - "grad_norm": 0.0024513343814760447, - "learning_rate": 0.00019999683028873149, - "loss": 46.0, - "step": 15748 - }, - { - "epoch": 2.536253472362011, - "grad_norm": 0.0009150298428721726, - "learning_rate": 0.0001999968298859109, - "loss": 46.0, - "step": 15749 - }, - { - "epoch": 2.5364145094407986, - "grad_norm": 0.008602021262049675, - "learning_rate": 0.0001999968294830647, - "loss": 46.0, - "step": 15750 - }, - { - "epoch": 2.536575546519586, - "grad_norm": 0.0025046514347195625, - "learning_rate": 0.00019999682908019293, - "loss": 46.0, - "step": 15751 - }, - { - "epoch": 2.5367365835983735, - "grad_norm": 0.004044221714138985, - "learning_rate": 0.00019999682867729557, - "loss": 46.0, - "step": 15752 - }, - { - "epoch": 2.536897620677161, - "grad_norm": 0.0028303838334977627, - "learning_rate": 0.0001999968282743726, - "loss": 46.0, - "step": 15753 - }, - { - "epoch": 2.5370586577559484, - "grad_norm": 0.003672609804198146, - "learning_rate": 0.00019999682787142403, - "loss": 46.0, - "step": 15754 - }, - { - "epoch": 2.537219694834736, - "grad_norm": 0.0014963069697842002, - "learning_rate": 0.00019999682746844988, - "loss": 46.0, - "step": 15755 - }, - { - "epoch": 2.5373807319135233, - "grad_norm": 0.0022372633684426546, - "learning_rate": 0.00019999682706545012, - "loss": 46.0, - "step": 15756 - }, - { - "epoch": 2.5375417689923103, - "grad_norm": 0.012474932707846165, - "learning_rate": 0.00019999682666242477, - "loss": 46.0, - "step": 15757 - }, - { - "epoch": 2.5377028060710978, - "grad_norm": 0.006394247990101576, - "learning_rate": 0.00019999682625937383, - "loss": 46.0, - "step": 15758 - }, - { - "epoch": 2.5378638431498852, - "grad_norm": 0.0035678413696587086, - "learning_rate": 0.0001999968258562973, - "loss": 46.0, - "step": 15759 - }, - { - "epoch": 2.5380248802286727, - "grad_norm": 0.0031321633141487837, - "learning_rate": 0.0001999968254531952, - "loss": 46.0, - "step": 15760 - }, - { - "epoch": 2.53818591730746, - "grad_norm": 0.0026392501313239336, - "learning_rate": 0.00019999682505006746, - "loss": 46.0, - "step": 15761 - }, - { - "epoch": 2.5383469543862476, - "grad_norm": 0.0024764128029346466, - "learning_rate": 0.00019999682464691415, - "loss": 46.0, - "step": 15762 - }, - { - "epoch": 2.5385079914650346, - "grad_norm": 0.0013382750330492854, - "learning_rate": 0.00019999682424373525, - "loss": 46.0, - "step": 15763 - }, - { - "epoch": 2.538669028543822, - "grad_norm": 0.004467046353965998, - "learning_rate": 0.00019999682384053076, - "loss": 46.0, - "step": 15764 - }, - { - "epoch": 2.5388300656226095, - "grad_norm": 0.009162072092294693, - "learning_rate": 0.00019999682343730063, - "loss": 46.0, - "step": 15765 - }, - { - "epoch": 2.538991102701397, - "grad_norm": 0.00311396480537951, - "learning_rate": 0.00019999682303404497, - "loss": 46.0, - "step": 15766 - }, - { - "epoch": 2.5391521397801844, - "grad_norm": 0.005254401359707117, - "learning_rate": 0.00019999682263076367, - "loss": 46.0, - "step": 15767 - }, - { - "epoch": 2.539313176858972, - "grad_norm": 0.0014897024957463145, - "learning_rate": 0.0001999968222274568, - "loss": 46.0, - "step": 15768 - }, - { - "epoch": 2.5394742139377593, - "grad_norm": 0.004809871781617403, - "learning_rate": 0.00019999682182412432, - "loss": 46.0, - "step": 15769 - }, - { - "epoch": 2.5396352510165467, - "grad_norm": 0.002005638089030981, - "learning_rate": 0.00019999682142076629, - "loss": 46.0, - "step": 15770 - }, - { - "epoch": 2.539796288095334, - "grad_norm": 0.016677599400281906, - "learning_rate": 0.0001999968210173826, - "loss": 46.0, - "step": 15771 - }, - { - "epoch": 2.539957325174121, - "grad_norm": 0.0007019637268967927, - "learning_rate": 0.00019999682061397337, - "loss": 46.0, - "step": 15772 - }, - { - "epoch": 2.5401183622529087, - "grad_norm": 0.0022845263592898846, - "learning_rate": 0.00019999682021053849, - "loss": 46.0, - "step": 15773 - }, - { - "epoch": 2.540279399331696, - "grad_norm": 0.0008481305558234453, - "learning_rate": 0.00019999681980707807, - "loss": 46.0, - "step": 15774 - }, - { - "epoch": 2.5404404364104836, - "grad_norm": 0.0017113655339926481, - "learning_rate": 0.00019999681940359202, - "loss": 46.0, - "step": 15775 - }, - { - "epoch": 2.540601473489271, - "grad_norm": 0.006946255918592215, - "learning_rate": 0.0001999968190000804, - "loss": 46.0, - "step": 15776 - }, - { - "epoch": 2.540762510568058, - "grad_norm": 0.0022909340914338827, - "learning_rate": 0.00019999681859654317, - "loss": 46.0, - "step": 15777 - }, - { - "epoch": 2.5409235476468455, - "grad_norm": 0.0024680462665855885, - "learning_rate": 0.00019999681819298038, - "loss": 46.0, - "step": 15778 - }, - { - "epoch": 2.541084584725633, - "grad_norm": 0.003536262083798647, - "learning_rate": 0.00019999681778939195, - "loss": 46.0, - "step": 15779 - }, - { - "epoch": 2.5412456218044204, - "grad_norm": 0.0143395084887743, - "learning_rate": 0.00019999681738577793, - "loss": 46.0, - "step": 15780 - }, - { - "epoch": 2.541406658883208, - "grad_norm": 0.0016347103519365191, - "learning_rate": 0.00019999681698213835, - "loss": 46.0, - "step": 15781 - }, - { - "epoch": 2.5415676959619953, - "grad_norm": 0.001784398453310132, - "learning_rate": 0.00019999681657847313, - "loss": 46.0, - "step": 15782 - }, - { - "epoch": 2.5417287330407827, - "grad_norm": 0.003143638838082552, - "learning_rate": 0.00019999681617478235, - "loss": 46.0, - "step": 15783 - }, - { - "epoch": 2.54188977011957, - "grad_norm": 0.001713858568109572, - "learning_rate": 0.00019999681577106596, - "loss": 46.0, - "step": 15784 - }, - { - "epoch": 2.5420508071983576, - "grad_norm": 0.000525288050994277, - "learning_rate": 0.000199996815367324, - "loss": 46.0, - "step": 15785 - }, - { - "epoch": 2.542211844277145, - "grad_norm": 0.0009199513588100672, - "learning_rate": 0.00019999681496355644, - "loss": 46.0, - "step": 15786 - }, - { - "epoch": 2.542372881355932, - "grad_norm": 0.0026275611016899347, - "learning_rate": 0.00019999681455976326, - "loss": 46.0, - "step": 15787 - }, - { - "epoch": 2.5425339184347195, - "grad_norm": 0.0056701297871768475, - "learning_rate": 0.0001999968141559445, - "loss": 46.0, - "step": 15788 - }, - { - "epoch": 2.542694955513507, - "grad_norm": 0.004300985485315323, - "learning_rate": 0.00019999681375210016, - "loss": 46.0, - "step": 15789 - }, - { - "epoch": 2.5428559925922944, - "grad_norm": 0.0016123721143230796, - "learning_rate": 0.0001999968133482302, - "loss": 46.0, - "step": 15790 - }, - { - "epoch": 2.543017029671082, - "grad_norm": 0.002945336978882551, - "learning_rate": 0.00019999681294433468, - "loss": 46.0, - "step": 15791 - }, - { - "epoch": 2.543178066749869, - "grad_norm": 0.006586259696632624, - "learning_rate": 0.00019999681254041353, - "loss": 46.0, - "step": 15792 - }, - { - "epoch": 2.5433391038286564, - "grad_norm": 0.005842157639563084, - "learning_rate": 0.00019999681213646683, - "loss": 46.0, - "step": 15793 - }, - { - "epoch": 2.543500140907444, - "grad_norm": 0.0006493720575235784, - "learning_rate": 0.00019999681173249448, - "loss": 46.0, - "step": 15794 - }, - { - "epoch": 2.5436611779862313, - "grad_norm": 0.005443824455142021, - "learning_rate": 0.00019999681132849657, - "loss": 46.0, - "step": 15795 - }, - { - "epoch": 2.5438222150650187, - "grad_norm": 0.016953447833657265, - "learning_rate": 0.00019999681092447305, - "loss": 46.0, - "step": 15796 - }, - { - "epoch": 2.543983252143806, - "grad_norm": 0.0009105742210522294, - "learning_rate": 0.00019999681052042397, - "loss": 46.0, - "step": 15797 - }, - { - "epoch": 2.5441442892225936, - "grad_norm": 0.005569086410105228, - "learning_rate": 0.00019999681011634925, - "loss": 46.0, - "step": 15798 - }, - { - "epoch": 2.544305326301381, - "grad_norm": 0.0008822057861834764, - "learning_rate": 0.00019999680971224897, - "loss": 46.0, - "step": 15799 - }, - { - "epoch": 2.5444663633801685, - "grad_norm": 0.0061337389051914215, - "learning_rate": 0.00019999680930812307, - "loss": 46.0, - "step": 15800 - }, - { - "epoch": 2.5446274004589555, - "grad_norm": 0.005552330985665321, - "learning_rate": 0.00019999680890397158, - "loss": 46.0, - "step": 15801 - }, - { - "epoch": 2.544788437537743, - "grad_norm": 0.005401782225817442, - "learning_rate": 0.0001999968084997945, - "loss": 46.0, - "step": 15802 - }, - { - "epoch": 2.5449494746165304, - "grad_norm": 0.0020015074405819178, - "learning_rate": 0.00019999680809559185, - "loss": 46.0, - "step": 15803 - }, - { - "epoch": 2.545110511695318, - "grad_norm": 0.00097870163153857, - "learning_rate": 0.0001999968076913636, - "loss": 46.0, - "step": 15804 - }, - { - "epoch": 2.5452715487741053, - "grad_norm": 0.003985776100307703, - "learning_rate": 0.00019999680728710972, - "loss": 46.0, - "step": 15805 - }, - { - "epoch": 2.545432585852893, - "grad_norm": 0.0039864699356257915, - "learning_rate": 0.00019999680688283027, - "loss": 46.0, - "step": 15806 - }, - { - "epoch": 2.54559362293168, - "grad_norm": 0.010370858013629913, - "learning_rate": 0.00019999680647852524, - "loss": 46.0, - "step": 15807 - }, - { - "epoch": 2.5457546600104672, - "grad_norm": 0.0010867168894037604, - "learning_rate": 0.0001999968060741946, - "loss": 46.0, - "step": 15808 - }, - { - "epoch": 2.5459156970892547, - "grad_norm": 0.006409656722098589, - "learning_rate": 0.00019999680566983835, - "loss": 46.0, - "step": 15809 - }, - { - "epoch": 2.546076734168042, - "grad_norm": 0.007434573955833912, - "learning_rate": 0.00019999680526545653, - "loss": 46.0, - "step": 15810 - }, - { - "epoch": 2.5462377712468296, - "grad_norm": 0.0016469808761030436, - "learning_rate": 0.00019999680486104912, - "loss": 46.0, - "step": 15811 - }, - { - "epoch": 2.546398808325617, - "grad_norm": 0.00819756742566824, - "learning_rate": 0.0001999968044566161, - "loss": 46.0, - "step": 15812 - }, - { - "epoch": 2.5465598454044045, - "grad_norm": 0.0026401225477457047, - "learning_rate": 0.00019999680405215748, - "loss": 46.0, - "step": 15813 - }, - { - "epoch": 2.546720882483192, - "grad_norm": 0.001077825902029872, - "learning_rate": 0.00019999680364767328, - "loss": 46.0, - "step": 15814 - }, - { - "epoch": 2.5468819195619794, - "grad_norm": 0.001470989198423922, - "learning_rate": 0.00019999680324316347, - "loss": 46.0, - "step": 15815 - }, - { - "epoch": 2.5470429566407664, - "grad_norm": 0.005357936955988407, - "learning_rate": 0.0001999968028386281, - "loss": 46.0, - "step": 15816 - }, - { - "epoch": 2.547203993719554, - "grad_norm": 0.0006705376436002553, - "learning_rate": 0.0001999968024340671, - "loss": 46.0, - "step": 15817 - }, - { - "epoch": 2.5473650307983413, - "grad_norm": 0.010283122770488262, - "learning_rate": 0.00019999680202948054, - "loss": 46.0, - "step": 15818 - }, - { - "epoch": 2.5475260678771288, - "grad_norm": 0.002999626798555255, - "learning_rate": 0.00019999680162486835, - "loss": 46.0, - "step": 15819 - }, - { - "epoch": 2.547687104955916, - "grad_norm": 0.0015617627650499344, - "learning_rate": 0.0001999968012202306, - "loss": 46.0, - "step": 15820 - }, - { - "epoch": 2.5478481420347032, - "grad_norm": 0.003764509689062834, - "learning_rate": 0.0001999968008155672, - "loss": 46.0, - "step": 15821 - }, - { - "epoch": 2.5480091791134907, - "grad_norm": 0.0030546344351023436, - "learning_rate": 0.00019999680041087828, - "loss": 46.0, - "step": 15822 - }, - { - "epoch": 2.548170216192278, - "grad_norm": 0.0035476309712976217, - "learning_rate": 0.00019999680000616372, - "loss": 46.0, - "step": 15823 - }, - { - "epoch": 2.5483312532710656, - "grad_norm": 0.004039666149765253, - "learning_rate": 0.0001999967996014236, - "loss": 46.0, - "step": 15824 - }, - { - "epoch": 2.548492290349853, - "grad_norm": 0.002095070667564869, - "learning_rate": 0.00019999679919665783, - "loss": 46.0, - "step": 15825 - }, - { - "epoch": 2.5486533274286405, - "grad_norm": 0.009739681147038937, - "learning_rate": 0.0001999967987918665, - "loss": 46.0, - "step": 15826 - }, - { - "epoch": 2.548814364507428, - "grad_norm": 0.0004628387396223843, - "learning_rate": 0.00019999679838704956, - "loss": 46.0, - "step": 15827 - }, - { - "epoch": 2.5489754015862154, - "grad_norm": 0.0025958239566534758, - "learning_rate": 0.00019999679798220706, - "loss": 46.0, - "step": 15828 - }, - { - "epoch": 2.549136438665003, - "grad_norm": 0.0018363907001912594, - "learning_rate": 0.00019999679757733895, - "loss": 46.0, - "step": 15829 - }, - { - "epoch": 2.54929747574379, - "grad_norm": 0.007587160915136337, - "learning_rate": 0.00019999679717244524, - "loss": 46.0, - "step": 15830 - }, - { - "epoch": 2.5494585128225773, - "grad_norm": 0.0028079207986593246, - "learning_rate": 0.00019999679676752593, - "loss": 46.0, - "step": 15831 - }, - { - "epoch": 2.5496195499013647, - "grad_norm": 0.0011105615412816405, - "learning_rate": 0.00019999679636258105, - "loss": 46.0, - "step": 15832 - }, - { - "epoch": 2.549780586980152, - "grad_norm": 0.005107423290610313, - "learning_rate": 0.00019999679595761056, - "loss": 46.0, - "step": 15833 - }, - { - "epoch": 2.5499416240589396, - "grad_norm": 0.0034990699496120214, - "learning_rate": 0.00019999679555261445, - "loss": 46.0, - "step": 15834 - }, - { - "epoch": 2.550102661137727, - "grad_norm": 0.01129177026450634, - "learning_rate": 0.00019999679514759276, - "loss": 46.0, - "step": 15835 - }, - { - "epoch": 2.550263698216514, - "grad_norm": 0.005279368255287409, - "learning_rate": 0.0001999967947425455, - "loss": 46.0, - "step": 15836 - }, - { - "epoch": 2.5504247352953016, - "grad_norm": 0.0017063929699361324, - "learning_rate": 0.00019999679433747262, - "loss": 46.0, - "step": 15837 - }, - { - "epoch": 2.550585772374089, - "grad_norm": 0.0009304551058448851, - "learning_rate": 0.0001999967939323742, - "loss": 46.0, - "step": 15838 - }, - { - "epoch": 2.5507468094528765, - "grad_norm": 0.01194256916642189, - "learning_rate": 0.00019999679352725012, - "loss": 46.0, - "step": 15839 - }, - { - "epoch": 2.550907846531664, - "grad_norm": 0.001702172332443297, - "learning_rate": 0.00019999679312210047, - "loss": 46.0, - "step": 15840 - }, - { - "epoch": 2.5510688836104514, - "grad_norm": 0.0012780199758708477, - "learning_rate": 0.00019999679271692525, - "loss": 46.0, - "step": 15841 - }, - { - "epoch": 2.551229920689239, - "grad_norm": 0.0014256832655519247, - "learning_rate": 0.0001999967923117244, - "loss": 46.0, - "step": 15842 - }, - { - "epoch": 2.5513909577680263, - "grad_norm": 0.005153740756213665, - "learning_rate": 0.00019999679190649795, - "loss": 46.0, - "step": 15843 - }, - { - "epoch": 2.5515519948468137, - "grad_norm": 0.003195536555722356, - "learning_rate": 0.00019999679150124595, - "loss": 46.0, - "step": 15844 - }, - { - "epoch": 2.5517130319256007, - "grad_norm": 0.007489427458494902, - "learning_rate": 0.00019999679109596833, - "loss": 46.0, - "step": 15845 - }, - { - "epoch": 2.551874069004388, - "grad_norm": 0.001013877335935831, - "learning_rate": 0.0001999967906906651, - "loss": 46.0, - "step": 15846 - }, - { - "epoch": 2.5520351060831756, - "grad_norm": 0.0009140408365055919, - "learning_rate": 0.0001999967902853363, - "loss": 46.0, - "step": 15847 - }, - { - "epoch": 2.552196143161963, - "grad_norm": 0.0032715012785047293, - "learning_rate": 0.0001999967898799819, - "loss": 46.0, - "step": 15848 - }, - { - "epoch": 2.5523571802407505, - "grad_norm": 0.004841975402086973, - "learning_rate": 0.0001999967894746019, - "loss": 46.0, - "step": 15849 - }, - { - "epoch": 2.552518217319538, - "grad_norm": 0.004513719584792852, - "learning_rate": 0.00019999678906919632, - "loss": 46.0, - "step": 15850 - }, - { - "epoch": 2.552679254398325, - "grad_norm": 0.0011710169492289424, - "learning_rate": 0.00019999678866376513, - "loss": 46.0, - "step": 15851 - }, - { - "epoch": 2.5528402914771124, - "grad_norm": 0.004223324358463287, - "learning_rate": 0.00019999678825830834, - "loss": 46.0, - "step": 15852 - }, - { - "epoch": 2.5530013285559, - "grad_norm": 0.013558804988861084, - "learning_rate": 0.00019999678785282598, - "loss": 46.0, - "step": 15853 - }, - { - "epoch": 2.5531623656346873, - "grad_norm": 0.01734413020312786, - "learning_rate": 0.00019999678744731802, - "loss": 46.0, - "step": 15854 - }, - { - "epoch": 2.553323402713475, - "grad_norm": 0.0027816323563456535, - "learning_rate": 0.00019999678704178447, - "loss": 46.0, - "step": 15855 - }, - { - "epoch": 2.5534844397922623, - "grad_norm": 0.010898136533796787, - "learning_rate": 0.0001999967866362253, - "loss": 46.0, - "step": 15856 - }, - { - "epoch": 2.5536454768710497, - "grad_norm": 0.009072975255548954, - "learning_rate": 0.00019999678623064054, - "loss": 46.0, - "step": 15857 - }, - { - "epoch": 2.553806513949837, - "grad_norm": 0.0017958837561309338, - "learning_rate": 0.00019999678582503024, - "loss": 46.0, - "step": 15858 - }, - { - "epoch": 2.5539675510286246, - "grad_norm": 0.0006768766324967146, - "learning_rate": 0.0001999967854193943, - "loss": 46.0, - "step": 15859 - }, - { - "epoch": 2.5541285881074116, - "grad_norm": 0.006000916939228773, - "learning_rate": 0.00019999678501373276, - "loss": 46.0, - "step": 15860 - }, - { - "epoch": 2.554289625186199, - "grad_norm": 0.005242292769253254, - "learning_rate": 0.00019999678460804564, - "loss": 46.0, - "step": 15861 - }, - { - "epoch": 2.5544506622649865, - "grad_norm": 0.009133460000157356, - "learning_rate": 0.00019999678420233293, - "loss": 46.0, - "step": 15862 - }, - { - "epoch": 2.554611699343774, - "grad_norm": 0.0031896091531962156, - "learning_rate": 0.00019999678379659458, - "loss": 46.0, - "step": 15863 - }, - { - "epoch": 2.5547727364225614, - "grad_norm": 0.0012896254193037748, - "learning_rate": 0.00019999678339083067, - "loss": 46.0, - "step": 15864 - }, - { - "epoch": 2.5549337735013484, - "grad_norm": 0.001649995450861752, - "learning_rate": 0.0001999967829850412, - "loss": 46.0, - "step": 15865 - }, - { - "epoch": 2.555094810580136, - "grad_norm": 0.004237490706145763, - "learning_rate": 0.00019999678257922609, - "loss": 46.0, - "step": 15866 - }, - { - "epoch": 2.5552558476589233, - "grad_norm": 0.001801868318580091, - "learning_rate": 0.00019999678217338541, - "loss": 46.0, - "step": 15867 - }, - { - "epoch": 2.555416884737711, - "grad_norm": 0.0009800817351788282, - "learning_rate": 0.00019999678176751913, - "loss": 46.0, - "step": 15868 - }, - { - "epoch": 2.5555779218164982, - "grad_norm": 0.0007903212099336088, - "learning_rate": 0.00019999678136162726, - "loss": 46.0, - "step": 15869 - }, - { - "epoch": 2.5557389588952857, - "grad_norm": 0.0025581642985343933, - "learning_rate": 0.00019999678095570977, - "loss": 46.0, - "step": 15870 - }, - { - "epoch": 2.555899995974073, - "grad_norm": 0.01888599619269371, - "learning_rate": 0.00019999678054976672, - "loss": 46.0, - "step": 15871 - }, - { - "epoch": 2.5560610330528606, - "grad_norm": 0.000634224561508745, - "learning_rate": 0.00019999678014379806, - "loss": 46.0, - "step": 15872 - }, - { - "epoch": 2.556222070131648, - "grad_norm": 0.004928269423544407, - "learning_rate": 0.0001999967797378038, - "loss": 46.0, - "step": 15873 - }, - { - "epoch": 2.556383107210435, - "grad_norm": 0.009454751387238503, - "learning_rate": 0.00019999677933178395, - "loss": 46.0, - "step": 15874 - }, - { - "epoch": 2.5565441442892225, - "grad_norm": 0.005087968427687883, - "learning_rate": 0.00019999677892573853, - "loss": 46.0, - "step": 15875 - }, - { - "epoch": 2.55670518136801, - "grad_norm": 0.0016252817586064339, - "learning_rate": 0.00019999677851966746, - "loss": 46.0, - "step": 15876 - }, - { - "epoch": 2.5568662184467974, - "grad_norm": 0.0022396978456526995, - "learning_rate": 0.00019999677811357084, - "loss": 46.0, - "step": 15877 - }, - { - "epoch": 2.557027255525585, - "grad_norm": 0.001236640033312142, - "learning_rate": 0.00019999677770744865, - "loss": 46.0, - "step": 15878 - }, - { - "epoch": 2.5571882926043723, - "grad_norm": 0.0020077975932508707, - "learning_rate": 0.00019999677730130083, - "loss": 46.0, - "step": 15879 - }, - { - "epoch": 2.5573493296831593, - "grad_norm": 0.0029608351178467274, - "learning_rate": 0.00019999677689512741, - "loss": 46.0, - "step": 15880 - }, - { - "epoch": 2.5575103667619468, - "grad_norm": 0.0064052557572722435, - "learning_rate": 0.0001999967764889284, - "loss": 46.0, - "step": 15881 - }, - { - "epoch": 2.557671403840734, - "grad_norm": 0.0032616094686090946, - "learning_rate": 0.0001999967760827038, - "loss": 46.0, - "step": 15882 - }, - { - "epoch": 2.5578324409195217, - "grad_norm": 0.0058501046150922775, - "learning_rate": 0.0001999967756764536, - "loss": 46.0, - "step": 15883 - }, - { - "epoch": 2.557993477998309, - "grad_norm": 0.004450731445103884, - "learning_rate": 0.0001999967752701778, - "loss": 46.0, - "step": 15884 - }, - { - "epoch": 2.5581545150770966, - "grad_norm": 0.002734450390562415, - "learning_rate": 0.00019999677486387643, - "loss": 46.0, - "step": 15885 - }, - { - "epoch": 2.558315552155884, - "grad_norm": 0.004494440741837025, - "learning_rate": 0.00019999677445754944, - "loss": 46.0, - "step": 15886 - }, - { - "epoch": 2.5584765892346715, - "grad_norm": 0.0019422023324295878, - "learning_rate": 0.0001999967740511969, - "loss": 46.0, - "step": 15887 - }, - { - "epoch": 2.558637626313459, - "grad_norm": 0.0033686214592307806, - "learning_rate": 0.00019999677364481873, - "loss": 46.0, - "step": 15888 - }, - { - "epoch": 2.558798663392246, - "grad_norm": 0.00145612598862499, - "learning_rate": 0.00019999677323841498, - "loss": 46.0, - "step": 15889 - }, - { - "epoch": 2.5589597004710334, - "grad_norm": 0.0010881536873057485, - "learning_rate": 0.00019999677283198564, - "loss": 46.0, - "step": 15890 - }, - { - "epoch": 2.559120737549821, - "grad_norm": 0.0022914018481969833, - "learning_rate": 0.00019999677242553068, - "loss": 46.0, - "step": 15891 - }, - { - "epoch": 2.5592817746286083, - "grad_norm": 0.0010935964528471231, - "learning_rate": 0.00019999677201905014, - "loss": 46.0, - "step": 15892 - }, - { - "epoch": 2.5594428117073957, - "grad_norm": 0.00904909148812294, - "learning_rate": 0.00019999677161254402, - "loss": 46.0, - "step": 15893 - }, - { - "epoch": 2.5596038487861827, - "grad_norm": 0.00311499391682446, - "learning_rate": 0.00019999677120601227, - "loss": 46.0, - "step": 15894 - }, - { - "epoch": 2.55976488586497, - "grad_norm": 0.0005413953913375735, - "learning_rate": 0.00019999677079945494, - "loss": 46.0, - "step": 15895 - }, - { - "epoch": 2.5599259229437576, - "grad_norm": 0.00992573518306017, - "learning_rate": 0.00019999677039287206, - "loss": 46.0, - "step": 15896 - }, - { - "epoch": 2.560086960022545, - "grad_norm": 0.0009334718342870474, - "learning_rate": 0.00019999676998626352, - "loss": 46.0, - "step": 15897 - }, - { - "epoch": 2.5602479971013326, - "grad_norm": 0.0042174989357590675, - "learning_rate": 0.00019999676957962943, - "loss": 46.0, - "step": 15898 - }, - { - "epoch": 2.56040903418012, - "grad_norm": 0.002316203899681568, - "learning_rate": 0.00019999676917296976, - "loss": 46.0, - "step": 15899 - }, - { - "epoch": 2.5605700712589075, - "grad_norm": 0.0018187230452895164, - "learning_rate": 0.00019999676876628444, - "loss": 46.0, - "step": 15900 - }, - { - "epoch": 2.560731108337695, - "grad_norm": 0.0043976749293506145, - "learning_rate": 0.00019999676835957356, - "loss": 46.0, - "step": 15901 - }, - { - "epoch": 2.5608921454164824, - "grad_norm": 0.0006605296512134373, - "learning_rate": 0.0001999967679528371, - "loss": 46.0, - "step": 15902 - }, - { - "epoch": 2.56105318249527, - "grad_norm": 0.0021460733842104673, - "learning_rate": 0.00019999676754607498, - "loss": 46.0, - "step": 15903 - }, - { - "epoch": 2.561214219574057, - "grad_norm": 0.004835005849599838, - "learning_rate": 0.00019999676713928732, - "loss": 46.0, - "step": 15904 - }, - { - "epoch": 2.5613752566528443, - "grad_norm": 0.000948254601098597, - "learning_rate": 0.00019999676673247406, - "loss": 46.0, - "step": 15905 - }, - { - "epoch": 2.5615362937316317, - "grad_norm": 0.0014655952109023929, - "learning_rate": 0.00019999676632563522, - "loss": 46.0, - "step": 15906 - }, - { - "epoch": 2.561697330810419, - "grad_norm": 0.0012304936535656452, - "learning_rate": 0.00019999676591877076, - "loss": 46.0, - "step": 15907 - }, - { - "epoch": 2.5618583678892066, - "grad_norm": 0.0010085111716762185, - "learning_rate": 0.00019999676551188072, - "loss": 46.0, - "step": 15908 - }, - { - "epoch": 2.5620194049679936, - "grad_norm": 0.0017989878542721272, - "learning_rate": 0.0001999967651049651, - "loss": 46.0, - "step": 15909 - }, - { - "epoch": 2.562180442046781, - "grad_norm": 0.0007498011691495776, - "learning_rate": 0.00019999676469802387, - "loss": 46.0, - "step": 15910 - }, - { - "epoch": 2.5623414791255685, - "grad_norm": 0.00799358170479536, - "learning_rate": 0.00019999676429105704, - "loss": 46.0, - "step": 15911 - }, - { - "epoch": 2.562502516204356, - "grad_norm": 0.0011987709440290928, - "learning_rate": 0.00019999676388406462, - "loss": 46.0, - "step": 15912 - }, - { - "epoch": 2.5626635532831434, - "grad_norm": 0.0016713483491912484, - "learning_rate": 0.00019999676347704659, - "loss": 46.0, - "step": 15913 - }, - { - "epoch": 2.562824590361931, - "grad_norm": 0.0010146618587896228, - "learning_rate": 0.000199996763070003, - "loss": 46.0, - "step": 15914 - }, - { - "epoch": 2.5629856274407183, - "grad_norm": 0.0006958627491258085, - "learning_rate": 0.00019999676266293376, - "loss": 46.0, - "step": 15915 - }, - { - "epoch": 2.563146664519506, - "grad_norm": 0.002969681750983, - "learning_rate": 0.000199996762255839, - "loss": 46.0, - "step": 15916 - }, - { - "epoch": 2.5633077015982932, - "grad_norm": 0.00719532510265708, - "learning_rate": 0.00019999676184871858, - "loss": 46.0, - "step": 15917 - }, - { - "epoch": 2.5634687386770802, - "grad_norm": 0.00809550005942583, - "learning_rate": 0.0001999967614415726, - "loss": 46.0, - "step": 15918 - }, - { - "epoch": 2.5636297757558677, - "grad_norm": 0.0008598658023402095, - "learning_rate": 0.00019999676103440103, - "loss": 46.0, - "step": 15919 - }, - { - "epoch": 2.563790812834655, - "grad_norm": 0.0015236702747642994, - "learning_rate": 0.00019999676062720388, - "loss": 46.0, - "step": 15920 - }, - { - "epoch": 2.5639518499134426, - "grad_norm": 0.007354318164288998, - "learning_rate": 0.00019999676021998107, - "loss": 46.0, - "step": 15921 - }, - { - "epoch": 2.56411288699223, - "grad_norm": 0.004132983740419149, - "learning_rate": 0.00019999675981273272, - "loss": 46.0, - "step": 15922 - }, - { - "epoch": 2.5642739240710175, - "grad_norm": 0.0035002650693058968, - "learning_rate": 0.00019999675940545876, - "loss": 46.0, - "step": 15923 - }, - { - "epoch": 2.5644349611498045, - "grad_norm": 0.007502646651118994, - "learning_rate": 0.00019999675899815925, - "loss": 46.0, - "step": 15924 - }, - { - "epoch": 2.564595998228592, - "grad_norm": 0.007813737727701664, - "learning_rate": 0.00019999675859083408, - "loss": 46.0, - "step": 15925 - }, - { - "epoch": 2.5647570353073794, - "grad_norm": 0.00215246737934649, - "learning_rate": 0.00019999675818348336, - "loss": 46.0, - "step": 15926 - }, - { - "epoch": 2.564918072386167, - "grad_norm": 0.002319880062714219, - "learning_rate": 0.00019999675777610703, - "loss": 46.0, - "step": 15927 - }, - { - "epoch": 2.5650791094649543, - "grad_norm": 0.0009606433450244367, - "learning_rate": 0.0001999967573687051, - "loss": 46.0, - "step": 15928 - }, - { - "epoch": 2.5652401465437418, - "grad_norm": 0.0005641624447889626, - "learning_rate": 0.00019999675696127757, - "loss": 46.0, - "step": 15929 - }, - { - "epoch": 2.5654011836225292, - "grad_norm": 0.0009018388227559626, - "learning_rate": 0.00019999675655382447, - "loss": 46.0, - "step": 15930 - }, - { - "epoch": 2.5655622207013167, - "grad_norm": 0.004454219713807106, - "learning_rate": 0.00019999675614634573, - "loss": 46.0, - "step": 15931 - }, - { - "epoch": 2.565723257780104, - "grad_norm": 0.0028883586637675762, - "learning_rate": 0.00019999675573884144, - "loss": 46.0, - "step": 15932 - }, - { - "epoch": 2.565884294858891, - "grad_norm": 0.009811880998313427, - "learning_rate": 0.00019999675533131158, - "loss": 46.0, - "step": 15933 - }, - { - "epoch": 2.5660453319376786, - "grad_norm": 0.0022590751759707928, - "learning_rate": 0.00019999675492375608, - "loss": 46.0, - "step": 15934 - }, - { - "epoch": 2.566206369016466, - "grad_norm": 0.002512830076739192, - "learning_rate": 0.000199996754516175, - "loss": 46.0, - "step": 15935 - }, - { - "epoch": 2.5663674060952535, - "grad_norm": 0.0011828721035271883, - "learning_rate": 0.00019999675410856832, - "loss": 46.0, - "step": 15936 - }, - { - "epoch": 2.566528443174041, - "grad_norm": 0.004234505817294121, - "learning_rate": 0.00019999675370093605, - "loss": 46.0, - "step": 15937 - }, - { - "epoch": 2.566689480252828, - "grad_norm": 0.0024975440464913845, - "learning_rate": 0.00019999675329327818, - "loss": 46.0, - "step": 15938 - }, - { - "epoch": 2.5668505173316154, - "grad_norm": 0.006123023573309183, - "learning_rate": 0.00019999675288559472, - "loss": 46.0, - "step": 15939 - }, - { - "epoch": 2.567011554410403, - "grad_norm": 0.0021886578761041164, - "learning_rate": 0.00019999675247788567, - "loss": 46.0, - "step": 15940 - }, - { - "epoch": 2.5671725914891903, - "grad_norm": 0.0011832137824967504, - "learning_rate": 0.00019999675207015103, - "loss": 46.0, - "step": 15941 - }, - { - "epoch": 2.5673336285679778, - "grad_norm": 0.0157995093613863, - "learning_rate": 0.00019999675166239078, - "loss": 46.0, - "step": 15942 - }, - { - "epoch": 2.567494665646765, - "grad_norm": 0.001965870847925544, - "learning_rate": 0.00019999675125460494, - "loss": 46.0, - "step": 15943 - }, - { - "epoch": 2.5676557027255527, - "grad_norm": 0.0033829710446298122, - "learning_rate": 0.00019999675084679351, - "loss": 46.0, - "step": 15944 - }, - { - "epoch": 2.56781673980434, - "grad_norm": 0.0032487178687006235, - "learning_rate": 0.0001999967504389565, - "loss": 46.0, - "step": 15945 - }, - { - "epoch": 2.5679777768831276, - "grad_norm": 0.0016249909531325102, - "learning_rate": 0.00019999675003109387, - "loss": 46.0, - "step": 15946 - }, - { - "epoch": 2.5681388139619146, - "grad_norm": 0.004730153363198042, - "learning_rate": 0.00019999674962320566, - "loss": 46.0, - "step": 15947 - }, - { - "epoch": 2.568299851040702, - "grad_norm": 0.0010158108780160546, - "learning_rate": 0.00019999674921529186, - "loss": 46.0, - "step": 15948 - }, - { - "epoch": 2.5684608881194895, - "grad_norm": 0.002735650632530451, - "learning_rate": 0.00019999674880735247, - "loss": 46.0, - "step": 15949 - }, - { - "epoch": 2.568621925198277, - "grad_norm": 0.008639516308903694, - "learning_rate": 0.00019999674839938747, - "loss": 46.0, - "step": 15950 - }, - { - "epoch": 2.5687829622770644, - "grad_norm": 0.0016998723149299622, - "learning_rate": 0.00019999674799139688, - "loss": 46.0, - "step": 15951 - }, - { - "epoch": 2.568943999355852, - "grad_norm": 0.0037123789079487324, - "learning_rate": 0.0001999967475833807, - "loss": 46.0, - "step": 15952 - }, - { - "epoch": 2.569105036434639, - "grad_norm": 0.0032129765022546053, - "learning_rate": 0.0001999967471753389, - "loss": 46.0, - "step": 15953 - }, - { - "epoch": 2.5692660735134263, - "grad_norm": 0.002516761887818575, - "learning_rate": 0.00019999674676727156, - "loss": 46.0, - "step": 15954 - }, - { - "epoch": 2.5694271105922137, - "grad_norm": 0.0021904422901570797, - "learning_rate": 0.0001999967463591786, - "loss": 46.0, - "step": 15955 - }, - { - "epoch": 2.569588147671001, - "grad_norm": 0.0009876337135210633, - "learning_rate": 0.00019999674595106, - "loss": 46.0, - "step": 15956 - }, - { - "epoch": 2.5697491847497886, - "grad_norm": 0.005452492740005255, - "learning_rate": 0.00019999674554291587, - "loss": 46.0, - "step": 15957 - }, - { - "epoch": 2.569910221828576, - "grad_norm": 0.0014200947480276227, - "learning_rate": 0.00019999674513474612, - "loss": 46.0, - "step": 15958 - }, - { - "epoch": 2.5700712589073635, - "grad_norm": 0.0021983920596539974, - "learning_rate": 0.0001999967447265508, - "loss": 46.0, - "step": 15959 - }, - { - "epoch": 2.570232295986151, - "grad_norm": 0.002464075107127428, - "learning_rate": 0.00019999674431832984, - "loss": 46.0, - "step": 15960 - }, - { - "epoch": 2.5703933330649384, - "grad_norm": 0.0007721377769485116, - "learning_rate": 0.00019999674391008333, - "loss": 46.0, - "step": 15961 - }, - { - "epoch": 2.5705543701437255, - "grad_norm": 0.002863567089661956, - "learning_rate": 0.00019999674350181123, - "loss": 46.0, - "step": 15962 - }, - { - "epoch": 2.570715407222513, - "grad_norm": 0.002807021141052246, - "learning_rate": 0.00019999674309351348, - "loss": 46.0, - "step": 15963 - }, - { - "epoch": 2.5708764443013004, - "grad_norm": 0.002399713033810258, - "learning_rate": 0.00019999674268519018, - "loss": 46.0, - "step": 15964 - }, - { - "epoch": 2.571037481380088, - "grad_norm": 0.006840342655777931, - "learning_rate": 0.00019999674227684126, - "loss": 46.0, - "step": 15965 - }, - { - "epoch": 2.5711985184588753, - "grad_norm": 0.0045151119120419025, - "learning_rate": 0.00019999674186846678, - "loss": 46.0, - "step": 15966 - }, - { - "epoch": 2.5713595555376627, - "grad_norm": 0.0021470310166478157, - "learning_rate": 0.00019999674146006669, - "loss": 46.0, - "step": 15967 - }, - { - "epoch": 2.5715205926164497, - "grad_norm": 0.0029068486765027046, - "learning_rate": 0.000199996741051641, - "loss": 46.0, - "step": 15968 - }, - { - "epoch": 2.571681629695237, - "grad_norm": 0.002300978172570467, - "learning_rate": 0.0001999967406431897, - "loss": 46.0, - "step": 15969 - }, - { - "epoch": 2.5718426667740246, - "grad_norm": 0.006745205260813236, - "learning_rate": 0.00019999674023471286, - "loss": 46.0, - "step": 15970 - }, - { - "epoch": 2.572003703852812, - "grad_norm": 0.0007168450392782688, - "learning_rate": 0.00019999673982621036, - "loss": 46.0, - "step": 15971 - }, - { - "epoch": 2.5721647409315995, - "grad_norm": 0.001623555552214384, - "learning_rate": 0.0001999967394176823, - "loss": 46.0, - "step": 15972 - }, - { - "epoch": 2.572325778010387, - "grad_norm": 0.0015095233684405684, - "learning_rate": 0.00019999673900912866, - "loss": 46.0, - "step": 15973 - }, - { - "epoch": 2.5724868150891744, - "grad_norm": 0.003773897420614958, - "learning_rate": 0.00019999673860054943, - "loss": 46.0, - "step": 15974 - }, - { - "epoch": 2.572647852167962, - "grad_norm": 0.0010073219891637564, - "learning_rate": 0.00019999673819194456, - "loss": 46.0, - "step": 15975 - }, - { - "epoch": 2.5728088892467493, - "grad_norm": 0.004301685839891434, - "learning_rate": 0.00019999673778331412, - "loss": 46.0, - "step": 15976 - }, - { - "epoch": 2.5729699263255363, - "grad_norm": 0.001037120702676475, - "learning_rate": 0.00019999673737465808, - "loss": 46.0, - "step": 15977 - }, - { - "epoch": 2.573130963404324, - "grad_norm": 0.0012596957385540009, - "learning_rate": 0.00019999673696597645, - "loss": 46.0, - "step": 15978 - }, - { - "epoch": 2.5732920004831112, - "grad_norm": 0.002911132061854005, - "learning_rate": 0.00019999673655726925, - "loss": 46.0, - "step": 15979 - }, - { - "epoch": 2.5734530375618987, - "grad_norm": 0.003323172451928258, - "learning_rate": 0.00019999673614853642, - "loss": 46.0, - "step": 15980 - }, - { - "epoch": 2.573614074640686, - "grad_norm": 0.004930560011416674, - "learning_rate": 0.00019999673573977802, - "loss": 46.0, - "step": 15981 - }, - { - "epoch": 2.573775111719473, - "grad_norm": 0.0009155586594715714, - "learning_rate": 0.000199996735330994, - "loss": 46.0, - "step": 15982 - }, - { - "epoch": 2.5739361487982606, - "grad_norm": 0.0032252734526991844, - "learning_rate": 0.00019999673492218442, - "loss": 46.0, - "step": 15983 - }, - { - "epoch": 2.574097185877048, - "grad_norm": 0.0023324915673583746, - "learning_rate": 0.00019999673451334923, - "loss": 46.0, - "step": 15984 - }, - { - "epoch": 2.5742582229558355, - "grad_norm": 0.002216553781181574, - "learning_rate": 0.00019999673410448843, - "loss": 46.0, - "step": 15985 - }, - { - "epoch": 2.574419260034623, - "grad_norm": 0.0008827780256979167, - "learning_rate": 0.00019999673369560208, - "loss": 46.0, - "step": 15986 - }, - { - "epoch": 2.5745802971134104, - "grad_norm": 0.002947204513475299, - "learning_rate": 0.0001999967332866901, - "loss": 46.0, - "step": 15987 - }, - { - "epoch": 2.574741334192198, - "grad_norm": 0.0026153482031077147, - "learning_rate": 0.00019999673287775252, - "loss": 46.0, - "step": 15988 - }, - { - "epoch": 2.5749023712709853, - "grad_norm": 0.0019219820387661457, - "learning_rate": 0.00019999673246878937, - "loss": 46.0, - "step": 15989 - }, - { - "epoch": 2.5750634083497728, - "grad_norm": 0.002843607449904084, - "learning_rate": 0.0001999967320598006, - "loss": 46.0, - "step": 15990 - }, - { - "epoch": 2.5752244454285598, - "grad_norm": 0.000634106807410717, - "learning_rate": 0.00019999673165078626, - "loss": 46.0, - "step": 15991 - }, - { - "epoch": 2.5753854825073472, - "grad_norm": 0.0032939857337623835, - "learning_rate": 0.00019999673124174633, - "loss": 46.0, - "step": 15992 - }, - { - "epoch": 2.5755465195861347, - "grad_norm": 0.0035462365485727787, - "learning_rate": 0.00019999673083268078, - "loss": 46.0, - "step": 15993 - }, - { - "epoch": 2.575707556664922, - "grad_norm": 0.001677834428846836, - "learning_rate": 0.00019999673042358964, - "loss": 46.0, - "step": 15994 - }, - { - "epoch": 2.5758685937437096, - "grad_norm": 0.0054684896022081375, - "learning_rate": 0.00019999673001447292, - "loss": 46.0, - "step": 15995 - }, - { - "epoch": 2.576029630822497, - "grad_norm": 0.003958237823098898, - "learning_rate": 0.0001999967296053306, - "loss": 46.0, - "step": 15996 - }, - { - "epoch": 2.576190667901284, - "grad_norm": 0.00340511417016387, - "learning_rate": 0.00019999672919616268, - "loss": 46.0, - "step": 15997 - }, - { - "epoch": 2.5763517049800715, - "grad_norm": 0.004183250013738871, - "learning_rate": 0.00019999672878696917, - "loss": 46.0, - "step": 15998 - }, - { - "epoch": 2.576512742058859, - "grad_norm": 0.0023353854194283485, - "learning_rate": 0.0001999967283777501, - "loss": 46.0, - "step": 15999 - }, - { - "epoch": 2.5766737791376464, - "grad_norm": 0.0015198108740150928, - "learning_rate": 0.00019999672796850538, - "loss": 46.0, - "step": 16000 - }, - { - "epoch": 2.576834816216434, - "grad_norm": 0.0014344934606924653, - "learning_rate": 0.0001999967275592351, - "loss": 46.0, - "step": 16001 - }, - { - "epoch": 2.5769958532952213, - "grad_norm": 0.0011377371847629547, - "learning_rate": 0.00019999672714993922, - "loss": 46.0, - "step": 16002 - }, - { - "epoch": 2.5771568903740087, - "grad_norm": 0.0012556801084429026, - "learning_rate": 0.00019999672674061775, - "loss": 46.0, - "step": 16003 - }, - { - "epoch": 2.577317927452796, - "grad_norm": 0.0008868277072906494, - "learning_rate": 0.00019999672633127066, - "loss": 46.0, - "step": 16004 - }, - { - "epoch": 2.5774789645315836, - "grad_norm": 0.0026483850087970495, - "learning_rate": 0.000199996725921898, - "loss": 46.0, - "step": 16005 - }, - { - "epoch": 2.5776400016103707, - "grad_norm": 0.0015649127308279276, - "learning_rate": 0.00019999672551249974, - "loss": 46.0, - "step": 16006 - }, - { - "epoch": 2.577801038689158, - "grad_norm": 0.000816620362456888, - "learning_rate": 0.00019999672510307586, - "loss": 46.0, - "step": 16007 - }, - { - "epoch": 2.5779620757679456, - "grad_norm": 0.0012617866741493344, - "learning_rate": 0.00019999672469362643, - "loss": 46.0, - "step": 16008 - }, - { - "epoch": 2.578123112846733, - "grad_norm": 0.00266861985437572, - "learning_rate": 0.0001999967242841514, - "loss": 46.0, - "step": 16009 - }, - { - "epoch": 2.5782841499255205, - "grad_norm": 0.005883903242647648, - "learning_rate": 0.00019999672387465076, - "loss": 46.0, - "step": 16010 - }, - { - "epoch": 2.5784451870043075, - "grad_norm": 0.001797378994524479, - "learning_rate": 0.00019999672346512456, - "loss": 46.0, - "step": 16011 - }, - { - "epoch": 2.578606224083095, - "grad_norm": 0.0016678535612300038, - "learning_rate": 0.00019999672305557272, - "loss": 46.0, - "step": 16012 - }, - { - "epoch": 2.5787672611618824, - "grad_norm": 0.002299239858984947, - "learning_rate": 0.0001999967226459953, - "loss": 46.0, - "step": 16013 - }, - { - "epoch": 2.57892829824067, - "grad_norm": 0.0014426435809582472, - "learning_rate": 0.00019999672223639227, - "loss": 46.0, - "step": 16014 - }, - { - "epoch": 2.5790893353194573, - "grad_norm": 0.0012001210125163198, - "learning_rate": 0.00019999672182676367, - "loss": 46.0, - "step": 16015 - }, - { - "epoch": 2.5792503723982447, - "grad_norm": 0.0030750101432204247, - "learning_rate": 0.00019999672141710948, - "loss": 46.0, - "step": 16016 - }, - { - "epoch": 2.579411409477032, - "grad_norm": 0.005097745917737484, - "learning_rate": 0.00019999672100742968, - "loss": 46.0, - "step": 16017 - }, - { - "epoch": 2.5795724465558196, - "grad_norm": 0.0013655796647071838, - "learning_rate": 0.0001999967205977243, - "loss": 46.0, - "step": 16018 - }, - { - "epoch": 2.579733483634607, - "grad_norm": 0.002719012089073658, - "learning_rate": 0.0001999967201879933, - "loss": 46.0, - "step": 16019 - }, - { - "epoch": 2.5798945207133945, - "grad_norm": 0.0022950624115765095, - "learning_rate": 0.00019999671977823674, - "loss": 46.0, - "step": 16020 - }, - { - "epoch": 2.5800555577921815, - "grad_norm": 0.0007586430292576551, - "learning_rate": 0.00019999671936845456, - "loss": 46.0, - "step": 16021 - }, - { - "epoch": 2.580216594870969, - "grad_norm": 0.0025612623430788517, - "learning_rate": 0.0001999967189586468, - "loss": 46.0, - "step": 16022 - }, - { - "epoch": 2.5803776319497564, - "grad_norm": 0.0009127302910201252, - "learning_rate": 0.00019999671854881347, - "loss": 46.0, - "step": 16023 - }, - { - "epoch": 2.580538669028544, - "grad_norm": 0.0006393424118869007, - "learning_rate": 0.0001999967181389545, - "loss": 46.0, - "step": 16024 - }, - { - "epoch": 2.5806997061073313, - "grad_norm": 0.011125031858682632, - "learning_rate": 0.00019999671772906994, - "loss": 46.0, - "step": 16025 - }, - { - "epoch": 2.5808607431861184, - "grad_norm": 0.000548688170965761, - "learning_rate": 0.0001999967173191598, - "loss": 46.0, - "step": 16026 - }, - { - "epoch": 2.581021780264906, - "grad_norm": 0.001740701962262392, - "learning_rate": 0.00019999671690922407, - "loss": 46.0, - "step": 16027 - }, - { - "epoch": 2.5811828173436933, - "grad_norm": 0.0031184684485197067, - "learning_rate": 0.00019999671649926275, - "loss": 46.0, - "step": 16028 - }, - { - "epoch": 2.5813438544224807, - "grad_norm": 0.002004853216931224, - "learning_rate": 0.00019999671608927582, - "loss": 46.0, - "step": 16029 - }, - { - "epoch": 2.581504891501268, - "grad_norm": 0.002701597986742854, - "learning_rate": 0.0001999967156792633, - "loss": 46.0, - "step": 16030 - }, - { - "epoch": 2.5816659285800556, - "grad_norm": 0.0029638635460287333, - "learning_rate": 0.0001999967152692252, - "loss": 46.0, - "step": 16031 - }, - { - "epoch": 2.581826965658843, - "grad_norm": 0.00609456654638052, - "learning_rate": 0.0001999967148591615, - "loss": 46.0, - "step": 16032 - }, - { - "epoch": 2.5819880027376305, - "grad_norm": 0.0033189135137945414, - "learning_rate": 0.0001999967144490722, - "loss": 46.0, - "step": 16033 - }, - { - "epoch": 2.582149039816418, - "grad_norm": 0.004438711795955896, - "learning_rate": 0.0001999967140389573, - "loss": 46.0, - "step": 16034 - }, - { - "epoch": 2.582310076895205, - "grad_norm": 0.002540210960432887, - "learning_rate": 0.00019999671362881682, - "loss": 46.0, - "step": 16035 - }, - { - "epoch": 2.5824711139739924, - "grad_norm": 0.0055688670836389065, - "learning_rate": 0.00019999671321865075, - "loss": 46.0, - "step": 16036 - }, - { - "epoch": 2.58263215105278, - "grad_norm": 0.0027148216031491756, - "learning_rate": 0.00019999671280845907, - "loss": 46.0, - "step": 16037 - }, - { - "epoch": 2.5827931881315673, - "grad_norm": 0.002419590251520276, - "learning_rate": 0.0001999967123982418, - "loss": 46.0, - "step": 16038 - }, - { - "epoch": 2.582954225210355, - "grad_norm": 0.0013533795718103647, - "learning_rate": 0.00019999671198799894, - "loss": 46.0, - "step": 16039 - }, - { - "epoch": 2.5831152622891422, - "grad_norm": 0.0010592100443318486, - "learning_rate": 0.00019999671157773047, - "loss": 46.0, - "step": 16040 - }, - { - "epoch": 2.5832762993679292, - "grad_norm": 0.0006331202457658947, - "learning_rate": 0.00019999671116743644, - "loss": 46.0, - "step": 16041 - }, - { - "epoch": 2.5834373364467167, - "grad_norm": 0.000930843991227448, - "learning_rate": 0.0001999967107571168, - "loss": 46.0, - "step": 16042 - }, - { - "epoch": 2.583598373525504, - "grad_norm": 0.003928042482584715, - "learning_rate": 0.00019999671034677153, - "loss": 46.0, - "step": 16043 - }, - { - "epoch": 2.5837594106042916, - "grad_norm": 0.003953359555453062, - "learning_rate": 0.0001999967099364007, - "loss": 46.0, - "step": 16044 - }, - { - "epoch": 2.583920447683079, - "grad_norm": 0.0010099763749167323, - "learning_rate": 0.00019999670952600428, - "loss": 46.0, - "step": 16045 - }, - { - "epoch": 2.5840814847618665, - "grad_norm": 0.011977887712419033, - "learning_rate": 0.00019999670911558225, - "loss": 46.0, - "step": 16046 - }, - { - "epoch": 2.584242521840654, - "grad_norm": 0.0047042942605912685, - "learning_rate": 0.00019999670870513464, - "loss": 46.0, - "step": 16047 - }, - { - "epoch": 2.5844035589194414, - "grad_norm": 0.004303669091314077, - "learning_rate": 0.00019999670829466145, - "loss": 46.0, - "step": 16048 - }, - { - "epoch": 2.584564595998229, - "grad_norm": 0.0009295951458625495, - "learning_rate": 0.00019999670788416264, - "loss": 46.0, - "step": 16049 - }, - { - "epoch": 2.584725633077016, - "grad_norm": 0.0008656475692987442, - "learning_rate": 0.00019999670747363824, - "loss": 46.0, - "step": 16050 - }, - { - "epoch": 2.5848866701558033, - "grad_norm": 0.0015673545422032475, - "learning_rate": 0.00019999670706308823, - "loss": 46.0, - "step": 16051 - }, - { - "epoch": 2.5850477072345908, - "grad_norm": 0.00130812160205096, - "learning_rate": 0.00019999670665251266, - "loss": 46.0, - "step": 16052 - }, - { - "epoch": 2.585208744313378, - "grad_norm": 0.0018158942693844438, - "learning_rate": 0.00019999670624191147, - "loss": 46.0, - "step": 16053 - }, - { - "epoch": 2.5853697813921657, - "grad_norm": 0.0024436796084046364, - "learning_rate": 0.0001999967058312847, - "loss": 46.0, - "step": 16054 - }, - { - "epoch": 2.5855308184709527, - "grad_norm": 0.0010908261174336076, - "learning_rate": 0.00019999670542063234, - "loss": 46.0, - "step": 16055 - }, - { - "epoch": 2.58569185554974, - "grad_norm": 0.001932608662173152, - "learning_rate": 0.0001999967050099544, - "loss": 46.0, - "step": 16056 - }, - { - "epoch": 2.5858528926285276, - "grad_norm": 0.0035636143293231726, - "learning_rate": 0.00019999670459925083, - "loss": 46.0, - "step": 16057 - }, - { - "epoch": 2.586013929707315, - "grad_norm": 0.008837430737912655, - "learning_rate": 0.00019999670418852168, - "loss": 46.0, - "step": 16058 - }, - { - "epoch": 2.5861749667861025, - "grad_norm": 0.012465517036616802, - "learning_rate": 0.00019999670377776694, - "loss": 46.0, - "step": 16059 - }, - { - "epoch": 2.58633600386489, - "grad_norm": 0.009264561347663403, - "learning_rate": 0.0001999967033669866, - "loss": 46.0, - "step": 16060 - }, - { - "epoch": 2.5864970409436774, - "grad_norm": 0.0013143039541319013, - "learning_rate": 0.00019999670295618068, - "loss": 46.0, - "step": 16061 - }, - { - "epoch": 2.586658078022465, - "grad_norm": 0.0039000045508146286, - "learning_rate": 0.00019999670254534915, - "loss": 46.0, - "step": 16062 - }, - { - "epoch": 2.5868191151012523, - "grad_norm": 0.00105371349491179, - "learning_rate": 0.00019999670213449201, - "loss": 46.0, - "step": 16063 - }, - { - "epoch": 2.5869801521800393, - "grad_norm": 0.005493657663464546, - "learning_rate": 0.0001999967017236093, - "loss": 46.0, - "step": 16064 - }, - { - "epoch": 2.5871411892588267, - "grad_norm": 0.0008403283427469432, - "learning_rate": 0.000199996701312701, - "loss": 46.0, - "step": 16065 - }, - { - "epoch": 2.587302226337614, - "grad_norm": 0.002454224741086364, - "learning_rate": 0.0001999967009017671, - "loss": 46.0, - "step": 16066 - }, - { - "epoch": 2.5874632634164016, - "grad_norm": 0.001540362136438489, - "learning_rate": 0.0001999967004908076, - "loss": 46.0, - "step": 16067 - }, - { - "epoch": 2.587624300495189, - "grad_norm": 0.005297347903251648, - "learning_rate": 0.0001999967000798225, - "loss": 46.0, - "step": 16068 - }, - { - "epoch": 2.5877853375739766, - "grad_norm": 0.0014679948799312115, - "learning_rate": 0.00019999669966881185, - "loss": 46.0, - "step": 16069 - }, - { - "epoch": 2.5879463746527636, - "grad_norm": 0.0024977836292237043, - "learning_rate": 0.00019999669925777554, - "loss": 46.0, - "step": 16070 - }, - { - "epoch": 2.588107411731551, - "grad_norm": 0.0032964071724563837, - "learning_rate": 0.00019999669884671368, - "loss": 46.0, - "step": 16071 - }, - { - "epoch": 2.5882684488103385, - "grad_norm": 0.004492423962801695, - "learning_rate": 0.0001999966984356262, - "loss": 46.0, - "step": 16072 - }, - { - "epoch": 2.588429485889126, - "grad_norm": 0.0006257380591705441, - "learning_rate": 0.00019999669802451316, - "loss": 46.0, - "step": 16073 - }, - { - "epoch": 2.5885905229679134, - "grad_norm": 0.0009773288620635867, - "learning_rate": 0.0001999966976133745, - "loss": 46.0, - "step": 16074 - }, - { - "epoch": 2.588751560046701, - "grad_norm": 0.002816564403474331, - "learning_rate": 0.00019999669720221027, - "loss": 46.0, - "step": 16075 - }, - { - "epoch": 2.5889125971254883, - "grad_norm": 0.0033466166350990534, - "learning_rate": 0.00019999669679102042, - "loss": 46.0, - "step": 16076 - }, - { - "epoch": 2.5890736342042757, - "grad_norm": 0.0023996103554964066, - "learning_rate": 0.00019999669637980498, - "loss": 46.0, - "step": 16077 - }, - { - "epoch": 2.589234671283063, - "grad_norm": 0.0008122390718199313, - "learning_rate": 0.00019999669596856398, - "loss": 46.0, - "step": 16078 - }, - { - "epoch": 2.58939570836185, - "grad_norm": 0.0015022241277620196, - "learning_rate": 0.00019999669555729733, - "loss": 46.0, - "step": 16079 - }, - { - "epoch": 2.5895567454406376, - "grad_norm": 0.0010299028363078833, - "learning_rate": 0.0001999966951460051, - "loss": 46.0, - "step": 16080 - }, - { - "epoch": 2.589717782519425, - "grad_norm": 0.006779077462852001, - "learning_rate": 0.0001999966947346873, - "loss": 46.0, - "step": 16081 - }, - { - "epoch": 2.5898788195982125, - "grad_norm": 0.0018228732515126467, - "learning_rate": 0.00019999669432334388, - "loss": 46.0, - "step": 16082 - }, - { - "epoch": 2.590039856677, - "grad_norm": 0.005500158295035362, - "learning_rate": 0.0001999966939119749, - "loss": 46.0, - "step": 16083 - }, - { - "epoch": 2.590200893755787, - "grad_norm": 0.004686194006353617, - "learning_rate": 0.00019999669350058031, - "loss": 46.0, - "step": 16084 - }, - { - "epoch": 2.5903619308345744, - "grad_norm": 0.0014073998900130391, - "learning_rate": 0.00019999669308916012, - "loss": 46.0, - "step": 16085 - }, - { - "epoch": 2.590522967913362, - "grad_norm": 0.0015975123969838023, - "learning_rate": 0.00019999669267771431, - "loss": 46.0, - "step": 16086 - }, - { - "epoch": 2.5906840049921493, - "grad_norm": 0.0017667760839685798, - "learning_rate": 0.00019999669226624295, - "loss": 46.0, - "step": 16087 - }, - { - "epoch": 2.590845042070937, - "grad_norm": 0.0007539974758401513, - "learning_rate": 0.000199996691854746, - "loss": 46.0, - "step": 16088 - }, - { - "epoch": 2.5910060791497243, - "grad_norm": 0.0030309869907796383, - "learning_rate": 0.00019999669144322342, - "loss": 46.0, - "step": 16089 - }, - { - "epoch": 2.5911671162285117, - "grad_norm": 0.0014217736897990108, - "learning_rate": 0.00019999669103167527, - "loss": 46.0, - "step": 16090 - }, - { - "epoch": 2.591328153307299, - "grad_norm": 0.0010116341290995479, - "learning_rate": 0.0001999966906201015, - "loss": 46.0, - "step": 16091 - }, - { - "epoch": 2.5914891903860866, - "grad_norm": 0.0036425767466425896, - "learning_rate": 0.00019999669020850217, - "loss": 46.0, - "step": 16092 - }, - { - "epoch": 2.591650227464874, - "grad_norm": 0.0017039704835042357, - "learning_rate": 0.00019999668979687723, - "loss": 46.0, - "step": 16093 - }, - { - "epoch": 2.591811264543661, - "grad_norm": 0.0012954877456650138, - "learning_rate": 0.00019999668938522672, - "loss": 46.0, - "step": 16094 - }, - { - "epoch": 2.5919723016224485, - "grad_norm": 0.001290460117161274, - "learning_rate": 0.00019999668897355058, - "loss": 46.0, - "step": 16095 - }, - { - "epoch": 2.592133338701236, - "grad_norm": 0.0036992921959608793, - "learning_rate": 0.00019999668856184884, - "loss": 46.0, - "step": 16096 - }, - { - "epoch": 2.5922943757800234, - "grad_norm": 0.0026420990470796824, - "learning_rate": 0.00019999668815012155, - "loss": 46.0, - "step": 16097 - }, - { - "epoch": 2.592455412858811, - "grad_norm": 0.0012626516399905086, - "learning_rate": 0.00019999668773836862, - "loss": 46.0, - "step": 16098 - }, - { - "epoch": 2.592616449937598, - "grad_norm": 0.0025810569059103727, - "learning_rate": 0.00019999668732659012, - "loss": 46.0, - "step": 16099 - }, - { - "epoch": 2.5927774870163853, - "grad_norm": 0.0027801140677183867, - "learning_rate": 0.000199996686914786, - "loss": 46.0, - "step": 16100 - }, - { - "epoch": 2.592938524095173, - "grad_norm": 0.0021034367382526398, - "learning_rate": 0.00019999668650295634, - "loss": 46.0, - "step": 16101 - }, - { - "epoch": 2.5930995611739602, - "grad_norm": 0.007078695576637983, - "learning_rate": 0.00019999668609110103, - "loss": 46.0, - "step": 16102 - }, - { - "epoch": 2.5932605982527477, - "grad_norm": 0.0016240933910012245, - "learning_rate": 0.00019999668567922016, - "loss": 46.0, - "step": 16103 - }, - { - "epoch": 2.593421635331535, - "grad_norm": 0.0008488434832543135, - "learning_rate": 0.00019999668526731368, - "loss": 46.0, - "step": 16104 - }, - { - "epoch": 2.5935826724103226, - "grad_norm": 0.0046164835803210735, - "learning_rate": 0.00019999668485538163, - "loss": 46.0, - "step": 16105 - }, - { - "epoch": 2.59374370948911, - "grad_norm": 0.0018604089273139834, - "learning_rate": 0.00019999668444342398, - "loss": 46.0, - "step": 16106 - }, - { - "epoch": 2.5939047465678975, - "grad_norm": 0.0017046486027538776, - "learning_rate": 0.0001999966840314407, - "loss": 46.0, - "step": 16107 - }, - { - "epoch": 2.5940657836466845, - "grad_norm": 0.007591297850012779, - "learning_rate": 0.00019999668361943184, - "loss": 46.0, - "step": 16108 - }, - { - "epoch": 2.594226820725472, - "grad_norm": 0.002911948598921299, - "learning_rate": 0.0001999966832073974, - "loss": 46.0, - "step": 16109 - }, - { - "epoch": 2.5943878578042594, - "grad_norm": 0.0032055643387138844, - "learning_rate": 0.00019999668279533736, - "loss": 46.0, - "step": 16110 - }, - { - "epoch": 2.594548894883047, - "grad_norm": 0.00234204507432878, - "learning_rate": 0.00019999668238325174, - "loss": 46.0, - "step": 16111 - }, - { - "epoch": 2.5947099319618343, - "grad_norm": 0.004169377498328686, - "learning_rate": 0.00019999668197114053, - "loss": 46.0, - "step": 16112 - }, - { - "epoch": 2.5948709690406218, - "grad_norm": 0.00201976066455245, - "learning_rate": 0.00019999668155900368, - "loss": 46.0, - "step": 16113 - }, - { - "epoch": 2.5950320061194088, - "grad_norm": 0.00225282390601933, - "learning_rate": 0.00019999668114684127, - "loss": 46.0, - "step": 16114 - }, - { - "epoch": 2.595193043198196, - "grad_norm": 0.001989091048017144, - "learning_rate": 0.00019999668073465324, - "loss": 46.0, - "step": 16115 - }, - { - "epoch": 2.5953540802769837, - "grad_norm": 0.00479503720998764, - "learning_rate": 0.00019999668032243966, - "loss": 46.0, - "step": 16116 - }, - { - "epoch": 2.595515117355771, - "grad_norm": 0.003390047699213028, - "learning_rate": 0.0001999966799102005, - "loss": 46.0, - "step": 16117 - }, - { - "epoch": 2.5956761544345586, - "grad_norm": 0.0019227584125474095, - "learning_rate": 0.00019999667949793568, - "loss": 46.0, - "step": 16118 - }, - { - "epoch": 2.595837191513346, - "grad_norm": 0.0043724942952394485, - "learning_rate": 0.0001999966790856453, - "loss": 46.0, - "step": 16119 - }, - { - "epoch": 2.5959982285921335, - "grad_norm": 0.0021974837873131037, - "learning_rate": 0.00019999667867332931, - "loss": 46.0, - "step": 16120 - }, - { - "epoch": 2.596159265670921, - "grad_norm": 0.011448969133198261, - "learning_rate": 0.00019999667826098777, - "loss": 46.0, - "step": 16121 - }, - { - "epoch": 2.5963203027497084, - "grad_norm": 0.005732130259275436, - "learning_rate": 0.00019999667784862058, - "loss": 46.0, - "step": 16122 - }, - { - "epoch": 2.5964813398284954, - "grad_norm": 0.0028530533891171217, - "learning_rate": 0.0001999966774362278, - "loss": 46.0, - "step": 16123 - }, - { - "epoch": 2.596642376907283, - "grad_norm": 0.007159261964261532, - "learning_rate": 0.00019999667702380947, - "loss": 46.0, - "step": 16124 - }, - { - "epoch": 2.5968034139860703, - "grad_norm": 0.009948200546205044, - "learning_rate": 0.00019999667661136551, - "loss": 46.0, - "step": 16125 - }, - { - "epoch": 2.5969644510648577, - "grad_norm": 0.00519247492775321, - "learning_rate": 0.00019999667619889598, - "loss": 46.0, - "step": 16126 - }, - { - "epoch": 2.597125488143645, - "grad_norm": 0.015372388996183872, - "learning_rate": 0.00019999667578640083, - "loss": 46.0, - "step": 16127 - }, - { - "epoch": 2.597286525222432, - "grad_norm": 0.003131683450192213, - "learning_rate": 0.00019999667537388009, - "loss": 46.0, - "step": 16128 - }, - { - "epoch": 2.5974475623012196, - "grad_norm": 0.0027298214845359325, - "learning_rate": 0.0001999966749613338, - "loss": 46.0, - "step": 16129 - }, - { - "epoch": 2.597608599380007, - "grad_norm": 0.0033878039103001356, - "learning_rate": 0.00019999667454876185, - "loss": 46.0, - "step": 16130 - }, - { - "epoch": 2.5977696364587946, - "grad_norm": 0.0021357000805437565, - "learning_rate": 0.00019999667413616435, - "loss": 46.0, - "step": 16131 - }, - { - "epoch": 2.597930673537582, - "grad_norm": 0.003726204624399543, - "learning_rate": 0.00019999667372354126, - "loss": 46.0, - "step": 16132 - }, - { - "epoch": 2.5980917106163695, - "grad_norm": 0.003822133643552661, - "learning_rate": 0.00019999667331089253, - "loss": 46.0, - "step": 16133 - }, - { - "epoch": 2.598252747695157, - "grad_norm": 0.0016124286921694875, - "learning_rate": 0.00019999667289821824, - "loss": 46.0, - "step": 16134 - }, - { - "epoch": 2.5984137847739444, - "grad_norm": 0.0036133346147835255, - "learning_rate": 0.00019999667248551836, - "loss": 46.0, - "step": 16135 - }, - { - "epoch": 2.598574821852732, - "grad_norm": 0.0014677965082228184, - "learning_rate": 0.00019999667207279287, - "loss": 46.0, - "step": 16136 - }, - { - "epoch": 2.598735858931519, - "grad_norm": 0.0017298039747402072, - "learning_rate": 0.0001999966716600418, - "loss": 46.0, - "step": 16137 - }, - { - "epoch": 2.5988968960103063, - "grad_norm": 0.0009745733113959432, - "learning_rate": 0.00019999667124726513, - "loss": 46.0, - "step": 16138 - }, - { - "epoch": 2.5990579330890937, - "grad_norm": 0.0032255914993584156, - "learning_rate": 0.00019999667083446285, - "loss": 46.0, - "step": 16139 - }, - { - "epoch": 2.599218970167881, - "grad_norm": 0.007755223195999861, - "learning_rate": 0.00019999667042163498, - "loss": 46.0, - "step": 16140 - }, - { - "epoch": 2.5993800072466686, - "grad_norm": 0.003692959202453494, - "learning_rate": 0.00019999667000878156, - "loss": 46.0, - "step": 16141 - }, - { - "epoch": 2.599541044325456, - "grad_norm": 0.0010905322851613164, - "learning_rate": 0.00019999666959590252, - "loss": 46.0, - "step": 16142 - }, - { - "epoch": 2.599702081404243, - "grad_norm": 0.00252927141264081, - "learning_rate": 0.00019999666918299786, - "loss": 46.0, - "step": 16143 - }, - { - "epoch": 2.5998631184830305, - "grad_norm": 0.0017952235648408532, - "learning_rate": 0.00019999666877006762, - "loss": 46.0, - "step": 16144 - }, - { - "epoch": 2.600024155561818, - "grad_norm": 0.002521948888897896, - "learning_rate": 0.00019999666835711182, - "loss": 46.0, - "step": 16145 - }, - { - "epoch": 2.6001851926406054, - "grad_norm": 0.0033577398862689734, - "learning_rate": 0.0001999966679441304, - "loss": 46.0, - "step": 16146 - }, - { - "epoch": 2.600346229719393, - "grad_norm": 0.0005824739346280694, - "learning_rate": 0.00019999666753112337, - "loss": 46.0, - "step": 16147 - }, - { - "epoch": 2.6005072667981803, - "grad_norm": 0.0018328281585127115, - "learning_rate": 0.00019999666711809075, - "loss": 46.0, - "step": 16148 - }, - { - "epoch": 2.600668303876968, - "grad_norm": 0.0020289828535169363, - "learning_rate": 0.00019999666670503255, - "loss": 46.0, - "step": 16149 - }, - { - "epoch": 2.6008293409557552, - "grad_norm": 0.010007748380303383, - "learning_rate": 0.00019999666629194875, - "loss": 46.0, - "step": 16150 - }, - { - "epoch": 2.6009903780345427, - "grad_norm": 0.0024312378372997046, - "learning_rate": 0.00019999666587883935, - "loss": 46.0, - "step": 16151 - }, - { - "epoch": 2.6011514151133297, - "grad_norm": 0.00319654680788517, - "learning_rate": 0.00019999666546570438, - "loss": 46.0, - "step": 16152 - }, - { - "epoch": 2.601312452192117, - "grad_norm": 0.005473264493048191, - "learning_rate": 0.00019999666505254377, - "loss": 46.0, - "step": 16153 - }, - { - "epoch": 2.6014734892709046, - "grad_norm": 0.0007003513746894896, - "learning_rate": 0.00019999666463935763, - "loss": 46.0, - "step": 16154 - }, - { - "epoch": 2.601634526349692, - "grad_norm": 0.003339738119393587, - "learning_rate": 0.00019999666422614585, - "loss": 46.0, - "step": 16155 - }, - { - "epoch": 2.6017955634284795, - "grad_norm": 0.0023215902037918568, - "learning_rate": 0.00019999666381290848, - "loss": 46.0, - "step": 16156 - }, - { - "epoch": 2.601956600507267, - "grad_norm": 0.001549035543575883, - "learning_rate": 0.00019999666339964552, - "loss": 46.0, - "step": 16157 - }, - { - "epoch": 2.602117637586054, - "grad_norm": 0.0012019840069115162, - "learning_rate": 0.00019999666298635698, - "loss": 46.0, - "step": 16158 - }, - { - "epoch": 2.6022786746648414, - "grad_norm": 0.00277071725577116, - "learning_rate": 0.00019999666257304282, - "loss": 46.0, - "step": 16159 - }, - { - "epoch": 2.602439711743629, - "grad_norm": 0.0015820463886484504, - "learning_rate": 0.0001999966621597031, - "loss": 46.0, - "step": 16160 - }, - { - "epoch": 2.6026007488224163, - "grad_norm": 0.006370109040290117, - "learning_rate": 0.00019999666174633777, - "loss": 46.0, - "step": 16161 - }, - { - "epoch": 2.6027617859012038, - "grad_norm": 0.0070269908756017685, - "learning_rate": 0.00019999666133294682, - "loss": 46.0, - "step": 16162 - }, - { - "epoch": 2.6029228229799912, - "grad_norm": 0.0020513904746621847, - "learning_rate": 0.00019999666091953031, - "loss": 46.0, - "step": 16163 - }, - { - "epoch": 2.6030838600587787, - "grad_norm": 0.006313891615718603, - "learning_rate": 0.00019999666050608822, - "loss": 46.0, - "step": 16164 - }, - { - "epoch": 2.603244897137566, - "grad_norm": 0.0008577312109991908, - "learning_rate": 0.00019999666009262048, - "loss": 46.0, - "step": 16165 - }, - { - "epoch": 2.6034059342163536, - "grad_norm": 0.007441112305969, - "learning_rate": 0.00019999665967912716, - "loss": 46.0, - "step": 16166 - }, - { - "epoch": 2.6035669712951406, - "grad_norm": 0.0008037133375182748, - "learning_rate": 0.00019999665926560828, - "loss": 46.0, - "step": 16167 - }, - { - "epoch": 2.603728008373928, - "grad_norm": 0.0006772925844416022, - "learning_rate": 0.00019999665885206378, - "loss": 46.0, - "step": 16168 - }, - { - "epoch": 2.6038890454527155, - "grad_norm": 0.007704761810600758, - "learning_rate": 0.00019999665843849372, - "loss": 46.0, - "step": 16169 - }, - { - "epoch": 2.604050082531503, - "grad_norm": 0.001972059952095151, - "learning_rate": 0.00019999665802489803, - "loss": 46.0, - "step": 16170 - }, - { - "epoch": 2.6042111196102904, - "grad_norm": 0.0006038892897777259, - "learning_rate": 0.00019999665761127674, - "loss": 46.0, - "step": 16171 - }, - { - "epoch": 2.6043721566890774, - "grad_norm": 0.0017083912389352918, - "learning_rate": 0.0001999966571976299, - "loss": 46.0, - "step": 16172 - }, - { - "epoch": 2.604533193767865, - "grad_norm": 0.0024328369181603193, - "learning_rate": 0.00019999665678395743, - "loss": 46.0, - "step": 16173 - }, - { - "epoch": 2.6046942308466523, - "grad_norm": 0.002566979033872485, - "learning_rate": 0.00019999665637025936, - "loss": 46.0, - "step": 16174 - }, - { - "epoch": 2.6048552679254398, - "grad_norm": 0.0035113622434437275, - "learning_rate": 0.00019999665595653573, - "loss": 46.0, - "step": 16175 - }, - { - "epoch": 2.605016305004227, - "grad_norm": 0.0013542110100388527, - "learning_rate": 0.00019999665554278648, - "loss": 46.0, - "step": 16176 - }, - { - "epoch": 2.6051773420830147, - "grad_norm": 0.002967189298942685, - "learning_rate": 0.00019999665512901164, - "loss": 46.0, - "step": 16177 - }, - { - "epoch": 2.605338379161802, - "grad_norm": 0.0010982576059177518, - "learning_rate": 0.00019999665471521122, - "loss": 46.0, - "step": 16178 - }, - { - "epoch": 2.6054994162405896, - "grad_norm": 0.003811403177678585, - "learning_rate": 0.00019999665430138518, - "loss": 46.0, - "step": 16179 - }, - { - "epoch": 2.605660453319377, - "grad_norm": 0.0016570906154811382, - "learning_rate": 0.00019999665388753356, - "loss": 46.0, - "step": 16180 - }, - { - "epoch": 2.605821490398164, - "grad_norm": 0.0015925776679068804, - "learning_rate": 0.00019999665347365635, - "loss": 46.0, - "step": 16181 - }, - { - "epoch": 2.6059825274769515, - "grad_norm": 0.006657804828137159, - "learning_rate": 0.00019999665305975355, - "loss": 46.0, - "step": 16182 - }, - { - "epoch": 2.606143564555739, - "grad_norm": 0.005006028339266777, - "learning_rate": 0.00019999665264582513, - "loss": 46.0, - "step": 16183 - }, - { - "epoch": 2.6063046016345264, - "grad_norm": 0.00213949684984982, - "learning_rate": 0.00019999665223187116, - "loss": 46.0, - "step": 16184 - }, - { - "epoch": 2.606465638713314, - "grad_norm": 0.0017056141514331102, - "learning_rate": 0.00019999665181789155, - "loss": 46.0, - "step": 16185 - }, - { - "epoch": 2.6066266757921013, - "grad_norm": 0.01463579386472702, - "learning_rate": 0.00019999665140388637, - "loss": 46.0, - "step": 16186 - }, - { - "epoch": 2.6067877128708883, - "grad_norm": 0.005354403052479029, - "learning_rate": 0.0001999966509898556, - "loss": 46.0, - "step": 16187 - }, - { - "epoch": 2.6069487499496757, - "grad_norm": 0.0022061849012970924, - "learning_rate": 0.0001999966505757992, - "loss": 46.0, - "step": 16188 - }, - { - "epoch": 2.607109787028463, - "grad_norm": 0.024766871705651283, - "learning_rate": 0.00019999665016171724, - "loss": 46.0, - "step": 16189 - }, - { - "epoch": 2.6072708241072506, - "grad_norm": 0.0014470957685261965, - "learning_rate": 0.0001999966497476097, - "loss": 46.0, - "step": 16190 - }, - { - "epoch": 2.607431861186038, - "grad_norm": 0.009810199029743671, - "learning_rate": 0.00019999664933347656, - "loss": 46.0, - "step": 16191 - }, - { - "epoch": 2.6075928982648255, - "grad_norm": 0.008456065319478512, - "learning_rate": 0.0001999966489193178, - "loss": 46.0, - "step": 16192 - }, - { - "epoch": 2.607753935343613, - "grad_norm": 0.0027164542116224766, - "learning_rate": 0.00019999664850513344, - "loss": 46.0, - "step": 16193 - }, - { - "epoch": 2.6079149724224004, - "grad_norm": 0.0014124843291938305, - "learning_rate": 0.00019999664809092354, - "loss": 46.0, - "step": 16194 - }, - { - "epoch": 2.608076009501188, - "grad_norm": 0.005745426286011934, - "learning_rate": 0.000199996647676688, - "loss": 46.0, - "step": 16195 - }, - { - "epoch": 2.608237046579975, - "grad_norm": 0.0013142259558662772, - "learning_rate": 0.00019999664726242687, - "loss": 46.0, - "step": 16196 - }, - { - "epoch": 2.6083980836587624, - "grad_norm": 0.003737054066732526, - "learning_rate": 0.00019999664684814016, - "loss": 46.0, - "step": 16197 - }, - { - "epoch": 2.60855912073755, - "grad_norm": 0.0020233402028679848, - "learning_rate": 0.00019999664643382783, - "loss": 46.0, - "step": 16198 - }, - { - "epoch": 2.6087201578163373, - "grad_norm": 0.002590385265648365, - "learning_rate": 0.00019999664601948994, - "loss": 46.0, - "step": 16199 - }, - { - "epoch": 2.6088811948951247, - "grad_norm": 0.004802698735147715, - "learning_rate": 0.00019999664560512646, - "loss": 46.0, - "step": 16200 - }, - { - "epoch": 2.6090422319739117, - "grad_norm": 0.0017088550375774503, - "learning_rate": 0.00019999664519073734, - "loss": 46.0, - "step": 16201 - }, - { - "epoch": 2.609203269052699, - "grad_norm": 0.0033691904973238707, - "learning_rate": 0.00019999664477632267, - "loss": 46.0, - "step": 16202 - }, - { - "epoch": 2.6093643061314866, - "grad_norm": 0.007423325441777706, - "learning_rate": 0.00019999664436188237, - "loss": 46.0, - "step": 16203 - }, - { - "epoch": 2.609525343210274, - "grad_norm": 0.0019671330228447914, - "learning_rate": 0.0001999966439474165, - "loss": 46.0, - "step": 16204 - }, - { - "epoch": 2.6096863802890615, - "grad_norm": 0.0012009054189547896, - "learning_rate": 0.00019999664353292503, - "loss": 46.0, - "step": 16205 - }, - { - "epoch": 2.609847417367849, - "grad_norm": 0.0010096444748342037, - "learning_rate": 0.00019999664311840798, - "loss": 46.0, - "step": 16206 - }, - { - "epoch": 2.6100084544466364, - "grad_norm": 0.0012120584724470973, - "learning_rate": 0.0001999966427038653, - "loss": 46.0, - "step": 16207 - }, - { - "epoch": 2.610169491525424, - "grad_norm": 0.0019064630614593625, - "learning_rate": 0.00019999664228929705, - "loss": 46.0, - "step": 16208 - }, - { - "epoch": 2.6103305286042113, - "grad_norm": 0.008776336908340454, - "learning_rate": 0.0001999966418747032, - "loss": 46.0, - "step": 16209 - }, - { - "epoch": 2.610491565682999, - "grad_norm": 0.003920337185263634, - "learning_rate": 0.00019999664146008378, - "loss": 46.0, - "step": 16210 - }, - { - "epoch": 2.610652602761786, - "grad_norm": 0.0019731582142412663, - "learning_rate": 0.00019999664104543874, - "loss": 46.0, - "step": 16211 - }, - { - "epoch": 2.6108136398405732, - "grad_norm": 0.0006241137161850929, - "learning_rate": 0.0001999966406307681, - "loss": 46.0, - "step": 16212 - }, - { - "epoch": 2.6109746769193607, - "grad_norm": 0.02755666896700859, - "learning_rate": 0.00019999664021607186, - "loss": 46.0, - "step": 16213 - }, - { - "epoch": 2.611135713998148, - "grad_norm": 0.0020803448278456926, - "learning_rate": 0.00019999663980135006, - "loss": 46.0, - "step": 16214 - }, - { - "epoch": 2.6112967510769356, - "grad_norm": 0.0009348891908302903, - "learning_rate": 0.00019999663938660266, - "loss": 46.0, - "step": 16215 - }, - { - "epoch": 2.6114577881557226, - "grad_norm": 0.002950292080640793, - "learning_rate": 0.00019999663897182966, - "loss": 46.0, - "step": 16216 - }, - { - "epoch": 2.61161882523451, - "grad_norm": 0.0013186151627451181, - "learning_rate": 0.00019999663855703106, - "loss": 46.0, - "step": 16217 - }, - { - "epoch": 2.6117798623132975, - "grad_norm": 0.000810387427918613, - "learning_rate": 0.00019999663814220688, - "loss": 46.0, - "step": 16218 - }, - { - "epoch": 2.611940899392085, - "grad_norm": 0.000788987148553133, - "learning_rate": 0.0001999966377273571, - "loss": 46.0, - "step": 16219 - }, - { - "epoch": 2.6121019364708724, - "grad_norm": 0.00318744033575058, - "learning_rate": 0.0001999966373124817, - "loss": 46.0, - "step": 16220 - }, - { - "epoch": 2.61226297354966, - "grad_norm": 0.0044906553812325, - "learning_rate": 0.00019999663689758074, - "loss": 46.0, - "step": 16221 - }, - { - "epoch": 2.6124240106284473, - "grad_norm": 0.00794381182640791, - "learning_rate": 0.00019999663648265415, - "loss": 46.0, - "step": 16222 - }, - { - "epoch": 2.6125850477072348, - "grad_norm": 0.001446620561182499, - "learning_rate": 0.000199996636067702, - "loss": 46.0, - "step": 16223 - }, - { - "epoch": 2.612746084786022, - "grad_norm": 0.004321327432990074, - "learning_rate": 0.00019999663565272425, - "loss": 46.0, - "step": 16224 - }, - { - "epoch": 2.6129071218648092, - "grad_norm": 0.0009887315100058913, - "learning_rate": 0.0001999966352377209, - "loss": 46.0, - "step": 16225 - }, - { - "epoch": 2.6130681589435967, - "grad_norm": 0.0034912792034447193, - "learning_rate": 0.00019999663482269195, - "loss": 46.0, - "step": 16226 - }, - { - "epoch": 2.613229196022384, - "grad_norm": 0.0031000266317278147, - "learning_rate": 0.00019999663440763743, - "loss": 46.0, - "step": 16227 - }, - { - "epoch": 2.6133902331011716, - "grad_norm": 0.00028321624267846346, - "learning_rate": 0.0001999966339925573, - "loss": 46.0, - "step": 16228 - }, - { - "epoch": 2.613551270179959, - "grad_norm": 0.0017997113754972816, - "learning_rate": 0.00019999663357745154, - "loss": 46.0, - "step": 16229 - }, - { - "epoch": 2.6137123072587465, - "grad_norm": 0.002274578670039773, - "learning_rate": 0.00019999663316232024, - "loss": 46.0, - "step": 16230 - }, - { - "epoch": 2.6138733443375335, - "grad_norm": 0.0009919337462633848, - "learning_rate": 0.00019999663274716331, - "loss": 46.0, - "step": 16231 - }, - { - "epoch": 2.614034381416321, - "grad_norm": 0.0007810642127878964, - "learning_rate": 0.0001999966323319808, - "loss": 46.0, - "step": 16232 - }, - { - "epoch": 2.6141954184951084, - "grad_norm": 0.003255092306062579, - "learning_rate": 0.0001999966319167727, - "loss": 46.0, - "step": 16233 - }, - { - "epoch": 2.614356455573896, - "grad_norm": 0.0023576554376631975, - "learning_rate": 0.00019999663150153902, - "loss": 46.0, - "step": 16234 - }, - { - "epoch": 2.6145174926526833, - "grad_norm": 0.0025528348051011562, - "learning_rate": 0.00019999663108627972, - "loss": 46.0, - "step": 16235 - }, - { - "epoch": 2.6146785297314707, - "grad_norm": 0.0012402617139741778, - "learning_rate": 0.00019999663067099484, - "loss": 46.0, - "step": 16236 - }, - { - "epoch": 2.614839566810258, - "grad_norm": 0.0059888362884521484, - "learning_rate": 0.0001999966302556844, - "loss": 46.0, - "step": 16237 - }, - { - "epoch": 2.6150006038890456, - "grad_norm": 0.0010131146991625428, - "learning_rate": 0.0001999966298403483, - "loss": 46.0, - "step": 16238 - }, - { - "epoch": 2.615161640967833, - "grad_norm": 0.007483948487788439, - "learning_rate": 0.00019999662942498663, - "loss": 46.0, - "step": 16239 - }, - { - "epoch": 2.61532267804662, - "grad_norm": 0.0021092721726745367, - "learning_rate": 0.00019999662900959937, - "loss": 46.0, - "step": 16240 - }, - { - "epoch": 2.6154837151254076, - "grad_norm": 0.007061942480504513, - "learning_rate": 0.0001999966285941865, - "loss": 46.0, - "step": 16241 - }, - { - "epoch": 2.615644752204195, - "grad_norm": 0.0026891794987022877, - "learning_rate": 0.00019999662817874806, - "loss": 46.0, - "step": 16242 - }, - { - "epoch": 2.6158057892829825, - "grad_norm": 0.005143879912793636, - "learning_rate": 0.00019999662776328404, - "loss": 46.0, - "step": 16243 - }, - { - "epoch": 2.61596682636177, - "grad_norm": 0.002124219899997115, - "learning_rate": 0.0001999966273477944, - "loss": 46.0, - "step": 16244 - }, - { - "epoch": 2.616127863440557, - "grad_norm": 0.0013617079239338636, - "learning_rate": 0.00019999662693227918, - "loss": 46.0, - "step": 16245 - }, - { - "epoch": 2.6162889005193444, - "grad_norm": 0.0014032250037416816, - "learning_rate": 0.00019999662651673834, - "loss": 46.0, - "step": 16246 - }, - { - "epoch": 2.616449937598132, - "grad_norm": 0.0035202796570956707, - "learning_rate": 0.0001999966261011719, - "loss": 46.0, - "step": 16247 - }, - { - "epoch": 2.6166109746769193, - "grad_norm": 0.000518196087796241, - "learning_rate": 0.0001999966256855799, - "loss": 46.0, - "step": 16248 - }, - { - "epoch": 2.6167720117557067, - "grad_norm": 0.001013385714031756, - "learning_rate": 0.0001999966252699623, - "loss": 46.0, - "step": 16249 - }, - { - "epoch": 2.616933048834494, - "grad_norm": 0.0013176481006667018, - "learning_rate": 0.0001999966248543191, - "loss": 46.0, - "step": 16250 - }, - { - "epoch": 2.6170940859132816, - "grad_norm": 0.001168896909803152, - "learning_rate": 0.0001999966244386503, - "loss": 46.0, - "step": 16251 - }, - { - "epoch": 2.617255122992069, - "grad_norm": 0.011545871384441853, - "learning_rate": 0.00019999662402295592, - "loss": 46.0, - "step": 16252 - }, - { - "epoch": 2.6174161600708565, - "grad_norm": 0.007152176462113857, - "learning_rate": 0.00019999662360723592, - "loss": 46.0, - "step": 16253 - }, - { - "epoch": 2.6175771971496435, - "grad_norm": 0.001512520364485681, - "learning_rate": 0.00019999662319149036, - "loss": 46.0, - "step": 16254 - }, - { - "epoch": 2.617738234228431, - "grad_norm": 0.0027331719174981117, - "learning_rate": 0.0001999966227757192, - "loss": 46.0, - "step": 16255 - }, - { - "epoch": 2.6178992713072184, - "grad_norm": 0.0008448728476651013, - "learning_rate": 0.00019999662235992241, - "loss": 46.0, - "step": 16256 - }, - { - "epoch": 2.618060308386006, - "grad_norm": 0.0049787405878305435, - "learning_rate": 0.00019999662194410006, - "loss": 46.0, - "step": 16257 - }, - { - "epoch": 2.6182213454647933, - "grad_norm": 0.004492932464927435, - "learning_rate": 0.00019999662152825212, - "loss": 46.0, - "step": 16258 - }, - { - "epoch": 2.618382382543581, - "grad_norm": 0.0031222079414874315, - "learning_rate": 0.00019999662111237857, - "loss": 46.0, - "step": 16259 - }, - { - "epoch": 2.618543419622368, - "grad_norm": 0.010371342301368713, - "learning_rate": 0.00019999662069647943, - "loss": 46.0, - "step": 16260 - }, - { - "epoch": 2.6187044567011553, - "grad_norm": 0.002968019340187311, - "learning_rate": 0.0001999966202805547, - "loss": 46.0, - "step": 16261 - }, - { - "epoch": 2.6188654937799427, - "grad_norm": 0.011174728162586689, - "learning_rate": 0.0001999966198646044, - "loss": 46.0, - "step": 16262 - }, - { - "epoch": 2.61902653085873, - "grad_norm": 0.001504142303019762, - "learning_rate": 0.00019999661944862846, - "loss": 46.0, - "step": 16263 - }, - { - "epoch": 2.6191875679375176, - "grad_norm": 0.0010710597271099687, - "learning_rate": 0.00019999661903262692, - "loss": 46.0, - "step": 16264 - }, - { - "epoch": 2.619348605016305, - "grad_norm": 0.0027598801534622908, - "learning_rate": 0.00019999661861659981, - "loss": 46.0, - "step": 16265 - }, - { - "epoch": 2.6195096420950925, - "grad_norm": 0.002100490964949131, - "learning_rate": 0.00019999661820054712, - "loss": 46.0, - "step": 16266 - }, - { - "epoch": 2.61967067917388, - "grad_norm": 0.00361101096495986, - "learning_rate": 0.00019999661778446882, - "loss": 46.0, - "step": 16267 - }, - { - "epoch": 2.6198317162526674, - "grad_norm": 0.0012040912406519055, - "learning_rate": 0.00019999661736836493, - "loss": 46.0, - "step": 16268 - }, - { - "epoch": 2.6199927533314544, - "grad_norm": 0.0016230870969593525, - "learning_rate": 0.00019999661695223545, - "loss": 46.0, - "step": 16269 - }, - { - "epoch": 2.620153790410242, - "grad_norm": 0.006094573996961117, - "learning_rate": 0.00019999661653608035, - "loss": 46.0, - "step": 16270 - }, - { - "epoch": 2.6203148274890293, - "grad_norm": 0.0020876533817499876, - "learning_rate": 0.0001999966161198997, - "loss": 46.0, - "step": 16271 - }, - { - "epoch": 2.620475864567817, - "grad_norm": 0.014123592525720596, - "learning_rate": 0.00019999661570369343, - "loss": 46.0, - "step": 16272 - }, - { - "epoch": 2.6206369016466042, - "grad_norm": 0.0008789273560978472, - "learning_rate": 0.00019999661528746155, - "loss": 46.0, - "step": 16273 - }, - { - "epoch": 2.6207979387253917, - "grad_norm": 0.0037456608843058348, - "learning_rate": 0.0001999966148712041, - "loss": 46.0, - "step": 16274 - }, - { - "epoch": 2.6209589758041787, - "grad_norm": 0.004488199949264526, - "learning_rate": 0.00019999661445492105, - "loss": 46.0, - "step": 16275 - }, - { - "epoch": 2.621120012882966, - "grad_norm": 0.005014956928789616, - "learning_rate": 0.0001999966140386124, - "loss": 46.0, - "step": 16276 - }, - { - "epoch": 2.6212810499617536, - "grad_norm": 0.003245734144002199, - "learning_rate": 0.00019999661362227818, - "loss": 46.0, - "step": 16277 - }, - { - "epoch": 2.621442087040541, - "grad_norm": 0.004849734716117382, - "learning_rate": 0.00019999661320591834, - "loss": 46.0, - "step": 16278 - }, - { - "epoch": 2.6216031241193285, - "grad_norm": 0.0035920669324696064, - "learning_rate": 0.00019999661278953293, - "loss": 46.0, - "step": 16279 - }, - { - "epoch": 2.621764161198116, - "grad_norm": 0.0021898269187659025, - "learning_rate": 0.00019999661237312188, - "loss": 46.0, - "step": 16280 - }, - { - "epoch": 2.6219251982769034, - "grad_norm": 0.001706786802969873, - "learning_rate": 0.00019999661195668528, - "loss": 46.0, - "step": 16281 - }, - { - "epoch": 2.622086235355691, - "grad_norm": 0.002933080308139324, - "learning_rate": 0.00019999661154022306, - "loss": 46.0, - "step": 16282 - }, - { - "epoch": 2.6222472724344783, - "grad_norm": 0.002068144269287586, - "learning_rate": 0.00019999661112373528, - "loss": 46.0, - "step": 16283 - }, - { - "epoch": 2.6224083095132653, - "grad_norm": 0.007859934121370316, - "learning_rate": 0.00019999661070722188, - "loss": 46.0, - "step": 16284 - }, - { - "epoch": 2.6225693465920528, - "grad_norm": 0.00048201545723713934, - "learning_rate": 0.00019999661029068287, - "loss": 46.0, - "step": 16285 - }, - { - "epoch": 2.62273038367084, - "grad_norm": 0.010907069779932499, - "learning_rate": 0.0001999966098741183, - "loss": 46.0, - "step": 16286 - }, - { - "epoch": 2.6228914207496277, - "grad_norm": 0.004363309592008591, - "learning_rate": 0.00019999660945752812, - "loss": 46.0, - "step": 16287 - }, - { - "epoch": 2.623052457828415, - "grad_norm": 0.009198774583637714, - "learning_rate": 0.00019999660904091235, - "loss": 46.0, - "step": 16288 - }, - { - "epoch": 2.623213494907202, - "grad_norm": 0.0019503688672557473, - "learning_rate": 0.000199996608624271, - "loss": 46.0, - "step": 16289 - }, - { - "epoch": 2.6233745319859896, - "grad_norm": 0.0022886747028678656, - "learning_rate": 0.00019999660820760402, - "loss": 46.0, - "step": 16290 - }, - { - "epoch": 2.623535569064777, - "grad_norm": 0.0052734678611159325, - "learning_rate": 0.00019999660779091146, - "loss": 46.0, - "step": 16291 - }, - { - "epoch": 2.6236966061435645, - "grad_norm": 0.0016602105461061, - "learning_rate": 0.0001999966073741933, - "loss": 46.0, - "step": 16292 - }, - { - "epoch": 2.623857643222352, - "grad_norm": 0.0042768982239067554, - "learning_rate": 0.00019999660695744958, - "loss": 46.0, - "step": 16293 - }, - { - "epoch": 2.6240186803011394, - "grad_norm": 0.007833343930542469, - "learning_rate": 0.00019999660654068023, - "loss": 46.0, - "step": 16294 - }, - { - "epoch": 2.624179717379927, - "grad_norm": 0.0031322180293500423, - "learning_rate": 0.00019999660612388532, - "loss": 46.0, - "step": 16295 - }, - { - "epoch": 2.6243407544587143, - "grad_norm": 0.0011031373869627714, - "learning_rate": 0.0001999966057070648, - "loss": 46.0, - "step": 16296 - }, - { - "epoch": 2.6245017915375017, - "grad_norm": 0.003968015778809786, - "learning_rate": 0.0001999966052902187, - "loss": 46.0, - "step": 16297 - }, - { - "epoch": 2.6246628286162887, - "grad_norm": 0.0004365532658994198, - "learning_rate": 0.00019999660487334694, - "loss": 46.0, - "step": 16298 - }, - { - "epoch": 2.624823865695076, - "grad_norm": 0.0008367628324776888, - "learning_rate": 0.00019999660445644966, - "loss": 46.0, - "step": 16299 - }, - { - "epoch": 2.6249849027738636, - "grad_norm": 0.007303046062588692, - "learning_rate": 0.00019999660403952676, - "loss": 46.0, - "step": 16300 - }, - { - "epoch": 2.625145939852651, - "grad_norm": 0.0011429721489548683, - "learning_rate": 0.00019999660362257827, - "loss": 46.0, - "step": 16301 - }, - { - "epoch": 2.6253069769314386, - "grad_norm": 0.004899218678474426, - "learning_rate": 0.00019999660320560417, - "loss": 46.0, - "step": 16302 - }, - { - "epoch": 2.625468014010226, - "grad_norm": 0.00478737335652113, - "learning_rate": 0.0001999966027886045, - "loss": 46.0, - "step": 16303 - }, - { - "epoch": 2.625629051089013, - "grad_norm": 0.001814377843402326, - "learning_rate": 0.0001999966023715792, - "loss": 46.0, - "step": 16304 - }, - { - "epoch": 2.6257900881678005, - "grad_norm": 0.0023631909862160683, - "learning_rate": 0.00019999660195452835, - "loss": 46.0, - "step": 16305 - }, - { - "epoch": 2.625951125246588, - "grad_norm": 0.010905549861490726, - "learning_rate": 0.00019999660153745188, - "loss": 46.0, - "step": 16306 - }, - { - "epoch": 2.6261121623253754, - "grad_norm": 0.019005555659532547, - "learning_rate": 0.00019999660112034984, - "loss": 46.0, - "step": 16307 - }, - { - "epoch": 2.626273199404163, - "grad_norm": 0.0016651147743687034, - "learning_rate": 0.00019999660070322216, - "loss": 46.0, - "step": 16308 - }, - { - "epoch": 2.6264342364829503, - "grad_norm": 0.003206402761861682, - "learning_rate": 0.00019999660028606893, - "loss": 46.0, - "step": 16309 - }, - { - "epoch": 2.6265952735617377, - "grad_norm": 0.013352095149457455, - "learning_rate": 0.00019999659986889008, - "loss": 46.0, - "step": 16310 - }, - { - "epoch": 2.626756310640525, - "grad_norm": 0.0025833584368228912, - "learning_rate": 0.00019999659945168564, - "loss": 46.0, - "step": 16311 - }, - { - "epoch": 2.6269173477193126, - "grad_norm": 0.003954403102397919, - "learning_rate": 0.0001999965990344556, - "loss": 46.0, - "step": 16312 - }, - { - "epoch": 2.6270783847980996, - "grad_norm": 0.006242019589990377, - "learning_rate": 0.00019999659861720003, - "loss": 46.0, - "step": 16313 - }, - { - "epoch": 2.627239421876887, - "grad_norm": 0.005097391549497843, - "learning_rate": 0.00019999659819991877, - "loss": 46.0, - "step": 16314 - }, - { - "epoch": 2.6274004589556745, - "grad_norm": 0.0010899916524067521, - "learning_rate": 0.00019999659778261196, - "loss": 46.0, - "step": 16315 - }, - { - "epoch": 2.627561496034462, - "grad_norm": 0.006583353504538536, - "learning_rate": 0.00019999659736527958, - "loss": 46.0, - "step": 16316 - }, - { - "epoch": 2.6277225331132494, - "grad_norm": 0.0010600329842418432, - "learning_rate": 0.00019999659694792157, - "loss": 46.0, - "step": 16317 - }, - { - "epoch": 2.6278835701920364, - "grad_norm": 0.001082659000530839, - "learning_rate": 0.00019999659653053797, - "loss": 46.0, - "step": 16318 - }, - { - "epoch": 2.628044607270824, - "grad_norm": 0.004644326400011778, - "learning_rate": 0.0001999965961131288, - "loss": 46.0, - "step": 16319 - }, - { - "epoch": 2.6282056443496113, - "grad_norm": 0.003597764065489173, - "learning_rate": 0.000199996595695694, - "loss": 46.0, - "step": 16320 - }, - { - "epoch": 2.628366681428399, - "grad_norm": 0.0030136071145534515, - "learning_rate": 0.00019999659527823364, - "loss": 46.0, - "step": 16321 - }, - { - "epoch": 2.6285277185071863, - "grad_norm": 0.0018900932045653462, - "learning_rate": 0.00019999659486074766, - "loss": 46.0, - "step": 16322 - }, - { - "epoch": 2.6286887555859737, - "grad_norm": 0.0007270133937709033, - "learning_rate": 0.00019999659444323612, - "loss": 46.0, - "step": 16323 - }, - { - "epoch": 2.628849792664761, - "grad_norm": 0.0011344680096954107, - "learning_rate": 0.00019999659402569897, - "loss": 46.0, - "step": 16324 - }, - { - "epoch": 2.6290108297435486, - "grad_norm": 0.005970783997327089, - "learning_rate": 0.0001999965936081362, - "loss": 46.0, - "step": 16325 - }, - { - "epoch": 2.629171866822336, - "grad_norm": 0.001079970272257924, - "learning_rate": 0.00019999659319054787, - "loss": 46.0, - "step": 16326 - }, - { - "epoch": 2.6293329039011235, - "grad_norm": 0.001130154705606401, - "learning_rate": 0.00019999659277293393, - "loss": 46.0, - "step": 16327 - }, - { - "epoch": 2.6294939409799105, - "grad_norm": 0.003891661064699292, - "learning_rate": 0.0001999965923552944, - "loss": 46.0, - "step": 16328 - }, - { - "epoch": 2.629654978058698, - "grad_norm": 0.0006424893508665264, - "learning_rate": 0.00019999659193762926, - "loss": 46.0, - "step": 16329 - }, - { - "epoch": 2.6298160151374854, - "grad_norm": 0.003793942043557763, - "learning_rate": 0.00019999659151993855, - "loss": 46.0, - "step": 16330 - }, - { - "epoch": 2.629977052216273, - "grad_norm": 0.004247124306857586, - "learning_rate": 0.0001999965911022222, - "loss": 46.0, - "step": 16331 - }, - { - "epoch": 2.6301380892950603, - "grad_norm": 0.010247626341879368, - "learning_rate": 0.00019999659068448033, - "loss": 46.0, - "step": 16332 - }, - { - "epoch": 2.6302991263738473, - "grad_norm": 0.012627347372472286, - "learning_rate": 0.00019999659026671281, - "loss": 46.0, - "step": 16333 - }, - { - "epoch": 2.630460163452635, - "grad_norm": 0.00166188960429281, - "learning_rate": 0.00019999658984891974, - "loss": 46.0, - "step": 16334 - }, - { - "epoch": 2.6306212005314222, - "grad_norm": 0.0021874040830880404, - "learning_rate": 0.00019999658943110102, - "loss": 46.0, - "step": 16335 - }, - { - "epoch": 2.6307822376102097, - "grad_norm": 0.006883494555950165, - "learning_rate": 0.00019999658901325676, - "loss": 46.0, - "step": 16336 - }, - { - "epoch": 2.630943274688997, - "grad_norm": 0.003918439615517855, - "learning_rate": 0.00019999658859538687, - "loss": 46.0, - "step": 16337 - }, - { - "epoch": 2.6311043117677846, - "grad_norm": 0.0025312795769423246, - "learning_rate": 0.0001999965881774914, - "loss": 46.0, - "step": 16338 - }, - { - "epoch": 2.631265348846572, - "grad_norm": 0.01011947263032198, - "learning_rate": 0.00019999658775957034, - "loss": 46.0, - "step": 16339 - }, - { - "epoch": 2.6314263859253595, - "grad_norm": 0.00772851100191474, - "learning_rate": 0.0001999965873416237, - "loss": 46.0, - "step": 16340 - }, - { - "epoch": 2.631587423004147, - "grad_norm": 0.0011113417567685246, - "learning_rate": 0.00019999658692365142, - "loss": 46.0, - "step": 16341 - }, - { - "epoch": 2.631748460082934, - "grad_norm": 0.0014731999253854156, - "learning_rate": 0.0001999965865056536, - "loss": 46.0, - "step": 16342 - }, - { - "epoch": 2.6319094971617214, - "grad_norm": 0.0020360820926725864, - "learning_rate": 0.00019999658608763014, - "loss": 46.0, - "step": 16343 - }, - { - "epoch": 2.632070534240509, - "grad_norm": 0.003389952937141061, - "learning_rate": 0.00019999658566958108, - "loss": 46.0, - "step": 16344 - }, - { - "epoch": 2.6322315713192963, - "grad_norm": 0.007669869810342789, - "learning_rate": 0.00019999658525150647, - "loss": 46.0, - "step": 16345 - }, - { - "epoch": 2.6323926083980838, - "grad_norm": 0.0030061539728194475, - "learning_rate": 0.00019999658483340623, - "loss": 46.0, - "step": 16346 - }, - { - "epoch": 2.632553645476871, - "grad_norm": 0.011876963078975677, - "learning_rate": 0.0001999965844152804, - "loss": 46.0, - "step": 16347 - }, - { - "epoch": 2.632714682555658, - "grad_norm": 0.0028468468226492405, - "learning_rate": 0.000199996583997129, - "loss": 46.0, - "step": 16348 - }, - { - "epoch": 2.6328757196344457, - "grad_norm": 0.003951325546950102, - "learning_rate": 0.00019999658357895198, - "loss": 46.0, - "step": 16349 - }, - { - "epoch": 2.633036756713233, - "grad_norm": 0.0010181076359003782, - "learning_rate": 0.0001999965831607494, - "loss": 46.0, - "step": 16350 - }, - { - "epoch": 2.6331977937920206, - "grad_norm": 0.005227404646575451, - "learning_rate": 0.0001999965827425212, - "loss": 46.0, - "step": 16351 - }, - { - "epoch": 2.633358830870808, - "grad_norm": 0.0031755631789565086, - "learning_rate": 0.00019999658232426742, - "loss": 46.0, - "step": 16352 - }, - { - "epoch": 2.6335198679495955, - "grad_norm": 0.008981127291917801, - "learning_rate": 0.00019999658190598805, - "loss": 46.0, - "step": 16353 - }, - { - "epoch": 2.633680905028383, - "grad_norm": 0.0023199168499559164, - "learning_rate": 0.00019999658148768307, - "loss": 46.0, - "step": 16354 - }, - { - "epoch": 2.6338419421071704, - "grad_norm": 0.0010527439881116152, - "learning_rate": 0.0001999965810693525, - "loss": 46.0, - "step": 16355 - }, - { - "epoch": 2.634002979185958, - "grad_norm": 0.0025388584472239017, - "learning_rate": 0.00019999658065099633, - "loss": 46.0, - "step": 16356 - }, - { - "epoch": 2.634164016264745, - "grad_norm": 0.0014624694595113397, - "learning_rate": 0.00019999658023261456, - "loss": 46.0, - "step": 16357 - }, - { - "epoch": 2.6343250533435323, - "grad_norm": 0.0018244596431031823, - "learning_rate": 0.0001999965798142072, - "loss": 46.0, - "step": 16358 - }, - { - "epoch": 2.6344860904223197, - "grad_norm": 0.0009498837753199041, - "learning_rate": 0.00019999657939577428, - "loss": 46.0, - "step": 16359 - }, - { - "epoch": 2.634647127501107, - "grad_norm": 0.0043435897678136826, - "learning_rate": 0.00019999657897731574, - "loss": 46.0, - "step": 16360 - }, - { - "epoch": 2.6348081645798946, - "grad_norm": 0.0012757699005305767, - "learning_rate": 0.0001999965785588316, - "loss": 46.0, - "step": 16361 - }, - { - "epoch": 2.6349692016586816, - "grad_norm": 0.004824684467166662, - "learning_rate": 0.00019999657814032186, - "loss": 46.0, - "step": 16362 - }, - { - "epoch": 2.635130238737469, - "grad_norm": 0.006517571397125721, - "learning_rate": 0.00019999657772178656, - "loss": 46.0, - "step": 16363 - }, - { - "epoch": 2.6352912758162566, - "grad_norm": 0.0072199320420622826, - "learning_rate": 0.00019999657730322565, - "loss": 46.0, - "step": 16364 - }, - { - "epoch": 2.635452312895044, - "grad_norm": 0.0005466794827952981, - "learning_rate": 0.00019999657688463913, - "loss": 46.0, - "step": 16365 - }, - { - "epoch": 2.6356133499738315, - "grad_norm": 0.002083239611238241, - "learning_rate": 0.00019999657646602704, - "loss": 46.0, - "step": 16366 - }, - { - "epoch": 2.635774387052619, - "grad_norm": 0.0070607662200927734, - "learning_rate": 0.00019999657604738931, - "loss": 46.0, - "step": 16367 - }, - { - "epoch": 2.6359354241314064, - "grad_norm": 0.00488640321418643, - "learning_rate": 0.00019999657562872606, - "loss": 46.0, - "step": 16368 - }, - { - "epoch": 2.636096461210194, - "grad_norm": 0.0016258973628282547, - "learning_rate": 0.00019999657521003713, - "loss": 46.0, - "step": 16369 - }, - { - "epoch": 2.6362574982889813, - "grad_norm": 0.0017399077769368887, - "learning_rate": 0.00019999657479132267, - "loss": 46.0, - "step": 16370 - }, - { - "epoch": 2.6364185353677683, - "grad_norm": 0.003619509283453226, - "learning_rate": 0.0001999965743725826, - "loss": 46.0, - "step": 16371 - }, - { - "epoch": 2.6365795724465557, - "grad_norm": 0.00186413642950356, - "learning_rate": 0.00019999657395381693, - "loss": 46.0, - "step": 16372 - }, - { - "epoch": 2.636740609525343, - "grad_norm": 0.0023832248989492655, - "learning_rate": 0.00019999657353502568, - "loss": 46.0, - "step": 16373 - }, - { - "epoch": 2.6369016466041306, - "grad_norm": 0.0005428649019449949, - "learning_rate": 0.00019999657311620882, - "loss": 46.0, - "step": 16374 - }, - { - "epoch": 2.637062683682918, - "grad_norm": 0.000989394960924983, - "learning_rate": 0.00019999657269736636, - "loss": 46.0, - "step": 16375 - }, - { - "epoch": 2.6372237207617055, - "grad_norm": 0.0031070520635694265, - "learning_rate": 0.0001999965722784983, - "loss": 46.0, - "step": 16376 - }, - { - "epoch": 2.6373847578404925, - "grad_norm": 0.0014868798898532987, - "learning_rate": 0.00019999657185960467, - "loss": 46.0, - "step": 16377 - }, - { - "epoch": 2.63754579491928, - "grad_norm": 0.005715236533433199, - "learning_rate": 0.00019999657144068543, - "loss": 46.0, - "step": 16378 - }, - { - "epoch": 2.6377068319980674, - "grad_norm": 0.0018072109669446945, - "learning_rate": 0.00019999657102174063, - "loss": 46.0, - "step": 16379 - }, - { - "epoch": 2.637867869076855, - "grad_norm": 0.0011455584317445755, - "learning_rate": 0.00019999657060277022, - "loss": 46.0, - "step": 16380 - }, - { - "epoch": 2.6380289061556423, - "grad_norm": 0.0022247028537094593, - "learning_rate": 0.00019999657018377417, - "loss": 46.0, - "step": 16381 - }, - { - "epoch": 2.63818994323443, - "grad_norm": 0.0022691092453897, - "learning_rate": 0.00019999656976475258, - "loss": 46.0, - "step": 16382 - }, - { - "epoch": 2.6383509803132172, - "grad_norm": 0.0019194961059838533, - "learning_rate": 0.00019999656934570537, - "loss": 46.0, - "step": 16383 - }, - { - "epoch": 2.6385120173920047, - "grad_norm": 0.0031690141186118126, - "learning_rate": 0.00019999656892663259, - "loss": 46.0, - "step": 16384 - }, - { - "epoch": 2.638673054470792, - "grad_norm": 0.0012817782117053866, - "learning_rate": 0.00019999656850753418, - "loss": 46.0, - "step": 16385 - }, - { - "epoch": 2.638834091549579, - "grad_norm": 0.01577017828822136, - "learning_rate": 0.00019999656808841022, - "loss": 46.0, - "step": 16386 - }, - { - "epoch": 2.6389951286283666, - "grad_norm": 0.0030287159606814384, - "learning_rate": 0.00019999656766926064, - "loss": 46.0, - "step": 16387 - }, - { - "epoch": 2.639156165707154, - "grad_norm": 0.009831307455897331, - "learning_rate": 0.00019999656725008547, - "loss": 46.0, - "step": 16388 - }, - { - "epoch": 2.6393172027859415, - "grad_norm": 0.005242469254881144, - "learning_rate": 0.0001999965668308847, - "loss": 46.0, - "step": 16389 - }, - { - "epoch": 2.639478239864729, - "grad_norm": 0.0026832332368940115, - "learning_rate": 0.00019999656641165833, - "loss": 46.0, - "step": 16390 - }, - { - "epoch": 2.6396392769435164, - "grad_norm": 0.003587692975997925, - "learning_rate": 0.0001999965659924064, - "loss": 46.0, - "step": 16391 - }, - { - "epoch": 2.6398003140223034, - "grad_norm": 0.005455745849758387, - "learning_rate": 0.00019999656557312886, - "loss": 46.0, - "step": 16392 - }, - { - "epoch": 2.639961351101091, - "grad_norm": 0.0031906720250844955, - "learning_rate": 0.00019999656515382568, - "loss": 46.0, - "step": 16393 - }, - { - "epoch": 2.6401223881798783, - "grad_norm": 0.00945283193141222, - "learning_rate": 0.00019999656473449696, - "loss": 46.0, - "step": 16394 - }, - { - "epoch": 2.6402834252586658, - "grad_norm": 0.0037267126608639956, - "learning_rate": 0.00019999656431514263, - "loss": 46.0, - "step": 16395 - }, - { - "epoch": 2.6404444623374532, - "grad_norm": 0.0008340401109308004, - "learning_rate": 0.00019999656389576269, - "loss": 46.0, - "step": 16396 - }, - { - "epoch": 2.6406054994162407, - "grad_norm": 0.0025725211016833782, - "learning_rate": 0.0001999965634763572, - "loss": 46.0, - "step": 16397 - }, - { - "epoch": 2.640766536495028, - "grad_norm": 0.0006869092467240989, - "learning_rate": 0.0001999965630569261, - "loss": 46.0, - "step": 16398 - }, - { - "epoch": 2.6409275735738156, - "grad_norm": 0.0019361295271664858, - "learning_rate": 0.00019999656263746936, - "loss": 46.0, - "step": 16399 - }, - { - "epoch": 2.641088610652603, - "grad_norm": 0.008035583421587944, - "learning_rate": 0.0001999965622179871, - "loss": 46.0, - "step": 16400 - }, - { - "epoch": 2.64124964773139, - "grad_norm": 0.003356541972607374, - "learning_rate": 0.00019999656179847918, - "loss": 46.0, - "step": 16401 - }, - { - "epoch": 2.6414106848101775, - "grad_norm": 0.003267604624852538, - "learning_rate": 0.0001999965613789457, - "loss": 46.0, - "step": 16402 - }, - { - "epoch": 2.641571721888965, - "grad_norm": 0.0013382801553234458, - "learning_rate": 0.0001999965609593866, - "loss": 46.0, - "step": 16403 - }, - { - "epoch": 2.6417327589677524, - "grad_norm": 0.010065632872283459, - "learning_rate": 0.00019999656053980194, - "loss": 46.0, - "step": 16404 - }, - { - "epoch": 2.64189379604654, - "grad_norm": 0.0026141307316720486, - "learning_rate": 0.00019999656012019168, - "loss": 46.0, - "step": 16405 - }, - { - "epoch": 2.642054833125327, - "grad_norm": 0.0028353743255138397, - "learning_rate": 0.0001999965597005558, - "loss": 46.0, - "step": 16406 - }, - { - "epoch": 2.6422158702041143, - "grad_norm": 0.009485107846558094, - "learning_rate": 0.00019999655928089433, - "loss": 46.0, - "step": 16407 - }, - { - "epoch": 2.6423769072829018, - "grad_norm": 0.0012441881699487567, - "learning_rate": 0.00019999655886120729, - "loss": 46.0, - "step": 16408 - }, - { - "epoch": 2.642537944361689, - "grad_norm": 0.0043039084412157536, - "learning_rate": 0.00019999655844149463, - "loss": 46.0, - "step": 16409 - }, - { - "epoch": 2.6426989814404767, - "grad_norm": 0.004510168451815844, - "learning_rate": 0.0001999965580217564, - "loss": 46.0, - "step": 16410 - }, - { - "epoch": 2.642860018519264, - "grad_norm": 0.0011374372988939285, - "learning_rate": 0.00019999655760199257, - "loss": 46.0, - "step": 16411 - }, - { - "epoch": 2.6430210555980516, - "grad_norm": 0.0026707665529102087, - "learning_rate": 0.00019999655718220315, - "loss": 46.0, - "step": 16412 - }, - { - "epoch": 2.643182092676839, - "grad_norm": 0.0004974910407327116, - "learning_rate": 0.00019999655676238812, - "loss": 46.0, - "step": 16413 - }, - { - "epoch": 2.6433431297556265, - "grad_norm": 0.0034076026640832424, - "learning_rate": 0.0001999965563425475, - "loss": 46.0, - "step": 16414 - }, - { - "epoch": 2.6435041668344135, - "grad_norm": 0.0019066615495830774, - "learning_rate": 0.0001999965559226813, - "loss": 46.0, - "step": 16415 - }, - { - "epoch": 2.643665203913201, - "grad_norm": 0.005310556851327419, - "learning_rate": 0.0001999965555027895, - "loss": 46.0, - "step": 16416 - }, - { - "epoch": 2.6438262409919884, - "grad_norm": 0.003847218118607998, - "learning_rate": 0.00019999655508287208, - "loss": 46.0, - "step": 16417 - }, - { - "epoch": 2.643987278070776, - "grad_norm": 0.015537664294242859, - "learning_rate": 0.0001999965546629291, - "loss": 46.0, - "step": 16418 - }, - { - "epoch": 2.6441483151495633, - "grad_norm": 0.0014112257631495595, - "learning_rate": 0.00019999655424296053, - "loss": 46.0, - "step": 16419 - }, - { - "epoch": 2.6443093522283507, - "grad_norm": 0.0019733665976673365, - "learning_rate": 0.00019999655382296636, - "loss": 46.0, - "step": 16420 - }, - { - "epoch": 2.6444703893071377, - "grad_norm": 0.0028667664155364037, - "learning_rate": 0.00019999655340294657, - "loss": 46.0, - "step": 16421 - }, - { - "epoch": 2.644631426385925, - "grad_norm": 0.0013341999147087336, - "learning_rate": 0.0001999965529829012, - "loss": 46.0, - "step": 16422 - }, - { - "epoch": 2.6447924634647126, - "grad_norm": 0.0019776078406721354, - "learning_rate": 0.00019999655256283024, - "loss": 46.0, - "step": 16423 - }, - { - "epoch": 2.6449535005435, - "grad_norm": 0.0022960947826504707, - "learning_rate": 0.0001999965521427337, - "loss": 46.0, - "step": 16424 - }, - { - "epoch": 2.6451145376222875, - "grad_norm": 0.0014121850254014134, - "learning_rate": 0.00019999655172261153, - "loss": 46.0, - "step": 16425 - }, - { - "epoch": 2.645275574701075, - "grad_norm": 0.0008594037499278784, - "learning_rate": 0.0001999965513024638, - "loss": 46.0, - "step": 16426 - }, - { - "epoch": 2.6454366117798624, - "grad_norm": 0.0009809122420847416, - "learning_rate": 0.00019999655088229047, - "loss": 46.0, - "step": 16427 - }, - { - "epoch": 2.64559764885865, - "grad_norm": 0.0013061140198260546, - "learning_rate": 0.00019999655046209155, - "loss": 46.0, - "step": 16428 - }, - { - "epoch": 2.6457586859374373, - "grad_norm": 0.006480553187429905, - "learning_rate": 0.000199996550041867, - "loss": 46.0, - "step": 16429 - }, - { - "epoch": 2.6459197230162244, - "grad_norm": 0.0007608724990859628, - "learning_rate": 0.00019999654962161686, - "loss": 46.0, - "step": 16430 - }, - { - "epoch": 2.646080760095012, - "grad_norm": 0.0014058458618819714, - "learning_rate": 0.00019999654920134117, - "loss": 46.0, - "step": 16431 - }, - { - "epoch": 2.6462417971737993, - "grad_norm": 0.004803846590220928, - "learning_rate": 0.00019999654878103987, - "loss": 46.0, - "step": 16432 - }, - { - "epoch": 2.6464028342525867, - "grad_norm": 0.0008249848615378141, - "learning_rate": 0.00019999654836071296, - "loss": 46.0, - "step": 16433 - }, - { - "epoch": 2.646563871331374, - "grad_norm": 0.0007507161935791373, - "learning_rate": 0.00019999654794036046, - "loss": 46.0, - "step": 16434 - }, - { - "epoch": 2.646724908410161, - "grad_norm": 0.0018223902443423867, - "learning_rate": 0.00019999654751998237, - "loss": 46.0, - "step": 16435 - }, - { - "epoch": 2.6468859454889486, - "grad_norm": 0.0004306924529373646, - "learning_rate": 0.0001999965470995787, - "loss": 46.0, - "step": 16436 - }, - { - "epoch": 2.647046982567736, - "grad_norm": 0.004863760434091091, - "learning_rate": 0.00019999654667914943, - "loss": 46.0, - "step": 16437 - }, - { - "epoch": 2.6472080196465235, - "grad_norm": 0.005212968215346336, - "learning_rate": 0.00019999654625869453, - "loss": 46.0, - "step": 16438 - }, - { - "epoch": 2.647369056725311, - "grad_norm": 0.0017275518039241433, - "learning_rate": 0.00019999654583821407, - "loss": 46.0, - "step": 16439 - }, - { - "epoch": 2.6475300938040984, - "grad_norm": 0.004870988428592682, - "learning_rate": 0.00019999654541770801, - "loss": 46.0, - "step": 16440 - }, - { - "epoch": 2.647691130882886, - "grad_norm": 0.0032627710606902838, - "learning_rate": 0.00019999654499717638, - "loss": 46.0, - "step": 16441 - }, - { - "epoch": 2.6478521679616733, - "grad_norm": 0.001680032815784216, - "learning_rate": 0.00019999654457661912, - "loss": 46.0, - "step": 16442 - }, - { - "epoch": 2.648013205040461, - "grad_norm": 0.0036580024752765894, - "learning_rate": 0.00019999654415603629, - "loss": 46.0, - "step": 16443 - }, - { - "epoch": 2.6481742421192482, - "grad_norm": 0.002199007896706462, - "learning_rate": 0.00019999654373542783, - "loss": 46.0, - "step": 16444 - }, - { - "epoch": 2.6483352791980352, - "grad_norm": 0.001317737391218543, - "learning_rate": 0.00019999654331479382, - "loss": 46.0, - "step": 16445 - }, - { - "epoch": 2.6484963162768227, - "grad_norm": 0.007964441552758217, - "learning_rate": 0.00019999654289413422, - "loss": 46.0, - "step": 16446 - }, - { - "epoch": 2.64865735335561, - "grad_norm": 0.0014704192290082574, - "learning_rate": 0.000199996542473449, - "loss": 46.0, - "step": 16447 - }, - { - "epoch": 2.6488183904343976, - "grad_norm": 0.0015728285070508718, - "learning_rate": 0.00019999654205273817, - "loss": 46.0, - "step": 16448 - }, - { - "epoch": 2.648979427513185, - "grad_norm": 0.004670578520745039, - "learning_rate": 0.00019999654163200176, - "loss": 46.0, - "step": 16449 - }, - { - "epoch": 2.649140464591972, - "grad_norm": 0.0008197281276807189, - "learning_rate": 0.00019999654121123975, - "loss": 46.0, - "step": 16450 - }, - { - "epoch": 2.6493015016707595, - "grad_norm": 0.0018704469548538327, - "learning_rate": 0.0001999965407904522, - "loss": 46.0, - "step": 16451 - }, - { - "epoch": 2.649462538749547, - "grad_norm": 0.004465532023459673, - "learning_rate": 0.00019999654036963898, - "loss": 46.0, - "step": 16452 - }, - { - "epoch": 2.6496235758283344, - "grad_norm": 0.0020445717964321375, - "learning_rate": 0.0001999965399488002, - "loss": 46.0, - "step": 16453 - }, - { - "epoch": 2.649784612907122, - "grad_norm": 0.006952350027859211, - "learning_rate": 0.0001999965395279358, - "loss": 46.0, - "step": 16454 - }, - { - "epoch": 2.6499456499859093, - "grad_norm": 0.0012763794511556625, - "learning_rate": 0.00019999653910704584, - "loss": 46.0, - "step": 16455 - }, - { - "epoch": 2.6501066870646968, - "grad_norm": 0.0048883287236094475, - "learning_rate": 0.00019999653868613032, - "loss": 46.0, - "step": 16456 - }, - { - "epoch": 2.650267724143484, - "grad_norm": 0.005108126439154148, - "learning_rate": 0.00019999653826518915, - "loss": 46.0, - "step": 16457 - }, - { - "epoch": 2.6504287612222717, - "grad_norm": 0.0032781974878162146, - "learning_rate": 0.0001999965378442224, - "loss": 46.0, - "step": 16458 - }, - { - "epoch": 2.6505897983010587, - "grad_norm": 0.002586958697065711, - "learning_rate": 0.00019999653742323002, - "loss": 46.0, - "step": 16459 - }, - { - "epoch": 2.650750835379846, - "grad_norm": 0.0010242596035823226, - "learning_rate": 0.0001999965370022121, - "loss": 46.0, - "step": 16460 - }, - { - "epoch": 2.6509118724586336, - "grad_norm": 0.0031969924457371235, - "learning_rate": 0.00019999653658116858, - "loss": 46.0, - "step": 16461 - }, - { - "epoch": 2.651072909537421, - "grad_norm": 0.003769185161218047, - "learning_rate": 0.00019999653616009945, - "loss": 46.0, - "step": 16462 - }, - { - "epoch": 2.6512339466162085, - "grad_norm": 0.009780793450772762, - "learning_rate": 0.00019999653573900473, - "loss": 46.0, - "step": 16463 - }, - { - "epoch": 2.651394983694996, - "grad_norm": 0.0017458347138017416, - "learning_rate": 0.0001999965353178844, - "loss": 46.0, - "step": 16464 - }, - { - "epoch": 2.651556020773783, - "grad_norm": 0.0035560824908316135, - "learning_rate": 0.0001999965348967385, - "loss": 46.0, - "step": 16465 - }, - { - "epoch": 2.6517170578525704, - "grad_norm": 0.007063970435410738, - "learning_rate": 0.000199996534475567, - "loss": 46.0, - "step": 16466 - }, - { - "epoch": 2.651878094931358, - "grad_norm": 0.0006652420852333307, - "learning_rate": 0.0001999965340543699, - "loss": 46.0, - "step": 16467 - }, - { - "epoch": 2.6520391320101453, - "grad_norm": 0.0009053508983924985, - "learning_rate": 0.00019999653363314722, - "loss": 46.0, - "step": 16468 - }, - { - "epoch": 2.6522001690889327, - "grad_norm": 0.004392145201563835, - "learning_rate": 0.00019999653321189892, - "loss": 46.0, - "step": 16469 - }, - { - "epoch": 2.65236120616772, - "grad_norm": 0.006357663776725531, - "learning_rate": 0.00019999653279062504, - "loss": 46.0, - "step": 16470 - }, - { - "epoch": 2.6525222432465076, - "grad_norm": 0.0048867687582969666, - "learning_rate": 0.00019999653236932557, - "loss": 46.0, - "step": 16471 - }, - { - "epoch": 2.652683280325295, - "grad_norm": 0.006181687116622925, - "learning_rate": 0.00019999653194800051, - "loss": 46.0, - "step": 16472 - }, - { - "epoch": 2.6528443174040826, - "grad_norm": 0.0037122254725545645, - "learning_rate": 0.00019999653152664984, - "loss": 46.0, - "step": 16473 - }, - { - "epoch": 2.6530053544828696, - "grad_norm": 0.006071495357900858, - "learning_rate": 0.0001999965311052736, - "loss": 46.0, - "step": 16474 - }, - { - "epoch": 2.653166391561657, - "grad_norm": 0.0028920916374772787, - "learning_rate": 0.00019999653068387177, - "loss": 46.0, - "step": 16475 - }, - { - "epoch": 2.6533274286404445, - "grad_norm": 0.0030148280784487724, - "learning_rate": 0.0001999965302624443, - "loss": 46.0, - "step": 16476 - }, - { - "epoch": 2.653488465719232, - "grad_norm": 0.004583471920341253, - "learning_rate": 0.00019999652984099126, - "loss": 46.0, - "step": 16477 - }, - { - "epoch": 2.6536495027980194, - "grad_norm": 0.004693299997597933, - "learning_rate": 0.00019999652941951262, - "loss": 46.0, - "step": 16478 - }, - { - "epoch": 2.6538105398768064, - "grad_norm": 0.0014970963820815086, - "learning_rate": 0.0001999965289980084, - "loss": 46.0, - "step": 16479 - }, - { - "epoch": 2.653971576955594, - "grad_norm": 0.009805742651224136, - "learning_rate": 0.0001999965285764786, - "loss": 46.0, - "step": 16480 - }, - { - "epoch": 2.6541326140343813, - "grad_norm": 0.0033442846033722162, - "learning_rate": 0.00019999652815492317, - "loss": 46.0, - "step": 16481 - }, - { - "epoch": 2.6542936511131687, - "grad_norm": 0.0008747574174776673, - "learning_rate": 0.00019999652773334216, - "loss": 46.0, - "step": 16482 - }, - { - "epoch": 2.654454688191956, - "grad_norm": 0.0018716448685154319, - "learning_rate": 0.0001999965273117356, - "loss": 46.0, - "step": 16483 - }, - { - "epoch": 2.6546157252707436, - "grad_norm": 0.000537240644916892, - "learning_rate": 0.0001999965268901034, - "loss": 46.0, - "step": 16484 - }, - { - "epoch": 2.654776762349531, - "grad_norm": 0.0037817684933543205, - "learning_rate": 0.0001999965264684456, - "loss": 46.0, - "step": 16485 - }, - { - "epoch": 2.6549377994283185, - "grad_norm": 0.005018272902816534, - "learning_rate": 0.00019999652604676222, - "loss": 46.0, - "step": 16486 - }, - { - "epoch": 2.655098836507106, - "grad_norm": 0.0030136287678033113, - "learning_rate": 0.00019999652562505325, - "loss": 46.0, - "step": 16487 - }, - { - "epoch": 2.655259873585893, - "grad_norm": 0.0029723059851676226, - "learning_rate": 0.00019999652520331866, - "loss": 46.0, - "step": 16488 - }, - { - "epoch": 2.6554209106646804, - "grad_norm": 0.00226042652502656, - "learning_rate": 0.00019999652478155851, - "loss": 46.0, - "step": 16489 - }, - { - "epoch": 2.655581947743468, - "grad_norm": 0.0018430107738822699, - "learning_rate": 0.00019999652435977275, - "loss": 46.0, - "step": 16490 - }, - { - "epoch": 2.6557429848222553, - "grad_norm": 0.0017959438264369965, - "learning_rate": 0.0001999965239379614, - "loss": 46.0, - "step": 16491 - }, - { - "epoch": 2.655904021901043, - "grad_norm": 0.0033439442049711943, - "learning_rate": 0.00019999652351612444, - "loss": 46.0, - "step": 16492 - }, - { - "epoch": 2.6560650589798303, - "grad_norm": 0.0006606042152270675, - "learning_rate": 0.00019999652309426192, - "loss": 46.0, - "step": 16493 - }, - { - "epoch": 2.6562260960586173, - "grad_norm": 0.001083419076167047, - "learning_rate": 0.00019999652267237375, - "loss": 46.0, - "step": 16494 - }, - { - "epoch": 2.6563871331374047, - "grad_norm": 0.0024354367051273584, - "learning_rate": 0.00019999652225046006, - "loss": 46.0, - "step": 16495 - }, - { - "epoch": 2.656548170216192, - "grad_norm": 0.008430845104157925, - "learning_rate": 0.00019999652182852074, - "loss": 46.0, - "step": 16496 - }, - { - "epoch": 2.6567092072949796, - "grad_norm": 0.0030144881457090378, - "learning_rate": 0.0001999965214065558, - "loss": 46.0, - "step": 16497 - }, - { - "epoch": 2.656870244373767, - "grad_norm": 0.0049331337213516235, - "learning_rate": 0.0001999965209845653, - "loss": 46.0, - "step": 16498 - }, - { - "epoch": 2.6570312814525545, - "grad_norm": 0.0020491608884185553, - "learning_rate": 0.00019999652056254918, - "loss": 46.0, - "step": 16499 - }, - { - "epoch": 2.657192318531342, - "grad_norm": 0.006340573076158762, - "learning_rate": 0.0001999965201405075, - "loss": 46.0, - "step": 16500 - }, - { - "epoch": 2.6573533556101294, - "grad_norm": 0.014400620944797993, - "learning_rate": 0.00019999651971844021, - "loss": 46.0, - "step": 16501 - }, - { - "epoch": 2.657514392688917, - "grad_norm": 0.0008878426742739975, - "learning_rate": 0.00019999651929634732, - "loss": 46.0, - "step": 16502 - }, - { - "epoch": 2.657675429767704, - "grad_norm": 0.0006870725192129612, - "learning_rate": 0.00019999651887422882, - "loss": 46.0, - "step": 16503 - }, - { - "epoch": 2.6578364668464913, - "grad_norm": 0.011201038025319576, - "learning_rate": 0.00019999651845208476, - "loss": 46.0, - "step": 16504 - }, - { - "epoch": 2.657997503925279, - "grad_norm": 0.0021067147608846426, - "learning_rate": 0.00019999651802991508, - "loss": 46.0, - "step": 16505 - }, - { - "epoch": 2.6581585410040662, - "grad_norm": 0.002912551397457719, - "learning_rate": 0.00019999651760771982, - "loss": 46.0, - "step": 16506 - }, - { - "epoch": 2.6583195780828537, - "grad_norm": 0.002470936393365264, - "learning_rate": 0.000199996517185499, - "loss": 46.0, - "step": 16507 - }, - { - "epoch": 2.6584806151616407, - "grad_norm": 0.0007000712212175131, - "learning_rate": 0.00019999651676325252, - "loss": 46.0, - "step": 16508 - }, - { - "epoch": 2.658641652240428, - "grad_norm": 0.001906967838294804, - "learning_rate": 0.0001999965163409805, - "loss": 46.0, - "step": 16509 - }, - { - "epoch": 2.6588026893192156, - "grad_norm": 0.010152967646718025, - "learning_rate": 0.00019999651591868286, - "loss": 46.0, - "step": 16510 - }, - { - "epoch": 2.658963726398003, - "grad_norm": 0.004215776454657316, - "learning_rate": 0.00019999651549635963, - "loss": 46.0, - "step": 16511 - }, - { - "epoch": 2.6591247634767905, - "grad_norm": 0.0021116817370057106, - "learning_rate": 0.00019999651507401081, - "loss": 46.0, - "step": 16512 - }, - { - "epoch": 2.659285800555578, - "grad_norm": 0.005937662906944752, - "learning_rate": 0.00019999651465163636, - "loss": 46.0, - "step": 16513 - }, - { - "epoch": 2.6594468376343654, - "grad_norm": 0.0008011190802790225, - "learning_rate": 0.00019999651422923634, - "loss": 46.0, - "step": 16514 - }, - { - "epoch": 2.659607874713153, - "grad_norm": 0.0013191973557695746, - "learning_rate": 0.00019999651380681074, - "loss": 46.0, - "step": 16515 - }, - { - "epoch": 2.6597689117919403, - "grad_norm": 0.003918023779988289, - "learning_rate": 0.00019999651338435955, - "loss": 46.0, - "step": 16516 - }, - { - "epoch": 2.6599299488707278, - "grad_norm": 0.004984579514712095, - "learning_rate": 0.00019999651296188274, - "loss": 46.0, - "step": 16517 - }, - { - "epoch": 2.6600909859495148, - "grad_norm": 0.0013943187659606338, - "learning_rate": 0.00019999651253938038, - "loss": 46.0, - "step": 16518 - }, - { - "epoch": 2.660252023028302, - "grad_norm": 0.010174227878451347, - "learning_rate": 0.00019999651211685237, - "loss": 46.0, - "step": 16519 - }, - { - "epoch": 2.6604130601070897, - "grad_norm": 0.0063035693019628525, - "learning_rate": 0.0001999965116942988, - "loss": 46.0, - "step": 16520 - }, - { - "epoch": 2.660574097185877, - "grad_norm": 0.0035327086225152016, - "learning_rate": 0.00019999651127171962, - "loss": 46.0, - "step": 16521 - }, - { - "epoch": 2.6607351342646646, - "grad_norm": 0.0023248244542628527, - "learning_rate": 0.00019999651084911486, - "loss": 46.0, - "step": 16522 - }, - { - "epoch": 2.6608961713434516, - "grad_norm": 0.000654800096526742, - "learning_rate": 0.0001999965104264845, - "loss": 46.0, - "step": 16523 - }, - { - "epoch": 2.661057208422239, - "grad_norm": 0.0014816172188147902, - "learning_rate": 0.00019999651000382856, - "loss": 46.0, - "step": 16524 - }, - { - "epoch": 2.6612182455010265, - "grad_norm": 0.00244570872746408, - "learning_rate": 0.000199996509581147, - "loss": 46.0, - "step": 16525 - }, - { - "epoch": 2.661379282579814, - "grad_norm": 0.0023470923770219088, - "learning_rate": 0.00019999650915843986, - "loss": 46.0, - "step": 16526 - }, - { - "epoch": 2.6615403196586014, - "grad_norm": 0.004165975842624903, - "learning_rate": 0.00019999650873570715, - "loss": 46.0, - "step": 16527 - }, - { - "epoch": 2.661701356737389, - "grad_norm": 0.017686238512396812, - "learning_rate": 0.0001999965083129488, - "loss": 46.0, - "step": 16528 - }, - { - "epoch": 2.6618623938161763, - "grad_norm": 0.003436204744502902, - "learning_rate": 0.00019999650789016488, - "loss": 46.0, - "step": 16529 - }, - { - "epoch": 2.6620234308949637, - "grad_norm": 0.0025715362280607224, - "learning_rate": 0.00019999650746735536, - "loss": 46.0, - "step": 16530 - }, - { - "epoch": 2.662184467973751, - "grad_norm": 0.0018459368729963899, - "learning_rate": 0.00019999650704452028, - "loss": 46.0, - "step": 16531 - }, - { - "epoch": 2.662345505052538, - "grad_norm": 0.0055804126895964146, - "learning_rate": 0.00019999650662165953, - "loss": 46.0, - "step": 16532 - }, - { - "epoch": 2.6625065421313256, - "grad_norm": 0.0012605715310201049, - "learning_rate": 0.00019999650619877325, - "loss": 46.0, - "step": 16533 - }, - { - "epoch": 2.662667579210113, - "grad_norm": 0.009583881124854088, - "learning_rate": 0.00019999650577586135, - "loss": 46.0, - "step": 16534 - }, - { - "epoch": 2.6628286162889006, - "grad_norm": 0.0037427470088005066, - "learning_rate": 0.00019999650535292387, - "loss": 46.0, - "step": 16535 - }, - { - "epoch": 2.662989653367688, - "grad_norm": 0.004453995730727911, - "learning_rate": 0.00019999650492996077, - "loss": 46.0, - "step": 16536 - }, - { - "epoch": 2.6631506904464755, - "grad_norm": 0.0017059422098100185, - "learning_rate": 0.0001999965045069721, - "loss": 46.0, - "step": 16537 - }, - { - "epoch": 2.6633117275252625, - "grad_norm": 0.0028158461209386587, - "learning_rate": 0.00019999650408395784, - "loss": 46.0, - "step": 16538 - }, - { - "epoch": 2.66347276460405, - "grad_norm": 0.004828350618481636, - "learning_rate": 0.00019999650366091799, - "loss": 46.0, - "step": 16539 - }, - { - "epoch": 2.6636338016828374, - "grad_norm": 0.0024651936255395412, - "learning_rate": 0.00019999650323785254, - "loss": 46.0, - "step": 16540 - }, - { - "epoch": 2.663794838761625, - "grad_norm": 0.004665489308536053, - "learning_rate": 0.00019999650281476145, - "loss": 46.0, - "step": 16541 - }, - { - "epoch": 2.6639558758404123, - "grad_norm": 0.0008253231062553823, - "learning_rate": 0.00019999650239164483, - "loss": 46.0, - "step": 16542 - }, - { - "epoch": 2.6641169129191997, - "grad_norm": 0.0009690801962278783, - "learning_rate": 0.00019999650196850257, - "loss": 46.0, - "step": 16543 - }, - { - "epoch": 2.664277949997987, - "grad_norm": 0.0010703481966629624, - "learning_rate": 0.00019999650154533475, - "loss": 46.0, - "step": 16544 - }, - { - "epoch": 2.6644389870767746, - "grad_norm": 0.006409147754311562, - "learning_rate": 0.00019999650112214132, - "loss": 46.0, - "step": 16545 - }, - { - "epoch": 2.664600024155562, - "grad_norm": 0.0008583225426264107, - "learning_rate": 0.00019999650069892232, - "loss": 46.0, - "step": 16546 - }, - { - "epoch": 2.664761061234349, - "grad_norm": 0.005577814765274525, - "learning_rate": 0.00019999650027567768, - "loss": 46.0, - "step": 16547 - }, - { - "epoch": 2.6649220983131365, - "grad_norm": 0.002359146950766444, - "learning_rate": 0.00019999649985240749, - "loss": 46.0, - "step": 16548 - }, - { - "epoch": 2.665083135391924, - "grad_norm": 0.0012180627090856433, - "learning_rate": 0.0001999964994291117, - "loss": 46.0, - "step": 16549 - }, - { - "epoch": 2.6652441724707114, - "grad_norm": 0.0014827927807345986, - "learning_rate": 0.00019999649900579028, - "loss": 46.0, - "step": 16550 - }, - { - "epoch": 2.665405209549499, - "grad_norm": 0.006505758035928011, - "learning_rate": 0.0001999964985824433, - "loss": 46.0, - "step": 16551 - }, - { - "epoch": 2.665566246628286, - "grad_norm": 0.0057903132401406765, - "learning_rate": 0.00019999649815907072, - "loss": 46.0, - "step": 16552 - }, - { - "epoch": 2.6657272837070733, - "grad_norm": 0.006674161646515131, - "learning_rate": 0.00019999649773567253, - "loss": 46.0, - "step": 16553 - }, - { - "epoch": 2.665888320785861, - "grad_norm": 0.009513991884887218, - "learning_rate": 0.00019999649731224876, - "loss": 46.0, - "step": 16554 - }, - { - "epoch": 2.6660493578646483, - "grad_norm": 0.008672289550304413, - "learning_rate": 0.00019999649688879937, - "loss": 46.0, - "step": 16555 - }, - { - "epoch": 2.6662103949434357, - "grad_norm": 0.0022527240216732025, - "learning_rate": 0.00019999649646532445, - "loss": 46.0, - "step": 16556 - }, - { - "epoch": 2.666371432022223, - "grad_norm": 0.005996780004352331, - "learning_rate": 0.00019999649604182386, - "loss": 46.0, - "step": 16557 - }, - { - "epoch": 2.6665324691010106, - "grad_norm": 0.001205631298944354, - "learning_rate": 0.00019999649561829773, - "loss": 46.0, - "step": 16558 - }, - { - "epoch": 2.666693506179798, - "grad_norm": 0.002092850860208273, - "learning_rate": 0.000199996495194746, - "loss": 46.0, - "step": 16559 - }, - { - "epoch": 2.6668545432585855, - "grad_norm": 0.002493825741112232, - "learning_rate": 0.00019999649477116864, - "loss": 46.0, - "step": 16560 - }, - { - "epoch": 2.6670155803373725, - "grad_norm": 0.0016343793831765652, - "learning_rate": 0.0001999964943475657, - "loss": 46.0, - "step": 16561 - }, - { - "epoch": 2.66717661741616, - "grad_norm": 0.001397967105731368, - "learning_rate": 0.0001999964939239372, - "loss": 46.0, - "step": 16562 - }, - { - "epoch": 2.6673376544949474, - "grad_norm": 0.003354318905621767, - "learning_rate": 0.00019999649350028307, - "loss": 46.0, - "step": 16563 - }, - { - "epoch": 2.667498691573735, - "grad_norm": 0.0009036913397721946, - "learning_rate": 0.00019999649307660337, - "loss": 46.0, - "step": 16564 - }, - { - "epoch": 2.6676597286525223, - "grad_norm": 0.0015470064245164394, - "learning_rate": 0.00019999649265289805, - "loss": 46.0, - "step": 16565 - }, - { - "epoch": 2.6678207657313098, - "grad_norm": 0.005684650503098965, - "learning_rate": 0.00019999649222916715, - "loss": 46.0, - "step": 16566 - }, - { - "epoch": 2.667981802810097, - "grad_norm": 0.0037616060581058264, - "learning_rate": 0.00019999649180541063, - "loss": 46.0, - "step": 16567 - }, - { - "epoch": 2.6681428398888842, - "grad_norm": 0.005537806544452906, - "learning_rate": 0.00019999649138162856, - "loss": 46.0, - "step": 16568 - }, - { - "epoch": 2.6683038769676717, - "grad_norm": 0.000822942063678056, - "learning_rate": 0.0001999964909578209, - "loss": 46.0, - "step": 16569 - }, - { - "epoch": 2.668464914046459, - "grad_norm": 0.004411452449858189, - "learning_rate": 0.00019999649053398762, - "loss": 46.0, - "step": 16570 - }, - { - "epoch": 2.6686259511252466, - "grad_norm": 0.005157234612852335, - "learning_rate": 0.00019999649011012872, - "loss": 46.0, - "step": 16571 - }, - { - "epoch": 2.668786988204034, - "grad_norm": 0.0033159011509269476, - "learning_rate": 0.0001999964896862443, - "loss": 46.0, - "step": 16572 - }, - { - "epoch": 2.6689480252828215, - "grad_norm": 0.009530625306069851, - "learning_rate": 0.00019999648926233423, - "loss": 46.0, - "step": 16573 - }, - { - "epoch": 2.669109062361609, - "grad_norm": 0.0070307631976902485, - "learning_rate": 0.00019999648883839855, - "loss": 46.0, - "step": 16574 - }, - { - "epoch": 2.6692700994403964, - "grad_norm": 0.0067693330347537994, - "learning_rate": 0.0001999964884144373, - "loss": 46.0, - "step": 16575 - }, - { - "epoch": 2.6694311365191834, - "grad_norm": 0.008922696113586426, - "learning_rate": 0.00019999648799045049, - "loss": 46.0, - "step": 16576 - }, - { - "epoch": 2.669592173597971, - "grad_norm": 0.003183950437232852, - "learning_rate": 0.00019999648756643804, - "loss": 46.0, - "step": 16577 - }, - { - "epoch": 2.6697532106767583, - "grad_norm": 0.008305241353809834, - "learning_rate": 0.00019999648714240001, - "loss": 46.0, - "step": 16578 - }, - { - "epoch": 2.6699142477555458, - "grad_norm": 0.0006374673102982342, - "learning_rate": 0.00019999648671833643, - "loss": 46.0, - "step": 16579 - }, - { - "epoch": 2.670075284834333, - "grad_norm": 0.003601290052756667, - "learning_rate": 0.0001999964862942472, - "loss": 46.0, - "step": 16580 - }, - { - "epoch": 2.6702363219131207, - "grad_norm": 0.002714934991672635, - "learning_rate": 0.00019999648587013238, - "loss": 46.0, - "step": 16581 - }, - { - "epoch": 2.6703973589919077, - "grad_norm": 0.00787340011447668, - "learning_rate": 0.00019999648544599197, - "loss": 46.0, - "step": 16582 - }, - { - "epoch": 2.670558396070695, - "grad_norm": 0.003337181406095624, - "learning_rate": 0.00019999648502182598, - "loss": 46.0, - "step": 16583 - }, - { - "epoch": 2.6707194331494826, - "grad_norm": 0.0009643948287703097, - "learning_rate": 0.00019999648459763437, - "loss": 46.0, - "step": 16584 - }, - { - "epoch": 2.67088047022827, - "grad_norm": 0.0015101605094969273, - "learning_rate": 0.0001999964841734172, - "loss": 46.0, - "step": 16585 - }, - { - "epoch": 2.6710415073070575, - "grad_norm": 0.002747853985056281, - "learning_rate": 0.0001999964837491744, - "loss": 46.0, - "step": 16586 - }, - { - "epoch": 2.671202544385845, - "grad_norm": 0.005050466395914555, - "learning_rate": 0.00019999648332490603, - "loss": 46.0, - "step": 16587 - }, - { - "epoch": 2.6713635814646324, - "grad_norm": 0.000775398570112884, - "learning_rate": 0.00019999648290061208, - "loss": 46.0, - "step": 16588 - }, - { - "epoch": 2.67152461854342, - "grad_norm": 0.003925311379134655, - "learning_rate": 0.0001999964824762925, - "loss": 46.0, - "step": 16589 - }, - { - "epoch": 2.6716856556222073, - "grad_norm": 0.0026923860423266888, - "learning_rate": 0.00019999648205194738, - "loss": 46.0, - "step": 16590 - }, - { - "epoch": 2.6718466927009943, - "grad_norm": 0.0015187134267762303, - "learning_rate": 0.0001999964816275766, - "loss": 46.0, - "step": 16591 - }, - { - "epoch": 2.6720077297797817, - "grad_norm": 0.0023581660352647305, - "learning_rate": 0.00019999648120318028, - "loss": 46.0, - "step": 16592 - }, - { - "epoch": 2.672168766858569, - "grad_norm": 0.002250324934720993, - "learning_rate": 0.00019999648077875833, - "loss": 46.0, - "step": 16593 - }, - { - "epoch": 2.6723298039373566, - "grad_norm": 0.002318785758689046, - "learning_rate": 0.0001999964803543108, - "loss": 46.0, - "step": 16594 - }, - { - "epoch": 2.672490841016144, - "grad_norm": 0.0013619036180898547, - "learning_rate": 0.00019999647992983768, - "loss": 46.0, - "step": 16595 - }, - { - "epoch": 2.672651878094931, - "grad_norm": 0.0015416962560266256, - "learning_rate": 0.00019999647950533895, - "loss": 46.0, - "step": 16596 - }, - { - "epoch": 2.6728129151737186, - "grad_norm": 0.0010195387294515967, - "learning_rate": 0.00019999647908081465, - "loss": 46.0, - "step": 16597 - }, - { - "epoch": 2.672973952252506, - "grad_norm": 0.00118343741632998, - "learning_rate": 0.00019999647865626474, - "loss": 46.0, - "step": 16598 - }, - { - "epoch": 2.6731349893312935, - "grad_norm": 0.001540732104331255, - "learning_rate": 0.00019999647823168922, - "loss": 46.0, - "step": 16599 - }, - { - "epoch": 2.673296026410081, - "grad_norm": 0.0022478620521724224, - "learning_rate": 0.00019999647780708814, - "loss": 46.0, - "step": 16600 - }, - { - "epoch": 2.6734570634888684, - "grad_norm": 0.0011191079393029213, - "learning_rate": 0.00019999647738246144, - "loss": 46.0, - "step": 16601 - }, - { - "epoch": 2.673618100567656, - "grad_norm": 0.0008199792937375605, - "learning_rate": 0.00019999647695780916, - "loss": 46.0, - "step": 16602 - }, - { - "epoch": 2.6737791376464433, - "grad_norm": 0.0017260703025385737, - "learning_rate": 0.0001999964765331313, - "loss": 46.0, - "step": 16603 - }, - { - "epoch": 2.6739401747252307, - "grad_norm": 0.007604490499943495, - "learning_rate": 0.00019999647610842783, - "loss": 46.0, - "step": 16604 - }, - { - "epoch": 2.6741012118040177, - "grad_norm": 0.0042333463206887245, - "learning_rate": 0.00019999647568369876, - "loss": 46.0, - "step": 16605 - }, - { - "epoch": 2.674262248882805, - "grad_norm": 0.002328177448362112, - "learning_rate": 0.0001999964752589441, - "loss": 46.0, - "step": 16606 - }, - { - "epoch": 2.6744232859615926, - "grad_norm": 0.0010306008625775576, - "learning_rate": 0.00019999647483416385, - "loss": 46.0, - "step": 16607 - }, - { - "epoch": 2.67458432304038, - "grad_norm": 0.0008662262698635459, - "learning_rate": 0.000199996474409358, - "loss": 46.0, - "step": 16608 - }, - { - "epoch": 2.6747453601191675, - "grad_norm": 0.0031616119667887688, - "learning_rate": 0.00019999647398452654, - "loss": 46.0, - "step": 16609 - }, - { - "epoch": 2.674906397197955, - "grad_norm": 0.012205038219690323, - "learning_rate": 0.0001999964735596695, - "loss": 46.0, - "step": 16610 - }, - { - "epoch": 2.675067434276742, - "grad_norm": 0.0018624645890668035, - "learning_rate": 0.00019999647313478688, - "loss": 46.0, - "step": 16611 - }, - { - "epoch": 2.6752284713555294, - "grad_norm": 0.001965672941878438, - "learning_rate": 0.00019999647270987867, - "loss": 46.0, - "step": 16612 - }, - { - "epoch": 2.675389508434317, - "grad_norm": 0.0015672518638893962, - "learning_rate": 0.00019999647228494485, - "loss": 46.0, - "step": 16613 - }, - { - "epoch": 2.6755505455131043, - "grad_norm": 0.003995127975940704, - "learning_rate": 0.0001999964718599854, - "loss": 46.0, - "step": 16614 - }, - { - "epoch": 2.675711582591892, - "grad_norm": 0.002233319915831089, - "learning_rate": 0.00019999647143500044, - "loss": 46.0, - "step": 16615 - }, - { - "epoch": 2.6758726196706792, - "grad_norm": 0.0017141100252047181, - "learning_rate": 0.00019999647100998983, - "loss": 46.0, - "step": 16616 - }, - { - "epoch": 2.6760336567494667, - "grad_norm": 0.002074493793770671, - "learning_rate": 0.00019999647058495366, - "loss": 46.0, - "step": 16617 - }, - { - "epoch": 2.676194693828254, - "grad_norm": 0.002965449821203947, - "learning_rate": 0.00019999647015989184, - "loss": 46.0, - "step": 16618 - }, - { - "epoch": 2.6763557309070416, - "grad_norm": 0.004293539095669985, - "learning_rate": 0.00019999646973480447, - "loss": 46.0, - "step": 16619 - }, - { - "epoch": 2.6765167679858286, - "grad_norm": 0.0017037526704370975, - "learning_rate": 0.0001999964693096915, - "loss": 46.0, - "step": 16620 - }, - { - "epoch": 2.676677805064616, - "grad_norm": 0.0015041976002976298, - "learning_rate": 0.00019999646888455296, - "loss": 46.0, - "step": 16621 - }, - { - "epoch": 2.6768388421434035, - "grad_norm": 0.004690492991358042, - "learning_rate": 0.00019999646845938877, - "loss": 46.0, - "step": 16622 - }, - { - "epoch": 2.676999879222191, - "grad_norm": 0.0028543814551085234, - "learning_rate": 0.00019999646803419902, - "loss": 46.0, - "step": 16623 - }, - { - "epoch": 2.6771609163009784, - "grad_norm": 0.00493650184944272, - "learning_rate": 0.00019999646760898365, - "loss": 46.0, - "step": 16624 - }, - { - "epoch": 2.6773219533797654, - "grad_norm": 0.019306128844618797, - "learning_rate": 0.00019999646718374273, - "loss": 46.0, - "step": 16625 - }, - { - "epoch": 2.677482990458553, - "grad_norm": 0.00233146152459085, - "learning_rate": 0.00019999646675847616, - "loss": 46.0, - "step": 16626 - }, - { - "epoch": 2.6776440275373403, - "grad_norm": 0.0014080959372222424, - "learning_rate": 0.00019999646633318406, - "loss": 46.0, - "step": 16627 - }, - { - "epoch": 2.6778050646161278, - "grad_norm": 0.0031507282983511686, - "learning_rate": 0.00019999646590786632, - "loss": 46.0, - "step": 16628 - }, - { - "epoch": 2.6779661016949152, - "grad_norm": 0.0016994421603158116, - "learning_rate": 0.000199996465482523, - "loss": 46.0, - "step": 16629 - }, - { - "epoch": 2.6781271387737027, - "grad_norm": 0.0038471859879791737, - "learning_rate": 0.0001999964650571541, - "loss": 46.0, - "step": 16630 - }, - { - "epoch": 2.67828817585249, - "grad_norm": 0.0016469808761030436, - "learning_rate": 0.00019999646463175958, - "loss": 46.0, - "step": 16631 - }, - { - "epoch": 2.6784492129312776, - "grad_norm": 0.0036466640885919333, - "learning_rate": 0.0001999964642063395, - "loss": 46.0, - "step": 16632 - }, - { - "epoch": 2.678610250010065, - "grad_norm": 0.00043725993600673974, - "learning_rate": 0.0001999964637808938, - "loss": 46.0, - "step": 16633 - }, - { - "epoch": 2.6787712870888525, - "grad_norm": 0.0007817916921339929, - "learning_rate": 0.0001999964633554225, - "loss": 46.0, - "step": 16634 - }, - { - "epoch": 2.6789323241676395, - "grad_norm": 0.0016522860387340188, - "learning_rate": 0.00019999646292992562, - "loss": 46.0, - "step": 16635 - }, - { - "epoch": 2.679093361246427, - "grad_norm": 0.0026875450275838375, - "learning_rate": 0.0001999964625044031, - "loss": 46.0, - "step": 16636 - }, - { - "epoch": 2.6792543983252144, - "grad_norm": 0.0026240721344947815, - "learning_rate": 0.00019999646207885508, - "loss": 46.0, - "step": 16637 - }, - { - "epoch": 2.679415435404002, - "grad_norm": 0.002033310243859887, - "learning_rate": 0.00019999646165328139, - "loss": 46.0, - "step": 16638 - }, - { - "epoch": 2.6795764724827893, - "grad_norm": 0.006061685737222433, - "learning_rate": 0.0001999964612276821, - "loss": 46.0, - "step": 16639 - }, - { - "epoch": 2.6797375095615763, - "grad_norm": 0.0009897294221445918, - "learning_rate": 0.00019999646080205727, - "loss": 46.0, - "step": 16640 - }, - { - "epoch": 2.6798985466403638, - "grad_norm": 0.011613779701292515, - "learning_rate": 0.00019999646037640684, - "loss": 46.0, - "step": 16641 - }, - { - "epoch": 2.680059583719151, - "grad_norm": 0.0005371918086893857, - "learning_rate": 0.00019999645995073077, - "loss": 46.0, - "step": 16642 - }, - { - "epoch": 2.6802206207979387, - "grad_norm": 0.004039292223751545, - "learning_rate": 0.00019999645952502917, - "loss": 46.0, - "step": 16643 - }, - { - "epoch": 2.680381657876726, - "grad_norm": 0.008642755448818207, - "learning_rate": 0.00019999645909930192, - "loss": 46.0, - "step": 16644 - }, - { - "epoch": 2.6805426949555136, - "grad_norm": 0.0037548034451901913, - "learning_rate": 0.0001999964586735491, - "loss": 46.0, - "step": 16645 - }, - { - "epoch": 2.680703732034301, - "grad_norm": 0.0025954842567443848, - "learning_rate": 0.0001999964582477707, - "loss": 46.0, - "step": 16646 - }, - { - "epoch": 2.6808647691130885, - "grad_norm": 0.0008959742845036089, - "learning_rate": 0.00019999645782196667, - "loss": 46.0, - "step": 16647 - }, - { - "epoch": 2.681025806191876, - "grad_norm": 0.003832322545349598, - "learning_rate": 0.00019999645739613705, - "loss": 46.0, - "step": 16648 - }, - { - "epoch": 2.681186843270663, - "grad_norm": 0.000573773228097707, - "learning_rate": 0.00019999645697028187, - "loss": 46.0, - "step": 16649 - }, - { - "epoch": 2.6813478803494504, - "grad_norm": 0.001437184982933104, - "learning_rate": 0.00019999645654440108, - "loss": 46.0, - "step": 16650 - }, - { - "epoch": 2.681508917428238, - "grad_norm": 0.002907233312726021, - "learning_rate": 0.0001999964561184947, - "loss": 46.0, - "step": 16651 - }, - { - "epoch": 2.6816699545070253, - "grad_norm": 0.0009279531077481806, - "learning_rate": 0.00019999645569256268, - "loss": 46.0, - "step": 16652 - }, - { - "epoch": 2.6818309915858127, - "grad_norm": 0.0007110547740012407, - "learning_rate": 0.00019999645526660512, - "loss": 46.0, - "step": 16653 - }, - { - "epoch": 2.6819920286646, - "grad_norm": 0.0028687051963061094, - "learning_rate": 0.00019999645484062195, - "loss": 46.0, - "step": 16654 - }, - { - "epoch": 2.682153065743387, - "grad_norm": 0.0021604718640446663, - "learning_rate": 0.0001999964544146132, - "loss": 46.0, - "step": 16655 - }, - { - "epoch": 2.6823141028221746, - "grad_norm": 0.0013892232673242688, - "learning_rate": 0.00019999645398857885, - "loss": 46.0, - "step": 16656 - }, - { - "epoch": 2.682475139900962, - "grad_norm": 0.0012530168751254678, - "learning_rate": 0.0001999964535625189, - "loss": 46.0, - "step": 16657 - }, - { - "epoch": 2.6826361769797495, - "grad_norm": 0.0016381407622247934, - "learning_rate": 0.00019999645313643332, - "loss": 46.0, - "step": 16658 - }, - { - "epoch": 2.682797214058537, - "grad_norm": 0.0024876834359019995, - "learning_rate": 0.0001999964527103222, - "loss": 46.0, - "step": 16659 - }, - { - "epoch": 2.6829582511373244, - "grad_norm": 0.0011124046286568046, - "learning_rate": 0.00019999645228418547, - "loss": 46.0, - "step": 16660 - }, - { - "epoch": 2.683119288216112, - "grad_norm": 0.002676828298717737, - "learning_rate": 0.00019999645185802317, - "loss": 46.0, - "step": 16661 - }, - { - "epoch": 2.6832803252948993, - "grad_norm": 0.0034460797905921936, - "learning_rate": 0.00019999645143183522, - "loss": 46.0, - "step": 16662 - }, - { - "epoch": 2.683441362373687, - "grad_norm": 0.002822249662131071, - "learning_rate": 0.0001999964510056217, - "loss": 46.0, - "step": 16663 - }, - { - "epoch": 2.683602399452474, - "grad_norm": 0.0006364263244904578, - "learning_rate": 0.0001999964505793826, - "loss": 46.0, - "step": 16664 - }, - { - "epoch": 2.6837634365312613, - "grad_norm": 0.0015507631469517946, - "learning_rate": 0.0001999964501531179, - "loss": 46.0, - "step": 16665 - }, - { - "epoch": 2.6839244736100487, - "grad_norm": 0.0025115031749010086, - "learning_rate": 0.00019999644972682758, - "loss": 46.0, - "step": 16666 - }, - { - "epoch": 2.684085510688836, - "grad_norm": 0.005299859680235386, - "learning_rate": 0.0001999964493005117, - "loss": 46.0, - "step": 16667 - }, - { - "epoch": 2.6842465477676236, - "grad_norm": 0.0016116708284243941, - "learning_rate": 0.00019999644887417023, - "loss": 46.0, - "step": 16668 - }, - { - "epoch": 2.6844075848464106, - "grad_norm": 0.0013413728447631001, - "learning_rate": 0.00019999644844780314, - "loss": 46.0, - "step": 16669 - }, - { - "epoch": 2.684568621925198, - "grad_norm": 0.0014985156012699008, - "learning_rate": 0.0001999964480214105, - "loss": 46.0, - "step": 16670 - }, - { - "epoch": 2.6847296590039855, - "grad_norm": 0.0019531450234353542, - "learning_rate": 0.0001999964475949922, - "loss": 46.0, - "step": 16671 - }, - { - "epoch": 2.684890696082773, - "grad_norm": 0.0011201798915863037, - "learning_rate": 0.00019999644716854834, - "loss": 46.0, - "step": 16672 - }, - { - "epoch": 2.6850517331615604, - "grad_norm": 0.0028693575877696276, - "learning_rate": 0.00019999644674207888, - "loss": 46.0, - "step": 16673 - }, - { - "epoch": 2.685212770240348, - "grad_norm": 0.0028689310420304537, - "learning_rate": 0.00019999644631558386, - "loss": 46.0, - "step": 16674 - }, - { - "epoch": 2.6853738073191353, - "grad_norm": 0.011154502630233765, - "learning_rate": 0.00019999644588906322, - "loss": 46.0, - "step": 16675 - }, - { - "epoch": 2.685534844397923, - "grad_norm": 0.001409060787409544, - "learning_rate": 0.00019999644546251697, - "loss": 46.0, - "step": 16676 - }, - { - "epoch": 2.6856958814767102, - "grad_norm": 0.0055886381305754185, - "learning_rate": 0.00019999644503594514, - "loss": 46.0, - "step": 16677 - }, - { - "epoch": 2.6858569185554972, - "grad_norm": 0.001857449417002499, - "learning_rate": 0.0001999964446093477, - "loss": 46.0, - "step": 16678 - }, - { - "epoch": 2.6860179556342847, - "grad_norm": 0.003105325624346733, - "learning_rate": 0.0001999964441827247, - "loss": 46.0, - "step": 16679 - }, - { - "epoch": 2.686178992713072, - "grad_norm": 0.005410938523709774, - "learning_rate": 0.0001999964437560761, - "loss": 46.0, - "step": 16680 - }, - { - "epoch": 2.6863400297918596, - "grad_norm": 0.009219738654792309, - "learning_rate": 0.0001999964433294019, - "loss": 46.0, - "step": 16681 - }, - { - "epoch": 2.686501066870647, - "grad_norm": 0.0007834638236090541, - "learning_rate": 0.00019999644290270206, - "loss": 46.0, - "step": 16682 - }, - { - "epoch": 2.6866621039494345, - "grad_norm": 0.0034031688701361418, - "learning_rate": 0.00019999644247597668, - "loss": 46.0, - "step": 16683 - }, - { - "epoch": 2.6868231410282215, - "grad_norm": 0.002383209764957428, - "learning_rate": 0.0001999964420492257, - "loss": 46.0, - "step": 16684 - }, - { - "epoch": 2.686984178107009, - "grad_norm": 0.003262204583734274, - "learning_rate": 0.00019999644162244911, - "loss": 46.0, - "step": 16685 - }, - { - "epoch": 2.6871452151857964, - "grad_norm": 0.0019388990476727486, - "learning_rate": 0.00019999644119564694, - "loss": 46.0, - "step": 16686 - }, - { - "epoch": 2.687306252264584, - "grad_norm": 0.0013207612792029977, - "learning_rate": 0.00019999644076881915, - "loss": 46.0, - "step": 16687 - }, - { - "epoch": 2.6874672893433713, - "grad_norm": 0.0009569731773808599, - "learning_rate": 0.0001999964403419658, - "loss": 46.0, - "step": 16688 - }, - { - "epoch": 2.6876283264221588, - "grad_norm": 0.004633996170014143, - "learning_rate": 0.00019999643991508684, - "loss": 46.0, - "step": 16689 - }, - { - "epoch": 2.687789363500946, - "grad_norm": 0.004391348920762539, - "learning_rate": 0.00019999643948818228, - "loss": 46.0, - "step": 16690 - }, - { - "epoch": 2.6879504005797337, - "grad_norm": 0.005670290905982256, - "learning_rate": 0.00019999643906125215, - "loss": 46.0, - "step": 16691 - }, - { - "epoch": 2.688111437658521, - "grad_norm": 0.001460038824006915, - "learning_rate": 0.0001999964386342964, - "loss": 46.0, - "step": 16692 - }, - { - "epoch": 2.688272474737308, - "grad_norm": 0.005185308866202831, - "learning_rate": 0.00019999643820731508, - "loss": 46.0, - "step": 16693 - }, - { - "epoch": 2.6884335118160956, - "grad_norm": 0.003059952985495329, - "learning_rate": 0.00019999643778030815, - "loss": 46.0, - "step": 16694 - }, - { - "epoch": 2.688594548894883, - "grad_norm": 0.0008635777630843222, - "learning_rate": 0.0001999964373532756, - "loss": 46.0, - "step": 16695 - }, - { - "epoch": 2.6887555859736705, - "grad_norm": 0.005537484306842089, - "learning_rate": 0.0001999964369262175, - "loss": 46.0, - "step": 16696 - }, - { - "epoch": 2.688916623052458, - "grad_norm": 0.003225943073630333, - "learning_rate": 0.0001999964364991338, - "loss": 46.0, - "step": 16697 - }, - { - "epoch": 2.6890776601312454, - "grad_norm": 0.0032401406206190586, - "learning_rate": 0.0001999964360720245, - "loss": 46.0, - "step": 16698 - }, - { - "epoch": 2.6892386972100324, - "grad_norm": 0.0031407340429723263, - "learning_rate": 0.0001999964356448896, - "loss": 46.0, - "step": 16699 - }, - { - "epoch": 2.68939973428882, - "grad_norm": 0.001973002217710018, - "learning_rate": 0.0001999964352177291, - "loss": 46.0, - "step": 16700 - }, - { - "epoch": 2.6895607713676073, - "grad_norm": 0.005462112836539745, - "learning_rate": 0.000199996434790543, - "loss": 46.0, - "step": 16701 - }, - { - "epoch": 2.6897218084463947, - "grad_norm": 0.006081890780478716, - "learning_rate": 0.00019999643436333133, - "loss": 46.0, - "step": 16702 - }, - { - "epoch": 2.689882845525182, - "grad_norm": 0.003943631891161203, - "learning_rate": 0.00019999643393609406, - "loss": 46.0, - "step": 16703 - }, - { - "epoch": 2.6900438826039696, - "grad_norm": 0.006720670498907566, - "learning_rate": 0.00019999643350883118, - "loss": 46.0, - "step": 16704 - }, - { - "epoch": 2.690204919682757, - "grad_norm": 0.008004121482372284, - "learning_rate": 0.00019999643308154274, - "loss": 46.0, - "step": 16705 - }, - { - "epoch": 2.6903659567615446, - "grad_norm": 0.0008740870398469269, - "learning_rate": 0.0001999964326542287, - "loss": 46.0, - "step": 16706 - }, - { - "epoch": 2.690526993840332, - "grad_norm": 0.0026336382143199444, - "learning_rate": 0.00019999643222688902, - "loss": 46.0, - "step": 16707 - }, - { - "epoch": 2.690688030919119, - "grad_norm": 0.00536775030195713, - "learning_rate": 0.0001999964317995238, - "loss": 46.0, - "step": 16708 - }, - { - "epoch": 2.6908490679979065, - "grad_norm": 0.0021431914065033197, - "learning_rate": 0.00019999643137213295, - "loss": 46.0, - "step": 16709 - }, - { - "epoch": 2.691010105076694, - "grad_norm": 0.0003728725132532418, - "learning_rate": 0.00019999643094471652, - "loss": 46.0, - "step": 16710 - }, - { - "epoch": 2.6911711421554814, - "grad_norm": 0.0013024089857935905, - "learning_rate": 0.00019999643051727447, - "loss": 46.0, - "step": 16711 - }, - { - "epoch": 2.691332179234269, - "grad_norm": 0.0015731453895568848, - "learning_rate": 0.00019999643008980687, - "loss": 46.0, - "step": 16712 - }, - { - "epoch": 2.691493216313056, - "grad_norm": 0.0014754356816411018, - "learning_rate": 0.00019999642966231368, - "loss": 46.0, - "step": 16713 - }, - { - "epoch": 2.6916542533918433, - "grad_norm": 0.00304122781381011, - "learning_rate": 0.00019999642923479484, - "loss": 46.0, - "step": 16714 - }, - { - "epoch": 2.6918152904706307, - "grad_norm": 0.0015769213205203414, - "learning_rate": 0.00019999642880725045, - "loss": 46.0, - "step": 16715 - }, - { - "epoch": 2.691976327549418, - "grad_norm": 0.012477298267185688, - "learning_rate": 0.00019999642837968047, - "loss": 46.0, - "step": 16716 - }, - { - "epoch": 2.6921373646282056, - "grad_norm": 0.0024488577619194984, - "learning_rate": 0.00019999642795208488, - "loss": 46.0, - "step": 16717 - }, - { - "epoch": 2.692298401706993, - "grad_norm": 0.009438673965632915, - "learning_rate": 0.0001999964275244637, - "loss": 46.0, - "step": 16718 - }, - { - "epoch": 2.6924594387857805, - "grad_norm": 0.008150266483426094, - "learning_rate": 0.00019999642709681693, - "loss": 46.0, - "step": 16719 - }, - { - "epoch": 2.692620475864568, - "grad_norm": 0.0016168324509635568, - "learning_rate": 0.00019999642666914454, - "loss": 46.0, - "step": 16720 - }, - { - "epoch": 2.6927815129433554, - "grad_norm": 0.005180926527827978, - "learning_rate": 0.00019999642624144657, - "loss": 46.0, - "step": 16721 - }, - { - "epoch": 2.6929425500221424, - "grad_norm": 0.00736199552193284, - "learning_rate": 0.00019999642581372301, - "loss": 46.0, - "step": 16722 - }, - { - "epoch": 2.69310358710093, - "grad_norm": 0.0028334404341876507, - "learning_rate": 0.00019999642538597387, - "loss": 46.0, - "step": 16723 - }, - { - "epoch": 2.6932646241797173, - "grad_norm": 0.0014165194006636739, - "learning_rate": 0.00019999642495819914, - "loss": 46.0, - "step": 16724 - }, - { - "epoch": 2.693425661258505, - "grad_norm": 0.001712548197247088, - "learning_rate": 0.00019999642453039876, - "loss": 46.0, - "step": 16725 - }, - { - "epoch": 2.6935866983372923, - "grad_norm": 0.0011719324393197894, - "learning_rate": 0.00019999642410257283, - "loss": 46.0, - "step": 16726 - }, - { - "epoch": 2.6937477354160797, - "grad_norm": 0.0020610301289707422, - "learning_rate": 0.00019999642367472134, - "loss": 46.0, - "step": 16727 - }, - { - "epoch": 2.6939087724948667, - "grad_norm": 0.0011872589820995927, - "learning_rate": 0.0001999964232468442, - "loss": 46.0, - "step": 16728 - }, - { - "epoch": 2.694069809573654, - "grad_norm": 0.0010340037988498807, - "learning_rate": 0.00019999642281894148, - "loss": 46.0, - "step": 16729 - }, - { - "epoch": 2.6942308466524416, - "grad_norm": 0.0005314347217790782, - "learning_rate": 0.00019999642239101317, - "loss": 46.0, - "step": 16730 - }, - { - "epoch": 2.694391883731229, - "grad_norm": 0.001003214973025024, - "learning_rate": 0.00019999642196305928, - "loss": 46.0, - "step": 16731 - }, - { - "epoch": 2.6945529208100165, - "grad_norm": 0.005966759752482176, - "learning_rate": 0.00019999642153507976, - "loss": 46.0, - "step": 16732 - }, - { - "epoch": 2.694713957888804, - "grad_norm": 0.005754299461841583, - "learning_rate": 0.0001999964211070747, - "loss": 46.0, - "step": 16733 - }, - { - "epoch": 2.6948749949675914, - "grad_norm": 0.006493737455457449, - "learning_rate": 0.00019999642067904398, - "loss": 46.0, - "step": 16734 - }, - { - "epoch": 2.695036032046379, - "grad_norm": 0.0005120303831063211, - "learning_rate": 0.00019999642025098774, - "loss": 46.0, - "step": 16735 - }, - { - "epoch": 2.6951970691251663, - "grad_norm": 0.0069949328899383545, - "learning_rate": 0.00019999641982290585, - "loss": 46.0, - "step": 16736 - }, - { - "epoch": 2.6953581062039533, - "grad_norm": 0.0009330203756690025, - "learning_rate": 0.00019999641939479838, - "loss": 46.0, - "step": 16737 - }, - { - "epoch": 2.695519143282741, - "grad_norm": 0.0012559612514451146, - "learning_rate": 0.0001999964189666653, - "loss": 46.0, - "step": 16738 - }, - { - "epoch": 2.6956801803615282, - "grad_norm": 0.013318131677806377, - "learning_rate": 0.00019999641853850667, - "loss": 46.0, - "step": 16739 - }, - { - "epoch": 2.6958412174403157, - "grad_norm": 0.001347652287222445, - "learning_rate": 0.00019999641811032238, - "loss": 46.0, - "step": 16740 - }, - { - "epoch": 2.696002254519103, - "grad_norm": 0.015256439335644245, - "learning_rate": 0.00019999641768211256, - "loss": 46.0, - "step": 16741 - }, - { - "epoch": 2.69616329159789, - "grad_norm": 0.0045393602922558784, - "learning_rate": 0.00019999641725387712, - "loss": 46.0, - "step": 16742 - }, - { - "epoch": 2.6963243286766776, - "grad_norm": 0.013451792299747467, - "learning_rate": 0.0001999964168256161, - "loss": 46.0, - "step": 16743 - }, - { - "epoch": 2.696485365755465, - "grad_norm": 0.0029337380547076464, - "learning_rate": 0.00019999641639732946, - "loss": 46.0, - "step": 16744 - }, - { - "epoch": 2.6966464028342525, - "grad_norm": 0.0027077605482190847, - "learning_rate": 0.00019999641596901726, - "loss": 46.0, - "step": 16745 - }, - { - "epoch": 2.69680743991304, - "grad_norm": 0.010373102501034737, - "learning_rate": 0.00019999641554067945, - "loss": 46.0, - "step": 16746 - }, - { - "epoch": 2.6969684769918274, - "grad_norm": 0.0015750667080283165, - "learning_rate": 0.00019999641511231602, - "loss": 46.0, - "step": 16747 - }, - { - "epoch": 2.697129514070615, - "grad_norm": 0.00912637822329998, - "learning_rate": 0.00019999641468392704, - "loss": 46.0, - "step": 16748 - }, - { - "epoch": 2.6972905511494023, - "grad_norm": 0.0009323619306087494, - "learning_rate": 0.00019999641425551244, - "loss": 46.0, - "step": 16749 - }, - { - "epoch": 2.6974515882281898, - "grad_norm": 0.007917189970612526, - "learning_rate": 0.00019999641382707225, - "loss": 46.0, - "step": 16750 - }, - { - "epoch": 2.697612625306977, - "grad_norm": 0.006665208842605352, - "learning_rate": 0.00019999641339860647, - "loss": 46.0, - "step": 16751 - }, - { - "epoch": 2.697773662385764, - "grad_norm": 0.0041270493529737, - "learning_rate": 0.00019999641297011506, - "loss": 46.0, - "step": 16752 - }, - { - "epoch": 2.6979346994645517, - "grad_norm": 0.0009448397904634476, - "learning_rate": 0.0001999964125415981, - "loss": 46.0, - "step": 16753 - }, - { - "epoch": 2.698095736543339, - "grad_norm": 0.0011741402558982372, - "learning_rate": 0.00019999641211305554, - "loss": 46.0, - "step": 16754 - }, - { - "epoch": 2.6982567736221266, - "grad_norm": 0.0037936896551400423, - "learning_rate": 0.00019999641168448736, - "loss": 46.0, - "step": 16755 - }, - { - "epoch": 2.698417810700914, - "grad_norm": 0.003401179565116763, - "learning_rate": 0.00019999641125589363, - "loss": 46.0, - "step": 16756 - }, - { - "epoch": 2.698578847779701, - "grad_norm": 0.009221816435456276, - "learning_rate": 0.00019999641082727427, - "loss": 46.0, - "step": 16757 - }, - { - "epoch": 2.6987398848584885, - "grad_norm": 0.004651486407965422, - "learning_rate": 0.00019999641039862933, - "loss": 46.0, - "step": 16758 - }, - { - "epoch": 2.698900921937276, - "grad_norm": 0.0019382634200155735, - "learning_rate": 0.0001999964099699588, - "loss": 46.0, - "step": 16759 - }, - { - "epoch": 2.6990619590160634, - "grad_norm": 0.000692696135956794, - "learning_rate": 0.0001999964095412627, - "loss": 46.0, - "step": 16760 - }, - { - "epoch": 2.699222996094851, - "grad_norm": 0.001394860097207129, - "learning_rate": 0.00019999640911254096, - "loss": 46.0, - "step": 16761 - }, - { - "epoch": 2.6993840331736383, - "grad_norm": 0.0010718988487496972, - "learning_rate": 0.00019999640868379362, - "loss": 46.0, - "step": 16762 - }, - { - "epoch": 2.6995450702524257, - "grad_norm": 0.005175519268959761, - "learning_rate": 0.00019999640825502075, - "loss": 46.0, - "step": 16763 - }, - { - "epoch": 2.699706107331213, - "grad_norm": 0.002591567113995552, - "learning_rate": 0.00019999640782622223, - "loss": 46.0, - "step": 16764 - }, - { - "epoch": 2.6998671444100006, - "grad_norm": 0.002293346682563424, - "learning_rate": 0.00019999640739739813, - "loss": 46.0, - "step": 16765 - }, - { - "epoch": 2.7000281814887876, - "grad_norm": 0.0018217446049675345, - "learning_rate": 0.00019999640696854843, - "loss": 46.0, - "step": 16766 - }, - { - "epoch": 2.700189218567575, - "grad_norm": 0.004994076676666737, - "learning_rate": 0.00019999640653967316, - "loss": 46.0, - "step": 16767 - }, - { - "epoch": 2.7003502556463626, - "grad_norm": 0.0022861426696181297, - "learning_rate": 0.0001999964061107723, - "loss": 46.0, - "step": 16768 - }, - { - "epoch": 2.70051129272515, - "grad_norm": 0.0044463323429226875, - "learning_rate": 0.00019999640568184578, - "loss": 46.0, - "step": 16769 - }, - { - "epoch": 2.7006723298039375, - "grad_norm": 0.002989982021972537, - "learning_rate": 0.00019999640525289372, - "loss": 46.0, - "step": 16770 - }, - { - "epoch": 2.700833366882725, - "grad_norm": 0.0012834639055654407, - "learning_rate": 0.00019999640482391606, - "loss": 46.0, - "step": 16771 - }, - { - "epoch": 2.700994403961512, - "grad_norm": 0.005140871275216341, - "learning_rate": 0.00019999640439491282, - "loss": 46.0, - "step": 16772 - }, - { - "epoch": 2.7011554410402994, - "grad_norm": 0.0017137883696705103, - "learning_rate": 0.00019999640396588397, - "loss": 46.0, - "step": 16773 - }, - { - "epoch": 2.701316478119087, - "grad_norm": 0.006277171429246664, - "learning_rate": 0.00019999640353682952, - "loss": 46.0, - "step": 16774 - }, - { - "epoch": 2.7014775151978743, - "grad_norm": 0.007688910700380802, - "learning_rate": 0.00019999640310774947, - "loss": 46.0, - "step": 16775 - }, - { - "epoch": 2.7016385522766617, - "grad_norm": 0.0008759669144637883, - "learning_rate": 0.00019999640267864385, - "loss": 46.0, - "step": 16776 - }, - { - "epoch": 2.701799589355449, - "grad_norm": 0.001542112324386835, - "learning_rate": 0.00019999640224951262, - "loss": 46.0, - "step": 16777 - }, - { - "epoch": 2.7019606264342366, - "grad_norm": 0.0038784516509622335, - "learning_rate": 0.00019999640182035583, - "loss": 46.0, - "step": 16778 - }, - { - "epoch": 2.702121663513024, - "grad_norm": 0.000979863223619759, - "learning_rate": 0.0001999964013911734, - "loss": 46.0, - "step": 16779 - }, - { - "epoch": 2.7022827005918115, - "grad_norm": 0.010109331458806992, - "learning_rate": 0.0001999964009619654, - "loss": 46.0, - "step": 16780 - }, - { - "epoch": 2.7024437376705985, - "grad_norm": 0.0008103515137918293, - "learning_rate": 0.00019999640053273177, - "loss": 46.0, - "step": 16781 - }, - { - "epoch": 2.702604774749386, - "grad_norm": 0.004866925999522209, - "learning_rate": 0.00019999640010347257, - "loss": 46.0, - "step": 16782 - }, - { - "epoch": 2.7027658118281734, - "grad_norm": 0.0006917114951647818, - "learning_rate": 0.0001999963996741878, - "loss": 46.0, - "step": 16783 - }, - { - "epoch": 2.702926848906961, - "grad_norm": 0.0007846925291232765, - "learning_rate": 0.00019999639924487742, - "loss": 46.0, - "step": 16784 - }, - { - "epoch": 2.7030878859857483, - "grad_norm": 0.004074486438184977, - "learning_rate": 0.0001999963988155414, - "loss": 46.0, - "step": 16785 - }, - { - "epoch": 2.7032489230645353, - "grad_norm": 0.006787537597119808, - "learning_rate": 0.00019999639838617984, - "loss": 46.0, - "step": 16786 - }, - { - "epoch": 2.703409960143323, - "grad_norm": 0.004249176941812038, - "learning_rate": 0.0001999963979567927, - "loss": 46.0, - "step": 16787 - }, - { - "epoch": 2.7035709972221103, - "grad_norm": 0.001418575062416494, - "learning_rate": 0.00019999639752737994, - "loss": 46.0, - "step": 16788 - }, - { - "epoch": 2.7037320343008977, - "grad_norm": 0.01058497466146946, - "learning_rate": 0.00019999639709794158, - "loss": 46.0, - "step": 16789 - }, - { - "epoch": 2.703893071379685, - "grad_norm": 0.0012655415339395404, - "learning_rate": 0.00019999639666847766, - "loss": 46.0, - "step": 16790 - }, - { - "epoch": 2.7040541084584726, - "grad_norm": 0.003899478819221258, - "learning_rate": 0.0001999963962389881, - "loss": 46.0, - "step": 16791 - }, - { - "epoch": 2.70421514553726, - "grad_norm": 0.008875411935150623, - "learning_rate": 0.00019999639580947295, - "loss": 46.0, - "step": 16792 - }, - { - "epoch": 2.7043761826160475, - "grad_norm": 0.0038673330564051867, - "learning_rate": 0.00019999639537993225, - "loss": 46.0, - "step": 16793 - }, - { - "epoch": 2.704537219694835, - "grad_norm": 0.005630007945001125, - "learning_rate": 0.0001999963949503659, - "loss": 46.0, - "step": 16794 - }, - { - "epoch": 2.704698256773622, - "grad_norm": 0.001027962425723672, - "learning_rate": 0.000199996394520774, - "loss": 46.0, - "step": 16795 - }, - { - "epoch": 2.7048592938524094, - "grad_norm": 0.0008963792934082448, - "learning_rate": 0.0001999963940911565, - "loss": 46.0, - "step": 16796 - }, - { - "epoch": 2.705020330931197, - "grad_norm": 0.007647103630006313, - "learning_rate": 0.00019999639366151338, - "loss": 46.0, - "step": 16797 - }, - { - "epoch": 2.7051813680099843, - "grad_norm": 0.002506924793124199, - "learning_rate": 0.00019999639323184466, - "loss": 46.0, - "step": 16798 - }, - { - "epoch": 2.7053424050887718, - "grad_norm": 0.004357772413641214, - "learning_rate": 0.0001999963928021504, - "loss": 46.0, - "step": 16799 - }, - { - "epoch": 2.7055034421675592, - "grad_norm": 0.006169793661683798, - "learning_rate": 0.0001999963923724305, - "loss": 46.0, - "step": 16800 - }, - { - "epoch": 2.7056644792463462, - "grad_norm": 0.0024810058530420065, - "learning_rate": 0.00019999639194268501, - "loss": 46.0, - "step": 16801 - }, - { - "epoch": 2.7058255163251337, - "grad_norm": 0.0013228394091129303, - "learning_rate": 0.00019999639151291394, - "loss": 46.0, - "step": 16802 - }, - { - "epoch": 2.705986553403921, - "grad_norm": 0.0034430488012731075, - "learning_rate": 0.0001999963910831173, - "loss": 46.0, - "step": 16803 - }, - { - "epoch": 2.7061475904827086, - "grad_norm": 0.004819844849407673, - "learning_rate": 0.00019999639065329503, - "loss": 46.0, - "step": 16804 - }, - { - "epoch": 2.706308627561496, - "grad_norm": 0.0024054015520960093, - "learning_rate": 0.00019999639022344717, - "loss": 46.0, - "step": 16805 - }, - { - "epoch": 2.7064696646402835, - "grad_norm": 0.0015642436919733882, - "learning_rate": 0.00019999638979357372, - "loss": 46.0, - "step": 16806 - }, - { - "epoch": 2.706630701719071, - "grad_norm": 0.0018935829866677523, - "learning_rate": 0.00019999638936367466, - "loss": 46.0, - "step": 16807 - }, - { - "epoch": 2.7067917387978584, - "grad_norm": 0.0017554400255903602, - "learning_rate": 0.00019999638893375003, - "loss": 46.0, - "step": 16808 - }, - { - "epoch": 2.706952775876646, - "grad_norm": 0.0012979459716007113, - "learning_rate": 0.00019999638850379977, - "loss": 46.0, - "step": 16809 - }, - { - "epoch": 2.707113812955433, - "grad_norm": 0.005419580731540918, - "learning_rate": 0.00019999638807382397, - "loss": 46.0, - "step": 16810 - }, - { - "epoch": 2.7072748500342203, - "grad_norm": 0.0005155244725756347, - "learning_rate": 0.00019999638764382253, - "loss": 46.0, - "step": 16811 - }, - { - "epoch": 2.7074358871130078, - "grad_norm": 0.0033099977299571037, - "learning_rate": 0.00019999638721379553, - "loss": 46.0, - "step": 16812 - }, - { - "epoch": 2.707596924191795, - "grad_norm": 0.005884178448468447, - "learning_rate": 0.00019999638678374292, - "loss": 46.0, - "step": 16813 - }, - { - "epoch": 2.7077579612705827, - "grad_norm": 0.004039480816572905, - "learning_rate": 0.00019999638635366475, - "loss": 46.0, - "step": 16814 - }, - { - "epoch": 2.7079189983493697, - "grad_norm": 0.008092648349702358, - "learning_rate": 0.00019999638592356093, - "loss": 46.0, - "step": 16815 - }, - { - "epoch": 2.708080035428157, - "grad_norm": 0.006574242375791073, - "learning_rate": 0.00019999638549343153, - "loss": 46.0, - "step": 16816 - }, - { - "epoch": 2.7082410725069446, - "grad_norm": 0.0010245295707136393, - "learning_rate": 0.00019999638506327657, - "loss": 46.0, - "step": 16817 - }, - { - "epoch": 2.708402109585732, - "grad_norm": 0.00125090591609478, - "learning_rate": 0.000199996384633096, - "loss": 46.0, - "step": 16818 - }, - { - "epoch": 2.7085631466645195, - "grad_norm": 0.0008006748394109309, - "learning_rate": 0.0001999963842028898, - "loss": 46.0, - "step": 16819 - }, - { - "epoch": 2.708724183743307, - "grad_norm": 0.006743420381098986, - "learning_rate": 0.00019999638377265805, - "loss": 46.0, - "step": 16820 - }, - { - "epoch": 2.7088852208220944, - "grad_norm": 0.0009717715438455343, - "learning_rate": 0.00019999638334240068, - "loss": 46.0, - "step": 16821 - }, - { - "epoch": 2.709046257900882, - "grad_norm": 0.0025457418523728848, - "learning_rate": 0.00019999638291211773, - "loss": 46.0, - "step": 16822 - }, - { - "epoch": 2.7092072949796693, - "grad_norm": 0.00406483793631196, - "learning_rate": 0.0001999963824818092, - "loss": 46.0, - "step": 16823 - }, - { - "epoch": 2.7093683320584567, - "grad_norm": 0.005037358496338129, - "learning_rate": 0.00019999638205147504, - "loss": 46.0, - "step": 16824 - }, - { - "epoch": 2.7095293691372437, - "grad_norm": 0.0051717557944357395, - "learning_rate": 0.00019999638162111532, - "loss": 46.0, - "step": 16825 - }, - { - "epoch": 2.709690406216031, - "grad_norm": 0.018700547516345978, - "learning_rate": 0.00019999638119073, - "loss": 46.0, - "step": 16826 - }, - { - "epoch": 2.7098514432948186, - "grad_norm": 0.0010653989156708121, - "learning_rate": 0.00019999638076031905, - "loss": 46.0, - "step": 16827 - }, - { - "epoch": 2.710012480373606, - "grad_norm": 0.0027403435669839382, - "learning_rate": 0.00019999638032988255, - "loss": 46.0, - "step": 16828 - }, - { - "epoch": 2.7101735174523935, - "grad_norm": 0.004282352048903704, - "learning_rate": 0.00019999637989942043, - "loss": 46.0, - "step": 16829 - }, - { - "epoch": 2.7103345545311806, - "grad_norm": 0.0018186519155278802, - "learning_rate": 0.00019999637946893273, - "loss": 46.0, - "step": 16830 - }, - { - "epoch": 2.710495591609968, - "grad_norm": 0.00401414604857564, - "learning_rate": 0.00019999637903841944, - "loss": 46.0, - "step": 16831 - }, - { - "epoch": 2.7106566286887555, - "grad_norm": 0.0056733377277851105, - "learning_rate": 0.00019999637860788053, - "loss": 46.0, - "step": 16832 - }, - { - "epoch": 2.710817665767543, - "grad_norm": 0.007255855482071638, - "learning_rate": 0.00019999637817731604, - "loss": 46.0, - "step": 16833 - }, - { - "epoch": 2.7109787028463304, - "grad_norm": 0.002388984663411975, - "learning_rate": 0.00019999637774672596, - "loss": 46.0, - "step": 16834 - }, - { - "epoch": 2.711139739925118, - "grad_norm": 0.0017402877565473318, - "learning_rate": 0.0001999963773161103, - "loss": 46.0, - "step": 16835 - }, - { - "epoch": 2.7113007770039053, - "grad_norm": 0.0021618285682052374, - "learning_rate": 0.00019999637688546901, - "loss": 46.0, - "step": 16836 - }, - { - "epoch": 2.7114618140826927, - "grad_norm": 0.0017112528439611197, - "learning_rate": 0.00019999637645480215, - "loss": 46.0, - "step": 16837 - }, - { - "epoch": 2.71162285116148, - "grad_norm": 0.0033222208730876446, - "learning_rate": 0.0001999963760241097, - "loss": 46.0, - "step": 16838 - }, - { - "epoch": 2.711783888240267, - "grad_norm": 0.0048472392372787, - "learning_rate": 0.00019999637559339165, - "loss": 46.0, - "step": 16839 - }, - { - "epoch": 2.7119449253190546, - "grad_norm": 0.0017181599978357553, - "learning_rate": 0.000199996375162648, - "loss": 46.0, - "step": 16840 - }, - { - "epoch": 2.712105962397842, - "grad_norm": 0.004129703156650066, - "learning_rate": 0.00019999637473187875, - "loss": 46.0, - "step": 16841 - }, - { - "epoch": 2.7122669994766295, - "grad_norm": 0.000810042314697057, - "learning_rate": 0.00019999637430108394, - "loss": 46.0, - "step": 16842 - }, - { - "epoch": 2.712428036555417, - "grad_norm": 0.00144431262742728, - "learning_rate": 0.0001999963738702635, - "loss": 46.0, - "step": 16843 - }, - { - "epoch": 2.7125890736342044, - "grad_norm": 0.007915292866528034, - "learning_rate": 0.00019999637343941752, - "loss": 46.0, - "step": 16844 - }, - { - "epoch": 2.7127501107129914, - "grad_norm": 0.002444526879116893, - "learning_rate": 0.00019999637300854587, - "loss": 46.0, - "step": 16845 - }, - { - "epoch": 2.712911147791779, - "grad_norm": 0.0017184007447212934, - "learning_rate": 0.00019999637257764867, - "loss": 46.0, - "step": 16846 - }, - { - "epoch": 2.7130721848705663, - "grad_norm": 0.004984188359230757, - "learning_rate": 0.00019999637214672587, - "loss": 46.0, - "step": 16847 - }, - { - "epoch": 2.713233221949354, - "grad_norm": 0.003552086651325226, - "learning_rate": 0.00019999637171577746, - "loss": 46.0, - "step": 16848 - }, - { - "epoch": 2.7133942590281412, - "grad_norm": 0.0052740732207894325, - "learning_rate": 0.0001999963712848035, - "loss": 46.0, - "step": 16849 - }, - { - "epoch": 2.7135552961069287, - "grad_norm": 0.0037684133276343346, - "learning_rate": 0.0001999963708538039, - "loss": 46.0, - "step": 16850 - }, - { - "epoch": 2.713716333185716, - "grad_norm": 0.0009654142195358872, - "learning_rate": 0.00019999637042277871, - "loss": 46.0, - "step": 16851 - }, - { - "epoch": 2.7138773702645036, - "grad_norm": 0.0017676504794508219, - "learning_rate": 0.00019999636999172796, - "loss": 46.0, - "step": 16852 - }, - { - "epoch": 2.714038407343291, - "grad_norm": 0.003571895882487297, - "learning_rate": 0.00019999636956065159, - "loss": 46.0, - "step": 16853 - }, - { - "epoch": 2.714199444422078, - "grad_norm": 0.0020821099169552326, - "learning_rate": 0.00019999636912954963, - "loss": 46.0, - "step": 16854 - }, - { - "epoch": 2.7143604815008655, - "grad_norm": 0.002076129661872983, - "learning_rate": 0.00019999636869842208, - "loss": 46.0, - "step": 16855 - }, - { - "epoch": 2.714521518579653, - "grad_norm": 0.004761275369673967, - "learning_rate": 0.00019999636826726892, - "loss": 46.0, - "step": 16856 - }, - { - "epoch": 2.7146825556584404, - "grad_norm": 0.0009966637007892132, - "learning_rate": 0.0001999963678360902, - "loss": 46.0, - "step": 16857 - }, - { - "epoch": 2.714843592737228, - "grad_norm": 0.0015429897466674447, - "learning_rate": 0.00019999636740488587, - "loss": 46.0, - "step": 16858 - }, - { - "epoch": 2.715004629816015, - "grad_norm": 0.00360603304579854, - "learning_rate": 0.00019999636697365595, - "loss": 46.0, - "step": 16859 - }, - { - "epoch": 2.7151656668948023, - "grad_norm": 0.0017117158276960254, - "learning_rate": 0.00019999636654240038, - "loss": 46.0, - "step": 16860 - }, - { - "epoch": 2.7153267039735898, - "grad_norm": 0.002073318464681506, - "learning_rate": 0.00019999636611111926, - "loss": 46.0, - "step": 16861 - }, - { - "epoch": 2.7154877410523772, - "grad_norm": 0.001088133081793785, - "learning_rate": 0.00019999636567981258, - "loss": 46.0, - "step": 16862 - }, - { - "epoch": 2.7156487781311647, - "grad_norm": 0.003052320796996355, - "learning_rate": 0.00019999636524848028, - "loss": 46.0, - "step": 16863 - }, - { - "epoch": 2.715809815209952, - "grad_norm": 0.0020853327587246895, - "learning_rate": 0.00019999636481712237, - "loss": 46.0, - "step": 16864 - }, - { - "epoch": 2.7159708522887396, - "grad_norm": 0.015134351328015327, - "learning_rate": 0.00019999636438573887, - "loss": 46.0, - "step": 16865 - }, - { - "epoch": 2.716131889367527, - "grad_norm": 0.001310597755946219, - "learning_rate": 0.00019999636395432978, - "loss": 46.0, - "step": 16866 - }, - { - "epoch": 2.7162929264463145, - "grad_norm": 0.0006142769125290215, - "learning_rate": 0.0001999963635228951, - "loss": 46.0, - "step": 16867 - }, - { - "epoch": 2.7164539635251015, - "grad_norm": 0.0016236824449151754, - "learning_rate": 0.00019999636309143482, - "loss": 46.0, - "step": 16868 - }, - { - "epoch": 2.716615000603889, - "grad_norm": 0.004155067726969719, - "learning_rate": 0.00019999636265994897, - "loss": 46.0, - "step": 16869 - }, - { - "epoch": 2.7167760376826764, - "grad_norm": 0.0015501020243391395, - "learning_rate": 0.00019999636222843749, - "loss": 46.0, - "step": 16870 - }, - { - "epoch": 2.716937074761464, - "grad_norm": 0.0024400365073233843, - "learning_rate": 0.00019999636179690044, - "loss": 46.0, - "step": 16871 - }, - { - "epoch": 2.7170981118402513, - "grad_norm": 0.0010739787248894572, - "learning_rate": 0.0001999963613653378, - "loss": 46.0, - "step": 16872 - }, - { - "epoch": 2.7172591489190387, - "grad_norm": 0.0010813574772328138, - "learning_rate": 0.00019999636093374955, - "loss": 46.0, - "step": 16873 - }, - { - "epoch": 2.7174201859978258, - "grad_norm": 0.0032039438374340534, - "learning_rate": 0.00019999636050213574, - "loss": 46.0, - "step": 16874 - }, - { - "epoch": 2.717581223076613, - "grad_norm": 0.01437385380268097, - "learning_rate": 0.0001999963600704963, - "loss": 46.0, - "step": 16875 - }, - { - "epoch": 2.7177422601554007, - "grad_norm": 0.001206106855534017, - "learning_rate": 0.00019999635963883125, - "loss": 46.0, - "step": 16876 - }, - { - "epoch": 2.717903297234188, - "grad_norm": 0.0036650982219725847, - "learning_rate": 0.00019999635920714065, - "loss": 46.0, - "step": 16877 - }, - { - "epoch": 2.7180643343129756, - "grad_norm": 0.0030384594574570656, - "learning_rate": 0.0001999963587754244, - "loss": 46.0, - "step": 16878 - }, - { - "epoch": 2.718225371391763, - "grad_norm": 0.007299521472305059, - "learning_rate": 0.0001999963583436826, - "loss": 46.0, - "step": 16879 - }, - { - "epoch": 2.7183864084705505, - "grad_norm": 0.0033372414764016867, - "learning_rate": 0.0001999963579119152, - "loss": 46.0, - "step": 16880 - }, - { - "epoch": 2.718547445549338, - "grad_norm": 0.005126938223838806, - "learning_rate": 0.0001999963574801222, - "loss": 46.0, - "step": 16881 - }, - { - "epoch": 2.7187084826281254, - "grad_norm": 0.0023445086553692818, - "learning_rate": 0.00019999635704830363, - "loss": 46.0, - "step": 16882 - }, - { - "epoch": 2.7188695197069124, - "grad_norm": 0.0013366233324632049, - "learning_rate": 0.00019999635661645943, - "loss": 46.0, - "step": 16883 - }, - { - "epoch": 2.7190305567857, - "grad_norm": 0.0010768286883831024, - "learning_rate": 0.00019999635618458966, - "loss": 46.0, - "step": 16884 - }, - { - "epoch": 2.7191915938644873, - "grad_norm": 0.005694425664842129, - "learning_rate": 0.00019999635575269429, - "loss": 46.0, - "step": 16885 - }, - { - "epoch": 2.7193526309432747, - "grad_norm": 0.0034576845355331898, - "learning_rate": 0.00019999635532077332, - "loss": 46.0, - "step": 16886 - }, - { - "epoch": 2.719513668022062, - "grad_norm": 0.003664106596261263, - "learning_rate": 0.00019999635488882674, - "loss": 46.0, - "step": 16887 - }, - { - "epoch": 2.7196747051008496, - "grad_norm": 0.0022734389640390873, - "learning_rate": 0.0001999963544568546, - "loss": 46.0, - "step": 16888 - }, - { - "epoch": 2.7198357421796366, - "grad_norm": 0.0016641359543427825, - "learning_rate": 0.00019999635402485688, - "loss": 46.0, - "step": 16889 - }, - { - "epoch": 2.719996779258424, - "grad_norm": 0.00746966153383255, - "learning_rate": 0.00019999635359283353, - "loss": 46.0, - "step": 16890 - }, - { - "epoch": 2.7201578163372115, - "grad_norm": 0.0013379884185269475, - "learning_rate": 0.0001999963531607846, - "loss": 46.0, - "step": 16891 - }, - { - "epoch": 2.720318853415999, - "grad_norm": 0.005165590904653072, - "learning_rate": 0.00019999635272871004, - "loss": 46.0, - "step": 16892 - }, - { - "epoch": 2.7204798904947864, - "grad_norm": 0.004248811397701502, - "learning_rate": 0.00019999635229660994, - "loss": 46.0, - "step": 16893 - }, - { - "epoch": 2.720640927573574, - "grad_norm": 0.00433771125972271, - "learning_rate": 0.00019999635186448422, - "loss": 46.0, - "step": 16894 - }, - { - "epoch": 2.7208019646523613, - "grad_norm": 0.0009444046299904585, - "learning_rate": 0.00019999635143233292, - "loss": 46.0, - "step": 16895 - }, - { - "epoch": 2.720963001731149, - "grad_norm": 0.00263758422806859, - "learning_rate": 0.000199996351000156, - "loss": 46.0, - "step": 16896 - }, - { - "epoch": 2.7211240388099363, - "grad_norm": 0.0028068292886018753, - "learning_rate": 0.0001999963505679535, - "loss": 46.0, - "step": 16897 - }, - { - "epoch": 2.7212850758887233, - "grad_norm": 0.0009865900501608849, - "learning_rate": 0.00019999635013572543, - "loss": 46.0, - "step": 16898 - }, - { - "epoch": 2.7214461129675107, - "grad_norm": 0.0031842507887631655, - "learning_rate": 0.00019999634970347172, - "loss": 46.0, - "step": 16899 - }, - { - "epoch": 2.721607150046298, - "grad_norm": 0.006077928002923727, - "learning_rate": 0.00019999634927119243, - "loss": 46.0, - "step": 16900 - }, - { - "epoch": 2.7217681871250856, - "grad_norm": 0.0009807192254811525, - "learning_rate": 0.00019999634883888757, - "loss": 46.0, - "step": 16901 - }, - { - "epoch": 2.721929224203873, - "grad_norm": 0.0038750700186938047, - "learning_rate": 0.0001999963484065571, - "loss": 46.0, - "step": 16902 - }, - { - "epoch": 2.72209026128266, - "grad_norm": 0.0015949290245771408, - "learning_rate": 0.00019999634797420105, - "loss": 46.0, - "step": 16903 - }, - { - "epoch": 2.7222512983614475, - "grad_norm": 0.005834200419485569, - "learning_rate": 0.0001999963475418194, - "loss": 46.0, - "step": 16904 - }, - { - "epoch": 2.722412335440235, - "grad_norm": 0.004745587706565857, - "learning_rate": 0.00019999634710941212, - "loss": 46.0, - "step": 16905 - }, - { - "epoch": 2.7225733725190224, - "grad_norm": 0.002906380221247673, - "learning_rate": 0.00019999634667697928, - "loss": 46.0, - "step": 16906 - }, - { - "epoch": 2.72273440959781, - "grad_norm": 0.0029213724192231894, - "learning_rate": 0.00019999634624452085, - "loss": 46.0, - "step": 16907 - }, - { - "epoch": 2.7228954466765973, - "grad_norm": 0.0010784838814288378, - "learning_rate": 0.0001999963458120368, - "loss": 46.0, - "step": 16908 - }, - { - "epoch": 2.723056483755385, - "grad_norm": 0.008651919662952423, - "learning_rate": 0.00019999634537952717, - "loss": 46.0, - "step": 16909 - }, - { - "epoch": 2.7232175208341722, - "grad_norm": 0.0023041178938001394, - "learning_rate": 0.00019999634494699198, - "loss": 46.0, - "step": 16910 - }, - { - "epoch": 2.7233785579129597, - "grad_norm": 0.00258329208008945, - "learning_rate": 0.00019999634451443114, - "loss": 46.0, - "step": 16911 - }, - { - "epoch": 2.7235395949917467, - "grad_norm": 0.0018589961109682918, - "learning_rate": 0.00019999634408184475, - "loss": 46.0, - "step": 16912 - }, - { - "epoch": 2.723700632070534, - "grad_norm": 0.002541129244491458, - "learning_rate": 0.00019999634364923274, - "loss": 46.0, - "step": 16913 - }, - { - "epoch": 2.7238616691493216, - "grad_norm": 0.00421768007799983, - "learning_rate": 0.00019999634321659515, - "loss": 46.0, - "step": 16914 - }, - { - "epoch": 2.724022706228109, - "grad_norm": 0.0016531634610146284, - "learning_rate": 0.000199996342783932, - "loss": 46.0, - "step": 16915 - }, - { - "epoch": 2.7241837433068965, - "grad_norm": 0.0028021091129630804, - "learning_rate": 0.0001999963423512432, - "loss": 46.0, - "step": 16916 - }, - { - "epoch": 2.724344780385684, - "grad_norm": 0.0019305588211864233, - "learning_rate": 0.0001999963419185288, - "loss": 46.0, - "step": 16917 - }, - { - "epoch": 2.724505817464471, - "grad_norm": 0.0008829992148093879, - "learning_rate": 0.00019999634148578884, - "loss": 46.0, - "step": 16918 - }, - { - "epoch": 2.7246668545432584, - "grad_norm": 0.0004995696363039315, - "learning_rate": 0.00019999634105302328, - "loss": 46.0, - "step": 16919 - }, - { - "epoch": 2.724827891622046, - "grad_norm": 0.003270841436460614, - "learning_rate": 0.0001999963406202321, - "loss": 46.0, - "step": 16920 - }, - { - "epoch": 2.7249889287008333, - "grad_norm": 0.005590683314949274, - "learning_rate": 0.00019999634018741535, - "loss": 46.0, - "step": 16921 - }, - { - "epoch": 2.7251499657796208, - "grad_norm": 0.005640998482704163, - "learning_rate": 0.00019999633975457303, - "loss": 46.0, - "step": 16922 - }, - { - "epoch": 2.725311002858408, - "grad_norm": 0.003803322557359934, - "learning_rate": 0.00019999633932170507, - "loss": 46.0, - "step": 16923 - }, - { - "epoch": 2.7254720399371957, - "grad_norm": 0.002323080552741885, - "learning_rate": 0.00019999633888881152, - "loss": 46.0, - "step": 16924 - }, - { - "epoch": 2.725633077015983, - "grad_norm": 0.002194333355873823, - "learning_rate": 0.0001999963384558924, - "loss": 46.0, - "step": 16925 - }, - { - "epoch": 2.7257941140947706, - "grad_norm": 0.0053588952869176865, - "learning_rate": 0.0001999963380229477, - "loss": 46.0, - "step": 16926 - }, - { - "epoch": 2.7259551511735576, - "grad_norm": 0.00917847827076912, - "learning_rate": 0.00019999633758997738, - "loss": 46.0, - "step": 16927 - }, - { - "epoch": 2.726116188252345, - "grad_norm": 0.004154964815825224, - "learning_rate": 0.00019999633715698145, - "loss": 46.0, - "step": 16928 - }, - { - "epoch": 2.7262772253311325, - "grad_norm": 0.0015438022091984749, - "learning_rate": 0.00019999633672395994, - "loss": 46.0, - "step": 16929 - }, - { - "epoch": 2.72643826240992, - "grad_norm": 0.009665518067777157, - "learning_rate": 0.00019999633629091284, - "loss": 46.0, - "step": 16930 - }, - { - "epoch": 2.7265992994887074, - "grad_norm": 0.0051448955200612545, - "learning_rate": 0.00019999633585784016, - "loss": 46.0, - "step": 16931 - }, - { - "epoch": 2.7267603365674944, - "grad_norm": 0.0009643063531257212, - "learning_rate": 0.00019999633542474188, - "loss": 46.0, - "step": 16932 - }, - { - "epoch": 2.726921373646282, - "grad_norm": 0.009452329017221928, - "learning_rate": 0.000199996334991618, - "loss": 46.0, - "step": 16933 - }, - { - "epoch": 2.7270824107250693, - "grad_norm": 0.00345798977650702, - "learning_rate": 0.00019999633455846852, - "loss": 46.0, - "step": 16934 - }, - { - "epoch": 2.7272434478038567, - "grad_norm": 0.002070992486551404, - "learning_rate": 0.00019999633412529346, - "loss": 46.0, - "step": 16935 - }, - { - "epoch": 2.727404484882644, - "grad_norm": 0.008523769676685333, - "learning_rate": 0.00019999633369209278, - "loss": 46.0, - "step": 16936 - }, - { - "epoch": 2.7275655219614316, - "grad_norm": 0.001795751741155982, - "learning_rate": 0.00019999633325886652, - "loss": 46.0, - "step": 16937 - }, - { - "epoch": 2.727726559040219, - "grad_norm": 0.00038119166856631637, - "learning_rate": 0.0001999963328256147, - "loss": 46.0, - "step": 16938 - }, - { - "epoch": 2.7278875961190066, - "grad_norm": 0.0009577765013091266, - "learning_rate": 0.00019999633239233723, - "loss": 46.0, - "step": 16939 - }, - { - "epoch": 2.728048633197794, - "grad_norm": 0.005162945948541164, - "learning_rate": 0.0001999963319590342, - "loss": 46.0, - "step": 16940 - }, - { - "epoch": 2.7282096702765815, - "grad_norm": 0.005504915490746498, - "learning_rate": 0.00019999633152570557, - "loss": 46.0, - "step": 16941 - }, - { - "epoch": 2.7283707073553685, - "grad_norm": 0.0016659394605085254, - "learning_rate": 0.00019999633109235134, - "loss": 46.0, - "step": 16942 - }, - { - "epoch": 2.728531744434156, - "grad_norm": 0.0030824693385511637, - "learning_rate": 0.00019999633065897153, - "loss": 46.0, - "step": 16943 - }, - { - "epoch": 2.7286927815129434, - "grad_norm": 0.004563783295452595, - "learning_rate": 0.00019999633022556613, - "loss": 46.0, - "step": 16944 - }, - { - "epoch": 2.728853818591731, - "grad_norm": 0.011392472311854362, - "learning_rate": 0.0001999963297921351, - "loss": 46.0, - "step": 16945 - }, - { - "epoch": 2.7290148556705183, - "grad_norm": 0.00310645648278296, - "learning_rate": 0.0001999963293586785, - "loss": 46.0, - "step": 16946 - }, - { - "epoch": 2.7291758927493053, - "grad_norm": 0.0014629033394157887, - "learning_rate": 0.0001999963289251963, - "loss": 46.0, - "step": 16947 - }, - { - "epoch": 2.7293369298280927, - "grad_norm": 0.005917146801948547, - "learning_rate": 0.0001999963284916885, - "loss": 46.0, - "step": 16948 - }, - { - "epoch": 2.72949796690688, - "grad_norm": 0.0015914080431684852, - "learning_rate": 0.00019999632805815513, - "loss": 46.0, - "step": 16949 - }, - { - "epoch": 2.7296590039856676, - "grad_norm": 0.002734109992161393, - "learning_rate": 0.00019999632762459615, - "loss": 46.0, - "step": 16950 - }, - { - "epoch": 2.729820041064455, - "grad_norm": 0.00970515701919794, - "learning_rate": 0.0001999963271910116, - "loss": 46.0, - "step": 16951 - }, - { - "epoch": 2.7299810781432425, - "grad_norm": 0.006925088819116354, - "learning_rate": 0.00019999632675740144, - "loss": 46.0, - "step": 16952 - }, - { - "epoch": 2.73014211522203, - "grad_norm": 0.002014497760683298, - "learning_rate": 0.00019999632632376567, - "loss": 46.0, - "step": 16953 - }, - { - "epoch": 2.7303031523008174, - "grad_norm": 0.014175421558320522, - "learning_rate": 0.00019999632589010432, - "loss": 46.0, - "step": 16954 - }, - { - "epoch": 2.730464189379605, - "grad_norm": 0.0023810931015759706, - "learning_rate": 0.00019999632545641735, - "loss": 46.0, - "step": 16955 - }, - { - "epoch": 2.730625226458392, - "grad_norm": 0.0023386916145682335, - "learning_rate": 0.00019999632502270482, - "loss": 46.0, - "step": 16956 - }, - { - "epoch": 2.7307862635371793, - "grad_norm": 0.0014831279404461384, - "learning_rate": 0.00019999632458896668, - "loss": 46.0, - "step": 16957 - }, - { - "epoch": 2.730947300615967, - "grad_norm": 0.004147290717810392, - "learning_rate": 0.00019999632415520295, - "loss": 46.0, - "step": 16958 - }, - { - "epoch": 2.7311083376947543, - "grad_norm": 0.004907776601612568, - "learning_rate": 0.00019999632372141364, - "loss": 46.0, - "step": 16959 - }, - { - "epoch": 2.7312693747735417, - "grad_norm": 0.005365044344216585, - "learning_rate": 0.00019999632328759873, - "loss": 46.0, - "step": 16960 - }, - { - "epoch": 2.731430411852329, - "grad_norm": 0.00871161837130785, - "learning_rate": 0.0001999963228537582, - "loss": 46.0, - "step": 16961 - }, - { - "epoch": 2.731591448931116, - "grad_norm": 0.003037975635379553, - "learning_rate": 0.0001999963224198921, - "loss": 46.0, - "step": 16962 - }, - { - "epoch": 2.7317524860099036, - "grad_norm": 0.0018639479530975223, - "learning_rate": 0.0001999963219860004, - "loss": 46.0, - "step": 16963 - }, - { - "epoch": 2.731913523088691, - "grad_norm": 0.013850276358425617, - "learning_rate": 0.0001999963215520831, - "loss": 46.0, - "step": 16964 - }, - { - "epoch": 2.7320745601674785, - "grad_norm": 0.0006425424944609404, - "learning_rate": 0.00019999632111814024, - "loss": 46.0, - "step": 16965 - }, - { - "epoch": 2.732235597246266, - "grad_norm": 0.0076021356508135796, - "learning_rate": 0.00019999632068417174, - "loss": 46.0, - "step": 16966 - }, - { - "epoch": 2.7323966343250534, - "grad_norm": 0.002191032748669386, - "learning_rate": 0.00019999632025017767, - "loss": 46.0, - "step": 16967 - }, - { - "epoch": 2.732557671403841, - "grad_norm": 0.0030889525078237057, - "learning_rate": 0.00019999631981615801, - "loss": 46.0, - "step": 16968 - }, - { - "epoch": 2.7327187084826283, - "grad_norm": 0.0007897743489593267, - "learning_rate": 0.00019999631938211274, - "loss": 46.0, - "step": 16969 - }, - { - "epoch": 2.7328797455614158, - "grad_norm": 0.010255039669573307, - "learning_rate": 0.00019999631894804186, - "loss": 46.0, - "step": 16970 - }, - { - "epoch": 2.733040782640203, - "grad_norm": 0.0007939495262689888, - "learning_rate": 0.00019999631851394542, - "loss": 46.0, - "step": 16971 - }, - { - "epoch": 2.7332018197189902, - "grad_norm": 0.0021262671798467636, - "learning_rate": 0.00019999631807982339, - "loss": 46.0, - "step": 16972 - }, - { - "epoch": 2.7333628567977777, - "grad_norm": 0.0038738714065402746, - "learning_rate": 0.00019999631764567574, - "loss": 46.0, - "step": 16973 - }, - { - "epoch": 2.733523893876565, - "grad_norm": 0.009010964073240757, - "learning_rate": 0.0001999963172115025, - "loss": 46.0, - "step": 16974 - }, - { - "epoch": 2.7336849309553526, - "grad_norm": 0.0008244227501563728, - "learning_rate": 0.00019999631677730372, - "loss": 46.0, - "step": 16975 - }, - { - "epoch": 2.7338459680341396, - "grad_norm": 0.005699793342500925, - "learning_rate": 0.00019999631634307928, - "loss": 46.0, - "step": 16976 - }, - { - "epoch": 2.734007005112927, - "grad_norm": 0.011394317261874676, - "learning_rate": 0.00019999631590882926, - "loss": 46.0, - "step": 16977 - }, - { - "epoch": 2.7341680421917145, - "grad_norm": 0.0070154257118701935, - "learning_rate": 0.00019999631547455365, - "loss": 46.0, - "step": 16978 - }, - { - "epoch": 2.734329079270502, - "grad_norm": 0.0019240652909502387, - "learning_rate": 0.00019999631504025246, - "loss": 46.0, - "step": 16979 - }, - { - "epoch": 2.7344901163492894, - "grad_norm": 0.018269382417201996, - "learning_rate": 0.00019999631460592565, - "loss": 46.0, - "step": 16980 - }, - { - "epoch": 2.734651153428077, - "grad_norm": 0.0011777982581406832, - "learning_rate": 0.00019999631417157328, - "loss": 46.0, - "step": 16981 - }, - { - "epoch": 2.7348121905068643, - "grad_norm": 0.006356328725814819, - "learning_rate": 0.0001999963137371953, - "loss": 46.0, - "step": 16982 - }, - { - "epoch": 2.7349732275856518, - "grad_norm": 0.0010452274000272155, - "learning_rate": 0.00019999631330279172, - "loss": 46.0, - "step": 16983 - }, - { - "epoch": 2.735134264664439, - "grad_norm": 0.002229926409199834, - "learning_rate": 0.00019999631286836254, - "loss": 46.0, - "step": 16984 - }, - { - "epoch": 2.735295301743226, - "grad_norm": 0.0020133969374001026, - "learning_rate": 0.0001999963124339078, - "loss": 46.0, - "step": 16985 - }, - { - "epoch": 2.7354563388220137, - "grad_norm": 0.0017598966369405389, - "learning_rate": 0.00019999631199942743, - "loss": 46.0, - "step": 16986 - }, - { - "epoch": 2.735617375900801, - "grad_norm": 0.008132735267281532, - "learning_rate": 0.00019999631156492146, - "loss": 46.0, - "step": 16987 - }, - { - "epoch": 2.7357784129795886, - "grad_norm": 0.006711173802614212, - "learning_rate": 0.00019999631113038993, - "loss": 46.0, - "step": 16988 - }, - { - "epoch": 2.735939450058376, - "grad_norm": 0.0009318398078903556, - "learning_rate": 0.00019999631069583278, - "loss": 46.0, - "step": 16989 - }, - { - "epoch": 2.7361004871371635, - "grad_norm": 0.0020174412056803703, - "learning_rate": 0.00019999631026125004, - "loss": 46.0, - "step": 16990 - }, - { - "epoch": 2.7362615242159505, - "grad_norm": 0.007003118749707937, - "learning_rate": 0.00019999630982664172, - "loss": 46.0, - "step": 16991 - }, - { - "epoch": 2.736422561294738, - "grad_norm": 0.0023433587048202753, - "learning_rate": 0.0001999963093920078, - "loss": 46.0, - "step": 16992 - }, - { - "epoch": 2.7365835983735254, - "grad_norm": 0.0013160071102902293, - "learning_rate": 0.00019999630895734829, - "loss": 46.0, - "step": 16993 - }, - { - "epoch": 2.736744635452313, - "grad_norm": 0.0037288167513906956, - "learning_rate": 0.00019999630852266317, - "loss": 46.0, - "step": 16994 - }, - { - "epoch": 2.7369056725311003, - "grad_norm": 0.003628902602940798, - "learning_rate": 0.00019999630808795248, - "loss": 46.0, - "step": 16995 - }, - { - "epoch": 2.7370667096098877, - "grad_norm": 0.00916481763124466, - "learning_rate": 0.00019999630765321616, - "loss": 46.0, - "step": 16996 - }, - { - "epoch": 2.737227746688675, - "grad_norm": 0.0007951876032166183, - "learning_rate": 0.0001999963072184543, - "loss": 46.0, - "step": 16997 - }, - { - "epoch": 2.7373887837674626, - "grad_norm": 0.0014403134118765593, - "learning_rate": 0.0001999963067836668, - "loss": 46.0, - "step": 16998 - }, - { - "epoch": 2.73754982084625, - "grad_norm": 0.007754639256745577, - "learning_rate": 0.0001999963063488537, - "loss": 46.0, - "step": 16999 - }, - { - "epoch": 2.737710857925037, - "grad_norm": 0.004751814529299736, - "learning_rate": 0.00019999630591401504, - "loss": 46.0, - "step": 17000 - }, - { - "epoch": 2.7378718950038246, - "grad_norm": 0.019630469381809235, - "learning_rate": 0.00019999630547915076, - "loss": 46.0, - "step": 17001 - }, - { - "epoch": 2.738032932082612, - "grad_norm": 0.0025465500075370073, - "learning_rate": 0.0001999963050442609, - "loss": 46.0, - "step": 17002 - }, - { - "epoch": 2.7381939691613995, - "grad_norm": 0.00048430985771119595, - "learning_rate": 0.00019999630460934545, - "loss": 46.0, - "step": 17003 - }, - { - "epoch": 2.738355006240187, - "grad_norm": 0.0027142332401126623, - "learning_rate": 0.00019999630417440441, - "loss": 46.0, - "step": 17004 - }, - { - "epoch": 2.7385160433189744, - "grad_norm": 0.007888139225542545, - "learning_rate": 0.00019999630373943776, - "loss": 46.0, - "step": 17005 - }, - { - "epoch": 2.7386770803977614, - "grad_norm": 0.000852732453495264, - "learning_rate": 0.00019999630330444555, - "loss": 46.0, - "step": 17006 - }, - { - "epoch": 2.738838117476549, - "grad_norm": 0.000506145937833935, - "learning_rate": 0.0001999963028694277, - "loss": 46.0, - "step": 17007 - }, - { - "epoch": 2.7389991545553363, - "grad_norm": 0.0009367187740281224, - "learning_rate": 0.00019999630243438429, - "loss": 46.0, - "step": 17008 - }, - { - "epoch": 2.7391601916341237, - "grad_norm": 0.00575898727402091, - "learning_rate": 0.00019999630199931526, - "loss": 46.0, - "step": 17009 - }, - { - "epoch": 2.739321228712911, - "grad_norm": 0.0005599577561952174, - "learning_rate": 0.00019999630156422064, - "loss": 46.0, - "step": 17010 - }, - { - "epoch": 2.7394822657916986, - "grad_norm": 0.0029721814207732677, - "learning_rate": 0.00019999630112910044, - "loss": 46.0, - "step": 17011 - }, - { - "epoch": 2.739643302870486, - "grad_norm": 0.007054353132843971, - "learning_rate": 0.00019999630069395463, - "loss": 46.0, - "step": 17012 - }, - { - "epoch": 2.7398043399492735, - "grad_norm": 0.0020257323049008846, - "learning_rate": 0.00019999630025878325, - "loss": 46.0, - "step": 17013 - }, - { - "epoch": 2.739965377028061, - "grad_norm": 0.004529378842562437, - "learning_rate": 0.00019999629982358626, - "loss": 46.0, - "step": 17014 - }, - { - "epoch": 2.740126414106848, - "grad_norm": 0.0008891024626791477, - "learning_rate": 0.00019999629938836368, - "loss": 46.0, - "step": 17015 - }, - { - "epoch": 2.7402874511856354, - "grad_norm": 0.0015078552532941103, - "learning_rate": 0.0001999962989531155, - "loss": 46.0, - "step": 17016 - }, - { - "epoch": 2.740448488264423, - "grad_norm": 0.0031485690269619226, - "learning_rate": 0.0001999962985178417, - "loss": 46.0, - "step": 17017 - }, - { - "epoch": 2.7406095253432103, - "grad_norm": 0.0015434781089425087, - "learning_rate": 0.00019999629808254237, - "loss": 46.0, - "step": 17018 - }, - { - "epoch": 2.740770562421998, - "grad_norm": 0.0034655025228857994, - "learning_rate": 0.0001999962976472174, - "loss": 46.0, - "step": 17019 - }, - { - "epoch": 2.740931599500785, - "grad_norm": 0.0029644996393471956, - "learning_rate": 0.00019999629721186683, - "loss": 46.0, - "step": 17020 - }, - { - "epoch": 2.7410926365795723, - "grad_norm": 0.005331673193722963, - "learning_rate": 0.0001999962967764907, - "loss": 46.0, - "step": 17021 - }, - { - "epoch": 2.7412536736583597, - "grad_norm": 0.0036207479424774647, - "learning_rate": 0.00019999629634108896, - "loss": 46.0, - "step": 17022 - }, - { - "epoch": 2.741414710737147, - "grad_norm": 0.0013629754539579153, - "learning_rate": 0.00019999629590566166, - "loss": 46.0, - "step": 17023 - }, - { - "epoch": 2.7415757478159346, - "grad_norm": 0.004165869206190109, - "learning_rate": 0.0001999962954702087, - "loss": 46.0, - "step": 17024 - }, - { - "epoch": 2.741736784894722, - "grad_norm": 0.0029115048237144947, - "learning_rate": 0.00019999629503473018, - "loss": 46.0, - "step": 17025 - }, - { - "epoch": 2.7418978219735095, - "grad_norm": 0.0013682834105566144, - "learning_rate": 0.00019999629459922606, - "loss": 46.0, - "step": 17026 - }, - { - "epoch": 2.742058859052297, - "grad_norm": 0.0060378173366189, - "learning_rate": 0.00019999629416369636, - "loss": 46.0, - "step": 17027 - }, - { - "epoch": 2.7422198961310844, - "grad_norm": 0.002196847926825285, - "learning_rate": 0.00019999629372814104, - "loss": 46.0, - "step": 17028 - }, - { - "epoch": 2.7423809332098714, - "grad_norm": 0.0008671838440932333, - "learning_rate": 0.00019999629329256016, - "loss": 46.0, - "step": 17029 - }, - { - "epoch": 2.742541970288659, - "grad_norm": 0.0036622064653784037, - "learning_rate": 0.00019999629285695367, - "loss": 46.0, - "step": 17030 - }, - { - "epoch": 2.7427030073674463, - "grad_norm": 0.0011343695223331451, - "learning_rate": 0.0001999962924213216, - "loss": 46.0, - "step": 17031 - }, - { - "epoch": 2.7428640444462338, - "grad_norm": 0.0010958018247038126, - "learning_rate": 0.0001999962919856639, - "loss": 46.0, - "step": 17032 - }, - { - "epoch": 2.7430250815250212, - "grad_norm": 0.001842381781898439, - "learning_rate": 0.00019999629154998064, - "loss": 46.0, - "step": 17033 - }, - { - "epoch": 2.7431861186038087, - "grad_norm": 0.0020009654108434916, - "learning_rate": 0.00019999629111427177, - "loss": 46.0, - "step": 17034 - }, - { - "epoch": 2.7433471556825957, - "grad_norm": 0.0005218570586293936, - "learning_rate": 0.0001999962906785373, - "loss": 46.0, - "step": 17035 - }, - { - "epoch": 2.743508192761383, - "grad_norm": 0.002518236171454191, - "learning_rate": 0.00019999629024277727, - "loss": 46.0, - "step": 17036 - }, - { - "epoch": 2.7436692298401706, - "grad_norm": 0.007329927291721106, - "learning_rate": 0.00019999628980699158, - "loss": 46.0, - "step": 17037 - }, - { - "epoch": 2.743830266918958, - "grad_norm": 0.001647655968554318, - "learning_rate": 0.00019999628937118037, - "loss": 46.0, - "step": 17038 - }, - { - "epoch": 2.7439913039977455, - "grad_norm": 0.00754706934094429, - "learning_rate": 0.00019999628893534353, - "loss": 46.0, - "step": 17039 - }, - { - "epoch": 2.744152341076533, - "grad_norm": 0.002074816031381488, - "learning_rate": 0.0001999962884994811, - "loss": 46.0, - "step": 17040 - }, - { - "epoch": 2.7443133781553204, - "grad_norm": 0.006157160736620426, - "learning_rate": 0.00019999628806359305, - "loss": 46.0, - "step": 17041 - }, - { - "epoch": 2.744474415234108, - "grad_norm": 0.0017099973047152162, - "learning_rate": 0.00019999628762767943, - "loss": 46.0, - "step": 17042 - }, - { - "epoch": 2.7446354523128953, - "grad_norm": 0.002026587026193738, - "learning_rate": 0.00019999628719174025, - "loss": 46.0, - "step": 17043 - }, - { - "epoch": 2.7447964893916823, - "grad_norm": 0.0036466491874307394, - "learning_rate": 0.00019999628675577543, - "loss": 46.0, - "step": 17044 - }, - { - "epoch": 2.7449575264704698, - "grad_norm": 0.0018727057613432407, - "learning_rate": 0.00019999628631978502, - "loss": 46.0, - "step": 17045 - }, - { - "epoch": 2.745118563549257, - "grad_norm": 0.0014251800021156669, - "learning_rate": 0.00019999628588376905, - "loss": 46.0, - "step": 17046 - }, - { - "epoch": 2.7452796006280447, - "grad_norm": 0.0037118534091860056, - "learning_rate": 0.00019999628544772744, - "loss": 46.0, - "step": 17047 - }, - { - "epoch": 2.745440637706832, - "grad_norm": 0.007601283490657806, - "learning_rate": 0.00019999628501166027, - "loss": 46.0, - "step": 17048 - }, - { - "epoch": 2.745601674785619, - "grad_norm": 0.004464562050998211, - "learning_rate": 0.00019999628457556748, - "loss": 46.0, - "step": 17049 - }, - { - "epoch": 2.7457627118644066, - "grad_norm": 0.008064158260822296, - "learning_rate": 0.0001999962841394491, - "loss": 46.0, - "step": 17050 - }, - { - "epoch": 2.745923748943194, - "grad_norm": 0.0014315111329779029, - "learning_rate": 0.00019999628370330515, - "loss": 46.0, - "step": 17051 - }, - { - "epoch": 2.7460847860219815, - "grad_norm": 0.0015736041823402047, - "learning_rate": 0.0001999962832671356, - "loss": 46.0, - "step": 17052 - }, - { - "epoch": 2.746245823100769, - "grad_norm": 0.005847330670803785, - "learning_rate": 0.00019999628283094044, - "loss": 46.0, - "step": 17053 - }, - { - "epoch": 2.7464068601795564, - "grad_norm": 0.004249951336532831, - "learning_rate": 0.0001999962823947197, - "loss": 46.0, - "step": 17054 - }, - { - "epoch": 2.746567897258344, - "grad_norm": 0.0013741877628490329, - "learning_rate": 0.00019999628195847336, - "loss": 46.0, - "step": 17055 - }, - { - "epoch": 2.7467289343371313, - "grad_norm": 0.0016543520614504814, - "learning_rate": 0.00019999628152220144, - "loss": 46.0, - "step": 17056 - }, - { - "epoch": 2.7468899714159187, - "grad_norm": 0.005567203741520643, - "learning_rate": 0.0001999962810859039, - "loss": 46.0, - "step": 17057 - }, - { - "epoch": 2.747051008494706, - "grad_norm": 0.0024511455558240414, - "learning_rate": 0.00019999628064958078, - "loss": 46.0, - "step": 17058 - }, - { - "epoch": 2.747212045573493, - "grad_norm": 0.0012431987561285496, - "learning_rate": 0.00019999628021323207, - "loss": 46.0, - "step": 17059 - }, - { - "epoch": 2.7473730826522806, - "grad_norm": 0.001957609783858061, - "learning_rate": 0.00019999627977685774, - "loss": 46.0, - "step": 17060 - }, - { - "epoch": 2.747534119731068, - "grad_norm": 0.00042120725265704095, - "learning_rate": 0.00019999627934045786, - "loss": 46.0, - "step": 17061 - }, - { - "epoch": 2.7476951568098555, - "grad_norm": 0.007067305967211723, - "learning_rate": 0.00019999627890403236, - "loss": 46.0, - "step": 17062 - }, - { - "epoch": 2.747856193888643, - "grad_norm": 0.0051729436963796616, - "learning_rate": 0.00019999627846758127, - "loss": 46.0, - "step": 17063 - }, - { - "epoch": 2.74801723096743, - "grad_norm": 0.007195282261818647, - "learning_rate": 0.0001999962780311046, - "loss": 46.0, - "step": 17064 - }, - { - "epoch": 2.7481782680462175, - "grad_norm": 0.0033652957063168287, - "learning_rate": 0.0001999962775946023, - "loss": 46.0, - "step": 17065 - }, - { - "epoch": 2.748339305125005, - "grad_norm": 0.004628072027117014, - "learning_rate": 0.00019999627715807443, - "loss": 46.0, - "step": 17066 - }, - { - "epoch": 2.7485003422037924, - "grad_norm": 0.009264047257602215, - "learning_rate": 0.00019999627672152097, - "loss": 46.0, - "step": 17067 - }, - { - "epoch": 2.74866137928258, - "grad_norm": 0.005872600711882114, - "learning_rate": 0.00019999627628494192, - "loss": 46.0, - "step": 17068 - }, - { - "epoch": 2.7488224163613673, - "grad_norm": 0.0036728999111801386, - "learning_rate": 0.00019999627584833726, - "loss": 46.0, - "step": 17069 - }, - { - "epoch": 2.7489834534401547, - "grad_norm": 0.0038058655336499214, - "learning_rate": 0.00019999627541170703, - "loss": 46.0, - "step": 17070 - }, - { - "epoch": 2.749144490518942, - "grad_norm": 0.008635470643639565, - "learning_rate": 0.00019999627497505117, - "loss": 46.0, - "step": 17071 - }, - { - "epoch": 2.7493055275977296, - "grad_norm": 0.0016123350942507386, - "learning_rate": 0.00019999627453836974, - "loss": 46.0, - "step": 17072 - }, - { - "epoch": 2.7494665646765166, - "grad_norm": 0.0011007452849298716, - "learning_rate": 0.0001999962741016627, - "loss": 46.0, - "step": 17073 - }, - { - "epoch": 2.749627601755304, - "grad_norm": 0.0030108599457889795, - "learning_rate": 0.00019999627366493007, - "loss": 46.0, - "step": 17074 - }, - { - "epoch": 2.7497886388340915, - "grad_norm": 0.005482657812535763, - "learning_rate": 0.00019999627322817186, - "loss": 46.0, - "step": 17075 - }, - { - "epoch": 2.749949675912879, - "grad_norm": 0.00085295713506639, - "learning_rate": 0.00019999627279138803, - "loss": 46.0, - "step": 17076 - }, - { - "epoch": 2.7501107129916664, - "grad_norm": 0.01183785405009985, - "learning_rate": 0.00019999627235457864, - "loss": 46.0, - "step": 17077 - }, - { - "epoch": 2.750271750070454, - "grad_norm": 0.004167368169873953, - "learning_rate": 0.00019999627191774364, - "loss": 46.0, - "step": 17078 - }, - { - "epoch": 2.750432787149241, - "grad_norm": 0.0018456553807482123, - "learning_rate": 0.00019999627148088305, - "loss": 46.0, - "step": 17079 - }, - { - "epoch": 2.7505938242280283, - "grad_norm": 0.00039835498319007456, - "learning_rate": 0.00019999627104399687, - "loss": 46.0, - "step": 17080 - }, - { - "epoch": 2.750754861306816, - "grad_norm": 0.0021245498210191727, - "learning_rate": 0.00019999627060708508, - "loss": 46.0, - "step": 17081 - }, - { - "epoch": 2.7509158983856032, - "grad_norm": 0.0012092370307072997, - "learning_rate": 0.0001999962701701477, - "loss": 46.0, - "step": 17082 - }, - { - "epoch": 2.7510769354643907, - "grad_norm": 0.00638191681355238, - "learning_rate": 0.00019999626973318474, - "loss": 46.0, - "step": 17083 - }, - { - "epoch": 2.751237972543178, - "grad_norm": 0.005046867299824953, - "learning_rate": 0.00019999626929619618, - "loss": 46.0, - "step": 17084 - }, - { - "epoch": 2.7513990096219656, - "grad_norm": 0.003371955594047904, - "learning_rate": 0.00019999626885918202, - "loss": 46.0, - "step": 17085 - }, - { - "epoch": 2.751560046700753, - "grad_norm": 0.006602564360946417, - "learning_rate": 0.00019999626842214226, - "loss": 46.0, - "step": 17086 - }, - { - "epoch": 2.7517210837795405, - "grad_norm": 0.002260745968669653, - "learning_rate": 0.00019999626798507692, - "loss": 46.0, - "step": 17087 - }, - { - "epoch": 2.7518821208583275, - "grad_norm": 0.0013032500864937901, - "learning_rate": 0.000199996267547986, - "loss": 46.0, - "step": 17088 - }, - { - "epoch": 2.752043157937115, - "grad_norm": 0.0014441004022955894, - "learning_rate": 0.00019999626711086945, - "loss": 46.0, - "step": 17089 - }, - { - "epoch": 2.7522041950159024, - "grad_norm": 0.008795741014182568, - "learning_rate": 0.00019999626667372732, - "loss": 46.0, - "step": 17090 - }, - { - "epoch": 2.75236523209469, - "grad_norm": 0.0006345787551254034, - "learning_rate": 0.00019999626623655963, - "loss": 46.0, - "step": 17091 - }, - { - "epoch": 2.7525262691734773, - "grad_norm": 0.0006841893773525953, - "learning_rate": 0.0001999962657993663, - "loss": 46.0, - "step": 17092 - }, - { - "epoch": 2.7526873062522643, - "grad_norm": 0.00434846943244338, - "learning_rate": 0.0001999962653621474, - "loss": 46.0, - "step": 17093 - }, - { - "epoch": 2.7528483433310518, - "grad_norm": 0.001952580758370459, - "learning_rate": 0.0001999962649249029, - "loss": 46.0, - "step": 17094 - }, - { - "epoch": 2.7530093804098392, - "grad_norm": 0.0019157506758347154, - "learning_rate": 0.0001999962644876328, - "loss": 46.0, - "step": 17095 - }, - { - "epoch": 2.7531704174886267, - "grad_norm": 0.0026203191373497248, - "learning_rate": 0.0001999962640503371, - "loss": 46.0, - "step": 17096 - }, - { - "epoch": 2.753331454567414, - "grad_norm": 0.007479417137801647, - "learning_rate": 0.00019999626361301583, - "loss": 46.0, - "step": 17097 - }, - { - "epoch": 2.7534924916462016, - "grad_norm": 0.002465332392603159, - "learning_rate": 0.00019999626317566892, - "loss": 46.0, - "step": 17098 - }, - { - "epoch": 2.753653528724989, - "grad_norm": 0.0036086381878703833, - "learning_rate": 0.00019999626273829648, - "loss": 46.0, - "step": 17099 - }, - { - "epoch": 2.7538145658037765, - "grad_norm": 0.0011274574790149927, - "learning_rate": 0.0001999962623008984, - "loss": 46.0, - "step": 17100 - }, - { - "epoch": 2.753975602882564, - "grad_norm": 0.00920192338526249, - "learning_rate": 0.00019999626186347475, - "loss": 46.0, - "step": 17101 - }, - { - "epoch": 2.754136639961351, - "grad_norm": 0.0008029105956666172, - "learning_rate": 0.00019999626142602547, - "loss": 46.0, - "step": 17102 - }, - { - "epoch": 2.7542976770401384, - "grad_norm": 0.0016800423618406057, - "learning_rate": 0.00019999626098855065, - "loss": 46.0, - "step": 17103 - }, - { - "epoch": 2.754458714118926, - "grad_norm": 0.0028716272208839655, - "learning_rate": 0.00019999626055105022, - "loss": 46.0, - "step": 17104 - }, - { - "epoch": 2.7546197511977133, - "grad_norm": 0.0016534305177628994, - "learning_rate": 0.00019999626011352417, - "loss": 46.0, - "step": 17105 - }, - { - "epoch": 2.7547807882765007, - "grad_norm": 0.002045708242803812, - "learning_rate": 0.00019999625967597254, - "loss": 46.0, - "step": 17106 - }, - { - "epoch": 2.754941825355288, - "grad_norm": 0.008509145118296146, - "learning_rate": 0.00019999625923839532, - "loss": 46.0, - "step": 17107 - }, - { - "epoch": 2.755102862434075, - "grad_norm": 0.0014312396524474025, - "learning_rate": 0.0001999962588007925, - "loss": 46.0, - "step": 17108 - }, - { - "epoch": 2.7552638995128627, - "grad_norm": 0.0030597897712141275, - "learning_rate": 0.0001999962583631641, - "loss": 46.0, - "step": 17109 - }, - { - "epoch": 2.75542493659165, - "grad_norm": 0.0068954406306147575, - "learning_rate": 0.00019999625792551008, - "loss": 46.0, - "step": 17110 - }, - { - "epoch": 2.7555859736704376, - "grad_norm": 0.003485912922769785, - "learning_rate": 0.00019999625748783048, - "loss": 46.0, - "step": 17111 - }, - { - "epoch": 2.755747010749225, - "grad_norm": 0.011335739865899086, - "learning_rate": 0.0001999962570501253, - "loss": 46.0, - "step": 17112 - }, - { - "epoch": 2.7559080478280125, - "grad_norm": 0.004586663097143173, - "learning_rate": 0.00019999625661239453, - "loss": 46.0, - "step": 17113 - }, - { - "epoch": 2.7560690849068, - "grad_norm": 0.011939934454858303, - "learning_rate": 0.00019999625617463812, - "loss": 46.0, - "step": 17114 - }, - { - "epoch": 2.7562301219855874, - "grad_norm": 0.003280117642134428, - "learning_rate": 0.00019999625573685617, - "loss": 46.0, - "step": 17115 - }, - { - "epoch": 2.756391159064375, - "grad_norm": 0.02143050730228424, - "learning_rate": 0.00019999625529904859, - "loss": 46.0, - "step": 17116 - }, - { - "epoch": 2.756552196143162, - "grad_norm": 0.0033806515857577324, - "learning_rate": 0.00019999625486121544, - "loss": 46.0, - "step": 17117 - }, - { - "epoch": 2.7567132332219493, - "grad_norm": 0.002159483265131712, - "learning_rate": 0.00019999625442335668, - "loss": 46.0, - "step": 17118 - }, - { - "epoch": 2.7568742703007367, - "grad_norm": 0.0027408525347709656, - "learning_rate": 0.00019999625398547233, - "loss": 46.0, - "step": 17119 - }, - { - "epoch": 2.757035307379524, - "grad_norm": 0.005222344771027565, - "learning_rate": 0.00019999625354756237, - "loss": 46.0, - "step": 17120 - }, - { - "epoch": 2.7571963444583116, - "grad_norm": 0.0006948627415113151, - "learning_rate": 0.00019999625310962685, - "loss": 46.0, - "step": 17121 - }, - { - "epoch": 2.7573573815370986, - "grad_norm": 0.0007239735568873584, - "learning_rate": 0.0001999962526716657, - "loss": 46.0, - "step": 17122 - }, - { - "epoch": 2.757518418615886, - "grad_norm": 0.005654027685523033, - "learning_rate": 0.00019999625223367899, - "loss": 46.0, - "step": 17123 - }, - { - "epoch": 2.7576794556946735, - "grad_norm": 0.005968195386230946, - "learning_rate": 0.00019999625179566665, - "loss": 46.0, - "step": 17124 - }, - { - "epoch": 2.757840492773461, - "grad_norm": 0.0015129344537854195, - "learning_rate": 0.00019999625135762875, - "loss": 46.0, - "step": 17125 - }, - { - "epoch": 2.7580015298522484, - "grad_norm": 0.007854820229113102, - "learning_rate": 0.00019999625091956524, - "loss": 46.0, - "step": 17126 - }, - { - "epoch": 2.758162566931036, - "grad_norm": 0.0008547230972908437, - "learning_rate": 0.00019999625048147614, - "loss": 46.0, - "step": 17127 - }, - { - "epoch": 2.7583236040098233, - "grad_norm": 0.0034407672937959433, - "learning_rate": 0.00019999625004336145, - "loss": 46.0, - "step": 17128 - }, - { - "epoch": 2.758484641088611, - "grad_norm": 0.0030282388906925917, - "learning_rate": 0.00019999624960522118, - "loss": 46.0, - "step": 17129 - }, - { - "epoch": 2.7586456781673983, - "grad_norm": 0.006146104075014591, - "learning_rate": 0.0001999962491670553, - "loss": 46.0, - "step": 17130 - }, - { - "epoch": 2.7588067152461857, - "grad_norm": 0.0004937821649946272, - "learning_rate": 0.0001999962487288638, - "loss": 46.0, - "step": 17131 - }, - { - "epoch": 2.7589677523249727, - "grad_norm": 0.007552339229732752, - "learning_rate": 0.00019999624829064672, - "loss": 46.0, - "step": 17132 - }, - { - "epoch": 2.75912878940376, - "grad_norm": 0.005795442499220371, - "learning_rate": 0.00019999624785240405, - "loss": 46.0, - "step": 17133 - }, - { - "epoch": 2.7592898264825476, - "grad_norm": 0.00309941079467535, - "learning_rate": 0.0001999962474141358, - "loss": 46.0, - "step": 17134 - }, - { - "epoch": 2.759450863561335, - "grad_norm": 0.007975741289556026, - "learning_rate": 0.00019999624697584196, - "loss": 46.0, - "step": 17135 - }, - { - "epoch": 2.7596119006401225, - "grad_norm": 0.001728990231640637, - "learning_rate": 0.00019999624653752252, - "loss": 46.0, - "step": 17136 - }, - { - "epoch": 2.7597729377189095, - "grad_norm": 0.004385133273899555, - "learning_rate": 0.00019999624609917747, - "loss": 46.0, - "step": 17137 - }, - { - "epoch": 2.759933974797697, - "grad_norm": 0.0034701130352914333, - "learning_rate": 0.00019999624566080683, - "loss": 46.0, - "step": 17138 - }, - { - "epoch": 2.7600950118764844, - "grad_norm": 0.0009467012714594603, - "learning_rate": 0.0001999962452224106, - "loss": 46.0, - "step": 17139 - }, - { - "epoch": 2.760256048955272, - "grad_norm": 0.008176835253834724, - "learning_rate": 0.0001999962447839888, - "loss": 46.0, - "step": 17140 - }, - { - "epoch": 2.7604170860340593, - "grad_norm": 0.009744575247168541, - "learning_rate": 0.0001999962443455414, - "loss": 46.0, - "step": 17141 - }, - { - "epoch": 2.760578123112847, - "grad_norm": 0.0012876407708972692, - "learning_rate": 0.00019999624390706834, - "loss": 46.0, - "step": 17142 - }, - { - "epoch": 2.7607391601916342, - "grad_norm": 0.0009183231159113348, - "learning_rate": 0.00019999624346856974, - "loss": 46.0, - "step": 17143 - }, - { - "epoch": 2.7609001972704217, - "grad_norm": 0.002574438927695155, - "learning_rate": 0.00019999624303004555, - "loss": 46.0, - "step": 17144 - }, - { - "epoch": 2.761061234349209, - "grad_norm": 0.0023398450575768948, - "learning_rate": 0.00019999624259149577, - "loss": 46.0, - "step": 17145 - }, - { - "epoch": 2.761222271427996, - "grad_norm": 0.0059340642765164375, - "learning_rate": 0.00019999624215292038, - "loss": 46.0, - "step": 17146 - }, - { - "epoch": 2.7613833085067836, - "grad_norm": 0.0037918237503618, - "learning_rate": 0.0001999962417143194, - "loss": 46.0, - "step": 17147 - }, - { - "epoch": 2.761544345585571, - "grad_norm": 0.006784059107303619, - "learning_rate": 0.00019999624127569284, - "loss": 46.0, - "step": 17148 - }, - { - "epoch": 2.7617053826643585, - "grad_norm": 0.005706030875444412, - "learning_rate": 0.00019999624083704066, - "loss": 46.0, - "step": 17149 - }, - { - "epoch": 2.761866419743146, - "grad_norm": 0.0014201728627085686, - "learning_rate": 0.00019999624039836292, - "loss": 46.0, - "step": 17150 - }, - { - "epoch": 2.7620274568219334, - "grad_norm": 0.002819629618898034, - "learning_rate": 0.00019999623995965954, - "loss": 46.0, - "step": 17151 - }, - { - "epoch": 2.7621884939007204, - "grad_norm": 0.0010610180906951427, - "learning_rate": 0.0001999962395209306, - "loss": 46.0, - "step": 17152 - }, - { - "epoch": 2.762349530979508, - "grad_norm": 0.006625819485634565, - "learning_rate": 0.00019999623908217607, - "loss": 46.0, - "step": 17153 - }, - { - "epoch": 2.7625105680582953, - "grad_norm": 0.008228259161114693, - "learning_rate": 0.00019999623864339593, - "loss": 46.0, - "step": 17154 - }, - { - "epoch": 2.7626716051370828, - "grad_norm": 0.009319491684436798, - "learning_rate": 0.0001999962382045902, - "loss": 46.0, - "step": 17155 - }, - { - "epoch": 2.76283264221587, - "grad_norm": 0.0008523929864168167, - "learning_rate": 0.00019999623776575888, - "loss": 46.0, - "step": 17156 - }, - { - "epoch": 2.7629936792946577, - "grad_norm": 0.00168919179122895, - "learning_rate": 0.00019999623732690195, - "loss": 46.0, - "step": 17157 - }, - { - "epoch": 2.763154716373445, - "grad_norm": 0.010237525217235088, - "learning_rate": 0.00019999623688801946, - "loss": 46.0, - "step": 17158 - }, - { - "epoch": 2.7633157534522326, - "grad_norm": 0.008353172801434994, - "learning_rate": 0.00019999623644911135, - "loss": 46.0, - "step": 17159 - }, - { - "epoch": 2.76347679053102, - "grad_norm": 0.007083939854055643, - "learning_rate": 0.00019999623601017763, - "loss": 46.0, - "step": 17160 - }, - { - "epoch": 2.763637827609807, - "grad_norm": 0.012969098053872585, - "learning_rate": 0.00019999623557121835, - "loss": 46.0, - "step": 17161 - }, - { - "epoch": 2.7637988646885945, - "grad_norm": 0.00047954244655556977, - "learning_rate": 0.00019999623513223345, - "loss": 46.0, - "step": 17162 - }, - { - "epoch": 2.763959901767382, - "grad_norm": 0.007116248365491629, - "learning_rate": 0.00019999623469322297, - "loss": 46.0, - "step": 17163 - }, - { - "epoch": 2.7641209388461694, - "grad_norm": 0.003631382016465068, - "learning_rate": 0.0001999962342541869, - "loss": 46.0, - "step": 17164 - }, - { - "epoch": 2.764281975924957, - "grad_norm": 0.0019030936527997255, - "learning_rate": 0.00019999623381512522, - "loss": 46.0, - "step": 17165 - }, - { - "epoch": 2.764443013003744, - "grad_norm": 0.0013142320094630122, - "learning_rate": 0.00019999623337603798, - "loss": 46.0, - "step": 17166 - }, - { - "epoch": 2.7646040500825313, - "grad_norm": 0.0013567854184657335, - "learning_rate": 0.00019999623293692512, - "loss": 46.0, - "step": 17167 - }, - { - "epoch": 2.7647650871613187, - "grad_norm": 0.002399209188297391, - "learning_rate": 0.00019999623249778665, - "loss": 46.0, - "step": 17168 - }, - { - "epoch": 2.764926124240106, - "grad_norm": 0.007906840182840824, - "learning_rate": 0.00019999623205862262, - "loss": 46.0, - "step": 17169 - }, - { - "epoch": 2.7650871613188936, - "grad_norm": 0.002173451706767082, - "learning_rate": 0.00019999623161943297, - "loss": 46.0, - "step": 17170 - }, - { - "epoch": 2.765248198397681, - "grad_norm": 0.006524259690195322, - "learning_rate": 0.00019999623118021776, - "loss": 46.0, - "step": 17171 - }, - { - "epoch": 2.7654092354764686, - "grad_norm": 0.005930430721491575, - "learning_rate": 0.00019999623074097692, - "loss": 46.0, - "step": 17172 - }, - { - "epoch": 2.765570272555256, - "grad_norm": 0.010853881016373634, - "learning_rate": 0.00019999623030171048, - "loss": 46.0, - "step": 17173 - }, - { - "epoch": 2.7657313096340435, - "grad_norm": 0.0033220115583389997, - "learning_rate": 0.00019999622986241846, - "loss": 46.0, - "step": 17174 - }, - { - "epoch": 2.7658923467128305, - "grad_norm": 0.005506960209459066, - "learning_rate": 0.00019999622942310085, - "loss": 46.0, - "step": 17175 - }, - { - "epoch": 2.766053383791618, - "grad_norm": 0.004830505233258009, - "learning_rate": 0.00019999622898375768, - "loss": 46.0, - "step": 17176 - }, - { - "epoch": 2.7662144208704054, - "grad_norm": 0.004754712339490652, - "learning_rate": 0.00019999622854438887, - "loss": 46.0, - "step": 17177 - }, - { - "epoch": 2.766375457949193, - "grad_norm": 0.0052373409271240234, - "learning_rate": 0.00019999622810499447, - "loss": 46.0, - "step": 17178 - }, - { - "epoch": 2.7665364950279803, - "grad_norm": 0.011678526178002357, - "learning_rate": 0.00019999622766557452, - "loss": 46.0, - "step": 17179 - }, - { - "epoch": 2.7666975321067677, - "grad_norm": 0.002471431391313672, - "learning_rate": 0.00019999622722612892, - "loss": 46.0, - "step": 17180 - }, - { - "epoch": 2.7668585691855547, - "grad_norm": 0.0028403140604496002, - "learning_rate": 0.00019999622678665776, - "loss": 46.0, - "step": 17181 - }, - { - "epoch": 2.767019606264342, - "grad_norm": 0.0040451339446008205, - "learning_rate": 0.00019999622634716099, - "loss": 46.0, - "step": 17182 - }, - { - "epoch": 2.7671806433431296, - "grad_norm": 0.006811333354562521, - "learning_rate": 0.00019999622590763863, - "loss": 46.0, - "step": 17183 - }, - { - "epoch": 2.767341680421917, - "grad_norm": 0.0028553283773362637, - "learning_rate": 0.00019999622546809068, - "loss": 46.0, - "step": 17184 - }, - { - "epoch": 2.7675027175007045, - "grad_norm": 0.0010586761636659503, - "learning_rate": 0.00019999622502851712, - "loss": 46.0, - "step": 17185 - }, - { - "epoch": 2.767663754579492, - "grad_norm": 0.0017935532378032804, - "learning_rate": 0.00019999622458891797, - "loss": 46.0, - "step": 17186 - }, - { - "epoch": 2.7678247916582794, - "grad_norm": 0.001102588721551001, - "learning_rate": 0.00019999622414929323, - "loss": 46.0, - "step": 17187 - }, - { - "epoch": 2.767985828737067, - "grad_norm": 0.0004795800778083503, - "learning_rate": 0.00019999622370964293, - "loss": 46.0, - "step": 17188 - }, - { - "epoch": 2.7681468658158543, - "grad_norm": 0.005188523791730404, - "learning_rate": 0.000199996223269967, - "loss": 46.0, - "step": 17189 - }, - { - "epoch": 2.7683079028946413, - "grad_norm": 0.0007705677999183536, - "learning_rate": 0.00019999622283026547, - "loss": 46.0, - "step": 17190 - }, - { - "epoch": 2.768468939973429, - "grad_norm": 0.003192085074260831, - "learning_rate": 0.00019999622239053836, - "loss": 46.0, - "step": 17191 - }, - { - "epoch": 2.7686299770522163, - "grad_norm": 0.008505075238645077, - "learning_rate": 0.00019999622195078563, - "loss": 46.0, - "step": 17192 - }, - { - "epoch": 2.7687910141310037, - "grad_norm": 0.0007946674013510346, - "learning_rate": 0.00019999622151100737, - "loss": 46.0, - "step": 17193 - }, - { - "epoch": 2.768952051209791, - "grad_norm": 0.004044679459184408, - "learning_rate": 0.00019999622107120347, - "loss": 46.0, - "step": 17194 - }, - { - "epoch": 2.7691130882885786, - "grad_norm": 0.0008073249482549727, - "learning_rate": 0.00019999622063137398, - "loss": 46.0, - "step": 17195 - }, - { - "epoch": 2.7692741253673656, - "grad_norm": 0.0007710932986810803, - "learning_rate": 0.0001999962201915189, - "loss": 46.0, - "step": 17196 - }, - { - "epoch": 2.769435162446153, - "grad_norm": 0.002095641568303108, - "learning_rate": 0.00019999621975163822, - "loss": 46.0, - "step": 17197 - }, - { - "epoch": 2.7695961995249405, - "grad_norm": 0.006098724901676178, - "learning_rate": 0.00019999621931173197, - "loss": 46.0, - "step": 17198 - }, - { - "epoch": 2.769757236603728, - "grad_norm": 0.007688706740736961, - "learning_rate": 0.00019999621887180008, - "loss": 46.0, - "step": 17199 - }, - { - "epoch": 2.7699182736825154, - "grad_norm": 0.003622662043198943, - "learning_rate": 0.00019999621843184263, - "loss": 46.0, - "step": 17200 - }, - { - "epoch": 2.770079310761303, - "grad_norm": 0.0020627097692340612, - "learning_rate": 0.0001999962179918596, - "loss": 46.0, - "step": 17201 - }, - { - "epoch": 2.7702403478400903, - "grad_norm": 0.003132890211418271, - "learning_rate": 0.00019999621755185097, - "loss": 46.0, - "step": 17202 - }, - { - "epoch": 2.7704013849188778, - "grad_norm": 0.009673915803432465, - "learning_rate": 0.0001999962171118167, - "loss": 46.0, - "step": 17203 - }, - { - "epoch": 2.7705624219976652, - "grad_norm": 0.002972107380628586, - "learning_rate": 0.00019999621667175687, - "loss": 46.0, - "step": 17204 - }, - { - "epoch": 2.7707234590764522, - "grad_norm": 0.0029015387408435345, - "learning_rate": 0.00019999621623167143, - "loss": 46.0, - "step": 17205 - }, - { - "epoch": 2.7708844961552397, - "grad_norm": 0.005969700403511524, - "learning_rate": 0.00019999621579156043, - "loss": 46.0, - "step": 17206 - }, - { - "epoch": 2.771045533234027, - "grad_norm": 0.007375736255198717, - "learning_rate": 0.00019999621535142384, - "loss": 46.0, - "step": 17207 - }, - { - "epoch": 2.7712065703128146, - "grad_norm": 0.00248416094109416, - "learning_rate": 0.0001999962149112616, - "loss": 46.0, - "step": 17208 - }, - { - "epoch": 2.771367607391602, - "grad_norm": 0.0014966547023504972, - "learning_rate": 0.00019999621447107382, - "loss": 46.0, - "step": 17209 - }, - { - "epoch": 2.771528644470389, - "grad_norm": 0.009774049744009972, - "learning_rate": 0.00019999621403086042, - "loss": 46.0, - "step": 17210 - }, - { - "epoch": 2.7716896815491765, - "grad_norm": 0.00196552905254066, - "learning_rate": 0.00019999621359062143, - "loss": 46.0, - "step": 17211 - }, - { - "epoch": 2.771850718627964, - "grad_norm": 0.0007924105157144368, - "learning_rate": 0.00019999621315035685, - "loss": 46.0, - "step": 17212 - }, - { - "epoch": 2.7720117557067514, - "grad_norm": 0.002094122814014554, - "learning_rate": 0.00019999621271006666, - "loss": 46.0, - "step": 17213 - }, - { - "epoch": 2.772172792785539, - "grad_norm": 0.009498411789536476, - "learning_rate": 0.0001999962122697509, - "loss": 46.0, - "step": 17214 - }, - { - "epoch": 2.7723338298643263, - "grad_norm": 0.0010483688674867153, - "learning_rate": 0.00019999621182940954, - "loss": 46.0, - "step": 17215 - }, - { - "epoch": 2.7724948669431138, - "grad_norm": 0.0011623470345512033, - "learning_rate": 0.00019999621138904256, - "loss": 46.0, - "step": 17216 - }, - { - "epoch": 2.772655904021901, - "grad_norm": 0.01052230317145586, - "learning_rate": 0.00019999621094865001, - "loss": 46.0, - "step": 17217 - }, - { - "epoch": 2.7728169411006887, - "grad_norm": 0.00584650132805109, - "learning_rate": 0.00019999621050823189, - "loss": 46.0, - "step": 17218 - }, - { - "epoch": 2.7729779781794757, - "grad_norm": 0.0006701999227516353, - "learning_rate": 0.00019999621006778812, - "loss": 46.0, - "step": 17219 - }, - { - "epoch": 2.773139015258263, - "grad_norm": 0.00454955268651247, - "learning_rate": 0.00019999620962731881, - "loss": 46.0, - "step": 17220 - }, - { - "epoch": 2.7733000523370506, - "grad_norm": 0.0047552818432450294, - "learning_rate": 0.0001999962091868239, - "loss": 46.0, - "step": 17221 - }, - { - "epoch": 2.773461089415838, - "grad_norm": 0.004514632746577263, - "learning_rate": 0.00019999620874630334, - "loss": 46.0, - "step": 17222 - }, - { - "epoch": 2.7736221264946255, - "grad_norm": 0.005523323081433773, - "learning_rate": 0.00019999620830575725, - "loss": 46.0, - "step": 17223 - }, - { - "epoch": 2.773783163573413, - "grad_norm": 0.0011826539412140846, - "learning_rate": 0.00019999620786518554, - "loss": 46.0, - "step": 17224 - }, - { - "epoch": 2.7739442006522, - "grad_norm": 0.0007408465025946498, - "learning_rate": 0.00019999620742458822, - "loss": 46.0, - "step": 17225 - }, - { - "epoch": 2.7741052377309874, - "grad_norm": 0.0014174769166857004, - "learning_rate": 0.00019999620698396534, - "loss": 46.0, - "step": 17226 - }, - { - "epoch": 2.774266274809775, - "grad_norm": 0.0009421113645657897, - "learning_rate": 0.00019999620654331682, - "loss": 46.0, - "step": 17227 - }, - { - "epoch": 2.7744273118885623, - "grad_norm": 0.001157391001470387, - "learning_rate": 0.00019999620610264277, - "loss": 46.0, - "step": 17228 - }, - { - "epoch": 2.7745883489673497, - "grad_norm": 0.0023011311423033476, - "learning_rate": 0.00019999620566194307, - "loss": 46.0, - "step": 17229 - }, - { - "epoch": 2.774749386046137, - "grad_norm": 0.001446246518753469, - "learning_rate": 0.00019999620522121781, - "loss": 46.0, - "step": 17230 - }, - { - "epoch": 2.7749104231249246, - "grad_norm": 0.0018936104606837034, - "learning_rate": 0.00019999620478046692, - "loss": 46.0, - "step": 17231 - }, - { - "epoch": 2.775071460203712, - "grad_norm": 0.0025069648399949074, - "learning_rate": 0.0001999962043396905, - "loss": 46.0, - "step": 17232 - }, - { - "epoch": 2.7752324972824995, - "grad_norm": 0.003439794760197401, - "learning_rate": 0.00019999620389888842, - "loss": 46.0, - "step": 17233 - }, - { - "epoch": 2.7753935343612866, - "grad_norm": 0.0008250723476521671, - "learning_rate": 0.00019999620345806078, - "loss": 46.0, - "step": 17234 - }, - { - "epoch": 2.775554571440074, - "grad_norm": 0.003856923431158066, - "learning_rate": 0.0001999962030172075, - "loss": 46.0, - "step": 17235 - }, - { - "epoch": 2.7757156085188615, - "grad_norm": 0.004507602658122778, - "learning_rate": 0.0001999962025763287, - "loss": 46.0, - "step": 17236 - }, - { - "epoch": 2.775876645597649, - "grad_norm": 0.005155741702765226, - "learning_rate": 0.00019999620213542426, - "loss": 46.0, - "step": 17237 - }, - { - "epoch": 2.7760376826764364, - "grad_norm": 0.009643965400755405, - "learning_rate": 0.00019999620169449425, - "loss": 46.0, - "step": 17238 - }, - { - "epoch": 2.7761987197552234, - "grad_norm": 0.0009915520204231143, - "learning_rate": 0.00019999620125353863, - "loss": 46.0, - "step": 17239 - }, - { - "epoch": 2.776359756834011, - "grad_norm": 0.0011607661144807935, - "learning_rate": 0.00019999620081255742, - "loss": 46.0, - "step": 17240 - }, - { - "epoch": 2.7765207939127983, - "grad_norm": 0.001892865402624011, - "learning_rate": 0.0001999962003715506, - "loss": 46.0, - "step": 17241 - }, - { - "epoch": 2.7766818309915857, - "grad_norm": 0.012929723598062992, - "learning_rate": 0.00019999619993051821, - "loss": 46.0, - "step": 17242 - }, - { - "epoch": 2.776842868070373, - "grad_norm": 0.005594770889729261, - "learning_rate": 0.00019999619948946022, - "loss": 46.0, - "step": 17243 - }, - { - "epoch": 2.7770039051491606, - "grad_norm": 0.0034008945804089308, - "learning_rate": 0.0001999961990483766, - "loss": 46.0, - "step": 17244 - }, - { - "epoch": 2.777164942227948, - "grad_norm": 0.004892023745924234, - "learning_rate": 0.00019999619860726743, - "loss": 46.0, - "step": 17245 - }, - { - "epoch": 2.7773259793067355, - "grad_norm": 0.001985863083973527, - "learning_rate": 0.00019999619816613267, - "loss": 46.0, - "step": 17246 - }, - { - "epoch": 2.777487016385523, - "grad_norm": 0.0025620614178478718, - "learning_rate": 0.0001999961977249723, - "loss": 46.0, - "step": 17247 - }, - { - "epoch": 2.7776480534643104, - "grad_norm": 0.0015864807646721601, - "learning_rate": 0.00019999619728378634, - "loss": 46.0, - "step": 17248 - }, - { - "epoch": 2.7778090905430974, - "grad_norm": 0.0024884187150746584, - "learning_rate": 0.00019999619684257477, - "loss": 46.0, - "step": 17249 - }, - { - "epoch": 2.777970127621885, - "grad_norm": 0.0026782918721437454, - "learning_rate": 0.00019999619640133763, - "loss": 46.0, - "step": 17250 - }, - { - "epoch": 2.7781311647006723, - "grad_norm": 0.009245862253010273, - "learning_rate": 0.00019999619596007488, - "loss": 46.0, - "step": 17251 - }, - { - "epoch": 2.77829220177946, - "grad_norm": 0.0026006782427430153, - "learning_rate": 0.00019999619551878655, - "loss": 46.0, - "step": 17252 - }, - { - "epoch": 2.7784532388582472, - "grad_norm": 0.0018891041399911046, - "learning_rate": 0.0001999961950774726, - "loss": 46.0, - "step": 17253 - }, - { - "epoch": 2.7786142759370343, - "grad_norm": 0.001433408702723682, - "learning_rate": 0.00019999619463613308, - "loss": 46.0, - "step": 17254 - }, - { - "epoch": 2.7787753130158217, - "grad_norm": 0.0011654626578092575, - "learning_rate": 0.00019999619419476796, - "loss": 46.0, - "step": 17255 - }, - { - "epoch": 2.778936350094609, - "grad_norm": 0.00508570671081543, - "learning_rate": 0.00019999619375337725, - "loss": 46.0, - "step": 17256 - }, - { - "epoch": 2.7790973871733966, - "grad_norm": 0.0018112065736204386, - "learning_rate": 0.00019999619331196092, - "loss": 46.0, - "step": 17257 - }, - { - "epoch": 2.779258424252184, - "grad_norm": 0.0017384226666763425, - "learning_rate": 0.00019999619287051906, - "loss": 46.0, - "step": 17258 - }, - { - "epoch": 2.7794194613309715, - "grad_norm": 0.004295541439205408, - "learning_rate": 0.00019999619242905154, - "loss": 46.0, - "step": 17259 - }, - { - "epoch": 2.779580498409759, - "grad_norm": 0.000518948188982904, - "learning_rate": 0.00019999619198755845, - "loss": 46.0, - "step": 17260 - }, - { - "epoch": 2.7797415354885464, - "grad_norm": 0.005840987432748079, - "learning_rate": 0.00019999619154603977, - "loss": 46.0, - "step": 17261 - }, - { - "epoch": 2.779902572567334, - "grad_norm": 0.003383237635716796, - "learning_rate": 0.0001999961911044955, - "loss": 46.0, - "step": 17262 - }, - { - "epoch": 2.780063609646121, - "grad_norm": 0.012057020328938961, - "learning_rate": 0.0001999961906629256, - "loss": 46.0, - "step": 17263 - }, - { - "epoch": 2.7802246467249083, - "grad_norm": 0.017308978363871574, - "learning_rate": 0.00019999619022133014, - "loss": 46.0, - "step": 17264 - }, - { - "epoch": 2.7803856838036958, - "grad_norm": 0.006352408789098263, - "learning_rate": 0.0001999961897797091, - "loss": 46.0, - "step": 17265 - }, - { - "epoch": 2.7805467208824832, - "grad_norm": 0.0016968533163890243, - "learning_rate": 0.00019999618933806245, - "loss": 46.0, - "step": 17266 - }, - { - "epoch": 2.7807077579612707, - "grad_norm": 0.0031767133623361588, - "learning_rate": 0.0001999961888963902, - "loss": 46.0, - "step": 17267 - }, - { - "epoch": 2.780868795040058, - "grad_norm": 0.0020461815875023603, - "learning_rate": 0.00019999618845469236, - "loss": 46.0, - "step": 17268 - }, - { - "epoch": 2.781029832118845, - "grad_norm": 0.005839423276484013, - "learning_rate": 0.0001999961880129689, - "loss": 46.0, - "step": 17269 - }, - { - "epoch": 2.7811908691976326, - "grad_norm": 0.0022688517346978188, - "learning_rate": 0.00019999618757121987, - "loss": 46.0, - "step": 17270 - }, - { - "epoch": 2.78135190627642, - "grad_norm": 0.0011125602759420872, - "learning_rate": 0.00019999618712944527, - "loss": 46.0, - "step": 17271 - }, - { - "epoch": 2.7815129433552075, - "grad_norm": 0.0046051060780882835, - "learning_rate": 0.00019999618668764505, - "loss": 46.0, - "step": 17272 - }, - { - "epoch": 2.781673980433995, - "grad_norm": 0.0004647821479011327, - "learning_rate": 0.00019999618624581925, - "loss": 46.0, - "step": 17273 - }, - { - "epoch": 2.7818350175127824, - "grad_norm": 0.0032389399129897356, - "learning_rate": 0.00019999618580396783, - "loss": 46.0, - "step": 17274 - }, - { - "epoch": 2.78199605459157, - "grad_norm": 0.015031995251774788, - "learning_rate": 0.00019999618536209083, - "loss": 46.0, - "step": 17275 - }, - { - "epoch": 2.7821570916703573, - "grad_norm": 0.004603092558681965, - "learning_rate": 0.00019999618492018827, - "loss": 46.0, - "step": 17276 - }, - { - "epoch": 2.7823181287491447, - "grad_norm": 0.0019079707562923431, - "learning_rate": 0.00019999618447826006, - "loss": 46.0, - "step": 17277 - }, - { - "epoch": 2.7824791658279318, - "grad_norm": 0.002006322843953967, - "learning_rate": 0.00019999618403630627, - "loss": 46.0, - "step": 17278 - }, - { - "epoch": 2.782640202906719, - "grad_norm": 0.0019319868879392743, - "learning_rate": 0.0001999961835943269, - "loss": 46.0, - "step": 17279 - }, - { - "epoch": 2.7828012399855067, - "grad_norm": 0.0018949775258079171, - "learning_rate": 0.00019999618315232195, - "loss": 46.0, - "step": 17280 - }, - { - "epoch": 2.782962277064294, - "grad_norm": 0.005325635429471731, - "learning_rate": 0.00019999618271029137, - "loss": 46.0, - "step": 17281 - }, - { - "epoch": 2.7831233141430816, - "grad_norm": 0.0014188267523422837, - "learning_rate": 0.00019999618226823523, - "loss": 46.0, - "step": 17282 - }, - { - "epoch": 2.7832843512218686, - "grad_norm": 0.006055557169020176, - "learning_rate": 0.00019999618182615347, - "loss": 46.0, - "step": 17283 - }, - { - "epoch": 2.783445388300656, - "grad_norm": 0.0017244759947061539, - "learning_rate": 0.00019999618138404613, - "loss": 46.0, - "step": 17284 - }, - { - "epoch": 2.7836064253794435, - "grad_norm": 0.0049844346940517426, - "learning_rate": 0.0001999961809419132, - "loss": 46.0, - "step": 17285 - }, - { - "epoch": 2.783767462458231, - "grad_norm": 0.006922903936356306, - "learning_rate": 0.00019999618049975466, - "loss": 46.0, - "step": 17286 - }, - { - "epoch": 2.7839284995370184, - "grad_norm": 0.004829640034586191, - "learning_rate": 0.00019999618005757053, - "loss": 46.0, - "step": 17287 - }, - { - "epoch": 2.784089536615806, - "grad_norm": 0.0011554949451237917, - "learning_rate": 0.0001999961796153608, - "loss": 46.0, - "step": 17288 - }, - { - "epoch": 2.7842505736945933, - "grad_norm": 0.01126040518283844, - "learning_rate": 0.00019999617917312553, - "loss": 46.0, - "step": 17289 - }, - { - "epoch": 2.7844116107733807, - "grad_norm": 0.0038276687264442444, - "learning_rate": 0.00019999617873086458, - "loss": 46.0, - "step": 17290 - }, - { - "epoch": 2.784572647852168, - "grad_norm": 0.0012934472179040313, - "learning_rate": 0.0001999961782885781, - "loss": 46.0, - "step": 17291 - }, - { - "epoch": 2.784733684930955, - "grad_norm": 0.002570344367995858, - "learning_rate": 0.000199996177846266, - "loss": 46.0, - "step": 17292 - }, - { - "epoch": 2.7848947220097426, - "grad_norm": 0.0021267388947308064, - "learning_rate": 0.0001999961774039283, - "loss": 46.0, - "step": 17293 - }, - { - "epoch": 2.78505575908853, - "grad_norm": 0.0008051579934544861, - "learning_rate": 0.00019999617696156503, - "loss": 46.0, - "step": 17294 - }, - { - "epoch": 2.7852167961673175, - "grad_norm": 0.0015287662390619516, - "learning_rate": 0.00019999617651917615, - "loss": 46.0, - "step": 17295 - }, - { - "epoch": 2.785377833246105, - "grad_norm": 0.00878703873604536, - "learning_rate": 0.00019999617607676168, - "loss": 46.0, - "step": 17296 - }, - { - "epoch": 2.7855388703248924, - "grad_norm": 0.006900724023580551, - "learning_rate": 0.00019999617563432162, - "loss": 46.0, - "step": 17297 - }, - { - "epoch": 2.7856999074036795, - "grad_norm": 0.003436419880017638, - "learning_rate": 0.00019999617519185595, - "loss": 46.0, - "step": 17298 - }, - { - "epoch": 2.785860944482467, - "grad_norm": 0.002675537019968033, - "learning_rate": 0.00019999617474936472, - "loss": 46.0, - "step": 17299 - }, - { - "epoch": 2.7860219815612544, - "grad_norm": 0.0018416759558022022, - "learning_rate": 0.00019999617430684787, - "loss": 46.0, - "step": 17300 - }, - { - "epoch": 2.786183018640042, - "grad_norm": 0.005380876827985048, - "learning_rate": 0.0001999961738643054, - "loss": 46.0, - "step": 17301 - }, - { - "epoch": 2.7863440557188293, - "grad_norm": 0.0022907995153218508, - "learning_rate": 0.0001999961734217374, - "loss": 46.0, - "step": 17302 - }, - { - "epoch": 2.7865050927976167, - "grad_norm": 0.0019048057729378343, - "learning_rate": 0.00019999617297914376, - "loss": 46.0, - "step": 17303 - }, - { - "epoch": 2.786666129876404, - "grad_norm": 0.006211045663803816, - "learning_rate": 0.00019999617253652456, - "loss": 46.0, - "step": 17304 - }, - { - "epoch": 2.7868271669551916, - "grad_norm": 0.003978512715548277, - "learning_rate": 0.00019999617209387973, - "loss": 46.0, - "step": 17305 - }, - { - "epoch": 2.786988204033979, - "grad_norm": 0.010241970419883728, - "learning_rate": 0.0001999961716512093, - "loss": 46.0, - "step": 17306 - }, - { - "epoch": 2.787149241112766, - "grad_norm": 0.0029114349745213985, - "learning_rate": 0.0001999961712085133, - "loss": 46.0, - "step": 17307 - }, - { - "epoch": 2.7873102781915535, - "grad_norm": 0.0019352325471118093, - "learning_rate": 0.00019999617076579173, - "loss": 46.0, - "step": 17308 - }, - { - "epoch": 2.787471315270341, - "grad_norm": 0.005099796690046787, - "learning_rate": 0.00019999617032304454, - "loss": 46.0, - "step": 17309 - }, - { - "epoch": 2.7876323523491284, - "grad_norm": 0.0015000685816630721, - "learning_rate": 0.00019999616988027174, - "loss": 46.0, - "step": 17310 - }, - { - "epoch": 2.787793389427916, - "grad_norm": 0.00374378333799541, - "learning_rate": 0.00019999616943747336, - "loss": 46.0, - "step": 17311 - }, - { - "epoch": 2.7879544265067033, - "grad_norm": 0.005975484382361174, - "learning_rate": 0.0001999961689946494, - "loss": 46.0, - "step": 17312 - }, - { - "epoch": 2.7881154635854903, - "grad_norm": 0.0068528796546161175, - "learning_rate": 0.00019999616855179982, - "loss": 46.0, - "step": 17313 - }, - { - "epoch": 2.788276500664278, - "grad_norm": 0.0008332461002282798, - "learning_rate": 0.00019999616810892468, - "loss": 46.0, - "step": 17314 - }, - { - "epoch": 2.7884375377430652, - "grad_norm": 0.002808322198688984, - "learning_rate": 0.00019999616766602391, - "loss": 46.0, - "step": 17315 - }, - { - "epoch": 2.7885985748218527, - "grad_norm": 0.012756533920764923, - "learning_rate": 0.00019999616722309757, - "loss": 46.0, - "step": 17316 - }, - { - "epoch": 2.78875961190064, - "grad_norm": 0.0030364638660103083, - "learning_rate": 0.00019999616678014563, - "loss": 46.0, - "step": 17317 - }, - { - "epoch": 2.7889206489794276, - "grad_norm": 0.0013004435459151864, - "learning_rate": 0.00019999616633716808, - "loss": 46.0, - "step": 17318 - }, - { - "epoch": 2.789081686058215, - "grad_norm": 0.007769137620925903, - "learning_rate": 0.00019999616589416497, - "loss": 46.0, - "step": 17319 - }, - { - "epoch": 2.7892427231370025, - "grad_norm": 0.004966042470186949, - "learning_rate": 0.00019999616545113624, - "loss": 46.0, - "step": 17320 - }, - { - "epoch": 2.78940376021579, - "grad_norm": 0.003941311500966549, - "learning_rate": 0.00019999616500808193, - "loss": 46.0, - "step": 17321 - }, - { - "epoch": 2.789564797294577, - "grad_norm": 0.003876606235280633, - "learning_rate": 0.000199996164565002, - "loss": 46.0, - "step": 17322 - }, - { - "epoch": 2.7897258343733644, - "grad_norm": 0.0012116816360503435, - "learning_rate": 0.0001999961641218965, - "loss": 46.0, - "step": 17323 - }, - { - "epoch": 2.789886871452152, - "grad_norm": 0.0012563696363940835, - "learning_rate": 0.00019999616367876542, - "loss": 46.0, - "step": 17324 - }, - { - "epoch": 2.7900479085309393, - "grad_norm": 0.002336804987862706, - "learning_rate": 0.0001999961632356087, - "loss": 46.0, - "step": 17325 - }, - { - "epoch": 2.7902089456097268, - "grad_norm": 0.0017057409277185798, - "learning_rate": 0.00019999616279242643, - "loss": 46.0, - "step": 17326 - }, - { - "epoch": 2.7903699826885138, - "grad_norm": 0.0013661442790180445, - "learning_rate": 0.00019999616234921854, - "loss": 46.0, - "step": 17327 - }, - { - "epoch": 2.7905310197673012, - "grad_norm": 0.0021454968955367804, - "learning_rate": 0.0001999961619059851, - "loss": 46.0, - "step": 17328 - }, - { - "epoch": 2.7906920568460887, - "grad_norm": 0.002131877699866891, - "learning_rate": 0.00019999616146272603, - "loss": 46.0, - "step": 17329 - }, - { - "epoch": 2.790853093924876, - "grad_norm": 0.00490427203476429, - "learning_rate": 0.00019999616101944135, - "loss": 46.0, - "step": 17330 - }, - { - "epoch": 2.7910141310036636, - "grad_norm": 0.005995448678731918, - "learning_rate": 0.0001999961605761311, - "loss": 46.0, - "step": 17331 - }, - { - "epoch": 2.791175168082451, - "grad_norm": 0.013836621306836605, - "learning_rate": 0.00019999616013279524, - "loss": 46.0, - "step": 17332 - }, - { - "epoch": 2.7913362051612385, - "grad_norm": 0.0013461961643770337, - "learning_rate": 0.0001999961596894338, - "loss": 46.0, - "step": 17333 - }, - { - "epoch": 2.791497242240026, - "grad_norm": 0.004238417837768793, - "learning_rate": 0.00019999615924604674, - "loss": 46.0, - "step": 17334 - }, - { - "epoch": 2.7916582793188134, - "grad_norm": 0.008564460091292858, - "learning_rate": 0.00019999615880263413, - "loss": 46.0, - "step": 17335 - }, - { - "epoch": 2.7918193163976004, - "grad_norm": 0.0028342341538518667, - "learning_rate": 0.0001999961583591959, - "loss": 46.0, - "step": 17336 - }, - { - "epoch": 2.791980353476388, - "grad_norm": 0.001389666460454464, - "learning_rate": 0.0001999961579157321, - "loss": 46.0, - "step": 17337 - }, - { - "epoch": 2.7921413905551753, - "grad_norm": 0.003649804275482893, - "learning_rate": 0.00019999615747224266, - "loss": 46.0, - "step": 17338 - }, - { - "epoch": 2.7923024276339627, - "grad_norm": 0.02480003610253334, - "learning_rate": 0.00019999615702872764, - "loss": 46.0, - "step": 17339 - }, - { - "epoch": 2.79246346471275, - "grad_norm": 0.004762317985296249, - "learning_rate": 0.00019999615658518706, - "loss": 46.0, - "step": 17340 - }, - { - "epoch": 2.7926245017915377, - "grad_norm": 0.00366000784561038, - "learning_rate": 0.00019999615614162087, - "loss": 46.0, - "step": 17341 - }, - { - "epoch": 2.7927855388703247, - "grad_norm": 0.0016108189010992646, - "learning_rate": 0.00019999615569802907, - "loss": 46.0, - "step": 17342 - }, - { - "epoch": 2.792946575949112, - "grad_norm": 0.01248826552182436, - "learning_rate": 0.00019999615525441168, - "loss": 46.0, - "step": 17343 - }, - { - "epoch": 2.7931076130278996, - "grad_norm": 0.006517420988529921, - "learning_rate": 0.00019999615481076872, - "loss": 46.0, - "step": 17344 - }, - { - "epoch": 2.793268650106687, - "grad_norm": 0.0022465353831648827, - "learning_rate": 0.00019999615436710016, - "loss": 46.0, - "step": 17345 - }, - { - "epoch": 2.7934296871854745, - "grad_norm": 0.002218261593952775, - "learning_rate": 0.00019999615392340598, - "loss": 46.0, - "step": 17346 - }, - { - "epoch": 2.793590724264262, - "grad_norm": 0.006983265746384859, - "learning_rate": 0.00019999615347968624, - "loss": 46.0, - "step": 17347 - }, - { - "epoch": 2.7937517613430494, - "grad_norm": 0.0011270599206909537, - "learning_rate": 0.00019999615303594088, - "loss": 46.0, - "step": 17348 - }, - { - "epoch": 2.793912798421837, - "grad_norm": 0.0007337352144531906, - "learning_rate": 0.00019999615259216994, - "loss": 46.0, - "step": 17349 - }, - { - "epoch": 2.7940738355006243, - "grad_norm": 0.0015230899443849921, - "learning_rate": 0.00019999615214837338, - "loss": 46.0, - "step": 17350 - }, - { - "epoch": 2.7942348725794113, - "grad_norm": 0.003638229798525572, - "learning_rate": 0.00019999615170455127, - "loss": 46.0, - "step": 17351 - }, - { - "epoch": 2.7943959096581987, - "grad_norm": 0.00909519661217928, - "learning_rate": 0.00019999615126070353, - "loss": 46.0, - "step": 17352 - }, - { - "epoch": 2.794556946736986, - "grad_norm": 0.00896409247070551, - "learning_rate": 0.00019999615081683022, - "loss": 46.0, - "step": 17353 - }, - { - "epoch": 2.7947179838157736, - "grad_norm": 0.007621936034411192, - "learning_rate": 0.00019999615037293128, - "loss": 46.0, - "step": 17354 - }, - { - "epoch": 2.794879020894561, - "grad_norm": 0.00622302433475852, - "learning_rate": 0.0001999961499290068, - "loss": 46.0, - "step": 17355 - }, - { - "epoch": 2.795040057973348, - "grad_norm": 0.0024968821089714766, - "learning_rate": 0.00019999614948505669, - "loss": 46.0, - "step": 17356 - }, - { - "epoch": 2.7952010950521355, - "grad_norm": 0.0006029472569935024, - "learning_rate": 0.00019999614904108096, - "loss": 46.0, - "step": 17357 - }, - { - "epoch": 2.795362132130923, - "grad_norm": 0.005191711708903313, - "learning_rate": 0.00019999614859707968, - "loss": 46.0, - "step": 17358 - }, - { - "epoch": 2.7955231692097104, - "grad_norm": 0.0016551089938730001, - "learning_rate": 0.0001999961481530528, - "loss": 46.0, - "step": 17359 - }, - { - "epoch": 2.795684206288498, - "grad_norm": 0.004970055539160967, - "learning_rate": 0.00019999614770900033, - "loss": 46.0, - "step": 17360 - }, - { - "epoch": 2.7958452433672853, - "grad_norm": 0.0071669877506792545, - "learning_rate": 0.00019999614726492224, - "loss": 46.0, - "step": 17361 - }, - { - "epoch": 2.796006280446073, - "grad_norm": 0.017554905265569687, - "learning_rate": 0.0001999961468208186, - "loss": 46.0, - "step": 17362 - }, - { - "epoch": 2.7961673175248603, - "grad_norm": 0.007663843221962452, - "learning_rate": 0.00019999614637668933, - "loss": 46.0, - "step": 17363 - }, - { - "epoch": 2.7963283546036477, - "grad_norm": 0.0031698443926870823, - "learning_rate": 0.00019999614593253448, - "loss": 46.0, - "step": 17364 - }, - { - "epoch": 2.796489391682435, - "grad_norm": 0.004886985756456852, - "learning_rate": 0.000199996145488354, - "loss": 46.0, - "step": 17365 - }, - { - "epoch": 2.796650428761222, - "grad_norm": 0.0017400650540366769, - "learning_rate": 0.00019999614504414797, - "loss": 46.0, - "step": 17366 - }, - { - "epoch": 2.7968114658400096, - "grad_norm": 0.0028208396397531033, - "learning_rate": 0.00019999614459991632, - "loss": 46.0, - "step": 17367 - }, - { - "epoch": 2.796972502918797, - "grad_norm": 0.0038511583115905523, - "learning_rate": 0.00019999614415565912, - "loss": 46.0, - "step": 17368 - }, - { - "epoch": 2.7971335399975845, - "grad_norm": 0.003789086826145649, - "learning_rate": 0.0001999961437113763, - "loss": 46.0, - "step": 17369 - }, - { - "epoch": 2.797294577076372, - "grad_norm": 0.0020728325471282005, - "learning_rate": 0.0001999961432670679, - "loss": 46.0, - "step": 17370 - }, - { - "epoch": 2.797455614155159, - "grad_norm": 0.0005276558804325759, - "learning_rate": 0.00019999614282273386, - "loss": 46.0, - "step": 17371 - }, - { - "epoch": 2.7976166512339464, - "grad_norm": 0.0015684147365391254, - "learning_rate": 0.00019999614237837426, - "loss": 46.0, - "step": 17372 - }, - { - "epoch": 2.797777688312734, - "grad_norm": 0.001548346015624702, - "learning_rate": 0.00019999614193398906, - "loss": 46.0, - "step": 17373 - }, - { - "epoch": 2.7979387253915213, - "grad_norm": 0.014401966705918312, - "learning_rate": 0.00019999614148957825, - "loss": 46.0, - "step": 17374 - }, - { - "epoch": 2.798099762470309, - "grad_norm": 0.004531376529484987, - "learning_rate": 0.00019999614104514188, - "loss": 46.0, - "step": 17375 - }, - { - "epoch": 2.7982607995490962, - "grad_norm": 0.001251931069418788, - "learning_rate": 0.0001999961406006799, - "loss": 46.0, - "step": 17376 - }, - { - "epoch": 2.7984218366278837, - "grad_norm": 0.008024302311241627, - "learning_rate": 0.00019999614015619232, - "loss": 46.0, - "step": 17377 - }, - { - "epoch": 2.798582873706671, - "grad_norm": 0.0005931921768933535, - "learning_rate": 0.00019999613971167913, - "loss": 46.0, - "step": 17378 - }, - { - "epoch": 2.7987439107854586, - "grad_norm": 0.0016716797836124897, - "learning_rate": 0.00019999613926714038, - "loss": 46.0, - "step": 17379 - }, - { - "epoch": 2.7989049478642456, - "grad_norm": 0.0008711446425877512, - "learning_rate": 0.00019999613882257605, - "loss": 46.0, - "step": 17380 - }, - { - "epoch": 2.799065984943033, - "grad_norm": 0.00184560043271631, - "learning_rate": 0.00019999613837798607, - "loss": 46.0, - "step": 17381 - }, - { - "epoch": 2.7992270220218205, - "grad_norm": 0.008441537618637085, - "learning_rate": 0.00019999613793337054, - "loss": 46.0, - "step": 17382 - }, - { - "epoch": 2.799388059100608, - "grad_norm": 0.004199116490781307, - "learning_rate": 0.00019999613748872941, - "loss": 46.0, - "step": 17383 - }, - { - "epoch": 2.7995490961793954, - "grad_norm": 0.005086650140583515, - "learning_rate": 0.00019999613704406265, - "loss": 46.0, - "step": 17384 - }, - { - "epoch": 2.799710133258183, - "grad_norm": 0.0013988835271447897, - "learning_rate": 0.00019999613659937032, - "loss": 46.0, - "step": 17385 - }, - { - "epoch": 2.79987117033697, - "grad_norm": 0.0007828865782357752, - "learning_rate": 0.0001999961361546524, - "loss": 46.0, - "step": 17386 - }, - { - "epoch": 2.8000322074157573, - "grad_norm": 0.006819195579737425, - "learning_rate": 0.00019999613570990891, - "loss": 46.0, - "step": 17387 - }, - { - "epoch": 2.8001932444945448, - "grad_norm": 0.0010173274204134941, - "learning_rate": 0.0001999961352651398, - "loss": 46.0, - "step": 17388 - }, - { - "epoch": 2.800354281573332, - "grad_norm": 0.00251241447404027, - "learning_rate": 0.0001999961348203451, - "loss": 46.0, - "step": 17389 - }, - { - "epoch": 2.8005153186521197, - "grad_norm": 0.0016244055004790425, - "learning_rate": 0.00019999613437552481, - "loss": 46.0, - "step": 17390 - }, - { - "epoch": 2.800676355730907, - "grad_norm": 0.0047190189361572266, - "learning_rate": 0.00019999613393067894, - "loss": 46.0, - "step": 17391 - }, - { - "epoch": 2.8008373928096946, - "grad_norm": 0.0026683418545871973, - "learning_rate": 0.00019999613348580742, - "loss": 46.0, - "step": 17392 - }, - { - "epoch": 2.800998429888482, - "grad_norm": 0.005497898440808058, - "learning_rate": 0.00019999613304091035, - "loss": 46.0, - "step": 17393 - }, - { - "epoch": 2.8011594669672695, - "grad_norm": 0.0013796283164992929, - "learning_rate": 0.00019999613259598768, - "loss": 46.0, - "step": 17394 - }, - { - "epoch": 2.8013205040460565, - "grad_norm": 0.001371831982396543, - "learning_rate": 0.00019999613215103943, - "loss": 46.0, - "step": 17395 - }, - { - "epoch": 2.801481541124844, - "grad_norm": 0.002239825204014778, - "learning_rate": 0.00019999613170606557, - "loss": 46.0, - "step": 17396 - }, - { - "epoch": 2.8016425782036314, - "grad_norm": 0.0028003628831356764, - "learning_rate": 0.00019999613126106612, - "loss": 46.0, - "step": 17397 - }, - { - "epoch": 2.801803615282419, - "grad_norm": 0.0028887924272567034, - "learning_rate": 0.00019999613081604108, - "loss": 46.0, - "step": 17398 - }, - { - "epoch": 2.8019646523612063, - "grad_norm": 0.007571985945105553, - "learning_rate": 0.00019999613037099043, - "loss": 46.0, - "step": 17399 - }, - { - "epoch": 2.8021256894399933, - "grad_norm": 0.007642506621778011, - "learning_rate": 0.00019999612992591419, - "loss": 46.0, - "step": 17400 - }, - { - "epoch": 2.8022867265187807, - "grad_norm": 0.014479024335741997, - "learning_rate": 0.00019999612948081236, - "loss": 46.0, - "step": 17401 - }, - { - "epoch": 2.802447763597568, - "grad_norm": 0.0010544572724029422, - "learning_rate": 0.00019999612903568494, - "loss": 46.0, - "step": 17402 - }, - { - "epoch": 2.8026088006763556, - "grad_norm": 0.0021329568699002266, - "learning_rate": 0.00019999612859053194, - "loss": 46.0, - "step": 17403 - }, - { - "epoch": 2.802769837755143, - "grad_norm": 0.0023929246235638857, - "learning_rate": 0.00019999612814535332, - "loss": 46.0, - "step": 17404 - }, - { - "epoch": 2.8029308748339306, - "grad_norm": 0.0013512979494407773, - "learning_rate": 0.0001999961277001491, - "loss": 46.0, - "step": 17405 - }, - { - "epoch": 2.803091911912718, - "grad_norm": 0.003351834136992693, - "learning_rate": 0.00019999612725491933, - "loss": 46.0, - "step": 17406 - }, - { - "epoch": 2.8032529489915055, - "grad_norm": 0.00213625468313694, - "learning_rate": 0.00019999612680966393, - "loss": 46.0, - "step": 17407 - }, - { - "epoch": 2.803413986070293, - "grad_norm": 0.0011837321799248457, - "learning_rate": 0.00019999612636438293, - "loss": 46.0, - "step": 17408 - }, - { - "epoch": 2.80357502314908, - "grad_norm": 0.0022808159701526165, - "learning_rate": 0.00019999612591907635, - "loss": 46.0, - "step": 17409 - }, - { - "epoch": 2.8037360602278674, - "grad_norm": 0.006732139736413956, - "learning_rate": 0.0001999961254737442, - "loss": 46.0, - "step": 17410 - }, - { - "epoch": 2.803897097306655, - "grad_norm": 0.000943644205108285, - "learning_rate": 0.00019999612502838643, - "loss": 46.0, - "step": 17411 - }, - { - "epoch": 2.8040581343854423, - "grad_norm": 0.0026921082753688097, - "learning_rate": 0.00019999612458300304, - "loss": 46.0, - "step": 17412 - }, - { - "epoch": 2.8042191714642297, - "grad_norm": 0.0054645235650241375, - "learning_rate": 0.00019999612413759409, - "loss": 46.0, - "step": 17413 - }, - { - "epoch": 2.804380208543017, - "grad_norm": 0.009223084896802902, - "learning_rate": 0.00019999612369215957, - "loss": 46.0, - "step": 17414 - }, - { - "epoch": 2.804541245621804, - "grad_norm": 0.0013247141614556313, - "learning_rate": 0.00019999612324669941, - "loss": 46.0, - "step": 17415 - }, - { - "epoch": 2.8047022827005916, - "grad_norm": 0.0039679259061813354, - "learning_rate": 0.00019999612280121367, - "loss": 46.0, - "step": 17416 - }, - { - "epoch": 2.804863319779379, - "grad_norm": 0.010341739282011986, - "learning_rate": 0.00019999612235570237, - "loss": 46.0, - "step": 17417 - }, - { - "epoch": 2.8050243568581665, - "grad_norm": 0.00178498774766922, - "learning_rate": 0.00019999612191016542, - "loss": 46.0, - "step": 17418 - }, - { - "epoch": 2.805185393936954, - "grad_norm": 0.002988457912579179, - "learning_rate": 0.00019999612146460292, - "loss": 46.0, - "step": 17419 - }, - { - "epoch": 2.8053464310157414, - "grad_norm": 0.014675927348434925, - "learning_rate": 0.0001999961210190148, - "loss": 46.0, - "step": 17420 - }, - { - "epoch": 2.805507468094529, - "grad_norm": 0.0009310987661592662, - "learning_rate": 0.0001999961205734011, - "loss": 46.0, - "step": 17421 - }, - { - "epoch": 2.8056685051733163, - "grad_norm": 0.0076582725159823895, - "learning_rate": 0.0001999961201277618, - "loss": 46.0, - "step": 17422 - }, - { - "epoch": 2.805829542252104, - "grad_norm": 0.0009604257065802813, - "learning_rate": 0.00019999611968209692, - "loss": 46.0, - "step": 17423 - }, - { - "epoch": 2.805990579330891, - "grad_norm": 0.0016388790681958199, - "learning_rate": 0.00019999611923640642, - "loss": 46.0, - "step": 17424 - }, - { - "epoch": 2.8061516164096783, - "grad_norm": 0.009259761311113834, - "learning_rate": 0.00019999611879069034, - "loss": 46.0, - "step": 17425 - }, - { - "epoch": 2.8063126534884657, - "grad_norm": 0.009581991471350193, - "learning_rate": 0.00019999611834494867, - "loss": 46.0, - "step": 17426 - }, - { - "epoch": 2.806473690567253, - "grad_norm": 0.0010605021379888058, - "learning_rate": 0.00019999611789918139, - "loss": 46.0, - "step": 17427 - }, - { - "epoch": 2.8066347276460406, - "grad_norm": 0.01231616735458374, - "learning_rate": 0.00019999611745338854, - "loss": 46.0, - "step": 17428 - }, - { - "epoch": 2.806795764724828, - "grad_norm": 0.004481412470340729, - "learning_rate": 0.00019999611700757006, - "loss": 46.0, - "step": 17429 - }, - { - "epoch": 2.806956801803615, - "grad_norm": 0.001579686300829053, - "learning_rate": 0.00019999611656172604, - "loss": 46.0, - "step": 17430 - }, - { - "epoch": 2.8071178388824025, - "grad_norm": 0.0026307052467018366, - "learning_rate": 0.00019999611611585638, - "loss": 46.0, - "step": 17431 - }, - { - "epoch": 2.80727887596119, - "grad_norm": 0.0011595413088798523, - "learning_rate": 0.00019999611566996116, - "loss": 46.0, - "step": 17432 - }, - { - "epoch": 2.8074399130399774, - "grad_norm": 0.004979744087904692, - "learning_rate": 0.00019999611522404033, - "loss": 46.0, - "step": 17433 - }, - { - "epoch": 2.807600950118765, - "grad_norm": 0.0010374952107667923, - "learning_rate": 0.00019999611477809388, - "loss": 46.0, - "step": 17434 - }, - { - "epoch": 2.8077619871975523, - "grad_norm": 0.0011462147813290358, - "learning_rate": 0.00019999611433212187, - "loss": 46.0, - "step": 17435 - }, - { - "epoch": 2.8079230242763398, - "grad_norm": 0.0030980510637164116, - "learning_rate": 0.00019999611388612425, - "loss": 46.0, - "step": 17436 - }, - { - "epoch": 2.8080840613551272, - "grad_norm": 0.0029788825195282698, - "learning_rate": 0.00019999611344010104, - "loss": 46.0, - "step": 17437 - }, - { - "epoch": 2.8082450984339147, - "grad_norm": 0.0008766724495217204, - "learning_rate": 0.00019999611299405224, - "loss": 46.0, - "step": 17438 - }, - { - "epoch": 2.8084061355127017, - "grad_norm": 0.006660845596343279, - "learning_rate": 0.00019999611254797786, - "loss": 46.0, - "step": 17439 - }, - { - "epoch": 2.808567172591489, - "grad_norm": 0.002069348469376564, - "learning_rate": 0.00019999611210187786, - "loss": 46.0, - "step": 17440 - }, - { - "epoch": 2.8087282096702766, - "grad_norm": 0.0055584535002708435, - "learning_rate": 0.00019999611165575227, - "loss": 46.0, - "step": 17441 - }, - { - "epoch": 2.808889246749064, - "grad_norm": 0.0033744191750884056, - "learning_rate": 0.0001999961112096011, - "loss": 46.0, - "step": 17442 - }, - { - "epoch": 2.8090502838278515, - "grad_norm": 0.002229581121355295, - "learning_rate": 0.00019999611076342434, - "loss": 46.0, - "step": 17443 - }, - { - "epoch": 2.8092113209066385, - "grad_norm": 0.0027567625511437654, - "learning_rate": 0.00019999611031722197, - "loss": 46.0, - "step": 17444 - }, - { - "epoch": 2.809372357985426, - "grad_norm": 0.007182674016803503, - "learning_rate": 0.00019999610987099403, - "loss": 46.0, - "step": 17445 - }, - { - "epoch": 2.8095333950642134, - "grad_norm": 0.0012006674660369754, - "learning_rate": 0.00019999610942474046, - "loss": 46.0, - "step": 17446 - }, - { - "epoch": 2.809694432143001, - "grad_norm": 0.000705803744494915, - "learning_rate": 0.0001999961089784613, - "loss": 46.0, - "step": 17447 - }, - { - "epoch": 2.8098554692217883, - "grad_norm": 0.0032836601603776217, - "learning_rate": 0.00019999610853215657, - "loss": 46.0, - "step": 17448 - }, - { - "epoch": 2.8100165063005758, - "grad_norm": 0.0024060728028416634, - "learning_rate": 0.00019999610808582623, - "loss": 46.0, - "step": 17449 - }, - { - "epoch": 2.810177543379363, - "grad_norm": 0.0015321644023060799, - "learning_rate": 0.0001999961076394703, - "loss": 46.0, - "step": 17450 - }, - { - "epoch": 2.8103385804581507, - "grad_norm": 0.0024709589779376984, - "learning_rate": 0.0001999961071930888, - "loss": 46.0, - "step": 17451 - }, - { - "epoch": 2.810499617536938, - "grad_norm": 0.0026140264235436916, - "learning_rate": 0.00019999610674668164, - "loss": 46.0, - "step": 17452 - }, - { - "epoch": 2.810660654615725, - "grad_norm": 0.0013854526914656162, - "learning_rate": 0.00019999610630024896, - "loss": 46.0, - "step": 17453 - }, - { - "epoch": 2.8108216916945126, - "grad_norm": 0.002409999957308173, - "learning_rate": 0.00019999610585379066, - "loss": 46.0, - "step": 17454 - }, - { - "epoch": 2.8109827287733, - "grad_norm": 0.000760366499889642, - "learning_rate": 0.00019999610540730674, - "loss": 46.0, - "step": 17455 - }, - { - "epoch": 2.8111437658520875, - "grad_norm": 0.003204125678166747, - "learning_rate": 0.00019999610496079727, - "loss": 46.0, - "step": 17456 - }, - { - "epoch": 2.811304802930875, - "grad_norm": 0.002333524404093623, - "learning_rate": 0.00019999610451426218, - "loss": 46.0, - "step": 17457 - }, - { - "epoch": 2.8114658400096624, - "grad_norm": 0.006015934515744448, - "learning_rate": 0.0001999961040677015, - "loss": 46.0, - "step": 17458 - }, - { - "epoch": 2.8116268770884494, - "grad_norm": 0.005155366845428944, - "learning_rate": 0.00019999610362111522, - "loss": 46.0, - "step": 17459 - }, - { - "epoch": 2.811787914167237, - "grad_norm": 0.0031221392564475536, - "learning_rate": 0.00019999610317450337, - "loss": 46.0, - "step": 17460 - }, - { - "epoch": 2.8119489512460243, - "grad_norm": 0.008233534172177315, - "learning_rate": 0.0001999961027278659, - "loss": 46.0, - "step": 17461 - }, - { - "epoch": 2.8121099883248117, - "grad_norm": 0.0010821737814694643, - "learning_rate": 0.00019999610228120288, - "loss": 46.0, - "step": 17462 - }, - { - "epoch": 2.812271025403599, - "grad_norm": 0.003293958492577076, - "learning_rate": 0.0001999961018345142, - "loss": 46.0, - "step": 17463 - }, - { - "epoch": 2.8124320624823866, - "grad_norm": 0.0023541077971458435, - "learning_rate": 0.00019999610138779996, - "loss": 46.0, - "step": 17464 - }, - { - "epoch": 2.812593099561174, - "grad_norm": 0.0018221113132312894, - "learning_rate": 0.00019999610094106012, - "loss": 46.0, - "step": 17465 - }, - { - "epoch": 2.8127541366399615, - "grad_norm": 0.0005643051699735224, - "learning_rate": 0.0001999961004942947, - "loss": 46.0, - "step": 17466 - }, - { - "epoch": 2.812915173718749, - "grad_norm": 0.0006361603154800832, - "learning_rate": 0.00019999610004750368, - "loss": 46.0, - "step": 17467 - }, - { - "epoch": 2.813076210797536, - "grad_norm": 0.012806505896151066, - "learning_rate": 0.00019999609960068705, - "loss": 46.0, - "step": 17468 - }, - { - "epoch": 2.8132372478763235, - "grad_norm": 0.005844034720212221, - "learning_rate": 0.00019999609915384486, - "loss": 46.0, - "step": 17469 - }, - { - "epoch": 2.813398284955111, - "grad_norm": 0.0005421608802862465, - "learning_rate": 0.00019999609870697705, - "loss": 46.0, - "step": 17470 - }, - { - "epoch": 2.8135593220338984, - "grad_norm": 0.004232186358422041, - "learning_rate": 0.00019999609826008366, - "loss": 46.0, - "step": 17471 - }, - { - "epoch": 2.813720359112686, - "grad_norm": 0.0008606420014984906, - "learning_rate": 0.00019999609781316466, - "loss": 46.0, - "step": 17472 - }, - { - "epoch": 2.813881396191473, - "grad_norm": 0.0009822994470596313, - "learning_rate": 0.00019999609736622004, - "loss": 46.0, - "step": 17473 - }, - { - "epoch": 2.8140424332702603, - "grad_norm": 0.002020971616730094, - "learning_rate": 0.0001999960969192499, - "loss": 46.0, - "step": 17474 - }, - { - "epoch": 2.8142034703490477, - "grad_norm": 0.0010751087684184313, - "learning_rate": 0.0001999960964722541, - "loss": 46.0, - "step": 17475 - }, - { - "epoch": 2.814364507427835, - "grad_norm": 0.006463815923780203, - "learning_rate": 0.00019999609602523272, - "loss": 46.0, - "step": 17476 - }, - { - "epoch": 2.8145255445066226, - "grad_norm": 0.0031545457895845175, - "learning_rate": 0.00019999609557818577, - "loss": 46.0, - "step": 17477 - }, - { - "epoch": 2.81468658158541, - "grad_norm": 0.001236637239344418, - "learning_rate": 0.00019999609513111322, - "loss": 46.0, - "step": 17478 - }, - { - "epoch": 2.8148476186641975, - "grad_norm": 0.004620371852070093, - "learning_rate": 0.00019999609468401505, - "loss": 46.0, - "step": 17479 - }, - { - "epoch": 2.815008655742985, - "grad_norm": 0.00397595064714551, - "learning_rate": 0.00019999609423689135, - "loss": 46.0, - "step": 17480 - }, - { - "epoch": 2.8151696928217724, - "grad_norm": 0.004133298061788082, - "learning_rate": 0.000199996093789742, - "loss": 46.0, - "step": 17481 - }, - { - "epoch": 2.81533072990056, - "grad_norm": 0.0071642352268099785, - "learning_rate": 0.00019999609334256705, - "loss": 46.0, - "step": 17482 - }, - { - "epoch": 2.815491766979347, - "grad_norm": 0.0074384515173733234, - "learning_rate": 0.00019999609289536653, - "loss": 46.0, - "step": 17483 - }, - { - "epoch": 2.8156528040581343, - "grad_norm": 0.0032049394212663174, - "learning_rate": 0.00019999609244814043, - "loss": 46.0, - "step": 17484 - }, - { - "epoch": 2.815813841136922, - "grad_norm": 0.0021838336251676083, - "learning_rate": 0.0001999960920008887, - "loss": 46.0, - "step": 17485 - }, - { - "epoch": 2.8159748782157092, - "grad_norm": 0.008074485696852207, - "learning_rate": 0.0001999960915536114, - "loss": 46.0, - "step": 17486 - }, - { - "epoch": 2.8161359152944967, - "grad_norm": 0.006901143118739128, - "learning_rate": 0.00019999609110630848, - "loss": 46.0, - "step": 17487 - }, - { - "epoch": 2.8162969523732837, - "grad_norm": 0.0017682092729955912, - "learning_rate": 0.00019999609065898, - "loss": 46.0, - "step": 17488 - }, - { - "epoch": 2.816457989452071, - "grad_norm": 0.00814884528517723, - "learning_rate": 0.0001999960902116259, - "loss": 46.0, - "step": 17489 - }, - { - "epoch": 2.8166190265308586, - "grad_norm": 0.0044253915548324585, - "learning_rate": 0.00019999608976424622, - "loss": 46.0, - "step": 17490 - }, - { - "epoch": 2.816780063609646, - "grad_norm": 0.006440346594899893, - "learning_rate": 0.00019999608931684095, - "loss": 46.0, - "step": 17491 - }, - { - "epoch": 2.8169411006884335, - "grad_norm": 0.0031027065124362707, - "learning_rate": 0.00019999608886941007, - "loss": 46.0, - "step": 17492 - }, - { - "epoch": 2.817102137767221, - "grad_norm": 0.00102567533031106, - "learning_rate": 0.0001999960884219536, - "loss": 46.0, - "step": 17493 - }, - { - "epoch": 2.8172631748460084, - "grad_norm": 0.012003621086478233, - "learning_rate": 0.00019999608797447157, - "loss": 46.0, - "step": 17494 - }, - { - "epoch": 2.817424211924796, - "grad_norm": 0.003172435099259019, - "learning_rate": 0.0001999960875269639, - "loss": 46.0, - "step": 17495 - }, - { - "epoch": 2.8175852490035833, - "grad_norm": 0.0009293970069848001, - "learning_rate": 0.00019999608707943066, - "loss": 46.0, - "step": 17496 - }, - { - "epoch": 2.8177462860823703, - "grad_norm": 0.001602035015821457, - "learning_rate": 0.00019999608663187182, - "loss": 46.0, - "step": 17497 - }, - { - "epoch": 2.8179073231611578, - "grad_norm": 0.0015408919425681233, - "learning_rate": 0.00019999608618428738, - "loss": 46.0, - "step": 17498 - }, - { - "epoch": 2.8180683602399452, - "grad_norm": 0.0017288177041336894, - "learning_rate": 0.00019999608573667736, - "loss": 46.0, - "step": 17499 - }, - { - "epoch": 2.8182293973187327, - "grad_norm": 0.006491424515843391, - "learning_rate": 0.00019999608528904173, - "loss": 46.0, - "step": 17500 - }, - { - "epoch": 2.81839043439752, - "grad_norm": 0.005350694991648197, - "learning_rate": 0.00019999608484138053, - "loss": 46.0, - "step": 17501 - }, - { - "epoch": 2.8185514714763076, - "grad_norm": 0.0028305589221417904, - "learning_rate": 0.0001999960843936937, - "loss": 46.0, - "step": 17502 - }, - { - "epoch": 2.8187125085550946, - "grad_norm": 0.0009118483867496252, - "learning_rate": 0.0001999960839459813, - "loss": 46.0, - "step": 17503 - }, - { - "epoch": 2.818873545633882, - "grad_norm": 0.0023907534778118134, - "learning_rate": 0.00019999608349824331, - "loss": 46.0, - "step": 17504 - }, - { - "epoch": 2.8190345827126695, - "grad_norm": 0.002552842488512397, - "learning_rate": 0.00019999608305047972, - "loss": 46.0, - "step": 17505 - }, - { - "epoch": 2.819195619791457, - "grad_norm": 0.011367013677954674, - "learning_rate": 0.00019999608260269053, - "loss": 46.0, - "step": 17506 - }, - { - "epoch": 2.8193566568702444, - "grad_norm": 0.000869512849021703, - "learning_rate": 0.00019999608215487579, - "loss": 46.0, - "step": 17507 - }, - { - "epoch": 2.819517693949032, - "grad_norm": 0.01703316532075405, - "learning_rate": 0.0001999960817070354, - "loss": 46.0, - "step": 17508 - }, - { - "epoch": 2.8196787310278193, - "grad_norm": 0.0046666469424963, - "learning_rate": 0.00019999608125916942, - "loss": 46.0, - "step": 17509 - }, - { - "epoch": 2.8198397681066067, - "grad_norm": 0.0012241367949172854, - "learning_rate": 0.00019999608081127786, - "loss": 46.0, - "step": 17510 - }, - { - "epoch": 2.820000805185394, - "grad_norm": 0.004727122373878956, - "learning_rate": 0.00019999608036336072, - "loss": 46.0, - "step": 17511 - }, - { - "epoch": 2.820161842264181, - "grad_norm": 0.0017181276343762875, - "learning_rate": 0.00019999607991541795, - "loss": 46.0, - "step": 17512 - }, - { - "epoch": 2.8203228793429687, - "grad_norm": 0.0038001309148967266, - "learning_rate": 0.00019999607946744963, - "loss": 46.0, - "step": 17513 - }, - { - "epoch": 2.820483916421756, - "grad_norm": 0.0056593157351017, - "learning_rate": 0.00019999607901945567, - "loss": 46.0, - "step": 17514 - }, - { - "epoch": 2.8206449535005436, - "grad_norm": 0.005102424882352352, - "learning_rate": 0.00019999607857143614, - "loss": 46.0, - "step": 17515 - }, - { - "epoch": 2.820805990579331, - "grad_norm": 0.00345412059687078, - "learning_rate": 0.00019999607812339103, - "loss": 46.0, - "step": 17516 - }, - { - "epoch": 2.820967027658118, - "grad_norm": 0.0009473130339756608, - "learning_rate": 0.0001999960776753203, - "loss": 46.0, - "step": 17517 - }, - { - "epoch": 2.8211280647369055, - "grad_norm": 0.0008056455408222973, - "learning_rate": 0.000199996077227224, - "loss": 46.0, - "step": 17518 - }, - { - "epoch": 2.821289101815693, - "grad_norm": 0.0019042566418647766, - "learning_rate": 0.0001999960767791021, - "loss": 46.0, - "step": 17519 - }, - { - "epoch": 2.8214501388944804, - "grad_norm": 0.0017730704275891185, - "learning_rate": 0.0001999960763309546, - "loss": 46.0, - "step": 17520 - }, - { - "epoch": 2.821611175973268, - "grad_norm": 0.001036213943734765, - "learning_rate": 0.0001999960758827815, - "loss": 46.0, - "step": 17521 - }, - { - "epoch": 2.8217722130520553, - "grad_norm": 0.016822606325149536, - "learning_rate": 0.00019999607543458284, - "loss": 46.0, - "step": 17522 - }, - { - "epoch": 2.8219332501308427, - "grad_norm": 0.0023057369980961084, - "learning_rate": 0.00019999607498635854, - "loss": 46.0, - "step": 17523 - }, - { - "epoch": 2.82209428720963, - "grad_norm": 0.00485507445409894, - "learning_rate": 0.00019999607453810868, - "loss": 46.0, - "step": 17524 - }, - { - "epoch": 2.8222553242884176, - "grad_norm": 0.004225747659802437, - "learning_rate": 0.00019999607408983317, - "loss": 46.0, - "step": 17525 - }, - { - "epoch": 2.8224163613672046, - "grad_norm": 0.010278395377099514, - "learning_rate": 0.00019999607364153214, - "loss": 46.0, - "step": 17526 - }, - { - "epoch": 2.822577398445992, - "grad_norm": 0.0015109482919797301, - "learning_rate": 0.00019999607319320548, - "loss": 46.0, - "step": 17527 - }, - { - "epoch": 2.8227384355247795, - "grad_norm": 0.007660284638404846, - "learning_rate": 0.00019999607274485325, - "loss": 46.0, - "step": 17528 - }, - { - "epoch": 2.822899472603567, - "grad_norm": 0.00685298815369606, - "learning_rate": 0.0001999960722964754, - "loss": 46.0, - "step": 17529 - }, - { - "epoch": 2.8230605096823544, - "grad_norm": 0.0014923186972737312, - "learning_rate": 0.00019999607184807195, - "loss": 46.0, - "step": 17530 - }, - { - "epoch": 2.823221546761142, - "grad_norm": 0.0020321037154644728, - "learning_rate": 0.00019999607139964293, - "loss": 46.0, - "step": 17531 - }, - { - "epoch": 2.823382583839929, - "grad_norm": 0.0030750269070267677, - "learning_rate": 0.00019999607095118828, - "loss": 46.0, - "step": 17532 - }, - { - "epoch": 2.8235436209187164, - "grad_norm": 0.006575407925993204, - "learning_rate": 0.00019999607050270808, - "loss": 46.0, - "step": 17533 - }, - { - "epoch": 2.823704657997504, - "grad_norm": 0.0012792469933629036, - "learning_rate": 0.00019999607005420227, - "loss": 46.0, - "step": 17534 - }, - { - "epoch": 2.8238656950762913, - "grad_norm": 0.0046748509630560875, - "learning_rate": 0.00019999606960567086, - "loss": 46.0, - "step": 17535 - }, - { - "epoch": 2.8240267321550787, - "grad_norm": 0.0030210623517632484, - "learning_rate": 0.00019999606915711387, - "loss": 46.0, - "step": 17536 - }, - { - "epoch": 2.824187769233866, - "grad_norm": 0.003818291937932372, - "learning_rate": 0.00019999606870853127, - "loss": 46.0, - "step": 17537 - }, - { - "epoch": 2.8243488063126536, - "grad_norm": 0.0031532743014395237, - "learning_rate": 0.00019999606825992308, - "loss": 46.0, - "step": 17538 - }, - { - "epoch": 2.824509843391441, - "grad_norm": 0.0021734449546784163, - "learning_rate": 0.0001999960678112893, - "loss": 46.0, - "step": 17539 - }, - { - "epoch": 2.8246708804702285, - "grad_norm": 0.0036230776458978653, - "learning_rate": 0.00019999606736262993, - "loss": 46.0, - "step": 17540 - }, - { - "epoch": 2.8248319175490155, - "grad_norm": 0.0009043588652275503, - "learning_rate": 0.00019999606691394495, - "loss": 46.0, - "step": 17541 - }, - { - "epoch": 2.824992954627803, - "grad_norm": 0.0036086265463382006, - "learning_rate": 0.00019999606646523439, - "loss": 46.0, - "step": 17542 - }, - { - "epoch": 2.8251539917065904, - "grad_norm": 0.0015189432306215167, - "learning_rate": 0.00019999606601649823, - "loss": 46.0, - "step": 17543 - }, - { - "epoch": 2.825315028785378, - "grad_norm": 0.0013452733401209116, - "learning_rate": 0.00019999606556773646, - "loss": 46.0, - "step": 17544 - }, - { - "epoch": 2.8254760658641653, - "grad_norm": 0.011437585577368736, - "learning_rate": 0.00019999606511894913, - "loss": 46.0, - "step": 17545 - }, - { - "epoch": 2.8256371029429523, - "grad_norm": 0.005635588895529509, - "learning_rate": 0.00019999606467013622, - "loss": 46.0, - "step": 17546 - }, - { - "epoch": 2.82579814002174, - "grad_norm": 0.004393447656184435, - "learning_rate": 0.00019999606422129766, - "loss": 46.0, - "step": 17547 - }, - { - "epoch": 2.8259591771005272, - "grad_norm": 0.00277325720526278, - "learning_rate": 0.00019999606377243354, - "loss": 46.0, - "step": 17548 - }, - { - "epoch": 2.8261202141793147, - "grad_norm": 0.004375135991722345, - "learning_rate": 0.0001999960633235438, - "loss": 46.0, - "step": 17549 - }, - { - "epoch": 2.826281251258102, - "grad_norm": 0.006475480273365974, - "learning_rate": 0.00019999606287462852, - "loss": 46.0, - "step": 17550 - }, - { - "epoch": 2.8264422883368896, - "grad_norm": 0.011471494100987911, - "learning_rate": 0.00019999606242568759, - "loss": 46.0, - "step": 17551 - }, - { - "epoch": 2.826603325415677, - "grad_norm": 0.0025683369021862745, - "learning_rate": 0.0001999960619767211, - "loss": 46.0, - "step": 17552 - }, - { - "epoch": 2.8267643624944645, - "grad_norm": 0.0005862105754204094, - "learning_rate": 0.00019999606152772898, - "loss": 46.0, - "step": 17553 - }, - { - "epoch": 2.826925399573252, - "grad_norm": 0.0005172690143808722, - "learning_rate": 0.00019999606107871132, - "loss": 46.0, - "step": 17554 - }, - { - "epoch": 2.8270864366520394, - "grad_norm": 0.007394160609692335, - "learning_rate": 0.00019999606062966803, - "loss": 46.0, - "step": 17555 - }, - { - "epoch": 2.8272474737308264, - "grad_norm": 0.0019491255516186357, - "learning_rate": 0.00019999606018059917, - "loss": 46.0, - "step": 17556 - }, - { - "epoch": 2.827408510809614, - "grad_norm": 0.001105102594010532, - "learning_rate": 0.00019999605973150466, - "loss": 46.0, - "step": 17557 - }, - { - "epoch": 2.8275695478884013, - "grad_norm": 0.001430126721970737, - "learning_rate": 0.0001999960592823846, - "loss": 46.0, - "step": 17558 - }, - { - "epoch": 2.8277305849671888, - "grad_norm": 0.0014700109604746103, - "learning_rate": 0.00019999605883323893, - "loss": 46.0, - "step": 17559 - }, - { - "epoch": 2.827891622045976, - "grad_norm": 0.004617826547473669, - "learning_rate": 0.0001999960583840677, - "loss": 46.0, - "step": 17560 - }, - { - "epoch": 2.8280526591247632, - "grad_norm": 0.001922737923450768, - "learning_rate": 0.00019999605793487085, - "loss": 46.0, - "step": 17561 - }, - { - "epoch": 2.8282136962035507, - "grad_norm": 0.0019020611653104424, - "learning_rate": 0.0001999960574856484, - "loss": 46.0, - "step": 17562 - }, - { - "epoch": 2.828374733282338, - "grad_norm": 0.0010919132037088275, - "learning_rate": 0.0001999960570364004, - "loss": 46.0, - "step": 17563 - }, - { - "epoch": 2.8285357703611256, - "grad_norm": 0.0024385685101151466, - "learning_rate": 0.00019999605658712675, - "loss": 46.0, - "step": 17564 - }, - { - "epoch": 2.828696807439913, - "grad_norm": 0.0008681168546900153, - "learning_rate": 0.00019999605613782751, - "loss": 46.0, - "step": 17565 - }, - { - "epoch": 2.8288578445187005, - "grad_norm": 0.00507500721141696, - "learning_rate": 0.0001999960556885027, - "loss": 46.0, - "step": 17566 - }, - { - "epoch": 2.829018881597488, - "grad_norm": 0.0032564294524490833, - "learning_rate": 0.0001999960552391523, - "loss": 46.0, - "step": 17567 - }, - { - "epoch": 2.8291799186762754, - "grad_norm": 0.001755494624376297, - "learning_rate": 0.0001999960547897763, - "loss": 46.0, - "step": 17568 - }, - { - "epoch": 2.829340955755063, - "grad_norm": 0.007341737858951092, - "learning_rate": 0.0001999960543403747, - "loss": 46.0, - "step": 17569 - }, - { - "epoch": 2.82950199283385, - "grad_norm": 0.014459128491580486, - "learning_rate": 0.0001999960538909475, - "loss": 46.0, - "step": 17570 - }, - { - "epoch": 2.8296630299126373, - "grad_norm": 0.005415915511548519, - "learning_rate": 0.00019999605344149472, - "loss": 46.0, - "step": 17571 - }, - { - "epoch": 2.8298240669914247, - "grad_norm": 0.0010363005567342043, - "learning_rate": 0.00019999605299201635, - "loss": 46.0, - "step": 17572 - }, - { - "epoch": 2.829985104070212, - "grad_norm": 0.0017835585167631507, - "learning_rate": 0.00019999605254251236, - "loss": 46.0, - "step": 17573 - }, - { - "epoch": 2.8301461411489997, - "grad_norm": 0.004828247707337141, - "learning_rate": 0.0001999960520929828, - "loss": 46.0, - "step": 17574 - }, - { - "epoch": 2.830307178227787, - "grad_norm": 0.005219201557338238, - "learning_rate": 0.00019999605164342765, - "loss": 46.0, - "step": 17575 - }, - { - "epoch": 2.830468215306574, - "grad_norm": 0.005991618148982525, - "learning_rate": 0.00019999605119384688, - "loss": 46.0, - "step": 17576 - }, - { - "epoch": 2.8306292523853616, - "grad_norm": 0.005289238411933184, - "learning_rate": 0.00019999605074424052, - "loss": 46.0, - "step": 17577 - }, - { - "epoch": 2.830790289464149, - "grad_norm": 0.0008138579432852566, - "learning_rate": 0.0001999960502946086, - "loss": 46.0, - "step": 17578 - }, - { - "epoch": 2.8309513265429365, - "grad_norm": 0.005250999238342047, - "learning_rate": 0.00019999604984495103, - "loss": 46.0, - "step": 17579 - }, - { - "epoch": 2.831112363621724, - "grad_norm": 0.0007484708330594003, - "learning_rate": 0.00019999604939526793, - "loss": 46.0, - "step": 17580 - }, - { - "epoch": 2.8312734007005114, - "grad_norm": 0.01723730005323887, - "learning_rate": 0.0001999960489455592, - "loss": 46.0, - "step": 17581 - }, - { - "epoch": 2.831434437779299, - "grad_norm": 0.0007146120187826455, - "learning_rate": 0.0001999960484958249, - "loss": 46.0, - "step": 17582 - }, - { - "epoch": 2.8315954748580863, - "grad_norm": 0.009761675260961056, - "learning_rate": 0.00019999604804606498, - "loss": 46.0, - "step": 17583 - }, - { - "epoch": 2.8317565119368737, - "grad_norm": 0.000902310770470649, - "learning_rate": 0.00019999604759627948, - "loss": 46.0, - "step": 17584 - }, - { - "epoch": 2.8319175490156607, - "grad_norm": 0.0028070963453501463, - "learning_rate": 0.00019999604714646834, - "loss": 46.0, - "step": 17585 - }, - { - "epoch": 2.832078586094448, - "grad_norm": 0.0011297531891614199, - "learning_rate": 0.00019999604669663164, - "loss": 46.0, - "step": 17586 - }, - { - "epoch": 2.8322396231732356, - "grad_norm": 0.0006591191049665213, - "learning_rate": 0.00019999604624676938, - "loss": 46.0, - "step": 17587 - }, - { - "epoch": 2.832400660252023, - "grad_norm": 0.005738638807088137, - "learning_rate": 0.0001999960457968815, - "loss": 46.0, - "step": 17588 - }, - { - "epoch": 2.8325616973308105, - "grad_norm": 0.0013909791596233845, - "learning_rate": 0.00019999604534696804, - "loss": 46.0, - "step": 17589 - }, - { - "epoch": 2.8327227344095975, - "grad_norm": 0.005785842891782522, - "learning_rate": 0.00019999604489702897, - "loss": 46.0, - "step": 17590 - }, - { - "epoch": 2.832883771488385, - "grad_norm": 0.0012799163814634085, - "learning_rate": 0.00019999604444706428, - "loss": 46.0, - "step": 17591 - }, - { - "epoch": 2.8330448085671724, - "grad_norm": 0.0015748272417113185, - "learning_rate": 0.00019999604399707402, - "loss": 46.0, - "step": 17592 - }, - { - "epoch": 2.83320584564596, - "grad_norm": 0.01842391863465309, - "learning_rate": 0.00019999604354705819, - "loss": 46.0, - "step": 17593 - }, - { - "epoch": 2.8333668827247473, - "grad_norm": 0.005611111875623465, - "learning_rate": 0.00019999604309701673, - "loss": 46.0, - "step": 17594 - }, - { - "epoch": 2.833527919803535, - "grad_norm": 0.003330002073198557, - "learning_rate": 0.00019999604264694972, - "loss": 46.0, - "step": 17595 - }, - { - "epoch": 2.8336889568823223, - "grad_norm": 0.0019941949285566807, - "learning_rate": 0.00019999604219685707, - "loss": 46.0, - "step": 17596 - }, - { - "epoch": 2.8338499939611097, - "grad_norm": 0.0060850828886032104, - "learning_rate": 0.00019999604174673885, - "loss": 46.0, - "step": 17597 - }, - { - "epoch": 2.834011031039897, - "grad_norm": 0.0014871172606945038, - "learning_rate": 0.00019999604129659502, - "loss": 46.0, - "step": 17598 - }, - { - "epoch": 2.834172068118684, - "grad_norm": 0.0020752577111124992, - "learning_rate": 0.0001999960408464256, - "loss": 46.0, - "step": 17599 - }, - { - "epoch": 2.8343331051974716, - "grad_norm": 0.0017564443405717611, - "learning_rate": 0.0001999960403962306, - "loss": 46.0, - "step": 17600 - }, - { - "epoch": 2.834494142276259, - "grad_norm": 0.002166620222851634, - "learning_rate": 0.00019999603994601, - "loss": 46.0, - "step": 17601 - }, - { - "epoch": 2.8346551793550465, - "grad_norm": 0.002695689210668206, - "learning_rate": 0.0001999960394957638, - "loss": 46.0, - "step": 17602 - }, - { - "epoch": 2.834816216433834, - "grad_norm": 0.0049211750738322735, - "learning_rate": 0.00019999603904549202, - "loss": 46.0, - "step": 17603 - }, - { - "epoch": 2.8349772535126214, - "grad_norm": 0.001928769052028656, - "learning_rate": 0.00019999603859519464, - "loss": 46.0, - "step": 17604 - }, - { - "epoch": 2.8351382905914084, - "grad_norm": 0.0035950620658695698, - "learning_rate": 0.00019999603814487168, - "loss": 46.0, - "step": 17605 - }, - { - "epoch": 2.835299327670196, - "grad_norm": 0.002129684202373028, - "learning_rate": 0.0001999960376945231, - "loss": 46.0, - "step": 17606 - }, - { - "epoch": 2.8354603647489833, - "grad_norm": 0.0011342416983097792, - "learning_rate": 0.00019999603724414893, - "loss": 46.0, - "step": 17607 - }, - { - "epoch": 2.835621401827771, - "grad_norm": 0.00642255088314414, - "learning_rate": 0.00019999603679374917, - "loss": 46.0, - "step": 17608 - }, - { - "epoch": 2.8357824389065582, - "grad_norm": 0.001449377159588039, - "learning_rate": 0.00019999603634332383, - "loss": 46.0, - "step": 17609 - }, - { - "epoch": 2.8359434759853457, - "grad_norm": 0.0029043208342045546, - "learning_rate": 0.00019999603589287288, - "loss": 46.0, - "step": 17610 - }, - { - "epoch": 2.836104513064133, - "grad_norm": 0.0011060646502301097, - "learning_rate": 0.00019999603544239633, - "loss": 46.0, - "step": 17611 - }, - { - "epoch": 2.8362655501429206, - "grad_norm": 0.0008838083012960851, - "learning_rate": 0.0001999960349918942, - "loss": 46.0, - "step": 17612 - }, - { - "epoch": 2.836426587221708, - "grad_norm": 0.013734808191657066, - "learning_rate": 0.00019999603454136646, - "loss": 46.0, - "step": 17613 - }, - { - "epoch": 2.836587624300495, - "grad_norm": 0.0023814975284039974, - "learning_rate": 0.00019999603409081315, - "loss": 46.0, - "step": 17614 - }, - { - "epoch": 2.8367486613792825, - "grad_norm": 0.0037989169359207153, - "learning_rate": 0.00019999603364023424, - "loss": 46.0, - "step": 17615 - }, - { - "epoch": 2.83690969845807, - "grad_norm": 0.010734109207987785, - "learning_rate": 0.00019999603318962973, - "loss": 46.0, - "step": 17616 - }, - { - "epoch": 2.8370707355368574, - "grad_norm": 0.003938655834645033, - "learning_rate": 0.00019999603273899964, - "loss": 46.0, - "step": 17617 - }, - { - "epoch": 2.837231772615645, - "grad_norm": 0.00327532016672194, - "learning_rate": 0.00019999603228834393, - "loss": 46.0, - "step": 17618 - }, - { - "epoch": 2.8373928096944323, - "grad_norm": 0.004189345054328442, - "learning_rate": 0.00019999603183766263, - "loss": 46.0, - "step": 17619 - }, - { - "epoch": 2.8375538467732193, - "grad_norm": 0.001494970521889627, - "learning_rate": 0.00019999603138695575, - "loss": 46.0, - "step": 17620 - }, - { - "epoch": 2.8377148838520068, - "grad_norm": 0.002382122678682208, - "learning_rate": 0.00019999603093622328, - "loss": 46.0, - "step": 17621 - }, - { - "epoch": 2.837875920930794, - "grad_norm": 0.0014835756737738848, - "learning_rate": 0.0001999960304854652, - "loss": 46.0, - "step": 17622 - }, - { - "epoch": 2.8380369580095817, - "grad_norm": 0.007294792681932449, - "learning_rate": 0.00019999603003468153, - "loss": 46.0, - "step": 17623 - }, - { - "epoch": 2.838197995088369, - "grad_norm": 0.0007438446045853198, - "learning_rate": 0.00019999602958387227, - "loss": 46.0, - "step": 17624 - }, - { - "epoch": 2.8383590321671566, - "grad_norm": 0.0023958845995366573, - "learning_rate": 0.00019999602913303743, - "loss": 46.0, - "step": 17625 - }, - { - "epoch": 2.838520069245944, - "grad_norm": 0.0011832868913188577, - "learning_rate": 0.00019999602868217697, - "loss": 46.0, - "step": 17626 - }, - { - "epoch": 2.8386811063247315, - "grad_norm": 0.002492837607860565, - "learning_rate": 0.00019999602823129092, - "loss": 46.0, - "step": 17627 - }, - { - "epoch": 2.838842143403519, - "grad_norm": 0.0073603675700724125, - "learning_rate": 0.00019999602778037932, - "loss": 46.0, - "step": 17628 - }, - { - "epoch": 2.839003180482306, - "grad_norm": 0.0014479657402262092, - "learning_rate": 0.00019999602732944207, - "loss": 46.0, - "step": 17629 - }, - { - "epoch": 2.8391642175610934, - "grad_norm": 0.0027166493237018585, - "learning_rate": 0.00019999602687847926, - "loss": 46.0, - "step": 17630 - }, - { - "epoch": 2.839325254639881, - "grad_norm": 0.001018046517856419, - "learning_rate": 0.00019999602642749084, - "loss": 46.0, - "step": 17631 - }, - { - "epoch": 2.8394862917186683, - "grad_norm": 0.0034496565349400043, - "learning_rate": 0.00019999602597647683, - "loss": 46.0, - "step": 17632 - }, - { - "epoch": 2.8396473287974557, - "grad_norm": 0.0017235990380868316, - "learning_rate": 0.0001999960255254372, - "loss": 46.0, - "step": 17633 - }, - { - "epoch": 2.8398083658762427, - "grad_norm": 0.005632879212498665, - "learning_rate": 0.00019999602507437202, - "loss": 46.0, - "step": 17634 - }, - { - "epoch": 2.83996940295503, - "grad_norm": 0.00149669055826962, - "learning_rate": 0.00019999602462328123, - "loss": 46.0, - "step": 17635 - }, - { - "epoch": 2.8401304400338176, - "grad_norm": 0.005096688400954008, - "learning_rate": 0.00019999602417216484, - "loss": 46.0, - "step": 17636 - }, - { - "epoch": 2.840291477112605, - "grad_norm": 0.005132357124239206, - "learning_rate": 0.00019999602372102287, - "loss": 46.0, - "step": 17637 - }, - { - "epoch": 2.8404525141913926, - "grad_norm": 0.006723702419549227, - "learning_rate": 0.00019999602326985528, - "loss": 46.0, - "step": 17638 - }, - { - "epoch": 2.84061355127018, - "grad_norm": 0.0012959045125171542, - "learning_rate": 0.0001999960228186621, - "loss": 46.0, - "step": 17639 - }, - { - "epoch": 2.8407745883489675, - "grad_norm": 0.001360506284981966, - "learning_rate": 0.00019999602236744335, - "loss": 46.0, - "step": 17640 - }, - { - "epoch": 2.840935625427755, - "grad_norm": 0.006803812924772501, - "learning_rate": 0.000199996021916199, - "loss": 46.0, - "step": 17641 - }, - { - "epoch": 2.8410966625065424, - "grad_norm": 0.004713117145001888, - "learning_rate": 0.00019999602146492906, - "loss": 46.0, - "step": 17642 - }, - { - "epoch": 2.8412576995853294, - "grad_norm": 0.00247765495441854, - "learning_rate": 0.00019999602101363352, - "loss": 46.0, - "step": 17643 - }, - { - "epoch": 2.841418736664117, - "grad_norm": 0.0069712125696241856, - "learning_rate": 0.00019999602056231238, - "loss": 46.0, - "step": 17644 - }, - { - "epoch": 2.8415797737429043, - "grad_norm": 0.003584443125873804, - "learning_rate": 0.00019999602011096563, - "loss": 46.0, - "step": 17645 - }, - { - "epoch": 2.8417408108216917, - "grad_norm": 0.0021740689408034086, - "learning_rate": 0.00019999601965959332, - "loss": 46.0, - "step": 17646 - }, - { - "epoch": 2.841901847900479, - "grad_norm": 0.0029067890718579292, - "learning_rate": 0.0001999960192081954, - "loss": 46.0, - "step": 17647 - }, - { - "epoch": 2.8420628849792666, - "grad_norm": 0.0024205760564655066, - "learning_rate": 0.0001999960187567719, - "loss": 46.0, - "step": 17648 - }, - { - "epoch": 2.8422239220580536, - "grad_norm": 0.005348925478756428, - "learning_rate": 0.00019999601830532278, - "loss": 46.0, - "step": 17649 - }, - { - "epoch": 2.842384959136841, - "grad_norm": 0.0009694330510683358, - "learning_rate": 0.00019999601785384807, - "loss": 46.0, - "step": 17650 - }, - { - "epoch": 2.8425459962156285, - "grad_norm": 0.0021761669777333736, - "learning_rate": 0.00019999601740234777, - "loss": 46.0, - "step": 17651 - }, - { - "epoch": 2.842707033294416, - "grad_norm": 0.00867735780775547, - "learning_rate": 0.00019999601695082188, - "loss": 46.0, - "step": 17652 - }, - { - "epoch": 2.8428680703732034, - "grad_norm": 0.004964365158230066, - "learning_rate": 0.00019999601649927043, - "loss": 46.0, - "step": 17653 - }, - { - "epoch": 2.843029107451991, - "grad_norm": 0.0013917895266786218, - "learning_rate": 0.00019999601604769334, - "loss": 46.0, - "step": 17654 - }, - { - "epoch": 2.8431901445307783, - "grad_norm": 0.006648889742791653, - "learning_rate": 0.00019999601559609066, - "loss": 46.0, - "step": 17655 - }, - { - "epoch": 2.843351181609566, - "grad_norm": 0.0009410177008248866, - "learning_rate": 0.00019999601514446243, - "loss": 46.0, - "step": 17656 - }, - { - "epoch": 2.8435122186883532, - "grad_norm": 0.00480504659935832, - "learning_rate": 0.00019999601469280855, - "loss": 46.0, - "step": 17657 - }, - { - "epoch": 2.8436732557671403, - "grad_norm": 0.002117637312039733, - "learning_rate": 0.0001999960142411291, - "loss": 46.0, - "step": 17658 - }, - { - "epoch": 2.8438342928459277, - "grad_norm": 0.005389770492911339, - "learning_rate": 0.00019999601378942406, - "loss": 46.0, - "step": 17659 - }, - { - "epoch": 2.843995329924715, - "grad_norm": 0.003491622395813465, - "learning_rate": 0.00019999601333769342, - "loss": 46.0, - "step": 17660 - }, - { - "epoch": 2.8441563670035026, - "grad_norm": 0.004554687067866325, - "learning_rate": 0.0001999960128859372, - "loss": 46.0, - "step": 17661 - }, - { - "epoch": 2.84431740408229, - "grad_norm": 0.004166826140135527, - "learning_rate": 0.00019999601243415535, - "loss": 46.0, - "step": 17662 - }, - { - "epoch": 2.844478441161077, - "grad_norm": 0.0027111927047371864, - "learning_rate": 0.00019999601198234795, - "loss": 46.0, - "step": 17663 - }, - { - "epoch": 2.8446394782398645, - "grad_norm": 0.005219809710979462, - "learning_rate": 0.00019999601153051493, - "loss": 46.0, - "step": 17664 - }, - { - "epoch": 2.844800515318652, - "grad_norm": 0.0071383328177034855, - "learning_rate": 0.00019999601107865633, - "loss": 46.0, - "step": 17665 - }, - { - "epoch": 2.8449615523974394, - "grad_norm": 0.002584136324003339, - "learning_rate": 0.00019999601062677211, - "loss": 46.0, - "step": 17666 - }, - { - "epoch": 2.845122589476227, - "grad_norm": 0.004258283879607916, - "learning_rate": 0.0001999960101748623, - "loss": 46.0, - "step": 17667 - }, - { - "epoch": 2.8452836265550143, - "grad_norm": 0.001980163622647524, - "learning_rate": 0.00019999600972292695, - "loss": 46.0, - "step": 17668 - }, - { - "epoch": 2.8454446636338018, - "grad_norm": 0.003112790873274207, - "learning_rate": 0.00019999600927096594, - "loss": 46.0, - "step": 17669 - }, - { - "epoch": 2.8456057007125892, - "grad_norm": 0.0022935864981263876, - "learning_rate": 0.00019999600881897937, - "loss": 46.0, - "step": 17670 - }, - { - "epoch": 2.8457667377913767, - "grad_norm": 0.004193443804979324, - "learning_rate": 0.0001999960083669672, - "loss": 46.0, - "step": 17671 - }, - { - "epoch": 2.845927774870164, - "grad_norm": 0.00351294269785285, - "learning_rate": 0.00019999600791492945, - "loss": 46.0, - "step": 17672 - }, - { - "epoch": 2.846088811948951, - "grad_norm": 0.008579579181969166, - "learning_rate": 0.00019999600746286607, - "loss": 46.0, - "step": 17673 - }, - { - "epoch": 2.8462498490277386, - "grad_norm": 0.003046198282390833, - "learning_rate": 0.00019999600701077713, - "loss": 46.0, - "step": 17674 - }, - { - "epoch": 2.846410886106526, - "grad_norm": 0.0005978193948976696, - "learning_rate": 0.00019999600655866258, - "loss": 46.0, - "step": 17675 - }, - { - "epoch": 2.8465719231853135, - "grad_norm": 0.001720809144899249, - "learning_rate": 0.00019999600610652243, - "loss": 46.0, - "step": 17676 - }, - { - "epoch": 2.846732960264101, - "grad_norm": 0.005646899808198214, - "learning_rate": 0.0001999960056543567, - "loss": 46.0, - "step": 17677 - }, - { - "epoch": 2.846893997342888, - "grad_norm": 0.014304318465292454, - "learning_rate": 0.0001999960052021654, - "loss": 46.0, - "step": 17678 - }, - { - "epoch": 2.8470550344216754, - "grad_norm": 0.0007857764721848071, - "learning_rate": 0.00019999600474994846, - "loss": 46.0, - "step": 17679 - }, - { - "epoch": 2.847216071500463, - "grad_norm": 0.0018906394252553582, - "learning_rate": 0.00019999600429770596, - "loss": 46.0, - "step": 17680 - }, - { - "epoch": 2.8473771085792503, - "grad_norm": 0.0013708010083064437, - "learning_rate": 0.00019999600384543783, - "loss": 46.0, - "step": 17681 - }, - { - "epoch": 2.8475381456580378, - "grad_norm": 0.001256629591807723, - "learning_rate": 0.00019999600339314414, - "loss": 46.0, - "step": 17682 - }, - { - "epoch": 2.847699182736825, - "grad_norm": 0.0033032239880412817, - "learning_rate": 0.00019999600294082483, - "loss": 46.0, - "step": 17683 - }, - { - "epoch": 2.8478602198156127, - "grad_norm": 0.0008098266553133726, - "learning_rate": 0.00019999600248847994, - "loss": 46.0, - "step": 17684 - }, - { - "epoch": 2.8480212568944, - "grad_norm": 0.0014712949050590396, - "learning_rate": 0.00019999600203610945, - "loss": 46.0, - "step": 17685 - }, - { - "epoch": 2.8481822939731876, - "grad_norm": 0.01069496851414442, - "learning_rate": 0.00019999600158371336, - "loss": 46.0, - "step": 17686 - }, - { - "epoch": 2.8483433310519746, - "grad_norm": 0.001078604138456285, - "learning_rate": 0.0001999960011312917, - "loss": 46.0, - "step": 17687 - }, - { - "epoch": 2.848504368130762, - "grad_norm": 0.0028587363194674253, - "learning_rate": 0.00019999600067884443, - "loss": 46.0, - "step": 17688 - }, - { - "epoch": 2.8486654052095495, - "grad_norm": 0.004137709736824036, - "learning_rate": 0.00019999600022637158, - "loss": 46.0, - "step": 17689 - }, - { - "epoch": 2.848826442288337, - "grad_norm": 0.0066100540570914745, - "learning_rate": 0.00019999599977387313, - "loss": 46.0, - "step": 17690 - }, - { - "epoch": 2.8489874793671244, - "grad_norm": 0.003406495088711381, - "learning_rate": 0.00019999599932134907, - "loss": 46.0, - "step": 17691 - }, - { - "epoch": 2.849148516445912, - "grad_norm": 0.0011324897641316056, - "learning_rate": 0.00019999599886879943, - "loss": 46.0, - "step": 17692 - }, - { - "epoch": 2.849309553524699, - "grad_norm": 0.0024638455361127853, - "learning_rate": 0.0001999959984162242, - "loss": 46.0, - "step": 17693 - }, - { - "epoch": 2.8494705906034863, - "grad_norm": 0.006360653322190046, - "learning_rate": 0.00019999599796362337, - "loss": 46.0, - "step": 17694 - }, - { - "epoch": 2.8496316276822737, - "grad_norm": 0.0026383153162896633, - "learning_rate": 0.00019999599751099694, - "loss": 46.0, - "step": 17695 - }, - { - "epoch": 2.849792664761061, - "grad_norm": 0.0013061851495876908, - "learning_rate": 0.00019999599705834494, - "loss": 46.0, - "step": 17696 - }, - { - "epoch": 2.8499537018398486, - "grad_norm": 0.0036800026427954435, - "learning_rate": 0.00019999599660566733, - "loss": 46.0, - "step": 17697 - }, - { - "epoch": 2.850114738918636, - "grad_norm": 0.0013708402402698994, - "learning_rate": 0.0001999959961529641, - "loss": 46.0, - "step": 17698 - }, - { - "epoch": 2.8502757759974235, - "grad_norm": 0.0006732487236149609, - "learning_rate": 0.0001999959957002353, - "loss": 46.0, - "step": 17699 - }, - { - "epoch": 2.850436813076211, - "grad_norm": 0.0023737475275993347, - "learning_rate": 0.00019999599524748093, - "loss": 46.0, - "step": 17700 - }, - { - "epoch": 2.8505978501549984, - "grad_norm": 0.008403660729527473, - "learning_rate": 0.00019999599479470095, - "loss": 46.0, - "step": 17701 - }, - { - "epoch": 2.8507588872337855, - "grad_norm": 0.0007045620586723089, - "learning_rate": 0.00019999599434189535, - "loss": 46.0, - "step": 17702 - }, - { - "epoch": 2.850919924312573, - "grad_norm": 0.0028128752019256353, - "learning_rate": 0.0001999959938890642, - "loss": 46.0, - "step": 17703 - }, - { - "epoch": 2.8510809613913604, - "grad_norm": 0.001996441511437297, - "learning_rate": 0.00019999599343620741, - "loss": 46.0, - "step": 17704 - }, - { - "epoch": 2.851241998470148, - "grad_norm": 0.010239379480481148, - "learning_rate": 0.00019999599298332505, - "loss": 46.0, - "step": 17705 - }, - { - "epoch": 2.8514030355489353, - "grad_norm": 0.0018866312457248569, - "learning_rate": 0.0001999959925304171, - "loss": 46.0, - "step": 17706 - }, - { - "epoch": 2.8515640726277223, - "grad_norm": 0.003044862300157547, - "learning_rate": 0.00019999599207748357, - "loss": 46.0, - "step": 17707 - }, - { - "epoch": 2.8517251097065097, - "grad_norm": 0.0015488950302824378, - "learning_rate": 0.0001999959916245244, - "loss": 46.0, - "step": 17708 - }, - { - "epoch": 2.851886146785297, - "grad_norm": 0.0015393532812595367, - "learning_rate": 0.00019999599117153966, - "loss": 46.0, - "step": 17709 - }, - { - "epoch": 2.8520471838640846, - "grad_norm": 0.0030128757935017347, - "learning_rate": 0.00019999599071852933, - "loss": 46.0, - "step": 17710 - }, - { - "epoch": 2.852208220942872, - "grad_norm": 0.004676430951803923, - "learning_rate": 0.0001999959902654934, - "loss": 46.0, - "step": 17711 - }, - { - "epoch": 2.8523692580216595, - "grad_norm": 0.0019623960833996534, - "learning_rate": 0.0001999959898124319, - "loss": 46.0, - "step": 17712 - }, - { - "epoch": 2.852530295100447, - "grad_norm": 0.013692514970898628, - "learning_rate": 0.0001999959893593448, - "loss": 46.0, - "step": 17713 - }, - { - "epoch": 2.8526913321792344, - "grad_norm": 0.0005461315158754587, - "learning_rate": 0.0001999959889062321, - "loss": 46.0, - "step": 17714 - }, - { - "epoch": 2.852852369258022, - "grad_norm": 0.0011717305751517415, - "learning_rate": 0.00019999598845309377, - "loss": 46.0, - "step": 17715 - }, - { - "epoch": 2.853013406336809, - "grad_norm": 0.008190694265067577, - "learning_rate": 0.0001999959879999299, - "loss": 46.0, - "step": 17716 - }, - { - "epoch": 2.8531744434155963, - "grad_norm": 0.00229228800162673, - "learning_rate": 0.0001999959875467404, - "loss": 46.0, - "step": 17717 - }, - { - "epoch": 2.853335480494384, - "grad_norm": 0.0023601411376148462, - "learning_rate": 0.0001999959870935253, - "loss": 46.0, - "step": 17718 - }, - { - "epoch": 2.8534965175731712, - "grad_norm": 0.0032715650741010904, - "learning_rate": 0.00019999598664028465, - "loss": 46.0, - "step": 17719 - }, - { - "epoch": 2.8536575546519587, - "grad_norm": 0.001448446186259389, - "learning_rate": 0.00019999598618701837, - "loss": 46.0, - "step": 17720 - }, - { - "epoch": 2.853818591730746, - "grad_norm": 0.0018138595623895526, - "learning_rate": 0.0001999959857337265, - "loss": 46.0, - "step": 17721 - }, - { - "epoch": 2.853979628809533, - "grad_norm": 0.0012017472181469202, - "learning_rate": 0.00019999598528040906, - "loss": 46.0, - "step": 17722 - }, - { - "epoch": 2.8541406658883206, - "grad_norm": 0.0039217215962708, - "learning_rate": 0.00019999598482706602, - "loss": 46.0, - "step": 17723 - }, - { - "epoch": 2.854301702967108, - "grad_norm": 0.0018078441498801112, - "learning_rate": 0.00019999598437369737, - "loss": 46.0, - "step": 17724 - }, - { - "epoch": 2.8544627400458955, - "grad_norm": 0.006035876926034689, - "learning_rate": 0.00019999598392030313, - "loss": 46.0, - "step": 17725 - }, - { - "epoch": 2.854623777124683, - "grad_norm": 0.0025969508569687605, - "learning_rate": 0.00019999598346688327, - "loss": 46.0, - "step": 17726 - }, - { - "epoch": 2.8547848142034704, - "grad_norm": 0.0037794699892401695, - "learning_rate": 0.00019999598301343786, - "loss": 46.0, - "step": 17727 - }, - { - "epoch": 2.854945851282258, - "grad_norm": 0.00448583485558629, - "learning_rate": 0.00019999598255996683, - "loss": 46.0, - "step": 17728 - }, - { - "epoch": 2.8551068883610453, - "grad_norm": 0.020436882972717285, - "learning_rate": 0.00019999598210647022, - "loss": 46.0, - "step": 17729 - }, - { - "epoch": 2.8552679254398328, - "grad_norm": 0.0007145946146920323, - "learning_rate": 0.00019999598165294801, - "loss": 46.0, - "step": 17730 - }, - { - "epoch": 2.8554289625186198, - "grad_norm": 0.004013553261756897, - "learning_rate": 0.00019999598119940022, - "loss": 46.0, - "step": 17731 - }, - { - "epoch": 2.8555899995974072, - "grad_norm": 0.0005296072922646999, - "learning_rate": 0.00019999598074582682, - "loss": 46.0, - "step": 17732 - }, - { - "epoch": 2.8557510366761947, - "grad_norm": 0.0019515667809173465, - "learning_rate": 0.00019999598029222783, - "loss": 46.0, - "step": 17733 - }, - { - "epoch": 2.855912073754982, - "grad_norm": 0.001788418390788138, - "learning_rate": 0.00019999597983860325, - "loss": 46.0, - "step": 17734 - }, - { - "epoch": 2.8560731108337696, - "grad_norm": 0.0023962599225342274, - "learning_rate": 0.00019999597938495306, - "loss": 46.0, - "step": 17735 - }, - { - "epoch": 2.856234147912557, - "grad_norm": 0.0008774466696195304, - "learning_rate": 0.0001999959789312773, - "loss": 46.0, - "step": 17736 - }, - { - "epoch": 2.856395184991344, - "grad_norm": 0.00515154330059886, - "learning_rate": 0.00019999597847757594, - "loss": 46.0, - "step": 17737 - }, - { - "epoch": 2.8565562220701315, - "grad_norm": 0.004023326560854912, - "learning_rate": 0.00019999597802384899, - "loss": 46.0, - "step": 17738 - }, - { - "epoch": 2.856717259148919, - "grad_norm": 0.008450892753899097, - "learning_rate": 0.00019999597757009642, - "loss": 46.0, - "step": 17739 - }, - { - "epoch": 2.8568782962277064, - "grad_norm": 0.00282103568315506, - "learning_rate": 0.0001999959771163183, - "loss": 46.0, - "step": 17740 - }, - { - "epoch": 2.857039333306494, - "grad_norm": 0.0014612689847126603, - "learning_rate": 0.00019999597666251452, - "loss": 46.0, - "step": 17741 - }, - { - "epoch": 2.8572003703852813, - "grad_norm": 0.009119594469666481, - "learning_rate": 0.0001999959762086852, - "loss": 46.0, - "step": 17742 - }, - { - "epoch": 2.8573614074640687, - "grad_norm": 0.007086241617798805, - "learning_rate": 0.0001999959757548303, - "loss": 46.0, - "step": 17743 - }, - { - "epoch": 2.857522444542856, - "grad_norm": 0.006481421645730734, - "learning_rate": 0.00019999597530094977, - "loss": 46.0, - "step": 17744 - }, - { - "epoch": 2.8576834816216437, - "grad_norm": 0.004874011967331171, - "learning_rate": 0.00019999597484704365, - "loss": 46.0, - "step": 17745 - }, - { - "epoch": 2.8578445187004307, - "grad_norm": 0.0023945835418999195, - "learning_rate": 0.00019999597439311192, - "loss": 46.0, - "step": 17746 - }, - { - "epoch": 2.858005555779218, - "grad_norm": 0.001118845073506236, - "learning_rate": 0.00019999597393915462, - "loss": 46.0, - "step": 17747 - }, - { - "epoch": 2.8581665928580056, - "grad_norm": 0.0018878093687817454, - "learning_rate": 0.00019999597348517174, - "loss": 46.0, - "step": 17748 - }, - { - "epoch": 2.858327629936793, - "grad_norm": 0.0008037827792577446, - "learning_rate": 0.00019999597303116325, - "loss": 46.0, - "step": 17749 - }, - { - "epoch": 2.8584886670155805, - "grad_norm": 0.0031560324132442474, - "learning_rate": 0.00019999597257712917, - "loss": 46.0, - "step": 17750 - }, - { - "epoch": 2.8586497040943675, - "grad_norm": 0.0038127838633954525, - "learning_rate": 0.00019999597212306947, - "loss": 46.0, - "step": 17751 - }, - { - "epoch": 2.858810741173155, - "grad_norm": 0.002726686652749777, - "learning_rate": 0.00019999597166898422, - "loss": 46.0, - "step": 17752 - }, - { - "epoch": 2.8589717782519424, - "grad_norm": 0.0007126725977286696, - "learning_rate": 0.00019999597121487335, - "loss": 46.0, - "step": 17753 - }, - { - "epoch": 2.85913281533073, - "grad_norm": 0.0019691763445734978, - "learning_rate": 0.0001999959707607369, - "loss": 46.0, - "step": 17754 - }, - { - "epoch": 2.8592938524095173, - "grad_norm": 0.0029765560757368803, - "learning_rate": 0.00019999597030657485, - "loss": 46.0, - "step": 17755 - }, - { - "epoch": 2.8594548894883047, - "grad_norm": 0.005691281985491514, - "learning_rate": 0.0001999959698523872, - "loss": 46.0, - "step": 17756 - }, - { - "epoch": 2.859615926567092, - "grad_norm": 0.00326905376277864, - "learning_rate": 0.00019999596939817394, - "loss": 46.0, - "step": 17757 - }, - { - "epoch": 2.8597769636458796, - "grad_norm": 0.0003961396578233689, - "learning_rate": 0.0001999959689439351, - "loss": 46.0, - "step": 17758 - }, - { - "epoch": 2.859938000724667, - "grad_norm": 0.001628469442948699, - "learning_rate": 0.0001999959684896707, - "loss": 46.0, - "step": 17759 - }, - { - "epoch": 2.860099037803454, - "grad_norm": 0.0012904424220323563, - "learning_rate": 0.00019999596803538065, - "loss": 46.0, - "step": 17760 - }, - { - "epoch": 2.8602600748822415, - "grad_norm": 0.0023224635515362024, - "learning_rate": 0.00019999596758106503, - "loss": 46.0, - "step": 17761 - }, - { - "epoch": 2.860421111961029, - "grad_norm": 0.0023614896927028894, - "learning_rate": 0.00019999596712672385, - "loss": 46.0, - "step": 17762 - }, - { - "epoch": 2.8605821490398164, - "grad_norm": 0.0032101981341838837, - "learning_rate": 0.00019999596667235703, - "loss": 46.0, - "step": 17763 - }, - { - "epoch": 2.860743186118604, - "grad_norm": 0.0038493045140057802, - "learning_rate": 0.00019999596621796464, - "loss": 46.0, - "step": 17764 - }, - { - "epoch": 2.8609042231973913, - "grad_norm": 0.002698685275390744, - "learning_rate": 0.00019999596576354662, - "loss": 46.0, - "step": 17765 - }, - { - "epoch": 2.8610652602761784, - "grad_norm": 0.0008765386301092803, - "learning_rate": 0.00019999596530910306, - "loss": 46.0, - "step": 17766 - }, - { - "epoch": 2.861226297354966, - "grad_norm": 0.0070189377292990685, - "learning_rate": 0.00019999596485463386, - "loss": 46.0, - "step": 17767 - }, - { - "epoch": 2.8613873344337533, - "grad_norm": 0.0061255996115505695, - "learning_rate": 0.0001999959644001391, - "loss": 46.0, - "step": 17768 - }, - { - "epoch": 2.8615483715125407, - "grad_norm": 0.0007536065531894565, - "learning_rate": 0.00019999596394561873, - "loss": 46.0, - "step": 17769 - }, - { - "epoch": 2.861709408591328, - "grad_norm": 0.002301283646374941, - "learning_rate": 0.00019999596349107277, - "loss": 46.0, - "step": 17770 - }, - { - "epoch": 2.8618704456701156, - "grad_norm": 0.002608307171612978, - "learning_rate": 0.0001999959630365012, - "loss": 46.0, - "step": 17771 - }, - { - "epoch": 2.862031482748903, - "grad_norm": 0.0010824125492945313, - "learning_rate": 0.00019999596258190406, - "loss": 46.0, - "step": 17772 - }, - { - "epoch": 2.8621925198276905, - "grad_norm": 0.0028726982418447733, - "learning_rate": 0.00019999596212728134, - "loss": 46.0, - "step": 17773 - }, - { - "epoch": 2.862353556906478, - "grad_norm": 0.0016900365008041263, - "learning_rate": 0.00019999596167263298, - "loss": 46.0, - "step": 17774 - }, - { - "epoch": 2.862514593985265, - "grad_norm": 0.004488042555749416, - "learning_rate": 0.00019999596121795905, - "loss": 46.0, - "step": 17775 - }, - { - "epoch": 2.8626756310640524, - "grad_norm": 0.006179673131555319, - "learning_rate": 0.00019999596076325952, - "loss": 46.0, - "step": 17776 - }, - { - "epoch": 2.86283666814284, - "grad_norm": 0.00732179032638669, - "learning_rate": 0.0001999959603085344, - "loss": 46.0, - "step": 17777 - }, - { - "epoch": 2.8629977052216273, - "grad_norm": 0.0023710872046649456, - "learning_rate": 0.0001999959598537837, - "loss": 46.0, - "step": 17778 - }, - { - "epoch": 2.863158742300415, - "grad_norm": 0.004059616010636091, - "learning_rate": 0.00019999595939900738, - "loss": 46.0, - "step": 17779 - }, - { - "epoch": 2.863319779379202, - "grad_norm": 0.0026830388233065605, - "learning_rate": 0.0001999959589442055, - "loss": 46.0, - "step": 17780 - }, - { - "epoch": 2.8634808164579892, - "grad_norm": 0.011139930225908756, - "learning_rate": 0.000199995958489378, - "loss": 46.0, - "step": 17781 - }, - { - "epoch": 2.8636418535367767, - "grad_norm": 0.00361109827645123, - "learning_rate": 0.0001999959580345249, - "loss": 46.0, - "step": 17782 - }, - { - "epoch": 2.863802890615564, - "grad_norm": 0.0017445327248424292, - "learning_rate": 0.00019999595757964623, - "loss": 46.0, - "step": 17783 - }, - { - "epoch": 2.8639639276943516, - "grad_norm": 0.0037039124872535467, - "learning_rate": 0.00019999595712474197, - "loss": 46.0, - "step": 17784 - }, - { - "epoch": 2.864124964773139, - "grad_norm": 0.0007199071696959436, - "learning_rate": 0.0001999959566698121, - "loss": 46.0, - "step": 17785 - }, - { - "epoch": 2.8642860018519265, - "grad_norm": 0.0014654486440122128, - "learning_rate": 0.00019999595621485663, - "loss": 46.0, - "step": 17786 - }, - { - "epoch": 2.864447038930714, - "grad_norm": 0.0020696097053587437, - "learning_rate": 0.00019999595575987558, - "loss": 46.0, - "step": 17787 - }, - { - "epoch": 2.8646080760095014, - "grad_norm": 0.0011353419395163655, - "learning_rate": 0.00019999595530486891, - "loss": 46.0, - "step": 17788 - }, - { - "epoch": 2.864769113088289, - "grad_norm": 0.0013259720290079713, - "learning_rate": 0.0001999959548498367, - "loss": 46.0, - "step": 17789 - }, - { - "epoch": 2.864930150167076, - "grad_norm": 0.0018845100421458483, - "learning_rate": 0.00019999595439477882, - "loss": 46.0, - "step": 17790 - }, - { - "epoch": 2.8650911872458633, - "grad_norm": 0.0032751886174082756, - "learning_rate": 0.0001999959539396954, - "loss": 46.0, - "step": 17791 - }, - { - "epoch": 2.8652522243246508, - "grad_norm": 0.002355837030336261, - "learning_rate": 0.00019999595348458638, - "loss": 46.0, - "step": 17792 - }, - { - "epoch": 2.865413261403438, - "grad_norm": 0.0010306062176823616, - "learning_rate": 0.00019999595302945178, - "loss": 46.0, - "step": 17793 - }, - { - "epoch": 2.8655742984822257, - "grad_norm": 0.005004832521080971, - "learning_rate": 0.00019999595257429157, - "loss": 46.0, - "step": 17794 - }, - { - "epoch": 2.8657353355610127, - "grad_norm": 0.004597513936460018, - "learning_rate": 0.00019999595211910576, - "loss": 46.0, - "step": 17795 - }, - { - "epoch": 2.8658963726398, - "grad_norm": 0.001008680323138833, - "learning_rate": 0.00019999595166389432, - "loss": 46.0, - "step": 17796 - }, - { - "epoch": 2.8660574097185876, - "grad_norm": 0.0038172886706888676, - "learning_rate": 0.00019999595120865734, - "loss": 46.0, - "step": 17797 - }, - { - "epoch": 2.866218446797375, - "grad_norm": 0.005964105483144522, - "learning_rate": 0.00019999595075339475, - "loss": 46.0, - "step": 17798 - }, - { - "epoch": 2.8663794838761625, - "grad_norm": 0.0015238994965329766, - "learning_rate": 0.00019999595029810658, - "loss": 46.0, - "step": 17799 - }, - { - "epoch": 2.86654052095495, - "grad_norm": 0.004664433654397726, - "learning_rate": 0.00019999594984279278, - "loss": 46.0, - "step": 17800 - }, - { - "epoch": 2.8667015580337374, - "grad_norm": 0.003416199702769518, - "learning_rate": 0.00019999594938745343, - "loss": 46.0, - "step": 17801 - }, - { - "epoch": 2.866862595112525, - "grad_norm": 0.00044466060353443027, - "learning_rate": 0.00019999594893208846, - "loss": 46.0, - "step": 17802 - }, - { - "epoch": 2.8670236321913123, - "grad_norm": 0.003705657320097089, - "learning_rate": 0.0001999959484766979, - "loss": 46.0, - "step": 17803 - }, - { - "epoch": 2.8671846692700993, - "grad_norm": 0.004809023346751928, - "learning_rate": 0.00019999594802128177, - "loss": 46.0, - "step": 17804 - }, - { - "epoch": 2.8673457063488867, - "grad_norm": 0.0016219865065068007, - "learning_rate": 0.00019999594756584001, - "loss": 46.0, - "step": 17805 - }, - { - "epoch": 2.867506743427674, - "grad_norm": 0.009811586700379848, - "learning_rate": 0.00019999594711037267, - "loss": 46.0, - "step": 17806 - }, - { - "epoch": 2.8676677805064616, - "grad_norm": 0.0015374321956187487, - "learning_rate": 0.00019999594665487974, - "loss": 46.0, - "step": 17807 - }, - { - "epoch": 2.867828817585249, - "grad_norm": 0.0043531060218811035, - "learning_rate": 0.00019999594619936122, - "loss": 46.0, - "step": 17808 - }, - { - "epoch": 2.8679898546640366, - "grad_norm": 0.001166231231763959, - "learning_rate": 0.0001999959457438171, - "loss": 46.0, - "step": 17809 - }, - { - "epoch": 2.8681508917428236, - "grad_norm": 0.0009849559282884002, - "learning_rate": 0.00019999594528824738, - "loss": 46.0, - "step": 17810 - }, - { - "epoch": 2.868311928821611, - "grad_norm": 0.010321375913918018, - "learning_rate": 0.00019999594483265207, - "loss": 46.0, - "step": 17811 - }, - { - "epoch": 2.8684729659003985, - "grad_norm": 0.001669671619310975, - "learning_rate": 0.00019999594437703118, - "loss": 46.0, - "step": 17812 - }, - { - "epoch": 2.868634002979186, - "grad_norm": 0.004847492557018995, - "learning_rate": 0.00019999594392138467, - "loss": 46.0, - "step": 17813 - }, - { - "epoch": 2.8687950400579734, - "grad_norm": 0.004640201572328806, - "learning_rate": 0.00019999594346571258, - "loss": 46.0, - "step": 17814 - }, - { - "epoch": 2.868956077136761, - "grad_norm": 0.005793978925794363, - "learning_rate": 0.0001999959430100149, - "loss": 46.0, - "step": 17815 - }, - { - "epoch": 2.8691171142155483, - "grad_norm": 0.003426800947636366, - "learning_rate": 0.00019999594255429163, - "loss": 46.0, - "step": 17816 - }, - { - "epoch": 2.8692781512943357, - "grad_norm": 0.0007502498920075595, - "learning_rate": 0.00019999594209854274, - "loss": 46.0, - "step": 17817 - }, - { - "epoch": 2.869439188373123, - "grad_norm": 0.0014112351927906275, - "learning_rate": 0.00019999594164276827, - "loss": 46.0, - "step": 17818 - }, - { - "epoch": 2.86960022545191, - "grad_norm": 0.0016646521398797631, - "learning_rate": 0.00019999594118696822, - "loss": 46.0, - "step": 17819 - }, - { - "epoch": 2.8697612625306976, - "grad_norm": 0.0032315412536263466, - "learning_rate": 0.00019999594073114257, - "loss": 46.0, - "step": 17820 - }, - { - "epoch": 2.869922299609485, - "grad_norm": 0.0027037032414227724, - "learning_rate": 0.00019999594027529134, - "loss": 46.0, - "step": 17821 - }, - { - "epoch": 2.8700833366882725, - "grad_norm": 0.007093028165400028, - "learning_rate": 0.0001999959398194145, - "loss": 46.0, - "step": 17822 - }, - { - "epoch": 2.87024437376706, - "grad_norm": 0.004408692009747028, - "learning_rate": 0.00019999593936351206, - "loss": 46.0, - "step": 17823 - }, - { - "epoch": 2.870405410845847, - "grad_norm": 0.0010976019548252225, - "learning_rate": 0.00019999593890758402, - "loss": 46.0, - "step": 17824 - }, - { - "epoch": 2.8705664479246344, - "grad_norm": 0.006968142930418253, - "learning_rate": 0.0001999959384516304, - "loss": 46.0, - "step": 17825 - }, - { - "epoch": 2.870727485003422, - "grad_norm": 0.0027809094171971083, - "learning_rate": 0.00019999593799565116, - "loss": 46.0, - "step": 17826 - }, - { - "epoch": 2.8708885220822093, - "grad_norm": 0.0034778385888785124, - "learning_rate": 0.00019999593753964638, - "loss": 46.0, - "step": 17827 - }, - { - "epoch": 2.871049559160997, - "grad_norm": 0.0013363156467676163, - "learning_rate": 0.00019999593708361596, - "loss": 46.0, - "step": 17828 - }, - { - "epoch": 2.8712105962397843, - "grad_norm": 0.002102689351886511, - "learning_rate": 0.00019999593662755997, - "loss": 46.0, - "step": 17829 - }, - { - "epoch": 2.8713716333185717, - "grad_norm": 0.009751196019351482, - "learning_rate": 0.00019999593617147838, - "loss": 46.0, - "step": 17830 - }, - { - "epoch": 2.871532670397359, - "grad_norm": 0.0028450442478060722, - "learning_rate": 0.00019999593571537122, - "loss": 46.0, - "step": 17831 - }, - { - "epoch": 2.8716937074761466, - "grad_norm": 0.004424213897436857, - "learning_rate": 0.00019999593525923842, - "loss": 46.0, - "step": 17832 - }, - { - "epoch": 2.8718547445549336, - "grad_norm": 0.0027277308981865644, - "learning_rate": 0.00019999593480308004, - "loss": 46.0, - "step": 17833 - }, - { - "epoch": 2.872015781633721, - "grad_norm": 0.0025158394128084183, - "learning_rate": 0.0001999959343468961, - "loss": 46.0, - "step": 17834 - }, - { - "epoch": 2.8721768187125085, - "grad_norm": 0.004669091664254665, - "learning_rate": 0.0001999959338906865, - "loss": 46.0, - "step": 17835 - }, - { - "epoch": 2.872337855791296, - "grad_norm": 0.003903195494785905, - "learning_rate": 0.00019999593343445135, - "loss": 46.0, - "step": 17836 - }, - { - "epoch": 2.8724988928700834, - "grad_norm": 0.00392779428511858, - "learning_rate": 0.00019999593297819062, - "loss": 46.0, - "step": 17837 - }, - { - "epoch": 2.872659929948871, - "grad_norm": 0.0007379152229987085, - "learning_rate": 0.00019999593252190427, - "loss": 46.0, - "step": 17838 - }, - { - "epoch": 2.872820967027658, - "grad_norm": 0.0028645016718655825, - "learning_rate": 0.00019999593206559234, - "loss": 46.0, - "step": 17839 - }, - { - "epoch": 2.8729820041064453, - "grad_norm": 0.0012807894963771105, - "learning_rate": 0.00019999593160925484, - "loss": 46.0, - "step": 17840 - }, - { - "epoch": 2.873143041185233, - "grad_norm": 0.01279291883111, - "learning_rate": 0.0001999959311528917, - "loss": 46.0, - "step": 17841 - }, - { - "epoch": 2.8733040782640202, - "grad_norm": 0.0027771827299147844, - "learning_rate": 0.00019999593069650298, - "loss": 46.0, - "step": 17842 - }, - { - "epoch": 2.8734651153428077, - "grad_norm": 0.001404869370162487, - "learning_rate": 0.00019999593024008867, - "loss": 46.0, - "step": 17843 - }, - { - "epoch": 2.873626152421595, - "grad_norm": 0.0039648148231208324, - "learning_rate": 0.00019999592978364877, - "loss": 46.0, - "step": 17844 - }, - { - "epoch": 2.8737871895003826, - "grad_norm": 0.004427431151270866, - "learning_rate": 0.00019999592932718328, - "loss": 46.0, - "step": 17845 - }, - { - "epoch": 2.87394822657917, - "grad_norm": 0.0030001287814229727, - "learning_rate": 0.00019999592887069215, - "loss": 46.0, - "step": 17846 - }, - { - "epoch": 2.8741092636579575, - "grad_norm": 0.0028888951055705547, - "learning_rate": 0.00019999592841417547, - "loss": 46.0, - "step": 17847 - }, - { - "epoch": 2.8742703007367445, - "grad_norm": 0.007748141419142485, - "learning_rate": 0.0001999959279576332, - "loss": 46.0, - "step": 17848 - }, - { - "epoch": 2.874431337815532, - "grad_norm": 0.003926098812371492, - "learning_rate": 0.00019999592750106533, - "loss": 46.0, - "step": 17849 - }, - { - "epoch": 2.8745923748943194, - "grad_norm": 0.010174036026000977, - "learning_rate": 0.00019999592704447185, - "loss": 46.0, - "step": 17850 - }, - { - "epoch": 2.874753411973107, - "grad_norm": 0.007152326870709658, - "learning_rate": 0.0001999959265878528, - "loss": 46.0, - "step": 17851 - }, - { - "epoch": 2.8749144490518943, - "grad_norm": 0.0013236486120149493, - "learning_rate": 0.00019999592613120814, - "loss": 46.0, - "step": 17852 - }, - { - "epoch": 2.8750754861306813, - "grad_norm": 0.00904405303299427, - "learning_rate": 0.00019999592567453787, - "loss": 46.0, - "step": 17853 - }, - { - "epoch": 2.8752365232094688, - "grad_norm": 0.0025345038156956434, - "learning_rate": 0.00019999592521784205, - "loss": 46.0, - "step": 17854 - }, - { - "epoch": 2.875397560288256, - "grad_norm": 0.00425531854853034, - "learning_rate": 0.0001999959247611206, - "loss": 46.0, - "step": 17855 - }, - { - "epoch": 2.8755585973670437, - "grad_norm": 0.0020722730550915003, - "learning_rate": 0.00019999592430437358, - "loss": 46.0, - "step": 17856 - }, - { - "epoch": 2.875719634445831, - "grad_norm": 0.005723000969737768, - "learning_rate": 0.00019999592384760097, - "loss": 46.0, - "step": 17857 - }, - { - "epoch": 2.8758806715246186, - "grad_norm": 0.0029709741938859224, - "learning_rate": 0.00019999592339080274, - "loss": 46.0, - "step": 17858 - }, - { - "epoch": 2.876041708603406, - "grad_norm": 0.003711389610543847, - "learning_rate": 0.00019999592293397893, - "loss": 46.0, - "step": 17859 - }, - { - "epoch": 2.8762027456821935, - "grad_norm": 0.003403382608667016, - "learning_rate": 0.00019999592247712953, - "loss": 46.0, - "step": 17860 - }, - { - "epoch": 2.876363782760981, - "grad_norm": 0.004919823259115219, - "learning_rate": 0.0001999959220202545, - "loss": 46.0, - "step": 17861 - }, - { - "epoch": 2.8765248198397684, - "grad_norm": 0.010195001028478146, - "learning_rate": 0.0001999959215633539, - "loss": 46.0, - "step": 17862 - }, - { - "epoch": 2.8766858569185554, - "grad_norm": 0.028367798775434494, - "learning_rate": 0.00019999592110642774, - "loss": 46.0, - "step": 17863 - }, - { - "epoch": 2.876846893997343, - "grad_norm": 0.0027255869936197996, - "learning_rate": 0.00019999592064947597, - "loss": 46.0, - "step": 17864 - }, - { - "epoch": 2.8770079310761303, - "grad_norm": 0.0018845719750970602, - "learning_rate": 0.00019999592019249857, - "loss": 46.0, - "step": 17865 - }, - { - "epoch": 2.8771689681549177, - "grad_norm": 0.00484988559037447, - "learning_rate": 0.0001999959197354956, - "loss": 46.0, - "step": 17866 - }, - { - "epoch": 2.877330005233705, - "grad_norm": 0.004739455413073301, - "learning_rate": 0.00019999591927846705, - "loss": 46.0, - "step": 17867 - }, - { - "epoch": 2.877491042312492, - "grad_norm": 0.003980754408985376, - "learning_rate": 0.00019999591882141287, - "loss": 46.0, - "step": 17868 - }, - { - "epoch": 2.8776520793912796, - "grad_norm": 0.003942277282476425, - "learning_rate": 0.00019999591836433313, - "loss": 46.0, - "step": 17869 - }, - { - "epoch": 2.877813116470067, - "grad_norm": 0.0025070379488170147, - "learning_rate": 0.0001999959179072278, - "loss": 46.0, - "step": 17870 - }, - { - "epoch": 2.8779741535488546, - "grad_norm": 0.0045952266082167625, - "learning_rate": 0.00019999591745009684, - "loss": 46.0, - "step": 17871 - }, - { - "epoch": 2.878135190627642, - "grad_norm": 0.0027326405979692936, - "learning_rate": 0.00019999591699294033, - "loss": 46.0, - "step": 17872 - }, - { - "epoch": 2.8782962277064295, - "grad_norm": 0.0009066511411219835, - "learning_rate": 0.0001999959165357582, - "loss": 46.0, - "step": 17873 - }, - { - "epoch": 2.878457264785217, - "grad_norm": 0.01229983102530241, - "learning_rate": 0.00019999591607855049, - "loss": 46.0, - "step": 17874 - }, - { - "epoch": 2.8786183018640044, - "grad_norm": 0.0006595427985303104, - "learning_rate": 0.0001999959156213172, - "loss": 46.0, - "step": 17875 - }, - { - "epoch": 2.878779338942792, - "grad_norm": 0.006917824503034353, - "learning_rate": 0.00019999591516405826, - "loss": 46.0, - "step": 17876 - }, - { - "epoch": 2.878940376021579, - "grad_norm": 0.0011825596448034048, - "learning_rate": 0.00019999591470677377, - "loss": 46.0, - "step": 17877 - }, - { - "epoch": 2.8791014131003663, - "grad_norm": 0.0011919609969481826, - "learning_rate": 0.00019999591424946366, - "loss": 46.0, - "step": 17878 - }, - { - "epoch": 2.8792624501791537, - "grad_norm": 0.003728542709723115, - "learning_rate": 0.000199995913792128, - "loss": 46.0, - "step": 17879 - }, - { - "epoch": 2.879423487257941, - "grad_norm": 0.005177161656320095, - "learning_rate": 0.00019999591333476672, - "loss": 46.0, - "step": 17880 - }, - { - "epoch": 2.8795845243367286, - "grad_norm": 0.00561678409576416, - "learning_rate": 0.00019999591287737982, - "loss": 46.0, - "step": 17881 - }, - { - "epoch": 2.879745561415516, - "grad_norm": 0.005106620490550995, - "learning_rate": 0.00019999591241996736, - "loss": 46.0, - "step": 17882 - }, - { - "epoch": 2.879906598494303, - "grad_norm": 0.015724681317806244, - "learning_rate": 0.0001999959119625293, - "loss": 46.0, - "step": 17883 - }, - { - "epoch": 2.8800676355730905, - "grad_norm": 0.00477852625772357, - "learning_rate": 0.00019999591150506564, - "loss": 46.0, - "step": 17884 - }, - { - "epoch": 2.880228672651878, - "grad_norm": 0.0018896848196163774, - "learning_rate": 0.00019999591104757637, - "loss": 46.0, - "step": 17885 - }, - { - "epoch": 2.8803897097306654, - "grad_norm": 0.002053465461358428, - "learning_rate": 0.00019999591059006156, - "loss": 46.0, - "step": 17886 - }, - { - "epoch": 2.880550746809453, - "grad_norm": 0.0014558566035702825, - "learning_rate": 0.0001999959101325211, - "loss": 46.0, - "step": 17887 - }, - { - "epoch": 2.8807117838882403, - "grad_norm": 0.008170108310878277, - "learning_rate": 0.00019999590967495509, - "loss": 46.0, - "step": 17888 - }, - { - "epoch": 2.880872820967028, - "grad_norm": 0.003906745929270983, - "learning_rate": 0.00019999590921736344, - "loss": 46.0, - "step": 17889 - }, - { - "epoch": 2.8810338580458152, - "grad_norm": 0.003990508615970612, - "learning_rate": 0.00019999590875974623, - "loss": 46.0, - "step": 17890 - }, - { - "epoch": 2.8811948951246027, - "grad_norm": 0.001963198184967041, - "learning_rate": 0.0001999959083021034, - "loss": 46.0, - "step": 17891 - }, - { - "epoch": 2.8813559322033897, - "grad_norm": 0.003254386829212308, - "learning_rate": 0.00019999590784443497, - "loss": 46.0, - "step": 17892 - }, - { - "epoch": 2.881516969282177, - "grad_norm": 0.0008436123025603592, - "learning_rate": 0.00019999590738674098, - "loss": 46.0, - "step": 17893 - }, - { - "epoch": 2.8816780063609646, - "grad_norm": 0.0027702623046934605, - "learning_rate": 0.0001999959069290214, - "loss": 46.0, - "step": 17894 - }, - { - "epoch": 2.881839043439752, - "grad_norm": 0.002942032413557172, - "learning_rate": 0.0001999959064712762, - "loss": 46.0, - "step": 17895 - }, - { - "epoch": 2.8820000805185395, - "grad_norm": 0.0005536332610063255, - "learning_rate": 0.00019999590601350542, - "loss": 46.0, - "step": 17896 - }, - { - "epoch": 2.8821611175973265, - "grad_norm": 0.0010056201135739684, - "learning_rate": 0.00019999590555570907, - "loss": 46.0, - "step": 17897 - }, - { - "epoch": 2.882322154676114, - "grad_norm": 0.007605133578181267, - "learning_rate": 0.00019999590509788706, - "loss": 46.0, - "step": 17898 - }, - { - "epoch": 2.8824831917549014, - "grad_norm": 0.010303414426743984, - "learning_rate": 0.0001999959046400395, - "loss": 46.0, - "step": 17899 - }, - { - "epoch": 2.882644228833689, - "grad_norm": 0.00073988240910694, - "learning_rate": 0.00019999590418216635, - "loss": 46.0, - "step": 17900 - }, - { - "epoch": 2.8828052659124763, - "grad_norm": 0.0015515421982854605, - "learning_rate": 0.0001999959037242676, - "loss": 46.0, - "step": 17901 - }, - { - "epoch": 2.8829663029912638, - "grad_norm": 0.0033378845546394587, - "learning_rate": 0.00019999590326634324, - "loss": 46.0, - "step": 17902 - }, - { - "epoch": 2.8831273400700512, - "grad_norm": 0.002852408913895488, - "learning_rate": 0.00019999590280839332, - "loss": 46.0, - "step": 17903 - }, - { - "epoch": 2.8832883771488387, - "grad_norm": 0.0057228063233196735, - "learning_rate": 0.0001999959023504178, - "loss": 46.0, - "step": 17904 - }, - { - "epoch": 2.883449414227626, - "grad_norm": 0.0023409484419971704, - "learning_rate": 0.00019999590189241664, - "loss": 46.0, - "step": 17905 - }, - { - "epoch": 2.883610451306413, - "grad_norm": 0.00564181013032794, - "learning_rate": 0.00019999590143438993, - "loss": 46.0, - "step": 17906 - }, - { - "epoch": 2.8837714883852006, - "grad_norm": 0.009571834467351437, - "learning_rate": 0.0001999959009763376, - "loss": 46.0, - "step": 17907 - }, - { - "epoch": 2.883932525463988, - "grad_norm": 0.002726753009483218, - "learning_rate": 0.00019999590051825972, - "loss": 46.0, - "step": 17908 - }, - { - "epoch": 2.8840935625427755, - "grad_norm": 0.003085640724748373, - "learning_rate": 0.0001999959000601562, - "loss": 46.0, - "step": 17909 - }, - { - "epoch": 2.884254599621563, - "grad_norm": 0.007829601876437664, - "learning_rate": 0.0001999958996020271, - "loss": 46.0, - "step": 17910 - }, - { - "epoch": 2.8844156367003504, - "grad_norm": 0.0019419188611209393, - "learning_rate": 0.0001999958991438724, - "loss": 46.0, - "step": 17911 - }, - { - "epoch": 2.8845766737791374, - "grad_norm": 0.015280799008905888, - "learning_rate": 0.00019999589868569212, - "loss": 46.0, - "step": 17912 - }, - { - "epoch": 2.884737710857925, - "grad_norm": 0.009347494691610336, - "learning_rate": 0.00019999589822748624, - "loss": 46.0, - "step": 17913 - }, - { - "epoch": 2.8848987479367123, - "grad_norm": 0.004690299276262522, - "learning_rate": 0.00019999589776925478, - "loss": 46.0, - "step": 17914 - }, - { - "epoch": 2.8850597850154998, - "grad_norm": 0.00586359528824687, - "learning_rate": 0.0001999958973109977, - "loss": 46.0, - "step": 17915 - }, - { - "epoch": 2.885220822094287, - "grad_norm": 0.004095832817256451, - "learning_rate": 0.00019999589685271504, - "loss": 46.0, - "step": 17916 - }, - { - "epoch": 2.8853818591730747, - "grad_norm": 0.018502598628401756, - "learning_rate": 0.00019999589639440682, - "loss": 46.0, - "step": 17917 - }, - { - "epoch": 2.885542896251862, - "grad_norm": 0.001328574144281447, - "learning_rate": 0.00019999589593607296, - "loss": 46.0, - "step": 17918 - }, - { - "epoch": 2.8857039333306496, - "grad_norm": 0.01616380736231804, - "learning_rate": 0.0001999958954777135, - "loss": 46.0, - "step": 17919 - }, - { - "epoch": 2.885864970409437, - "grad_norm": 0.0033234127331525087, - "learning_rate": 0.00019999589501932847, - "loss": 46.0, - "step": 17920 - }, - { - "epoch": 2.886026007488224, - "grad_norm": 0.0006904685287736356, - "learning_rate": 0.00019999589456091784, - "loss": 46.0, - "step": 17921 - }, - { - "epoch": 2.8861870445670115, - "grad_norm": 0.0013475097948685288, - "learning_rate": 0.00019999589410248163, - "loss": 46.0, - "step": 17922 - }, - { - "epoch": 2.886348081645799, - "grad_norm": 0.003214400727301836, - "learning_rate": 0.0001999958936440198, - "loss": 46.0, - "step": 17923 - }, - { - "epoch": 2.8865091187245864, - "grad_norm": 0.009359680116176605, - "learning_rate": 0.0001999958931855324, - "loss": 46.0, - "step": 17924 - }, - { - "epoch": 2.886670155803374, - "grad_norm": 0.0005583578022196889, - "learning_rate": 0.0001999958927270194, - "loss": 46.0, - "step": 17925 - }, - { - "epoch": 2.8868311928821613, - "grad_norm": 0.0026180287823081017, - "learning_rate": 0.0001999958922684808, - "loss": 46.0, - "step": 17926 - }, - { - "epoch": 2.8869922299609483, - "grad_norm": 0.001509267371147871, - "learning_rate": 0.0001999958918099166, - "loss": 46.0, - "step": 17927 - }, - { - "epoch": 2.8871532670397357, - "grad_norm": 0.0015041035367175937, - "learning_rate": 0.00019999589135132683, - "loss": 46.0, - "step": 17928 - }, - { - "epoch": 2.887314304118523, - "grad_norm": 0.004807292949408293, - "learning_rate": 0.00019999589089271145, - "loss": 46.0, - "step": 17929 - }, - { - "epoch": 2.8874753411973106, - "grad_norm": 0.006815982982516289, - "learning_rate": 0.00019999589043407046, - "loss": 46.0, - "step": 17930 - }, - { - "epoch": 2.887636378276098, - "grad_norm": 0.0037054154090583324, - "learning_rate": 0.00019999588997540388, - "loss": 46.0, - "step": 17931 - }, - { - "epoch": 2.8877974153548855, - "grad_norm": 0.002724493620917201, - "learning_rate": 0.00019999588951671174, - "loss": 46.0, - "step": 17932 - }, - { - "epoch": 2.887958452433673, - "grad_norm": 0.0028292504139244556, - "learning_rate": 0.00019999588905799398, - "loss": 46.0, - "step": 17933 - }, - { - "epoch": 2.8881194895124604, - "grad_norm": 0.002641974249854684, - "learning_rate": 0.00019999588859925062, - "loss": 46.0, - "step": 17934 - }, - { - "epoch": 2.888280526591248, - "grad_norm": 0.0010813666740432382, - "learning_rate": 0.0001999958881404817, - "loss": 46.0, - "step": 17935 - }, - { - "epoch": 2.888441563670035, - "grad_norm": 0.003969601821154356, - "learning_rate": 0.00019999588768168715, - "loss": 46.0, - "step": 17936 - }, - { - "epoch": 2.8886026007488224, - "grad_norm": 0.0051483758725225925, - "learning_rate": 0.00019999588722286702, - "loss": 46.0, - "step": 17937 - }, - { - "epoch": 2.88876363782761, - "grad_norm": 0.0006969957030378282, - "learning_rate": 0.0001999958867640213, - "loss": 46.0, - "step": 17938 - }, - { - "epoch": 2.8889246749063973, - "grad_norm": 0.0020348557736724615, - "learning_rate": 0.00019999588630514997, - "loss": 46.0, - "step": 17939 - }, - { - "epoch": 2.8890857119851847, - "grad_norm": 0.005832456983625889, - "learning_rate": 0.00019999588584625305, - "loss": 46.0, - "step": 17940 - }, - { - "epoch": 2.8892467490639717, - "grad_norm": 0.008833184838294983, - "learning_rate": 0.00019999588538733055, - "loss": 46.0, - "step": 17941 - }, - { - "epoch": 2.889407786142759, - "grad_norm": 0.003829443361610174, - "learning_rate": 0.00019999588492838246, - "loss": 46.0, - "step": 17942 - }, - { - "epoch": 2.8895688232215466, - "grad_norm": 0.0023214484099298716, - "learning_rate": 0.00019999588446940875, - "loss": 46.0, - "step": 17943 - }, - { - "epoch": 2.889729860300334, - "grad_norm": 0.006406680215150118, - "learning_rate": 0.00019999588401040948, - "loss": 46.0, - "step": 17944 - }, - { - "epoch": 2.8898908973791215, - "grad_norm": 0.0021372276823967695, - "learning_rate": 0.0001999958835513846, - "loss": 46.0, - "step": 17945 - }, - { - "epoch": 2.890051934457909, - "grad_norm": 0.004949562717229128, - "learning_rate": 0.00019999588309233413, - "loss": 46.0, - "step": 17946 - }, - { - "epoch": 2.8902129715366964, - "grad_norm": 0.005079805850982666, - "learning_rate": 0.00019999588263325805, - "loss": 46.0, - "step": 17947 - }, - { - "epoch": 2.890374008615484, - "grad_norm": 0.0028896895237267017, - "learning_rate": 0.0001999958821741564, - "loss": 46.0, - "step": 17948 - }, - { - "epoch": 2.8905350456942713, - "grad_norm": 0.0009584093349985778, - "learning_rate": 0.00019999588171502915, - "loss": 46.0, - "step": 17949 - }, - { - "epoch": 2.8906960827730583, - "grad_norm": 0.0053268177434802055, - "learning_rate": 0.00019999588125587628, - "loss": 46.0, - "step": 17950 - }, - { - "epoch": 2.890857119851846, - "grad_norm": 0.001871272106654942, - "learning_rate": 0.00019999588079669785, - "loss": 46.0, - "step": 17951 - }, - { - "epoch": 2.8910181569306332, - "grad_norm": 0.005613320041447878, - "learning_rate": 0.0001999958803374938, - "loss": 46.0, - "step": 17952 - }, - { - "epoch": 2.8911791940094207, - "grad_norm": 0.006312674377113581, - "learning_rate": 0.00019999587987826417, - "loss": 46.0, - "step": 17953 - }, - { - "epoch": 2.891340231088208, - "grad_norm": 0.008954005315899849, - "learning_rate": 0.00019999587941900898, - "loss": 46.0, - "step": 17954 - }, - { - "epoch": 2.8915012681669956, - "grad_norm": 0.0011943741701543331, - "learning_rate": 0.00019999587895972815, - "loss": 46.0, - "step": 17955 - }, - { - "epoch": 2.8916623052457826, - "grad_norm": 0.008451980538666248, - "learning_rate": 0.00019999587850042173, - "loss": 46.0, - "step": 17956 - }, - { - "epoch": 2.89182334232457, - "grad_norm": 0.0007649046019650996, - "learning_rate": 0.00019999587804108972, - "loss": 46.0, - "step": 17957 - }, - { - "epoch": 2.8919843794033575, - "grad_norm": 0.0014025458367541432, - "learning_rate": 0.0001999958775817321, - "loss": 46.0, - "step": 17958 - }, - { - "epoch": 2.892145416482145, - "grad_norm": 0.0024657694157212973, - "learning_rate": 0.0001999958771223489, - "loss": 46.0, - "step": 17959 - }, - { - "epoch": 2.8923064535609324, - "grad_norm": 0.002833641367033124, - "learning_rate": 0.00019999587666294012, - "loss": 46.0, - "step": 17960 - }, - { - "epoch": 2.89246749063972, - "grad_norm": 0.002567199757322669, - "learning_rate": 0.00019999587620350576, - "loss": 46.0, - "step": 17961 - }, - { - "epoch": 2.8926285277185073, - "grad_norm": 0.004578740336000919, - "learning_rate": 0.0001999958757440458, - "loss": 46.0, - "step": 17962 - }, - { - "epoch": 2.8927895647972948, - "grad_norm": 0.0018670371500775218, - "learning_rate": 0.0001999958752845602, - "loss": 46.0, - "step": 17963 - }, - { - "epoch": 2.892950601876082, - "grad_norm": 0.001153697376139462, - "learning_rate": 0.00019999587482504906, - "loss": 46.0, - "step": 17964 - }, - { - "epoch": 2.8931116389548692, - "grad_norm": 0.001426900620572269, - "learning_rate": 0.00019999587436551227, - "loss": 46.0, - "step": 17965 - }, - { - "epoch": 2.8932726760336567, - "grad_norm": 0.008064872585237026, - "learning_rate": 0.00019999587390594995, - "loss": 46.0, - "step": 17966 - }, - { - "epoch": 2.893433713112444, - "grad_norm": 0.0007149272714741528, - "learning_rate": 0.000199995873446362, - "loss": 46.0, - "step": 17967 - }, - { - "epoch": 2.8935947501912316, - "grad_norm": 0.007296116556972265, - "learning_rate": 0.00019999587298674847, - "loss": 46.0, - "step": 17968 - }, - { - "epoch": 2.893755787270019, - "grad_norm": 0.009523754008114338, - "learning_rate": 0.0001999958725271093, - "loss": 46.0, - "step": 17969 - }, - { - "epoch": 2.893916824348806, - "grad_norm": 0.0011613063979893923, - "learning_rate": 0.00019999587206744458, - "loss": 46.0, - "step": 17970 - }, - { - "epoch": 2.8940778614275935, - "grad_norm": 0.004892035387456417, - "learning_rate": 0.00019999587160775427, - "loss": 46.0, - "step": 17971 - }, - { - "epoch": 2.894238898506381, - "grad_norm": 0.001188953290693462, - "learning_rate": 0.00019999587114803835, - "loss": 46.0, - "step": 17972 - }, - { - "epoch": 2.8943999355851684, - "grad_norm": 0.006213276647031307, - "learning_rate": 0.00019999587068829686, - "loss": 46.0, - "step": 17973 - }, - { - "epoch": 2.894560972663956, - "grad_norm": 0.005128875840455294, - "learning_rate": 0.00019999587022852974, - "loss": 46.0, - "step": 17974 - }, - { - "epoch": 2.8947220097427433, - "grad_norm": 0.0022443195339292288, - "learning_rate": 0.00019999586976873705, - "loss": 46.0, - "step": 17975 - }, - { - "epoch": 2.8948830468215307, - "grad_norm": 0.005779961124062538, - "learning_rate": 0.00019999586930891878, - "loss": 46.0, - "step": 17976 - }, - { - "epoch": 2.895044083900318, - "grad_norm": 0.002377884928137064, - "learning_rate": 0.0001999958688490749, - "loss": 46.0, - "step": 17977 - }, - { - "epoch": 2.8952051209791057, - "grad_norm": 0.0014512325869873166, - "learning_rate": 0.00019999586838920542, - "loss": 46.0, - "step": 17978 - }, - { - "epoch": 2.895366158057893, - "grad_norm": 0.0014586142497137189, - "learning_rate": 0.00019999586792931036, - "loss": 46.0, - "step": 17979 - }, - { - "epoch": 2.89552719513668, - "grad_norm": 0.002845063805580139, - "learning_rate": 0.00019999586746938968, - "loss": 46.0, - "step": 17980 - }, - { - "epoch": 2.8956882322154676, - "grad_norm": 0.005690551362931728, - "learning_rate": 0.00019999586700944342, - "loss": 46.0, - "step": 17981 - }, - { - "epoch": 2.895849269294255, - "grad_norm": 0.004403562750667334, - "learning_rate": 0.00019999586654947157, - "loss": 46.0, - "step": 17982 - }, - { - "epoch": 2.8960103063730425, - "grad_norm": 0.00412313686683774, - "learning_rate": 0.0001999958660894741, - "loss": 46.0, - "step": 17983 - }, - { - "epoch": 2.89617134345183, - "grad_norm": 0.0013536461628973484, - "learning_rate": 0.00019999586562945108, - "loss": 46.0, - "step": 17984 - }, - { - "epoch": 2.896332380530617, - "grad_norm": 0.003979323897510767, - "learning_rate": 0.00019999586516940244, - "loss": 46.0, - "step": 17985 - }, - { - "epoch": 2.8964934176094044, - "grad_norm": 0.002008540090173483, - "learning_rate": 0.00019999586470932821, - "loss": 46.0, - "step": 17986 - }, - { - "epoch": 2.896654454688192, - "grad_norm": 0.0048518432304263115, - "learning_rate": 0.0001999958642492284, - "loss": 46.0, - "step": 17987 - }, - { - "epoch": 2.8968154917669793, - "grad_norm": 0.003518218407407403, - "learning_rate": 0.00019999586378910297, - "loss": 46.0, - "step": 17988 - }, - { - "epoch": 2.8969765288457667, - "grad_norm": 0.0016916838940232992, - "learning_rate": 0.000199995863328952, - "loss": 46.0, - "step": 17989 - }, - { - "epoch": 2.897137565924554, - "grad_norm": 0.002047501038759947, - "learning_rate": 0.00019999586286877539, - "loss": 46.0, - "step": 17990 - }, - { - "epoch": 2.8972986030033416, - "grad_norm": 0.012943160720169544, - "learning_rate": 0.00019999586240857317, - "loss": 46.0, - "step": 17991 - }, - { - "epoch": 2.897459640082129, - "grad_norm": 0.013662423938512802, - "learning_rate": 0.0001999958619483454, - "loss": 46.0, - "step": 17992 - }, - { - "epoch": 2.8976206771609165, - "grad_norm": 0.0072461869567632675, - "learning_rate": 0.000199995861488092, - "loss": 46.0, - "step": 17993 - }, - { - "epoch": 2.8977817142397035, - "grad_norm": 0.002726959530264139, - "learning_rate": 0.00019999586102781303, - "loss": 46.0, - "step": 17994 - }, - { - "epoch": 2.897942751318491, - "grad_norm": 0.0014070110628381371, - "learning_rate": 0.0001999958605675085, - "loss": 46.0, - "step": 17995 - }, - { - "epoch": 2.8981037883972784, - "grad_norm": 0.006584594491869211, - "learning_rate": 0.0001999958601071783, - "loss": 46.0, - "step": 17996 - }, - { - "epoch": 2.898264825476066, - "grad_norm": 0.004790824837982655, - "learning_rate": 0.00019999585964682254, - "loss": 46.0, - "step": 17997 - }, - { - "epoch": 2.8984258625548533, - "grad_norm": 0.0011857866775244474, - "learning_rate": 0.00019999585918644116, - "loss": 46.0, - "step": 17998 - }, - { - "epoch": 2.898586899633641, - "grad_norm": 0.002769236220046878, - "learning_rate": 0.00019999585872603422, - "loss": 46.0, - "step": 17999 - }, - { - "epoch": 2.898747936712428, - "grad_norm": 0.0023509215097874403, - "learning_rate": 0.0001999958582656017, - "loss": 46.0, - "step": 18000 - }, - { - "epoch": 2.8989089737912153, - "grad_norm": 0.008861015550792217, - "learning_rate": 0.00019999585780514355, - "loss": 46.0, - "step": 18001 - }, - { - "epoch": 2.8990700108700027, - "grad_norm": 0.002614819910377264, - "learning_rate": 0.00019999585734465982, - "loss": 46.0, - "step": 18002 - }, - { - "epoch": 2.89923104794879, - "grad_norm": 0.0011969548650085926, - "learning_rate": 0.0001999958568841505, - "loss": 46.0, - "step": 18003 - }, - { - "epoch": 2.8993920850275776, - "grad_norm": 0.0031611144077032804, - "learning_rate": 0.0001999958564236156, - "loss": 46.0, - "step": 18004 - }, - { - "epoch": 2.899553122106365, - "grad_norm": 0.003020546166226268, - "learning_rate": 0.00019999585596305509, - "loss": 46.0, - "step": 18005 - }, - { - "epoch": 2.8997141591851525, - "grad_norm": 0.0036904688458889723, - "learning_rate": 0.00019999585550246898, - "loss": 46.0, - "step": 18006 - }, - { - "epoch": 2.89987519626394, - "grad_norm": 0.001113614533096552, - "learning_rate": 0.0001999958550418573, - "loss": 46.0, - "step": 18007 - }, - { - "epoch": 2.9000362333427274, - "grad_norm": 0.003312904154881835, - "learning_rate": 0.00019999585458121998, - "loss": 46.0, - "step": 18008 - }, - { - "epoch": 2.9001972704215144, - "grad_norm": 0.004726111423224211, - "learning_rate": 0.0001999958541205571, - "loss": 46.0, - "step": 18009 - }, - { - "epoch": 2.900358307500302, - "grad_norm": 0.0022232099436223507, - "learning_rate": 0.00019999585365986864, - "loss": 46.0, - "step": 18010 - }, - { - "epoch": 2.9005193445790893, - "grad_norm": 0.008369456976652145, - "learning_rate": 0.00019999585319915454, - "loss": 46.0, - "step": 18011 - }, - { - "epoch": 2.900680381657877, - "grad_norm": 0.006913413759320974, - "learning_rate": 0.00019999585273841489, - "loss": 46.0, - "step": 18012 - }, - { - "epoch": 2.9008414187366642, - "grad_norm": 0.005468557123094797, - "learning_rate": 0.00019999585227764964, - "loss": 46.0, - "step": 18013 - }, - { - "epoch": 2.9010024558154512, - "grad_norm": 0.000931234797462821, - "learning_rate": 0.0001999958518168588, - "loss": 46.0, - "step": 18014 - }, - { - "epoch": 2.9011634928942387, - "grad_norm": 0.0005673962878063321, - "learning_rate": 0.00019999585135604232, - "loss": 46.0, - "step": 18015 - }, - { - "epoch": 2.901324529973026, - "grad_norm": 0.0006478444556705654, - "learning_rate": 0.00019999585089520029, - "loss": 46.0, - "step": 18016 - }, - { - "epoch": 2.9014855670518136, - "grad_norm": 0.0009890447836369276, - "learning_rate": 0.00019999585043433267, - "loss": 46.0, - "step": 18017 - }, - { - "epoch": 2.901646604130601, - "grad_norm": 0.004521854687482119, - "learning_rate": 0.00019999584997343944, - "loss": 46.0, - "step": 18018 - }, - { - "epoch": 2.9018076412093885, - "grad_norm": 0.0035858110059052706, - "learning_rate": 0.00019999584951252062, - "loss": 46.0, - "step": 18019 - }, - { - "epoch": 2.901968678288176, - "grad_norm": 0.005323626101016998, - "learning_rate": 0.00019999584905157618, - "loss": 46.0, - "step": 18020 - }, - { - "epoch": 2.9021297153669634, - "grad_norm": 0.003042892087250948, - "learning_rate": 0.0001999958485906062, - "loss": 46.0, - "step": 18021 - }, - { - "epoch": 2.902290752445751, - "grad_norm": 0.001452945638448, - "learning_rate": 0.00019999584812961055, - "loss": 46.0, - "step": 18022 - }, - { - "epoch": 2.902451789524538, - "grad_norm": 0.0007284888415597379, - "learning_rate": 0.00019999584766858939, - "loss": 46.0, - "step": 18023 - }, - { - "epoch": 2.9026128266033253, - "grad_norm": 0.007866005413234234, - "learning_rate": 0.00019999584720754258, - "loss": 46.0, - "step": 18024 - }, - { - "epoch": 2.9027738636821128, - "grad_norm": 0.0077062686905264854, - "learning_rate": 0.0001999958467464702, - "loss": 46.0, - "step": 18025 - }, - { - "epoch": 2.9029349007609, - "grad_norm": 0.0011744254734367132, - "learning_rate": 0.00019999584628537222, - "loss": 46.0, - "step": 18026 - }, - { - "epoch": 2.9030959378396877, - "grad_norm": 0.002215759828686714, - "learning_rate": 0.00019999584582424865, - "loss": 46.0, - "step": 18027 - }, - { - "epoch": 2.903256974918475, - "grad_norm": 0.004121643025428057, - "learning_rate": 0.00019999584536309947, - "loss": 46.0, - "step": 18028 - }, - { - "epoch": 2.903418011997262, - "grad_norm": 0.0020589560735970736, - "learning_rate": 0.00019999584490192472, - "loss": 46.0, - "step": 18029 - }, - { - "epoch": 2.9035790490760496, - "grad_norm": 0.002641829429194331, - "learning_rate": 0.00019999584444072436, - "loss": 46.0, - "step": 18030 - }, - { - "epoch": 2.903740086154837, - "grad_norm": 0.0031198933720588684, - "learning_rate": 0.00019999584397949842, - "loss": 46.0, - "step": 18031 - }, - { - "epoch": 2.9039011232336245, - "grad_norm": 0.00571768032386899, - "learning_rate": 0.00019999584351824688, - "loss": 46.0, - "step": 18032 - }, - { - "epoch": 2.904062160312412, - "grad_norm": 0.004765548277646303, - "learning_rate": 0.00019999584305696973, - "loss": 46.0, - "step": 18033 - }, - { - "epoch": 2.9042231973911994, - "grad_norm": 0.0016701009590178728, - "learning_rate": 0.000199995842595667, - "loss": 46.0, - "step": 18034 - }, - { - "epoch": 2.904384234469987, - "grad_norm": 0.0026695849373936653, - "learning_rate": 0.00019999584213433868, - "loss": 46.0, - "step": 18035 - }, - { - "epoch": 2.9045452715487743, - "grad_norm": 0.0013127813581377268, - "learning_rate": 0.00019999584167298477, - "loss": 46.0, - "step": 18036 - }, - { - "epoch": 2.9047063086275617, - "grad_norm": 0.008485066704452038, - "learning_rate": 0.00019999584121160527, - "loss": 46.0, - "step": 18037 - }, - { - "epoch": 2.9048673457063487, - "grad_norm": 0.0013082843506708741, - "learning_rate": 0.00019999584075020016, - "loss": 46.0, - "step": 18038 - }, - { - "epoch": 2.905028382785136, - "grad_norm": 0.0007086473633535206, - "learning_rate": 0.00019999584028876946, - "loss": 46.0, - "step": 18039 - }, - { - "epoch": 2.9051894198639236, - "grad_norm": 0.0026978799141943455, - "learning_rate": 0.00019999583982731315, - "loss": 46.0, - "step": 18040 - }, - { - "epoch": 2.905350456942711, - "grad_norm": 0.0029192203655838966, - "learning_rate": 0.00019999583936583127, - "loss": 46.0, - "step": 18041 - }, - { - "epoch": 2.9055114940214986, - "grad_norm": 0.0014519410906359553, - "learning_rate": 0.00019999583890432381, - "loss": 46.0, - "step": 18042 - }, - { - "epoch": 2.905672531100286, - "grad_norm": 0.0009246626868844032, - "learning_rate": 0.00019999583844279074, - "loss": 46.0, - "step": 18043 - }, - { - "epoch": 2.905833568179073, - "grad_norm": 0.002123977057635784, - "learning_rate": 0.00019999583798123205, - "loss": 46.0, - "step": 18044 - }, - { - "epoch": 2.9059946052578605, - "grad_norm": 0.014459672383964062, - "learning_rate": 0.00019999583751964778, - "loss": 46.0, - "step": 18045 - }, - { - "epoch": 2.906155642336648, - "grad_norm": 0.0011551082134246826, - "learning_rate": 0.00019999583705803794, - "loss": 46.0, - "step": 18046 - }, - { - "epoch": 2.9063166794154354, - "grad_norm": 0.0029360789339989424, - "learning_rate": 0.0001999958365964025, - "loss": 46.0, - "step": 18047 - }, - { - "epoch": 2.906477716494223, - "grad_norm": 0.0042358869686722755, - "learning_rate": 0.00019999583613474145, - "loss": 46.0, - "step": 18048 - }, - { - "epoch": 2.9066387535730103, - "grad_norm": 0.001116848667152226, - "learning_rate": 0.00019999583567305483, - "loss": 46.0, - "step": 18049 - }, - { - "epoch": 2.9067997906517977, - "grad_norm": 0.001232419046573341, - "learning_rate": 0.00019999583521134256, - "loss": 46.0, - "step": 18050 - }, - { - "epoch": 2.906960827730585, - "grad_norm": 0.010546743869781494, - "learning_rate": 0.00019999583474960474, - "loss": 46.0, - "step": 18051 - }, - { - "epoch": 2.9071218648093726, - "grad_norm": 0.0016749532660469413, - "learning_rate": 0.00019999583428784132, - "loss": 46.0, - "step": 18052 - }, - { - "epoch": 2.9072829018881596, - "grad_norm": 0.003740352811291814, - "learning_rate": 0.00019999583382605232, - "loss": 46.0, - "step": 18053 - }, - { - "epoch": 2.907443938966947, - "grad_norm": 0.0039942823350429535, - "learning_rate": 0.00019999583336423774, - "loss": 46.0, - "step": 18054 - }, - { - "epoch": 2.9076049760457345, - "grad_norm": 0.0011449663434177637, - "learning_rate": 0.0001999958329023975, - "loss": 46.0, - "step": 18055 - }, - { - "epoch": 2.907766013124522, - "grad_norm": 0.0022023271303623915, - "learning_rate": 0.00019999583244053172, - "loss": 46.0, - "step": 18056 - }, - { - "epoch": 2.9079270502033094, - "grad_norm": 0.0025111560244113207, - "learning_rate": 0.00019999583197864034, - "loss": 46.0, - "step": 18057 - }, - { - "epoch": 2.9080880872820964, - "grad_norm": 0.0006832068902440369, - "learning_rate": 0.00019999583151672335, - "loss": 46.0, - "step": 18058 - }, - { - "epoch": 2.908249124360884, - "grad_norm": 0.004509684629738331, - "learning_rate": 0.0001999958310547808, - "loss": 46.0, - "step": 18059 - }, - { - "epoch": 2.9084101614396713, - "grad_norm": 0.004035685211420059, - "learning_rate": 0.0001999958305928126, - "loss": 46.0, - "step": 18060 - }, - { - "epoch": 2.908571198518459, - "grad_norm": 0.0012917224084958434, - "learning_rate": 0.00019999583013081886, - "loss": 46.0, - "step": 18061 - }, - { - "epoch": 2.9087322355972463, - "grad_norm": 0.012897004373371601, - "learning_rate": 0.0001999958296687995, - "loss": 46.0, - "step": 18062 - }, - { - "epoch": 2.9088932726760337, - "grad_norm": 0.0030188795644789934, - "learning_rate": 0.00019999582920675451, - "loss": 46.0, - "step": 18063 - }, - { - "epoch": 2.909054309754821, - "grad_norm": 0.0007855913718231022, - "learning_rate": 0.000199995828744684, - "loss": 46.0, - "step": 18064 - }, - { - "epoch": 2.9092153468336086, - "grad_norm": 0.005138868000358343, - "learning_rate": 0.00019999582828258787, - "loss": 46.0, - "step": 18065 - }, - { - "epoch": 2.909376383912396, - "grad_norm": 0.00433430215343833, - "learning_rate": 0.00019999582782046613, - "loss": 46.0, - "step": 18066 - }, - { - "epoch": 2.909537420991183, - "grad_norm": 0.005203371401876211, - "learning_rate": 0.0001999958273583188, - "loss": 46.0, - "step": 18067 - }, - { - "epoch": 2.9096984580699705, - "grad_norm": 0.005903835408389568, - "learning_rate": 0.0001999958268961459, - "loss": 46.0, - "step": 18068 - }, - { - "epoch": 2.909859495148758, - "grad_norm": 0.0006122100166976452, - "learning_rate": 0.00019999582643394738, - "loss": 46.0, - "step": 18069 - }, - { - "epoch": 2.9100205322275454, - "grad_norm": 0.003275372087955475, - "learning_rate": 0.00019999582597172327, - "loss": 46.0, - "step": 18070 - }, - { - "epoch": 2.910181569306333, - "grad_norm": 0.0025616525672376156, - "learning_rate": 0.00019999582550947356, - "loss": 46.0, - "step": 18071 - }, - { - "epoch": 2.9103426063851203, - "grad_norm": 0.0010053514270111918, - "learning_rate": 0.00019999582504719827, - "loss": 46.0, - "step": 18072 - }, - { - "epoch": 2.9105036434639073, - "grad_norm": 0.006247455719858408, - "learning_rate": 0.0001999958245848974, - "loss": 46.0, - "step": 18073 - }, - { - "epoch": 2.910664680542695, - "grad_norm": 0.01034959964454174, - "learning_rate": 0.0001999958241225709, - "loss": 46.0, - "step": 18074 - }, - { - "epoch": 2.9108257176214822, - "grad_norm": 0.0024148093070834875, - "learning_rate": 0.00019999582366021882, - "loss": 46.0, - "step": 18075 - }, - { - "epoch": 2.9109867547002697, - "grad_norm": 0.0012868557823821902, - "learning_rate": 0.00019999582319784115, - "loss": 46.0, - "step": 18076 - }, - { - "epoch": 2.911147791779057, - "grad_norm": 0.009551554918289185, - "learning_rate": 0.0001999958227354379, - "loss": 46.0, - "step": 18077 - }, - { - "epoch": 2.9113088288578446, - "grad_norm": 0.012105186469852924, - "learning_rate": 0.00019999582227300903, - "loss": 46.0, - "step": 18078 - }, - { - "epoch": 2.911469865936632, - "grad_norm": 0.0015006529865786433, - "learning_rate": 0.0001999958218105546, - "loss": 46.0, - "step": 18079 - }, - { - "epoch": 2.9116309030154195, - "grad_norm": 0.0014158220728859305, - "learning_rate": 0.00019999582134807455, - "loss": 46.0, - "step": 18080 - }, - { - "epoch": 2.911791940094207, - "grad_norm": 0.0015673128655180335, - "learning_rate": 0.0001999958208855689, - "loss": 46.0, - "step": 18081 - }, - { - "epoch": 2.911952977172994, - "grad_norm": 0.002433421555906534, - "learning_rate": 0.00019999582042303765, - "loss": 46.0, - "step": 18082 - }, - { - "epoch": 2.9121140142517814, - "grad_norm": 0.00192752736620605, - "learning_rate": 0.00019999581996048085, - "loss": 46.0, - "step": 18083 - }, - { - "epoch": 2.912275051330569, - "grad_norm": 0.0029742789920419455, - "learning_rate": 0.00019999581949789843, - "loss": 46.0, - "step": 18084 - }, - { - "epoch": 2.9124360884093563, - "grad_norm": 0.006985408719629049, - "learning_rate": 0.00019999581903529042, - "loss": 46.0, - "step": 18085 - }, - { - "epoch": 2.9125971254881438, - "grad_norm": 0.0010164888808503747, - "learning_rate": 0.0001999958185726568, - "loss": 46.0, - "step": 18086 - }, - { - "epoch": 2.9127581625669308, - "grad_norm": 0.009255086071789265, - "learning_rate": 0.0001999958181099976, - "loss": 46.0, - "step": 18087 - }, - { - "epoch": 2.912919199645718, - "grad_norm": 0.004060061648488045, - "learning_rate": 0.00019999581764731282, - "loss": 46.0, - "step": 18088 - }, - { - "epoch": 2.9130802367245057, - "grad_norm": 0.0021101615857332945, - "learning_rate": 0.00019999581718460241, - "loss": 46.0, - "step": 18089 - }, - { - "epoch": 2.913241273803293, - "grad_norm": 0.0030538870487362146, - "learning_rate": 0.00019999581672186642, - "loss": 46.0, - "step": 18090 - }, - { - "epoch": 2.9134023108820806, - "grad_norm": 0.002641197992488742, - "learning_rate": 0.00019999581625910486, - "loss": 46.0, - "step": 18091 - }, - { - "epoch": 2.913563347960868, - "grad_norm": 0.002696691546589136, - "learning_rate": 0.00019999581579631766, - "loss": 46.0, - "step": 18092 - }, - { - "epoch": 2.9137243850396555, - "grad_norm": 0.0015544277848675847, - "learning_rate": 0.0001999958153335049, - "loss": 46.0, - "step": 18093 - }, - { - "epoch": 2.913885422118443, - "grad_norm": 0.0009993682615458965, - "learning_rate": 0.00019999581487066656, - "loss": 46.0, - "step": 18094 - }, - { - "epoch": 2.9140464591972304, - "grad_norm": 0.005122416187077761, - "learning_rate": 0.00019999581440780263, - "loss": 46.0, - "step": 18095 - }, - { - "epoch": 2.914207496276018, - "grad_norm": 0.0014448229921981692, - "learning_rate": 0.00019999581394491306, - "loss": 46.0, - "step": 18096 - }, - { - "epoch": 2.914368533354805, - "grad_norm": 0.0014858562499284744, - "learning_rate": 0.00019999581348199792, - "loss": 46.0, - "step": 18097 - }, - { - "epoch": 2.9145295704335923, - "grad_norm": 0.002398568671196699, - "learning_rate": 0.0001999958130190572, - "loss": 46.0, - "step": 18098 - }, - { - "epoch": 2.9146906075123797, - "grad_norm": 0.0005177139537408948, - "learning_rate": 0.00019999581255609087, - "loss": 46.0, - "step": 18099 - }, - { - "epoch": 2.914851644591167, - "grad_norm": 0.010388285852968693, - "learning_rate": 0.00019999581209309894, - "loss": 46.0, - "step": 18100 - }, - { - "epoch": 2.9150126816699546, - "grad_norm": 0.012333924882113934, - "learning_rate": 0.00019999581163008143, - "loss": 46.0, - "step": 18101 - }, - { - "epoch": 2.9151737187487416, - "grad_norm": 0.005881312303245068, - "learning_rate": 0.0001999958111670383, - "loss": 46.0, - "step": 18102 - }, - { - "epoch": 2.915334755827529, - "grad_norm": 0.004993600770831108, - "learning_rate": 0.00019999581070396963, - "loss": 46.0, - "step": 18103 - }, - { - "epoch": 2.9154957929063166, - "grad_norm": 0.0024717652704566717, - "learning_rate": 0.00019999581024087533, - "loss": 46.0, - "step": 18104 - }, - { - "epoch": 2.915656829985104, - "grad_norm": 0.0033058105036616325, - "learning_rate": 0.00019999580977775544, - "loss": 46.0, - "step": 18105 - }, - { - "epoch": 2.9158178670638915, - "grad_norm": 0.002527159173041582, - "learning_rate": 0.00019999580931460997, - "loss": 46.0, - "step": 18106 - }, - { - "epoch": 2.915978904142679, - "grad_norm": 0.006367997732013464, - "learning_rate": 0.00019999580885143888, - "loss": 46.0, - "step": 18107 - }, - { - "epoch": 2.9161399412214664, - "grad_norm": 0.00848577730357647, - "learning_rate": 0.0001999958083882422, - "loss": 46.0, - "step": 18108 - }, - { - "epoch": 2.916300978300254, - "grad_norm": 0.0037274009082466364, - "learning_rate": 0.00019999580792501992, - "loss": 46.0, - "step": 18109 - }, - { - "epoch": 2.9164620153790413, - "grad_norm": 0.003142056055366993, - "learning_rate": 0.0001999958074617721, - "loss": 46.0, - "step": 18110 - }, - { - "epoch": 2.9166230524578283, - "grad_norm": 0.0006899714935570955, - "learning_rate": 0.00019999580699849864, - "loss": 46.0, - "step": 18111 - }, - { - "epoch": 2.9167840895366157, - "grad_norm": 0.0008837699424475431, - "learning_rate": 0.0001999958065351996, - "loss": 46.0, - "step": 18112 - }, - { - "epoch": 2.916945126615403, - "grad_norm": 0.009626165963709354, - "learning_rate": 0.00019999580607187495, - "loss": 46.0, - "step": 18113 - }, - { - "epoch": 2.9171061636941906, - "grad_norm": 0.014297491870820522, - "learning_rate": 0.0001999958056085247, - "loss": 46.0, - "step": 18114 - }, - { - "epoch": 2.917267200772978, - "grad_norm": 0.0028548925183713436, - "learning_rate": 0.0001999958051451489, - "loss": 46.0, - "step": 18115 - }, - { - "epoch": 2.9174282378517655, - "grad_norm": 0.006810382008552551, - "learning_rate": 0.0001999958046817475, - "loss": 46.0, - "step": 18116 - }, - { - "epoch": 2.9175892749305525, - "grad_norm": 0.005385892000049353, - "learning_rate": 0.00019999580421832045, - "loss": 46.0, - "step": 18117 - }, - { - "epoch": 2.91775031200934, - "grad_norm": 0.0019443599740043283, - "learning_rate": 0.00019999580375486785, - "loss": 46.0, - "step": 18118 - }, - { - "epoch": 2.9179113490881274, - "grad_norm": 0.003477794351056218, - "learning_rate": 0.00019999580329138966, - "loss": 46.0, - "step": 18119 - }, - { - "epoch": 2.918072386166915, - "grad_norm": 0.0009519299492239952, - "learning_rate": 0.00019999580282788583, - "loss": 46.0, - "step": 18120 - }, - { - "epoch": 2.9182334232457023, - "grad_norm": 0.005061762407422066, - "learning_rate": 0.00019999580236435645, - "loss": 46.0, - "step": 18121 - }, - { - "epoch": 2.91839446032449, - "grad_norm": 0.001759837963618338, - "learning_rate": 0.00019999580190080147, - "loss": 46.0, - "step": 18122 - }, - { - "epoch": 2.9185554974032772, - "grad_norm": 0.003119898959994316, - "learning_rate": 0.00019999580143722088, - "loss": 46.0, - "step": 18123 - }, - { - "epoch": 2.9187165344820647, - "grad_norm": 0.0008854255429469049, - "learning_rate": 0.0001999958009736147, - "loss": 46.0, - "step": 18124 - }, - { - "epoch": 2.918877571560852, - "grad_norm": 0.0008090348565019667, - "learning_rate": 0.00019999580050998294, - "loss": 46.0, - "step": 18125 - }, - { - "epoch": 2.919038608639639, - "grad_norm": 0.001469541690312326, - "learning_rate": 0.0001999958000463256, - "loss": 46.0, - "step": 18126 - }, - { - "epoch": 2.9191996457184266, - "grad_norm": 0.0010692086070775986, - "learning_rate": 0.00019999579958264265, - "loss": 46.0, - "step": 18127 - }, - { - "epoch": 2.919360682797214, - "grad_norm": 0.0030428345780819654, - "learning_rate": 0.0001999957991189341, - "loss": 46.0, - "step": 18128 - }, - { - "epoch": 2.9195217198760015, - "grad_norm": 0.0013786237686872482, - "learning_rate": 0.00019999579865519996, - "loss": 46.0, - "step": 18129 - }, - { - "epoch": 2.919682756954789, - "grad_norm": 0.00037617204361595213, - "learning_rate": 0.0001999957981914402, - "loss": 46.0, - "step": 18130 - }, - { - "epoch": 2.919843794033576, - "grad_norm": 0.0031854051630944014, - "learning_rate": 0.0001999957977276549, - "loss": 46.0, - "step": 18131 - }, - { - "epoch": 2.9200048311123634, - "grad_norm": 0.005881123710423708, - "learning_rate": 0.00019999579726384396, - "loss": 46.0, - "step": 18132 - }, - { - "epoch": 2.920165868191151, - "grad_norm": 0.010901257395744324, - "learning_rate": 0.00019999579680000747, - "loss": 46.0, - "step": 18133 - }, - { - "epoch": 2.9203269052699383, - "grad_norm": 0.0049719843082129955, - "learning_rate": 0.00019999579633614534, - "loss": 46.0, - "step": 18134 - }, - { - "epoch": 2.9204879423487258, - "grad_norm": 0.0030031991191208363, - "learning_rate": 0.00019999579587225762, - "loss": 46.0, - "step": 18135 - }, - { - "epoch": 2.9206489794275132, - "grad_norm": 0.0007713107625022531, - "learning_rate": 0.00019999579540834434, - "loss": 46.0, - "step": 18136 - }, - { - "epoch": 2.9208100165063007, - "grad_norm": 0.0035574648063629866, - "learning_rate": 0.00019999579494440543, - "loss": 46.0, - "step": 18137 - }, - { - "epoch": 2.920971053585088, - "grad_norm": 0.0011207188945263624, - "learning_rate": 0.00019999579448044097, - "loss": 46.0, - "step": 18138 - }, - { - "epoch": 2.9211320906638756, - "grad_norm": 0.004560600034892559, - "learning_rate": 0.00019999579401645088, - "loss": 46.0, - "step": 18139 - }, - { - "epoch": 2.9212931277426626, - "grad_norm": 0.001656700624153018, - "learning_rate": 0.0001999957935524352, - "loss": 46.0, - "step": 18140 - }, - { - "epoch": 2.92145416482145, - "grad_norm": 0.0032139443792402744, - "learning_rate": 0.00019999579308839396, - "loss": 46.0, - "step": 18141 - }, - { - "epoch": 2.9216152019002375, - "grad_norm": 0.001980012282729149, - "learning_rate": 0.00019999579262432708, - "loss": 46.0, - "step": 18142 - }, - { - "epoch": 2.921776238979025, - "grad_norm": 0.014282112009823322, - "learning_rate": 0.00019999579216023464, - "loss": 46.0, - "step": 18143 - }, - { - "epoch": 2.9219372760578124, - "grad_norm": 0.00427097175270319, - "learning_rate": 0.0001999957916961166, - "loss": 46.0, - "step": 18144 - }, - { - "epoch": 2.9220983131366, - "grad_norm": 0.006991812027990818, - "learning_rate": 0.00019999579123197294, - "loss": 46.0, - "step": 18145 - }, - { - "epoch": 2.922259350215387, - "grad_norm": 0.004101167898625135, - "learning_rate": 0.0001999957907678037, - "loss": 46.0, - "step": 18146 - }, - { - "epoch": 2.9224203872941743, - "grad_norm": 0.005737594328820705, - "learning_rate": 0.0001999957903036089, - "loss": 46.0, - "step": 18147 - }, - { - "epoch": 2.9225814243729618, - "grad_norm": 0.002010551281273365, - "learning_rate": 0.00019999578983938846, - "loss": 46.0, - "step": 18148 - }, - { - "epoch": 2.922742461451749, - "grad_norm": 0.0009590897825546563, - "learning_rate": 0.00019999578937514244, - "loss": 46.0, - "step": 18149 - }, - { - "epoch": 2.9229034985305367, - "grad_norm": 0.004255289677530527, - "learning_rate": 0.00019999578891087083, - "loss": 46.0, - "step": 18150 - }, - { - "epoch": 2.923064535609324, - "grad_norm": 0.005566461477428675, - "learning_rate": 0.00019999578844657364, - "loss": 46.0, - "step": 18151 - }, - { - "epoch": 2.9232255726881116, - "grad_norm": 0.00228495872579515, - "learning_rate": 0.00019999578798225083, - "loss": 46.0, - "step": 18152 - }, - { - "epoch": 2.923386609766899, - "grad_norm": 0.0022519587073475122, - "learning_rate": 0.0001999957875179024, - "loss": 46.0, - "step": 18153 - }, - { - "epoch": 2.9235476468456865, - "grad_norm": 0.0016503919614478946, - "learning_rate": 0.00019999578705352846, - "loss": 46.0, - "step": 18154 - }, - { - "epoch": 2.9237086839244735, - "grad_norm": 0.00110530573874712, - "learning_rate": 0.00019999578658912889, - "loss": 46.0, - "step": 18155 - }, - { - "epoch": 2.923869721003261, - "grad_norm": 0.002533811377361417, - "learning_rate": 0.0001999957861247037, - "loss": 46.0, - "step": 18156 - }, - { - "epoch": 2.9240307580820484, - "grad_norm": 0.0009799996623769403, - "learning_rate": 0.00019999578566025293, - "loss": 46.0, - "step": 18157 - }, - { - "epoch": 2.924191795160836, - "grad_norm": 0.001082068309187889, - "learning_rate": 0.00019999578519577657, - "loss": 46.0, - "step": 18158 - }, - { - "epoch": 2.9243528322396233, - "grad_norm": 0.005331976804882288, - "learning_rate": 0.0001999957847312746, - "loss": 46.0, - "step": 18159 - }, - { - "epoch": 2.9245138693184107, - "grad_norm": 0.002563070273026824, - "learning_rate": 0.00019999578426674707, - "loss": 46.0, - "step": 18160 - }, - { - "epoch": 2.9246749063971977, - "grad_norm": 0.0009754052152857184, - "learning_rate": 0.00019999578380219392, - "loss": 46.0, - "step": 18161 - }, - { - "epoch": 2.924835943475985, - "grad_norm": 0.002539311069995165, - "learning_rate": 0.0001999957833376152, - "loss": 46.0, - "step": 18162 - }, - { - "epoch": 2.9249969805547726, - "grad_norm": 0.0021239614579826593, - "learning_rate": 0.00019999578287301087, - "loss": 46.0, - "step": 18163 - }, - { - "epoch": 2.92515801763356, - "grad_norm": 0.0005640267627313733, - "learning_rate": 0.00019999578240838096, - "loss": 46.0, - "step": 18164 - }, - { - "epoch": 2.9253190547123475, - "grad_norm": 0.0017377551412209868, - "learning_rate": 0.00019999578194372544, - "loss": 46.0, - "step": 18165 - }, - { - "epoch": 2.925480091791135, - "grad_norm": 0.0019512108992785215, - "learning_rate": 0.0001999957814790443, - "loss": 46.0, - "step": 18166 - }, - { - "epoch": 2.9256411288699224, - "grad_norm": 0.00739320321008563, - "learning_rate": 0.0001999957810143376, - "loss": 46.0, - "step": 18167 - }, - { - "epoch": 2.92580216594871, - "grad_norm": 0.0009076702990569174, - "learning_rate": 0.0001999957805496053, - "loss": 46.0, - "step": 18168 - }, - { - "epoch": 2.9259632030274974, - "grad_norm": 0.002244069706648588, - "learning_rate": 0.0001999957800848474, - "loss": 46.0, - "step": 18169 - }, - { - "epoch": 2.9261242401062844, - "grad_norm": 0.0006175077287480235, - "learning_rate": 0.00019999577962006394, - "loss": 46.0, - "step": 18170 - }, - { - "epoch": 2.926285277185072, - "grad_norm": 0.0016929727280512452, - "learning_rate": 0.00019999577915525484, - "loss": 46.0, - "step": 18171 - }, - { - "epoch": 2.9264463142638593, - "grad_norm": 0.00583892734721303, - "learning_rate": 0.00019999577869042015, - "loss": 46.0, - "step": 18172 - }, - { - "epoch": 2.9266073513426467, - "grad_norm": 0.005278992000967264, - "learning_rate": 0.0001999957782255599, - "loss": 46.0, - "step": 18173 - }, - { - "epoch": 2.926768388421434, - "grad_norm": 0.00039456624654121697, - "learning_rate": 0.00019999577776067405, - "loss": 46.0, - "step": 18174 - }, - { - "epoch": 2.926929425500221, - "grad_norm": 0.0060783615335822105, - "learning_rate": 0.00019999577729576257, - "loss": 46.0, - "step": 18175 - }, - { - "epoch": 2.9270904625790086, - "grad_norm": 0.002739834599196911, - "learning_rate": 0.00019999577683082556, - "loss": 46.0, - "step": 18176 - }, - { - "epoch": 2.927251499657796, - "grad_norm": 0.0010865024523809552, - "learning_rate": 0.0001999957763658629, - "loss": 46.0, - "step": 18177 - }, - { - "epoch": 2.9274125367365835, - "grad_norm": 0.012886730954051018, - "learning_rate": 0.00019999577590087465, - "loss": 46.0, - "step": 18178 - }, - { - "epoch": 2.927573573815371, - "grad_norm": 0.0036216166336089373, - "learning_rate": 0.00019999577543586083, - "loss": 46.0, - "step": 18179 - }, - { - "epoch": 2.9277346108941584, - "grad_norm": 0.0008295559673570096, - "learning_rate": 0.00019999577497082141, - "loss": 46.0, - "step": 18180 - }, - { - "epoch": 2.927895647972946, - "grad_norm": 0.0019190424354746938, - "learning_rate": 0.0001999957745057564, - "loss": 46.0, - "step": 18181 - }, - { - "epoch": 2.9280566850517333, - "grad_norm": 0.00409006979316473, - "learning_rate": 0.00019999577404066578, - "loss": 46.0, - "step": 18182 - }, - { - "epoch": 2.928217722130521, - "grad_norm": 0.0017831475706771016, - "learning_rate": 0.00019999577357554958, - "loss": 46.0, - "step": 18183 - }, - { - "epoch": 2.928378759209308, - "grad_norm": 0.0017056305659934878, - "learning_rate": 0.00019999577311040776, - "loss": 46.0, - "step": 18184 - }, - { - "epoch": 2.9285397962880952, - "grad_norm": 0.001114071230404079, - "learning_rate": 0.0001999957726452404, - "loss": 46.0, - "step": 18185 - }, - { - "epoch": 2.9287008333668827, - "grad_norm": 0.002630940405651927, - "learning_rate": 0.0001999957721800474, - "loss": 46.0, - "step": 18186 - }, - { - "epoch": 2.92886187044567, - "grad_norm": 0.0017845238326117396, - "learning_rate": 0.00019999577171482883, - "loss": 46.0, - "step": 18187 - }, - { - "epoch": 2.9290229075244576, - "grad_norm": 0.0072834608145058155, - "learning_rate": 0.00019999577124958464, - "loss": 46.0, - "step": 18188 - }, - { - "epoch": 2.929183944603245, - "grad_norm": 0.0021217649336904287, - "learning_rate": 0.00019999577078431489, - "loss": 46.0, - "step": 18189 - }, - { - "epoch": 2.929344981682032, - "grad_norm": 0.0017602955922484398, - "learning_rate": 0.0001999957703190195, - "loss": 46.0, - "step": 18190 - }, - { - "epoch": 2.9295060187608195, - "grad_norm": 0.008133293129503727, - "learning_rate": 0.00019999576985369854, - "loss": 46.0, - "step": 18191 - }, - { - "epoch": 2.929667055839607, - "grad_norm": 0.0015090502565726638, - "learning_rate": 0.000199995769388352, - "loss": 46.0, - "step": 18192 - }, - { - "epoch": 2.9298280929183944, - "grad_norm": 0.006169312167912722, - "learning_rate": 0.00019999576892297985, - "loss": 46.0, - "step": 18193 - }, - { - "epoch": 2.929989129997182, - "grad_norm": 0.0006887398776598275, - "learning_rate": 0.0001999957684575821, - "loss": 46.0, - "step": 18194 - }, - { - "epoch": 2.9301501670759693, - "grad_norm": 0.007141907699406147, - "learning_rate": 0.00019999576799215879, - "loss": 46.0, - "step": 18195 - }, - { - "epoch": 2.9303112041547568, - "grad_norm": 0.002672359114512801, - "learning_rate": 0.00019999576752670984, - "loss": 46.0, - "step": 18196 - }, - { - "epoch": 2.930472241233544, - "grad_norm": 0.004060429520905018, - "learning_rate": 0.00019999576706123534, - "loss": 46.0, - "step": 18197 - }, - { - "epoch": 2.9306332783123317, - "grad_norm": 0.0029106049332767725, - "learning_rate": 0.00019999576659573523, - "loss": 46.0, - "step": 18198 - }, - { - "epoch": 2.9307943153911187, - "grad_norm": 0.0019907609093934298, - "learning_rate": 0.00019999576613020952, - "loss": 46.0, - "step": 18199 - }, - { - "epoch": 2.930955352469906, - "grad_norm": 0.0011841485975310206, - "learning_rate": 0.00019999576566465823, - "loss": 46.0, - "step": 18200 - }, - { - "epoch": 2.9311163895486936, - "grad_norm": 0.0009769797325134277, - "learning_rate": 0.00019999576519908133, - "loss": 46.0, - "step": 18201 - }, - { - "epoch": 2.931277426627481, - "grad_norm": 0.00409746216610074, - "learning_rate": 0.00019999576473347887, - "loss": 46.0, - "step": 18202 - }, - { - "epoch": 2.9314384637062685, - "grad_norm": 0.004400520119816065, - "learning_rate": 0.00019999576426785076, - "loss": 46.0, - "step": 18203 - }, - { - "epoch": 2.9315995007850555, - "grad_norm": 0.0016638454981148243, - "learning_rate": 0.00019999576380219712, - "loss": 46.0, - "step": 18204 - }, - { - "epoch": 2.931760537863843, - "grad_norm": 0.0017848439747467637, - "learning_rate": 0.00019999576333651784, - "loss": 46.0, - "step": 18205 - }, - { - "epoch": 2.9319215749426304, - "grad_norm": 0.001913099898956716, - "learning_rate": 0.00019999576287081297, - "loss": 46.0, - "step": 18206 - }, - { - "epoch": 2.932082612021418, - "grad_norm": 0.001097127329558134, - "learning_rate": 0.00019999576240508252, - "loss": 46.0, - "step": 18207 - }, - { - "epoch": 2.9322436491002053, - "grad_norm": 0.0012283776886761189, - "learning_rate": 0.00019999576193932645, - "loss": 46.0, - "step": 18208 - }, - { - "epoch": 2.9324046861789927, - "grad_norm": 0.0013275261735543609, - "learning_rate": 0.00019999576147354482, - "loss": 46.0, - "step": 18209 - }, - { - "epoch": 2.93256572325778, - "grad_norm": 0.0025266394950449467, - "learning_rate": 0.00019999576100773758, - "loss": 46.0, - "step": 18210 - }, - { - "epoch": 2.9327267603365677, - "grad_norm": 0.0018194651929661632, - "learning_rate": 0.00019999576054190475, - "loss": 46.0, - "step": 18211 - }, - { - "epoch": 2.932887797415355, - "grad_norm": 0.00701428996399045, - "learning_rate": 0.00019999576007604633, - "loss": 46.0, - "step": 18212 - }, - { - "epoch": 2.9330488344941426, - "grad_norm": 0.0025370344519615173, - "learning_rate": 0.0001999957596101623, - "loss": 46.0, - "step": 18213 - }, - { - "epoch": 2.9332098715729296, - "grad_norm": 0.004439545329660177, - "learning_rate": 0.00019999575914425268, - "loss": 46.0, - "step": 18214 - }, - { - "epoch": 2.933370908651717, - "grad_norm": 0.006848150864243507, - "learning_rate": 0.00019999575867831747, - "loss": 46.0, - "step": 18215 - }, - { - "epoch": 2.9335319457305045, - "grad_norm": 0.0038663074374198914, - "learning_rate": 0.0001999957582123567, - "loss": 46.0, - "step": 18216 - }, - { - "epoch": 2.933692982809292, - "grad_norm": 0.002190664177760482, - "learning_rate": 0.0001999957577463703, - "loss": 46.0, - "step": 18217 - }, - { - "epoch": 2.9338540198880794, - "grad_norm": 0.0013622419210150838, - "learning_rate": 0.0001999957572803583, - "loss": 46.0, - "step": 18218 - }, - { - "epoch": 2.9340150569668664, - "grad_norm": 0.0006758912932127714, - "learning_rate": 0.00019999575681432072, - "loss": 46.0, - "step": 18219 - }, - { - "epoch": 2.934176094045654, - "grad_norm": 0.0020648129284381866, - "learning_rate": 0.00019999575634825756, - "loss": 46.0, - "step": 18220 - }, - { - "epoch": 2.9343371311244413, - "grad_norm": 0.0015850800555199385, - "learning_rate": 0.0001999957558821688, - "loss": 46.0, - "step": 18221 - }, - { - "epoch": 2.9344981682032287, - "grad_norm": 0.006224527955055237, - "learning_rate": 0.0001999957554160544, - "loss": 46.0, - "step": 18222 - }, - { - "epoch": 2.934659205282016, - "grad_norm": 0.003197029232978821, - "learning_rate": 0.00019999575494991444, - "loss": 46.0, - "step": 18223 - }, - { - "epoch": 2.9348202423608036, - "grad_norm": 0.0011069177417084575, - "learning_rate": 0.0001999957544837489, - "loss": 46.0, - "step": 18224 - }, - { - "epoch": 2.934981279439591, - "grad_norm": 0.004962557926774025, - "learning_rate": 0.00019999575401755774, - "loss": 46.0, - "step": 18225 - }, - { - "epoch": 2.9351423165183785, - "grad_norm": 0.0017906212015077472, - "learning_rate": 0.00019999575355134105, - "loss": 46.0, - "step": 18226 - }, - { - "epoch": 2.935303353597166, - "grad_norm": 0.005940783303231001, - "learning_rate": 0.00019999575308509872, - "loss": 46.0, - "step": 18227 - }, - { - "epoch": 2.935464390675953, - "grad_norm": 0.0029781844932585955, - "learning_rate": 0.00019999575261883077, - "loss": 46.0, - "step": 18228 - }, - { - "epoch": 2.9356254277547404, - "grad_norm": 0.0019347805064171553, - "learning_rate": 0.00019999575215253726, - "loss": 46.0, - "step": 18229 - }, - { - "epoch": 2.935786464833528, - "grad_norm": 0.0028998963534832, - "learning_rate": 0.00019999575168621814, - "loss": 46.0, - "step": 18230 - }, - { - "epoch": 2.9359475019123153, - "grad_norm": 0.0012944411719217896, - "learning_rate": 0.00019999575121987343, - "loss": 46.0, - "step": 18231 - }, - { - "epoch": 2.936108538991103, - "grad_norm": 0.0021356791257858276, - "learning_rate": 0.00019999575075350313, - "loss": 46.0, - "step": 18232 - }, - { - "epoch": 2.9362695760698903, - "grad_norm": 0.001533346250653267, - "learning_rate": 0.00019999575028710722, - "loss": 46.0, - "step": 18233 - }, - { - "epoch": 2.9364306131486773, - "grad_norm": 0.0021803805138915777, - "learning_rate": 0.00019999574982068575, - "loss": 46.0, - "step": 18234 - }, - { - "epoch": 2.9365916502274647, - "grad_norm": 0.004516003653407097, - "learning_rate": 0.00019999574935423867, - "loss": 46.0, - "step": 18235 - }, - { - "epoch": 2.936752687306252, - "grad_norm": 0.0015579540049657226, - "learning_rate": 0.00019999574888776597, - "loss": 46.0, - "step": 18236 - }, - { - "epoch": 2.9369137243850396, - "grad_norm": 0.005141932051628828, - "learning_rate": 0.0001999957484212677, - "loss": 46.0, - "step": 18237 - }, - { - "epoch": 2.937074761463827, - "grad_norm": 0.0009286055574193597, - "learning_rate": 0.00019999574795474386, - "loss": 46.0, - "step": 18238 - }, - { - "epoch": 2.9372357985426145, - "grad_norm": 0.00913744792342186, - "learning_rate": 0.00019999574748819437, - "loss": 46.0, - "step": 18239 - }, - { - "epoch": 2.937396835621402, - "grad_norm": 0.0016055910382419825, - "learning_rate": 0.00019999574702161935, - "loss": 46.0, - "step": 18240 - }, - { - "epoch": 2.9375578727001894, - "grad_norm": 0.004685543943196535, - "learning_rate": 0.0001999957465550187, - "loss": 46.0, - "step": 18241 - }, - { - "epoch": 2.937718909778977, - "grad_norm": 0.004667813424021006, - "learning_rate": 0.00019999574608839247, - "loss": 46.0, - "step": 18242 - }, - { - "epoch": 2.937879946857764, - "grad_norm": 0.0025856574065983295, - "learning_rate": 0.00019999574562174063, - "loss": 46.0, - "step": 18243 - }, - { - "epoch": 2.9380409839365513, - "grad_norm": 0.005429683718830347, - "learning_rate": 0.00019999574515506318, - "loss": 46.0, - "step": 18244 - }, - { - "epoch": 2.938202021015339, - "grad_norm": 0.004247988574206829, - "learning_rate": 0.00019999574468836017, - "loss": 46.0, - "step": 18245 - }, - { - "epoch": 2.9383630580941262, - "grad_norm": 0.0015145476208999753, - "learning_rate": 0.00019999574422163157, - "loss": 46.0, - "step": 18246 - }, - { - "epoch": 2.9385240951729137, - "grad_norm": 0.004039260093122721, - "learning_rate": 0.00019999574375487736, - "loss": 46.0, - "step": 18247 - }, - { - "epoch": 2.9386851322517007, - "grad_norm": 0.012637422420084476, - "learning_rate": 0.00019999574328809756, - "loss": 46.0, - "step": 18248 - }, - { - "epoch": 2.938846169330488, - "grad_norm": 0.0026153794024139643, - "learning_rate": 0.00019999574282129217, - "loss": 46.0, - "step": 18249 - }, - { - "epoch": 2.9390072064092756, - "grad_norm": 0.0010796021670103073, - "learning_rate": 0.00019999574235446117, - "loss": 46.0, - "step": 18250 - }, - { - "epoch": 2.939168243488063, - "grad_norm": 0.0024613006971776485, - "learning_rate": 0.00019999574188760458, - "loss": 46.0, - "step": 18251 - }, - { - "epoch": 2.9393292805668505, - "grad_norm": 0.003761656815186143, - "learning_rate": 0.0001999957414207224, - "loss": 46.0, - "step": 18252 - }, - { - "epoch": 2.939490317645638, - "grad_norm": 0.0019356246339157224, - "learning_rate": 0.00019999574095381465, - "loss": 46.0, - "step": 18253 - }, - { - "epoch": 2.9396513547244254, - "grad_norm": 0.0005578165291808546, - "learning_rate": 0.00019999574048688127, - "loss": 46.0, - "step": 18254 - }, - { - "epoch": 2.939812391803213, - "grad_norm": 0.0009123950148932636, - "learning_rate": 0.0001999957400199223, - "loss": 46.0, - "step": 18255 - }, - { - "epoch": 2.9399734288820003, - "grad_norm": 0.0008915271027944982, - "learning_rate": 0.00019999573955293775, - "loss": 46.0, - "step": 18256 - }, - { - "epoch": 2.9401344659607873, - "grad_norm": 0.0008778031333349645, - "learning_rate": 0.00019999573908592764, - "loss": 46.0, - "step": 18257 - }, - { - "epoch": 2.9402955030395748, - "grad_norm": 0.004145601764321327, - "learning_rate": 0.0001999957386188919, - "loss": 46.0, - "step": 18258 - }, - { - "epoch": 2.940456540118362, - "grad_norm": 0.003352200146764517, - "learning_rate": 0.00019999573815183055, - "loss": 46.0, - "step": 18259 - }, - { - "epoch": 2.9406175771971497, - "grad_norm": 0.004106913693249226, - "learning_rate": 0.00019999573768474362, - "loss": 46.0, - "step": 18260 - }, - { - "epoch": 2.940778614275937, - "grad_norm": 0.00277924002148211, - "learning_rate": 0.00019999573721763108, - "loss": 46.0, - "step": 18261 - }, - { - "epoch": 2.9409396513547246, - "grad_norm": 0.003632959211245179, - "learning_rate": 0.00019999573675049298, - "loss": 46.0, - "step": 18262 - }, - { - "epoch": 2.9411006884335116, - "grad_norm": 0.011196434497833252, - "learning_rate": 0.0001999957362833293, - "loss": 46.0, - "step": 18263 - }, - { - "epoch": 2.941261725512299, - "grad_norm": 0.005271113943308592, - "learning_rate": 0.00019999573581614, - "loss": 46.0, - "step": 18264 - }, - { - "epoch": 2.9414227625910865, - "grad_norm": 0.002945388900116086, - "learning_rate": 0.00019999573534892507, - "loss": 46.0, - "step": 18265 - }, - { - "epoch": 2.941583799669874, - "grad_norm": 0.003602365031838417, - "learning_rate": 0.0001999957348816846, - "loss": 46.0, - "step": 18266 - }, - { - "epoch": 2.9417448367486614, - "grad_norm": 0.0059127239510416985, - "learning_rate": 0.0001999957344144185, - "loss": 46.0, - "step": 18267 - }, - { - "epoch": 2.941905873827449, - "grad_norm": 0.0019192382460460067, - "learning_rate": 0.00019999573394712683, - "loss": 46.0, - "step": 18268 - }, - { - "epoch": 2.9420669109062363, - "grad_norm": 0.010361652821302414, - "learning_rate": 0.0001999957334798096, - "loss": 46.0, - "step": 18269 - }, - { - "epoch": 2.9422279479850237, - "grad_norm": 0.0008517851820215583, - "learning_rate": 0.0001999957330124667, - "loss": 46.0, - "step": 18270 - }, - { - "epoch": 2.942388985063811, - "grad_norm": 0.003357992274686694, - "learning_rate": 0.00019999573254509824, - "loss": 46.0, - "step": 18271 - }, - { - "epoch": 2.942550022142598, - "grad_norm": 0.003572401124984026, - "learning_rate": 0.0001999957320777042, - "loss": 46.0, - "step": 18272 - }, - { - "epoch": 2.9427110592213856, - "grad_norm": 0.0007625145954079926, - "learning_rate": 0.00019999573161028455, - "loss": 46.0, - "step": 18273 - }, - { - "epoch": 2.942872096300173, - "grad_norm": 0.0054258918389678, - "learning_rate": 0.00019999573114283932, - "loss": 46.0, - "step": 18274 - }, - { - "epoch": 2.9430331333789606, - "grad_norm": 0.0019069387344643474, - "learning_rate": 0.00019999573067536848, - "loss": 46.0, - "step": 18275 - }, - { - "epoch": 2.943194170457748, - "grad_norm": 0.0009239456849172711, - "learning_rate": 0.00019999573020787205, - "loss": 46.0, - "step": 18276 - }, - { - "epoch": 2.943355207536535, - "grad_norm": 0.0055352505296468735, - "learning_rate": 0.00019999572974035003, - "loss": 46.0, - "step": 18277 - }, - { - "epoch": 2.9435162446153225, - "grad_norm": 0.004443612415343523, - "learning_rate": 0.00019999572927280242, - "loss": 46.0, - "step": 18278 - }, - { - "epoch": 2.94367728169411, - "grad_norm": 0.0023634915705770254, - "learning_rate": 0.0001999957288052292, - "loss": 46.0, - "step": 18279 - }, - { - "epoch": 2.9438383187728974, - "grad_norm": 0.0011116719106212258, - "learning_rate": 0.0001999957283376304, - "loss": 46.0, - "step": 18280 - }, - { - "epoch": 2.943999355851685, - "grad_norm": 0.00572171388193965, - "learning_rate": 0.000199995727870006, - "loss": 46.0, - "step": 18281 - }, - { - "epoch": 2.9441603929304723, - "grad_norm": 0.005229401867836714, - "learning_rate": 0.00019999572740235603, - "loss": 46.0, - "step": 18282 - }, - { - "epoch": 2.9443214300092597, - "grad_norm": 0.001199902268126607, - "learning_rate": 0.00019999572693468043, - "loss": 46.0, - "step": 18283 - }, - { - "epoch": 2.944482467088047, - "grad_norm": 0.004327108152210712, - "learning_rate": 0.00019999572646697925, - "loss": 46.0, - "step": 18284 - }, - { - "epoch": 2.9446435041668346, - "grad_norm": 0.0029401604551821947, - "learning_rate": 0.00019999572599925248, - "loss": 46.0, - "step": 18285 - }, - { - "epoch": 2.944804541245622, - "grad_norm": 0.0028358884155750275, - "learning_rate": 0.00019999572553150013, - "loss": 46.0, - "step": 18286 - }, - { - "epoch": 2.944965578324409, - "grad_norm": 0.0019006096990779042, - "learning_rate": 0.00019999572506372216, - "loss": 46.0, - "step": 18287 - }, - { - "epoch": 2.9451266154031965, - "grad_norm": 0.001222077989950776, - "learning_rate": 0.00019999572459591862, - "loss": 46.0, - "step": 18288 - }, - { - "epoch": 2.945287652481984, - "grad_norm": 0.008274531923234463, - "learning_rate": 0.00019999572412808948, - "loss": 46.0, - "step": 18289 - }, - { - "epoch": 2.9454486895607714, - "grad_norm": 0.0011886481661349535, - "learning_rate": 0.00019999572366023475, - "loss": 46.0, - "step": 18290 - }, - { - "epoch": 2.945609726639559, - "grad_norm": 0.00539096724241972, - "learning_rate": 0.00019999572319235438, - "loss": 46.0, - "step": 18291 - }, - { - "epoch": 2.945770763718346, - "grad_norm": 0.0031763585284352303, - "learning_rate": 0.00019999572272444847, - "loss": 46.0, - "step": 18292 - }, - { - "epoch": 2.9459318007971333, - "grad_norm": 0.0030268875416368246, - "learning_rate": 0.00019999572225651692, - "loss": 46.0, - "step": 18293 - }, - { - "epoch": 2.946092837875921, - "grad_norm": 0.006517102010548115, - "learning_rate": 0.00019999572178855984, - "loss": 46.0, - "step": 18294 - }, - { - "epoch": 2.9462538749547083, - "grad_norm": 0.004996228031814098, - "learning_rate": 0.00019999572132057712, - "loss": 46.0, - "step": 18295 - }, - { - "epoch": 2.9464149120334957, - "grad_norm": 0.0035531383473426104, - "learning_rate": 0.0001999957208525688, - "loss": 46.0, - "step": 18296 - }, - { - "epoch": 2.946575949112283, - "grad_norm": 0.008447499945759773, - "learning_rate": 0.00019999572038453491, - "loss": 46.0, - "step": 18297 - }, - { - "epoch": 2.9467369861910706, - "grad_norm": 0.0010228337487205863, - "learning_rate": 0.00019999571991647543, - "loss": 46.0, - "step": 18298 - }, - { - "epoch": 2.946898023269858, - "grad_norm": 0.0023939877282828093, - "learning_rate": 0.00019999571944839036, - "loss": 46.0, - "step": 18299 - }, - { - "epoch": 2.9470590603486455, - "grad_norm": 0.0032466899137943983, - "learning_rate": 0.00019999571898027968, - "loss": 46.0, - "step": 18300 - }, - { - "epoch": 2.9472200974274325, - "grad_norm": 0.002225608332082629, - "learning_rate": 0.0001999957185121434, - "loss": 46.0, - "step": 18301 - }, - { - "epoch": 2.94738113450622, - "grad_norm": 0.005308377090841532, - "learning_rate": 0.00019999571804398154, - "loss": 46.0, - "step": 18302 - }, - { - "epoch": 2.9475421715850074, - "grad_norm": 0.0020556533709168434, - "learning_rate": 0.00019999571757579407, - "loss": 46.0, - "step": 18303 - }, - { - "epoch": 2.947703208663795, - "grad_norm": 0.004077361896634102, - "learning_rate": 0.00019999571710758104, - "loss": 46.0, - "step": 18304 - }, - { - "epoch": 2.9478642457425823, - "grad_norm": 0.002882294822484255, - "learning_rate": 0.0001999957166393424, - "loss": 46.0, - "step": 18305 - }, - { - "epoch": 2.9480252828213698, - "grad_norm": 0.004241040907800198, - "learning_rate": 0.00019999571617107813, - "loss": 46.0, - "step": 18306 - }, - { - "epoch": 2.948186319900157, - "grad_norm": 0.0017040788661688566, - "learning_rate": 0.0001999957157027883, - "loss": 46.0, - "step": 18307 - }, - { - "epoch": 2.9483473569789442, - "grad_norm": 0.0016345313051715493, - "learning_rate": 0.00019999571523447287, - "loss": 46.0, - "step": 18308 - }, - { - "epoch": 2.9485083940577317, - "grad_norm": 0.0007096792687661946, - "learning_rate": 0.00019999571476613185, - "loss": 46.0, - "step": 18309 - }, - { - "epoch": 2.948669431136519, - "grad_norm": 0.0031859949231147766, - "learning_rate": 0.00019999571429776524, - "loss": 46.0, - "step": 18310 - }, - { - "epoch": 2.9488304682153066, - "grad_norm": 0.0014084330759942532, - "learning_rate": 0.000199995713829373, - "loss": 46.0, - "step": 18311 - }, - { - "epoch": 2.948991505294094, - "grad_norm": 0.006162330042570829, - "learning_rate": 0.00019999571336095523, - "loss": 46.0, - "step": 18312 - }, - { - "epoch": 2.9491525423728815, - "grad_norm": 0.003467687638476491, - "learning_rate": 0.00019999571289251183, - "loss": 46.0, - "step": 18313 - }, - { - "epoch": 2.949313579451669, - "grad_norm": 0.004696844145655632, - "learning_rate": 0.0001999957124240428, - "loss": 46.0, - "step": 18314 - }, - { - "epoch": 2.9494746165304564, - "grad_norm": 0.0028436086140573025, - "learning_rate": 0.00019999571195554824, - "loss": 46.0, - "step": 18315 - }, - { - "epoch": 2.9496356536092434, - "grad_norm": 0.0010815279092639685, - "learning_rate": 0.00019999571148702808, - "loss": 46.0, - "step": 18316 - }, - { - "epoch": 2.949796690688031, - "grad_norm": 0.0009222500957548618, - "learning_rate": 0.0001999957110184823, - "loss": 46.0, - "step": 18317 - }, - { - "epoch": 2.9499577277668183, - "grad_norm": 0.0007939033093862236, - "learning_rate": 0.00019999571054991094, - "loss": 46.0, - "step": 18318 - }, - { - "epoch": 2.9501187648456058, - "grad_norm": 0.0018598157912492752, - "learning_rate": 0.00019999571008131396, - "loss": 46.0, - "step": 18319 - }, - { - "epoch": 2.950279801924393, - "grad_norm": 0.0037508851382881403, - "learning_rate": 0.00019999570961269143, - "loss": 46.0, - "step": 18320 - }, - { - "epoch": 2.95044083900318, - "grad_norm": 0.0066314502619206905, - "learning_rate": 0.00019999570914404328, - "loss": 46.0, - "step": 18321 - }, - { - "epoch": 2.9506018760819677, - "grad_norm": 0.005467196926474571, - "learning_rate": 0.0001999957086753695, - "loss": 46.0, - "step": 18322 - }, - { - "epoch": 2.950762913160755, - "grad_norm": 0.0018607608508318663, - "learning_rate": 0.0001999957082066702, - "loss": 46.0, - "step": 18323 - }, - { - "epoch": 2.9509239502395426, - "grad_norm": 0.003991422709077597, - "learning_rate": 0.00019999570773794527, - "loss": 46.0, - "step": 18324 - }, - { - "epoch": 2.95108498731833, - "grad_norm": 0.0021336579229682684, - "learning_rate": 0.00019999570726919475, - "loss": 46.0, - "step": 18325 - }, - { - "epoch": 2.9512460243971175, - "grad_norm": 0.004980088211596012, - "learning_rate": 0.0001999957068004186, - "loss": 46.0, - "step": 18326 - }, - { - "epoch": 2.951407061475905, - "grad_norm": 0.01494060829281807, - "learning_rate": 0.0001999957063316169, - "loss": 46.0, - "step": 18327 - }, - { - "epoch": 2.9515680985546924, - "grad_norm": 0.003776193130761385, - "learning_rate": 0.00019999570586278962, - "loss": 46.0, - "step": 18328 - }, - { - "epoch": 2.95172913563348, - "grad_norm": 0.0026578884571790695, - "learning_rate": 0.00019999570539393671, - "loss": 46.0, - "step": 18329 - }, - { - "epoch": 2.951890172712267, - "grad_norm": 0.003074975684285164, - "learning_rate": 0.00019999570492505823, - "loss": 46.0, - "step": 18330 - }, - { - "epoch": 2.9520512097910543, - "grad_norm": 0.0071158758364617825, - "learning_rate": 0.00019999570445615415, - "loss": 46.0, - "step": 18331 - }, - { - "epoch": 2.9522122468698417, - "grad_norm": 0.004280238412320614, - "learning_rate": 0.00019999570398722446, - "loss": 46.0, - "step": 18332 - }, - { - "epoch": 2.952373283948629, - "grad_norm": 0.0025755814276635647, - "learning_rate": 0.0001999957035182692, - "loss": 46.0, - "step": 18333 - }, - { - "epoch": 2.9525343210274166, - "grad_norm": 0.00290506100282073, - "learning_rate": 0.00019999570304928831, - "loss": 46.0, - "step": 18334 - }, - { - "epoch": 2.952695358106204, - "grad_norm": 0.006528573576360941, - "learning_rate": 0.00019999570258028186, - "loss": 46.0, - "step": 18335 - }, - { - "epoch": 2.952856395184991, - "grad_norm": 0.0032045214902609587, - "learning_rate": 0.00019999570211124982, - "loss": 46.0, - "step": 18336 - }, - { - "epoch": 2.9530174322637786, - "grad_norm": 0.001571933040395379, - "learning_rate": 0.00019999570164219217, - "loss": 46.0, - "step": 18337 - }, - { - "epoch": 2.953178469342566, - "grad_norm": 0.001303008641116321, - "learning_rate": 0.00019999570117310893, - "loss": 46.0, - "step": 18338 - }, - { - "epoch": 2.9533395064213535, - "grad_norm": 0.0017148804618045688, - "learning_rate": 0.0001999957007040001, - "loss": 46.0, - "step": 18339 - }, - { - "epoch": 2.953500543500141, - "grad_norm": 0.0008733664290048182, - "learning_rate": 0.00019999570023486565, - "loss": 46.0, - "step": 18340 - }, - { - "epoch": 2.9536615805789284, - "grad_norm": 0.001065896824002266, - "learning_rate": 0.00019999569976570565, - "loss": 46.0, - "step": 18341 - }, - { - "epoch": 2.953822617657716, - "grad_norm": 0.0012946195201948285, - "learning_rate": 0.00019999569929652004, - "loss": 46.0, - "step": 18342 - }, - { - "epoch": 2.9539836547365033, - "grad_norm": 0.009168010205030441, - "learning_rate": 0.00019999569882730883, - "loss": 46.0, - "step": 18343 - }, - { - "epoch": 2.9541446918152907, - "grad_norm": 0.008732241578400135, - "learning_rate": 0.000199995698358072, - "loss": 46.0, - "step": 18344 - }, - { - "epoch": 2.9543057288940777, - "grad_norm": 0.0012411995558068156, - "learning_rate": 0.0001999956978888096, - "loss": 46.0, - "step": 18345 - }, - { - "epoch": 2.954466765972865, - "grad_norm": 0.0030234933365136385, - "learning_rate": 0.00019999569741952161, - "loss": 46.0, - "step": 18346 - }, - { - "epoch": 2.9546278030516526, - "grad_norm": 0.0009080435265786946, - "learning_rate": 0.00019999569695020803, - "loss": 46.0, - "step": 18347 - }, - { - "epoch": 2.95478884013044, - "grad_norm": 0.001072784187272191, - "learning_rate": 0.00019999569648086887, - "loss": 46.0, - "step": 18348 - }, - { - "epoch": 2.9549498772092275, - "grad_norm": 0.0020690930541604757, - "learning_rate": 0.00019999569601150409, - "loss": 46.0, - "step": 18349 - }, - { - "epoch": 2.955110914288015, - "grad_norm": 0.0021843689028173685, - "learning_rate": 0.00019999569554211372, - "loss": 46.0, - "step": 18350 - }, - { - "epoch": 2.955271951366802, - "grad_norm": 0.004255227744579315, - "learning_rate": 0.0001999956950726978, - "loss": 46.0, - "step": 18351 - }, - { - "epoch": 2.9554329884455894, - "grad_norm": 0.002831714926287532, - "learning_rate": 0.00019999569460325622, - "loss": 46.0, - "step": 18352 - }, - { - "epoch": 2.955594025524377, - "grad_norm": 0.0016571101732552052, - "learning_rate": 0.00019999569413378903, - "loss": 46.0, - "step": 18353 - }, - { - "epoch": 2.9557550626031643, - "grad_norm": 0.004907633177936077, - "learning_rate": 0.00019999569366429632, - "loss": 46.0, - "step": 18354 - }, - { - "epoch": 2.955916099681952, - "grad_norm": 0.0016868987586349249, - "learning_rate": 0.000199995693194778, - "loss": 46.0, - "step": 18355 - }, - { - "epoch": 2.9560771367607392, - "grad_norm": 0.003417165484279394, - "learning_rate": 0.00019999569272523404, - "loss": 46.0, - "step": 18356 - }, - { - "epoch": 2.9562381738395267, - "grad_norm": 0.006100281607359648, - "learning_rate": 0.00019999569225566453, - "loss": 46.0, - "step": 18357 - }, - { - "epoch": 2.956399210918314, - "grad_norm": 0.0016772671369835734, - "learning_rate": 0.00019999569178606944, - "loss": 46.0, - "step": 18358 - }, - { - "epoch": 2.9565602479971016, - "grad_norm": 0.0018922797171398997, - "learning_rate": 0.0001999956913164487, - "loss": 46.0, - "step": 18359 - }, - { - "epoch": 2.9567212850758886, - "grad_norm": 0.015465022064745426, - "learning_rate": 0.0001999956908468024, - "loss": 46.0, - "step": 18360 - }, - { - "epoch": 2.956882322154676, - "grad_norm": 0.0033949699718505144, - "learning_rate": 0.0001999956903771305, - "loss": 46.0, - "step": 18361 - }, - { - "epoch": 2.9570433592334635, - "grad_norm": 0.008320840075612068, - "learning_rate": 0.00019999568990743303, - "loss": 46.0, - "step": 18362 - }, - { - "epoch": 2.957204396312251, - "grad_norm": 0.0036002597771584988, - "learning_rate": 0.00019999568943770992, - "loss": 46.0, - "step": 18363 - }, - { - "epoch": 2.9573654333910384, - "grad_norm": 0.0005141058354638517, - "learning_rate": 0.00019999568896796123, - "loss": 46.0, - "step": 18364 - }, - { - "epoch": 2.9575264704698254, - "grad_norm": 0.006049633491784334, - "learning_rate": 0.00019999568849818697, - "loss": 46.0, - "step": 18365 - }, - { - "epoch": 2.957687507548613, - "grad_norm": 0.0035453508608043194, - "learning_rate": 0.0001999956880283871, - "loss": 46.0, - "step": 18366 - }, - { - "epoch": 2.9578485446274003, - "grad_norm": 0.003871844382956624, - "learning_rate": 0.00019999568755856167, - "loss": 46.0, - "step": 18367 - }, - { - "epoch": 2.9580095817061878, - "grad_norm": 0.004047180525958538, - "learning_rate": 0.0001999956870887106, - "loss": 46.0, - "step": 18368 - }, - { - "epoch": 2.9581706187849752, - "grad_norm": 0.0045349509455263615, - "learning_rate": 0.00019999568661883393, - "loss": 46.0, - "step": 18369 - }, - { - "epoch": 2.9583316558637627, - "grad_norm": 0.0023616482503712177, - "learning_rate": 0.00019999568614893168, - "loss": 46.0, - "step": 18370 - }, - { - "epoch": 2.95849269294255, - "grad_norm": 0.00441835867241025, - "learning_rate": 0.00019999568567900388, - "loss": 46.0, - "step": 18371 - }, - { - "epoch": 2.9586537300213376, - "grad_norm": 0.007372252177447081, - "learning_rate": 0.00019999568520905045, - "loss": 46.0, - "step": 18372 - }, - { - "epoch": 2.958814767100125, - "grad_norm": 0.004499177914112806, - "learning_rate": 0.00019999568473907142, - "loss": 46.0, - "step": 18373 - }, - { - "epoch": 2.958975804178912, - "grad_norm": 0.0015675516333431005, - "learning_rate": 0.0001999956842690668, - "loss": 46.0, - "step": 18374 - }, - { - "epoch": 2.9591368412576995, - "grad_norm": 0.0010757470736280084, - "learning_rate": 0.00019999568379903658, - "loss": 46.0, - "step": 18375 - }, - { - "epoch": 2.959297878336487, - "grad_norm": 0.001580989919602871, - "learning_rate": 0.0001999956833289808, - "loss": 46.0, - "step": 18376 - }, - { - "epoch": 2.9594589154152744, - "grad_norm": 0.0026291089598089457, - "learning_rate": 0.0001999956828588994, - "loss": 46.0, - "step": 18377 - }, - { - "epoch": 2.959619952494062, - "grad_norm": 0.001443927874788642, - "learning_rate": 0.00019999568238879238, - "loss": 46.0, - "step": 18378 - }, - { - "epoch": 2.9597809895728493, - "grad_norm": 0.0013021137565374374, - "learning_rate": 0.00019999568191865982, - "loss": 46.0, - "step": 18379 - }, - { - "epoch": 2.9599420266516363, - "grad_norm": 0.001310904510319233, - "learning_rate": 0.00019999568144850164, - "loss": 46.0, - "step": 18380 - }, - { - "epoch": 2.9601030637304238, - "grad_norm": 0.0023698126897215843, - "learning_rate": 0.00019999568097831786, - "loss": 46.0, - "step": 18381 - }, - { - "epoch": 2.960264100809211, - "grad_norm": 0.0039572035893797874, - "learning_rate": 0.0001999956805081085, - "loss": 46.0, - "step": 18382 - }, - { - "epoch": 2.9604251378879987, - "grad_norm": 0.007814143784344196, - "learning_rate": 0.00019999568003787355, - "loss": 46.0, - "step": 18383 - }, - { - "epoch": 2.960586174966786, - "grad_norm": 0.002072358736768365, - "learning_rate": 0.00019999567956761297, - "loss": 46.0, - "step": 18384 - }, - { - "epoch": 2.9607472120455736, - "grad_norm": 0.0022338759154081345, - "learning_rate": 0.00019999567909732683, - "loss": 46.0, - "step": 18385 - }, - { - "epoch": 2.960908249124361, - "grad_norm": 0.0014239565934985876, - "learning_rate": 0.00019999567862701508, - "loss": 46.0, - "step": 18386 - }, - { - "epoch": 2.9610692862031485, - "grad_norm": 0.00490310974419117, - "learning_rate": 0.00019999567815667777, - "loss": 46.0, - "step": 18387 - }, - { - "epoch": 2.961230323281936, - "grad_norm": 0.0010968658607453108, - "learning_rate": 0.00019999567768631482, - "loss": 46.0, - "step": 18388 - }, - { - "epoch": 2.961391360360723, - "grad_norm": 0.005245309788733721, - "learning_rate": 0.00019999567721592628, - "loss": 46.0, - "step": 18389 - }, - { - "epoch": 2.9615523974395104, - "grad_norm": 0.0024718816857784986, - "learning_rate": 0.00019999567674551218, - "loss": 46.0, - "step": 18390 - }, - { - "epoch": 2.961713434518298, - "grad_norm": 0.00124309619423002, - "learning_rate": 0.00019999567627507247, - "loss": 46.0, - "step": 18391 - }, - { - "epoch": 2.9618744715970853, - "grad_norm": 0.009283908642828465, - "learning_rate": 0.0001999956758046072, - "loss": 46.0, - "step": 18392 - }, - { - "epoch": 2.9620355086758727, - "grad_norm": 0.0023621288128197193, - "learning_rate": 0.00019999567533411628, - "loss": 46.0, - "step": 18393 - }, - { - "epoch": 2.9621965457546597, - "grad_norm": 0.005116816144436598, - "learning_rate": 0.00019999567486359978, - "loss": 46.0, - "step": 18394 - }, - { - "epoch": 2.962357582833447, - "grad_norm": 0.001094233593903482, - "learning_rate": 0.00019999567439305769, - "loss": 46.0, - "step": 18395 - }, - { - "epoch": 2.9625186199122346, - "grad_norm": 0.0023934142664074898, - "learning_rate": 0.00019999567392249, - "loss": 46.0, - "step": 18396 - }, - { - "epoch": 2.962679656991022, - "grad_norm": 0.0014208616921678185, - "learning_rate": 0.00019999567345189675, - "loss": 46.0, - "step": 18397 - }, - { - "epoch": 2.9628406940698095, - "grad_norm": 0.002365308813750744, - "learning_rate": 0.0001999956729812779, - "loss": 46.0, - "step": 18398 - }, - { - "epoch": 2.963001731148597, - "grad_norm": 0.0033565261401236057, - "learning_rate": 0.00019999567251063343, - "loss": 46.0, - "step": 18399 - }, - { - "epoch": 2.9631627682273844, - "grad_norm": 0.00140491989441216, - "learning_rate": 0.00019999567203996335, - "loss": 46.0, - "step": 18400 - }, - { - "epoch": 2.963323805306172, - "grad_norm": 0.0060475068166852, - "learning_rate": 0.00019999567156926774, - "loss": 46.0, - "step": 18401 - }, - { - "epoch": 2.9634848423849594, - "grad_norm": 0.0017807814292609692, - "learning_rate": 0.00019999567109854646, - "loss": 46.0, - "step": 18402 - }, - { - "epoch": 2.963645879463747, - "grad_norm": 0.0020399540662765503, - "learning_rate": 0.00019999567062779962, - "loss": 46.0, - "step": 18403 - }, - { - "epoch": 2.963806916542534, - "grad_norm": 0.0048352801240980625, - "learning_rate": 0.00019999567015702721, - "loss": 46.0, - "step": 18404 - }, - { - "epoch": 2.9639679536213213, - "grad_norm": 0.0077485013753175735, - "learning_rate": 0.00019999566968622917, - "loss": 46.0, - "step": 18405 - }, - { - "epoch": 2.9641289907001087, - "grad_norm": 0.005939498543739319, - "learning_rate": 0.00019999566921540557, - "loss": 46.0, - "step": 18406 - }, - { - "epoch": 2.964290027778896, - "grad_norm": 0.0010195238282904029, - "learning_rate": 0.00019999566874455635, - "loss": 46.0, - "step": 18407 - }, - { - "epoch": 2.9644510648576836, - "grad_norm": 0.0034312172792851925, - "learning_rate": 0.00019999566827368155, - "loss": 46.0, - "step": 18408 - }, - { - "epoch": 2.9646121019364706, - "grad_norm": 0.0017127074534073472, - "learning_rate": 0.00019999566780278113, - "loss": 46.0, - "step": 18409 - }, - { - "epoch": 2.964773139015258, - "grad_norm": 0.004184579476714134, - "learning_rate": 0.00019999566733185518, - "loss": 46.0, - "step": 18410 - }, - { - "epoch": 2.9649341760940455, - "grad_norm": 0.00745483348146081, - "learning_rate": 0.0001999956668609036, - "loss": 46.0, - "step": 18411 - }, - { - "epoch": 2.965095213172833, - "grad_norm": 0.0038978401571512222, - "learning_rate": 0.0001999956663899264, - "loss": 46.0, - "step": 18412 - }, - { - "epoch": 2.9652562502516204, - "grad_norm": 0.0011870965827256441, - "learning_rate": 0.0001999956659189236, - "loss": 46.0, - "step": 18413 - }, - { - "epoch": 2.965417287330408, - "grad_norm": 0.0010393648408353329, - "learning_rate": 0.00019999566544789526, - "loss": 46.0, - "step": 18414 - }, - { - "epoch": 2.9655783244091953, - "grad_norm": 0.0019145701080560684, - "learning_rate": 0.0001999956649768413, - "loss": 46.0, - "step": 18415 - }, - { - "epoch": 2.965739361487983, - "grad_norm": 0.0017798240296542645, - "learning_rate": 0.00019999566450576173, - "loss": 46.0, - "step": 18416 - }, - { - "epoch": 2.9659003985667702, - "grad_norm": 0.0005918376846238971, - "learning_rate": 0.0001999956640346566, - "loss": 46.0, - "step": 18417 - }, - { - "epoch": 2.9660614356455572, - "grad_norm": 0.0017097140662372112, - "learning_rate": 0.00019999566356352583, - "loss": 46.0, - "step": 18418 - }, - { - "epoch": 2.9662224727243447, - "grad_norm": 0.0015214142622426152, - "learning_rate": 0.00019999566309236952, - "loss": 46.0, - "step": 18419 - }, - { - "epoch": 2.966383509803132, - "grad_norm": 0.004899428226053715, - "learning_rate": 0.00019999566262118759, - "loss": 46.0, - "step": 18420 - }, - { - "epoch": 2.9665445468819196, - "grad_norm": 0.005743523593991995, - "learning_rate": 0.00019999566214998007, - "loss": 46.0, - "step": 18421 - }, - { - "epoch": 2.966705583960707, - "grad_norm": 0.0011276621371507645, - "learning_rate": 0.00019999566167874694, - "loss": 46.0, - "step": 18422 - }, - { - "epoch": 2.9668666210394945, - "grad_norm": 0.0027939225547015667, - "learning_rate": 0.00019999566120748822, - "loss": 46.0, - "step": 18423 - }, - { - "epoch": 2.9670276581182815, - "grad_norm": 0.008693045936524868, - "learning_rate": 0.00019999566073620394, - "loss": 46.0, - "step": 18424 - }, - { - "epoch": 2.967188695197069, - "grad_norm": 0.001919653033837676, - "learning_rate": 0.00019999566026489401, - "loss": 46.0, - "step": 18425 - }, - { - "epoch": 2.9673497322758564, - "grad_norm": 0.00215740199200809, - "learning_rate": 0.00019999565979355853, - "loss": 46.0, - "step": 18426 - }, - { - "epoch": 2.967510769354644, - "grad_norm": 0.0043504973873496056, - "learning_rate": 0.00019999565932219744, - "loss": 46.0, - "step": 18427 - }, - { - "epoch": 2.9676718064334313, - "grad_norm": 0.002733775181695819, - "learning_rate": 0.00019999565885081078, - "loss": 46.0, - "step": 18428 - }, - { - "epoch": 2.9678328435122188, - "grad_norm": 0.0008774618036113679, - "learning_rate": 0.00019999565837939848, - "loss": 46.0, - "step": 18429 - }, - { - "epoch": 2.967993880591006, - "grad_norm": 0.003374746534973383, - "learning_rate": 0.00019999565790796063, - "loss": 46.0, - "step": 18430 - }, - { - "epoch": 2.9681549176697937, - "grad_norm": 0.001083322917111218, - "learning_rate": 0.00019999565743649718, - "loss": 46.0, - "step": 18431 - }, - { - "epoch": 2.968315954748581, - "grad_norm": 0.0014616484986618161, - "learning_rate": 0.0001999956569650081, - "loss": 46.0, - "step": 18432 - }, - { - "epoch": 2.968476991827368, - "grad_norm": 0.004131474066525698, - "learning_rate": 0.00019999565649349345, - "loss": 46.0, - "step": 18433 - }, - { - "epoch": 2.9686380289061556, - "grad_norm": 0.0019515649182721972, - "learning_rate": 0.00019999565602195322, - "loss": 46.0, - "step": 18434 - }, - { - "epoch": 2.968799065984943, - "grad_norm": 0.0015557692386209965, - "learning_rate": 0.00019999565555038737, - "loss": 46.0, - "step": 18435 - }, - { - "epoch": 2.9689601030637305, - "grad_norm": 0.002823358168825507, - "learning_rate": 0.00019999565507879596, - "loss": 46.0, - "step": 18436 - }, - { - "epoch": 2.969121140142518, - "grad_norm": 0.014139360748231411, - "learning_rate": 0.00019999565460717894, - "loss": 46.0, - "step": 18437 - }, - { - "epoch": 2.969282177221305, - "grad_norm": 0.0016486848471686244, - "learning_rate": 0.0001999956541355363, - "loss": 46.0, - "step": 18438 - }, - { - "epoch": 2.9694432143000924, - "grad_norm": 0.01684490032494068, - "learning_rate": 0.0001999956536638681, - "loss": 46.0, - "step": 18439 - }, - { - "epoch": 2.96960425137888, - "grad_norm": 0.00124447257257998, - "learning_rate": 0.0001999956531921743, - "loss": 46.0, - "step": 18440 - }, - { - "epoch": 2.9697652884576673, - "grad_norm": 0.004933250602334738, - "learning_rate": 0.0001999956527204549, - "loss": 46.0, - "step": 18441 - }, - { - "epoch": 2.9699263255364547, - "grad_norm": 0.002164689125493169, - "learning_rate": 0.00019999565224870992, - "loss": 46.0, - "step": 18442 - }, - { - "epoch": 2.970087362615242, - "grad_norm": 0.0036833875346928835, - "learning_rate": 0.00019999565177693932, - "loss": 46.0, - "step": 18443 - }, - { - "epoch": 2.9702483996940297, - "grad_norm": 0.0016168163856491446, - "learning_rate": 0.00019999565130514313, - "loss": 46.0, - "step": 18444 - }, - { - "epoch": 2.970409436772817, - "grad_norm": 0.00426136888563633, - "learning_rate": 0.00019999565083332136, - "loss": 46.0, - "step": 18445 - }, - { - "epoch": 2.9705704738516046, - "grad_norm": 0.0007692842627875507, - "learning_rate": 0.000199995650361474, - "loss": 46.0, - "step": 18446 - }, - { - "epoch": 2.9707315109303916, - "grad_norm": 0.004225170239806175, - "learning_rate": 0.00019999564988960105, - "loss": 46.0, - "step": 18447 - }, - { - "epoch": 2.970892548009179, - "grad_norm": 0.005309720989316702, - "learning_rate": 0.0001999956494177025, - "loss": 46.0, - "step": 18448 - }, - { - "epoch": 2.9710535850879665, - "grad_norm": 0.001555133960209787, - "learning_rate": 0.00019999564894577834, - "loss": 46.0, - "step": 18449 - }, - { - "epoch": 2.971214622166754, - "grad_norm": 0.004254933912307024, - "learning_rate": 0.0001999956484738286, - "loss": 46.0, - "step": 18450 - }, - { - "epoch": 2.9713756592455414, - "grad_norm": 0.0007019075565040112, - "learning_rate": 0.00019999564800185325, - "loss": 46.0, - "step": 18451 - }, - { - "epoch": 2.971536696324329, - "grad_norm": 0.0023639341816306114, - "learning_rate": 0.00019999564752985234, - "loss": 46.0, - "step": 18452 - }, - { - "epoch": 2.971697733403116, - "grad_norm": 0.00486363610252738, - "learning_rate": 0.00019999564705782582, - "loss": 46.0, - "step": 18453 - }, - { - "epoch": 2.9718587704819033, - "grad_norm": 0.004407297819852829, - "learning_rate": 0.0001999956465857737, - "loss": 46.0, - "step": 18454 - }, - { - "epoch": 2.9720198075606907, - "grad_norm": 0.0015118003357201815, - "learning_rate": 0.00019999564611369598, - "loss": 46.0, - "step": 18455 - }, - { - "epoch": 2.972180844639478, - "grad_norm": 0.006015649531036615, - "learning_rate": 0.00019999564564159266, - "loss": 46.0, - "step": 18456 - }, - { - "epoch": 2.9723418817182656, - "grad_norm": 0.002204635413363576, - "learning_rate": 0.0001999956451694638, - "loss": 46.0, - "step": 18457 - }, - { - "epoch": 2.972502918797053, - "grad_norm": 0.003703678958117962, - "learning_rate": 0.0001999956446973093, - "loss": 46.0, - "step": 18458 - }, - { - "epoch": 2.9726639558758405, - "grad_norm": 0.003325363388285041, - "learning_rate": 0.0001999956442251292, - "loss": 46.0, - "step": 18459 - }, - { - "epoch": 2.972824992954628, - "grad_norm": 0.004380201920866966, - "learning_rate": 0.0001999956437529235, - "loss": 46.0, - "step": 18460 - }, - { - "epoch": 2.9729860300334154, - "grad_norm": 0.002804793184623122, - "learning_rate": 0.00019999564328069224, - "loss": 46.0, - "step": 18461 - }, - { - "epoch": 2.9731470671122024, - "grad_norm": 0.0028695680666714907, - "learning_rate": 0.0001999956428084354, - "loss": 46.0, - "step": 18462 - }, - { - "epoch": 2.97330810419099, - "grad_norm": 0.00822277832776308, - "learning_rate": 0.00019999564233615292, - "loss": 46.0, - "step": 18463 - }, - { - "epoch": 2.9734691412697773, - "grad_norm": 0.002469172002747655, - "learning_rate": 0.00019999564186384486, - "loss": 46.0, - "step": 18464 - }, - { - "epoch": 2.973630178348565, - "grad_norm": 0.0005723289796151221, - "learning_rate": 0.0001999956413915112, - "loss": 46.0, - "step": 18465 - }, - { - "epoch": 2.9737912154273523, - "grad_norm": 0.0018965598428621888, - "learning_rate": 0.00019999564091915196, - "loss": 46.0, - "step": 18466 - }, - { - "epoch": 2.9739522525061397, - "grad_norm": 0.00133640191052109, - "learning_rate": 0.00019999564044676714, - "loss": 46.0, - "step": 18467 - }, - { - "epoch": 2.9741132895849267, - "grad_norm": 0.00894093606621027, - "learning_rate": 0.0001999956399743567, - "loss": 46.0, - "step": 18468 - }, - { - "epoch": 2.974274326663714, - "grad_norm": 0.002226758748292923, - "learning_rate": 0.0001999956395019207, - "loss": 46.0, - "step": 18469 - }, - { - "epoch": 2.9744353637425016, - "grad_norm": 0.004566085524857044, - "learning_rate": 0.00019999563902945905, - "loss": 46.0, - "step": 18470 - }, - { - "epoch": 2.974596400821289, - "grad_norm": 0.006014668848365545, - "learning_rate": 0.00019999563855697185, - "loss": 46.0, - "step": 18471 - }, - { - "epoch": 2.9747574379000765, - "grad_norm": 0.002742860233411193, - "learning_rate": 0.00019999563808445903, - "loss": 46.0, - "step": 18472 - }, - { - "epoch": 2.974918474978864, - "grad_norm": 0.0037918067537248135, - "learning_rate": 0.00019999563761192066, - "loss": 46.0, - "step": 18473 - }, - { - "epoch": 2.9750795120576514, - "grad_norm": 0.004271207842975855, - "learning_rate": 0.00019999563713935664, - "loss": 46.0, - "step": 18474 - }, - { - "epoch": 2.975240549136439, - "grad_norm": 0.0038061838131397963, - "learning_rate": 0.00019999563666676706, - "loss": 46.0, - "step": 18475 - }, - { - "epoch": 2.9754015862152263, - "grad_norm": 0.001159599982202053, - "learning_rate": 0.0001999956361941519, - "loss": 46.0, - "step": 18476 - }, - { - "epoch": 2.9755626232940133, - "grad_norm": 0.0021892578806728125, - "learning_rate": 0.0001999956357215111, - "loss": 46.0, - "step": 18477 - }, - { - "epoch": 2.975723660372801, - "grad_norm": 0.0023154402151703835, - "learning_rate": 0.00019999563524884475, - "loss": 46.0, - "step": 18478 - }, - { - "epoch": 2.9758846974515882, - "grad_norm": 0.009547240100800991, - "learning_rate": 0.00019999563477615277, - "loss": 46.0, - "step": 18479 - }, - { - "epoch": 2.9760457345303757, - "grad_norm": 0.000793353421613574, - "learning_rate": 0.00019999563430343523, - "loss": 46.0, - "step": 18480 - }, - { - "epoch": 2.976206771609163, - "grad_norm": 0.006900258362293243, - "learning_rate": 0.0001999956338306921, - "loss": 46.0, - "step": 18481 - }, - { - "epoch": 2.97636780868795, - "grad_norm": 0.0014947090530768037, - "learning_rate": 0.00019999563335792333, - "loss": 46.0, - "step": 18482 - }, - { - "epoch": 2.9765288457667376, - "grad_norm": 0.004521749448031187, - "learning_rate": 0.000199995632885129, - "loss": 46.0, - "step": 18483 - }, - { - "epoch": 2.976689882845525, - "grad_norm": 0.001985024195164442, - "learning_rate": 0.00019999563241230908, - "loss": 46.0, - "step": 18484 - }, - { - "epoch": 2.9768509199243125, - "grad_norm": 0.001415968406945467, - "learning_rate": 0.00019999563193946353, - "loss": 46.0, - "step": 18485 - }, - { - "epoch": 2.9770119570031, - "grad_norm": 0.0012037853011861444, - "learning_rate": 0.00019999563146659243, - "loss": 46.0, - "step": 18486 - }, - { - "epoch": 2.9771729940818874, - "grad_norm": 0.0011512440396472812, - "learning_rate": 0.00019999563099369573, - "loss": 46.0, - "step": 18487 - }, - { - "epoch": 2.977334031160675, - "grad_norm": 0.0021884555462747812, - "learning_rate": 0.0001999956305207734, - "loss": 46.0, - "step": 18488 - }, - { - "epoch": 2.9774950682394623, - "grad_norm": 0.005605579353868961, - "learning_rate": 0.00019999563004782553, - "loss": 46.0, - "step": 18489 - }, - { - "epoch": 2.9776561053182498, - "grad_norm": 0.003936262335628271, - "learning_rate": 0.00019999562957485203, - "loss": 46.0, - "step": 18490 - }, - { - "epoch": 2.9778171423970368, - "grad_norm": 0.0010099461069330573, - "learning_rate": 0.00019999562910185295, - "loss": 46.0, - "step": 18491 - }, - { - "epoch": 2.977978179475824, - "grad_norm": 0.0025921945925801992, - "learning_rate": 0.00019999562862882826, - "loss": 46.0, - "step": 18492 - }, - { - "epoch": 2.9781392165546117, - "grad_norm": 0.0014950225595384836, - "learning_rate": 0.000199995628155778, - "loss": 46.0, - "step": 18493 - }, - { - "epoch": 2.978300253633399, - "grad_norm": 0.0087662935256958, - "learning_rate": 0.0001999956276827021, - "loss": 46.0, - "step": 18494 - }, - { - "epoch": 2.9784612907121866, - "grad_norm": 0.0038464716635644436, - "learning_rate": 0.00019999562720960065, - "loss": 46.0, - "step": 18495 - }, - { - "epoch": 2.978622327790974, - "grad_norm": 0.0013468189863488078, - "learning_rate": 0.0001999956267364736, - "loss": 46.0, - "step": 18496 - }, - { - "epoch": 2.978783364869761, - "grad_norm": 0.014001449570059776, - "learning_rate": 0.00019999562626332095, - "loss": 46.0, - "step": 18497 - }, - { - "epoch": 2.9789444019485485, - "grad_norm": 0.004450570326298475, - "learning_rate": 0.0001999956257901427, - "loss": 46.0, - "step": 18498 - }, - { - "epoch": 2.979105439027336, - "grad_norm": 0.0011002302635461092, - "learning_rate": 0.00019999562531693887, - "loss": 46.0, - "step": 18499 - }, - { - "epoch": 2.9792664761061234, - "grad_norm": 0.0022788571659475565, - "learning_rate": 0.00019999562484370945, - "loss": 46.0, - "step": 18500 - }, - { - "epoch": 2.979427513184911, - "grad_norm": 0.003244733205065131, - "learning_rate": 0.00019999562437045444, - "loss": 46.0, - "step": 18501 - }, - { - "epoch": 2.9795885502636983, - "grad_norm": 0.004255375359207392, - "learning_rate": 0.00019999562389717382, - "loss": 46.0, - "step": 18502 - }, - { - "epoch": 2.9797495873424857, - "grad_norm": 0.006406597327440977, - "learning_rate": 0.00019999562342386758, - "loss": 46.0, - "step": 18503 - }, - { - "epoch": 2.979910624421273, - "grad_norm": 0.0012827736791223288, - "learning_rate": 0.00019999562295053579, - "loss": 46.0, - "step": 18504 - }, - { - "epoch": 2.9800716615000606, - "grad_norm": 0.00554022379219532, - "learning_rate": 0.0001999956224771784, - "loss": 46.0, - "step": 18505 - }, - { - "epoch": 2.9802326985788476, - "grad_norm": 0.0045968820340931416, - "learning_rate": 0.00019999562200379538, - "loss": 46.0, - "step": 18506 - }, - { - "epoch": 2.980393735657635, - "grad_norm": 0.0027091035153716803, - "learning_rate": 0.00019999562153038682, - "loss": 46.0, - "step": 18507 - }, - { - "epoch": 2.9805547727364226, - "grad_norm": 0.014366013929247856, - "learning_rate": 0.00019999562105695265, - "loss": 46.0, - "step": 18508 - }, - { - "epoch": 2.98071580981521, - "grad_norm": 0.004478765185922384, - "learning_rate": 0.00019999562058349286, - "loss": 46.0, - "step": 18509 - }, - { - "epoch": 2.9808768468939975, - "grad_norm": 0.0019709656480699778, - "learning_rate": 0.0001999956201100075, - "loss": 46.0, - "step": 18510 - }, - { - "epoch": 2.9810378839727845, - "grad_norm": 0.001509210211224854, - "learning_rate": 0.00019999561963649653, - "loss": 46.0, - "step": 18511 - }, - { - "epoch": 2.981198921051572, - "grad_norm": 0.003124014940112829, - "learning_rate": 0.00019999561916295996, - "loss": 46.0, - "step": 18512 - }, - { - "epoch": 2.9813599581303594, - "grad_norm": 0.012680255807936192, - "learning_rate": 0.00019999561868939782, - "loss": 46.0, - "step": 18513 - }, - { - "epoch": 2.981520995209147, - "grad_norm": 0.004033701494336128, - "learning_rate": 0.00019999561821581007, - "loss": 46.0, - "step": 18514 - }, - { - "epoch": 2.9816820322879343, - "grad_norm": 0.00432217214256525, - "learning_rate": 0.00019999561774219676, - "loss": 46.0, - "step": 18515 - }, - { - "epoch": 2.9818430693667217, - "grad_norm": 0.00091934745432809, - "learning_rate": 0.0001999956172685578, - "loss": 46.0, - "step": 18516 - }, - { - "epoch": 2.982004106445509, - "grad_norm": 0.0025655035860836506, - "learning_rate": 0.0001999956167948933, - "loss": 46.0, - "step": 18517 - }, - { - "epoch": 2.9821651435242966, - "grad_norm": 0.0009710592566989362, - "learning_rate": 0.00019999561632120315, - "loss": 46.0, - "step": 18518 - }, - { - "epoch": 2.982326180603084, - "grad_norm": 0.002257666317746043, - "learning_rate": 0.00019999561584748747, - "loss": 46.0, - "step": 18519 - }, - { - "epoch": 2.9824872176818715, - "grad_norm": 0.003338546259328723, - "learning_rate": 0.00019999561537374617, - "loss": 46.0, - "step": 18520 - }, - { - "epoch": 2.9826482547606585, - "grad_norm": 0.003992764744907618, - "learning_rate": 0.00019999561489997925, - "loss": 46.0, - "step": 18521 - }, - { - "epoch": 2.982809291839446, - "grad_norm": 0.002327491994947195, - "learning_rate": 0.00019999561442618678, - "loss": 46.0, - "step": 18522 - }, - { - "epoch": 2.9829703289182334, - "grad_norm": 0.0035936960484832525, - "learning_rate": 0.00019999561395236867, - "loss": 46.0, - "step": 18523 - }, - { - "epoch": 2.983131365997021, - "grad_norm": 0.0011990476632490754, - "learning_rate": 0.000199995613478525, - "loss": 46.0, - "step": 18524 - }, - { - "epoch": 2.9832924030758083, - "grad_norm": 0.008181135170161724, - "learning_rate": 0.00019999561300465573, - "loss": 46.0, - "step": 18525 - }, - { - "epoch": 2.9834534401545953, - "grad_norm": 0.005735564976930618, - "learning_rate": 0.00019999561253076085, - "loss": 46.0, - "step": 18526 - }, - { - "epoch": 2.983614477233383, - "grad_norm": 0.0009439793648198247, - "learning_rate": 0.0001999956120568404, - "loss": 46.0, - "step": 18527 - }, - { - "epoch": 2.9837755143121703, - "grad_norm": 0.0027761005330830812, - "learning_rate": 0.00019999561158289434, - "loss": 46.0, - "step": 18528 - }, - { - "epoch": 2.9839365513909577, - "grad_norm": 0.00563826272264123, - "learning_rate": 0.00019999561110892267, - "loss": 46.0, - "step": 18529 - }, - { - "epoch": 2.984097588469745, - "grad_norm": 0.0022981413640081882, - "learning_rate": 0.00019999561063492545, - "loss": 46.0, - "step": 18530 - }, - { - "epoch": 2.9842586255485326, - "grad_norm": 0.0004431944980751723, - "learning_rate": 0.00019999561016090264, - "loss": 46.0, - "step": 18531 - }, - { - "epoch": 2.98441966262732, - "grad_norm": 0.002274968195706606, - "learning_rate": 0.00019999560968685418, - "loss": 46.0, - "step": 18532 - }, - { - "epoch": 2.9845806997061075, - "grad_norm": 0.0027848570607602596, - "learning_rate": 0.00019999560921278014, - "loss": 46.0, - "step": 18533 - }, - { - "epoch": 2.984741736784895, - "grad_norm": 0.0008386837434954941, - "learning_rate": 0.00019999560873868054, - "loss": 46.0, - "step": 18534 - }, - { - "epoch": 2.984902773863682, - "grad_norm": 0.006353685632348061, - "learning_rate": 0.00019999560826455533, - "loss": 46.0, - "step": 18535 - }, - { - "epoch": 2.9850638109424694, - "grad_norm": 0.0015298710204660892, - "learning_rate": 0.00019999560779040452, - "loss": 46.0, - "step": 18536 - }, - { - "epoch": 2.985224848021257, - "grad_norm": 0.010095451027154922, - "learning_rate": 0.00019999560731622814, - "loss": 46.0, - "step": 18537 - }, - { - "epoch": 2.9853858851000443, - "grad_norm": 0.0013281877618283033, - "learning_rate": 0.00019999560684202613, - "loss": 46.0, - "step": 18538 - }, - { - "epoch": 2.9855469221788318, - "grad_norm": 0.001293028355576098, - "learning_rate": 0.00019999560636779854, - "loss": 46.0, - "step": 18539 - }, - { - "epoch": 2.9857079592576192, - "grad_norm": 0.005644380114972591, - "learning_rate": 0.00019999560589354536, - "loss": 46.0, - "step": 18540 - }, - { - "epoch": 2.9858689963364062, - "grad_norm": 0.002164635807275772, - "learning_rate": 0.0001999956054192666, - "loss": 46.0, - "step": 18541 - }, - { - "epoch": 2.9860300334151937, - "grad_norm": 0.0010207355953752995, - "learning_rate": 0.00019999560494496225, - "loss": 46.0, - "step": 18542 - }, - { - "epoch": 2.986191070493981, - "grad_norm": 0.0008625152986496687, - "learning_rate": 0.00019999560447063228, - "loss": 46.0, - "step": 18543 - }, - { - "epoch": 2.9863521075727686, - "grad_norm": 0.0008602470043115318, - "learning_rate": 0.0001999956039962767, - "loss": 46.0, - "step": 18544 - }, - { - "epoch": 2.986513144651556, - "grad_norm": 0.0026290961541235447, - "learning_rate": 0.00019999560352189556, - "loss": 46.0, - "step": 18545 - }, - { - "epoch": 2.9866741817303435, - "grad_norm": 0.003948190715163946, - "learning_rate": 0.0001999956030474888, - "loss": 46.0, - "step": 18546 - }, - { - "epoch": 2.986835218809131, - "grad_norm": 0.0006668227142654359, - "learning_rate": 0.0001999956025730565, - "loss": 46.0, - "step": 18547 - }, - { - "epoch": 2.9869962558879184, - "grad_norm": 0.00663685193285346, - "learning_rate": 0.00019999560209859856, - "loss": 46.0, - "step": 18548 - }, - { - "epoch": 2.987157292966706, - "grad_norm": 0.006620586384087801, - "learning_rate": 0.00019999560162411504, - "loss": 46.0, - "step": 18549 - }, - { - "epoch": 2.987318330045493, - "grad_norm": 0.0052564614452421665, - "learning_rate": 0.0001999956011496059, - "loss": 46.0, - "step": 18550 - }, - { - "epoch": 2.9874793671242803, - "grad_norm": 0.0028195157647132874, - "learning_rate": 0.00019999560067507122, - "loss": 46.0, - "step": 18551 - }, - { - "epoch": 2.9876404042030678, - "grad_norm": 0.004455295391380787, - "learning_rate": 0.0001999956002005109, - "loss": 46.0, - "step": 18552 - }, - { - "epoch": 2.987801441281855, - "grad_norm": 0.007713985163718462, - "learning_rate": 0.00019999559972592502, - "loss": 46.0, - "step": 18553 - }, - { - "epoch": 2.9879624783606427, - "grad_norm": 0.002368438523262739, - "learning_rate": 0.00019999559925131351, - "loss": 46.0, - "step": 18554 - }, - { - "epoch": 2.9881235154394297, - "grad_norm": 0.0034312766510993242, - "learning_rate": 0.00019999559877667645, - "loss": 46.0, - "step": 18555 - }, - { - "epoch": 2.988284552518217, - "grad_norm": 0.0013110170839354396, - "learning_rate": 0.00019999559830201374, - "loss": 46.0, - "step": 18556 - }, - { - "epoch": 2.9884455895970046, - "grad_norm": 0.00045992841478437185, - "learning_rate": 0.0001999955978273255, - "loss": 46.0, - "step": 18557 - }, - { - "epoch": 2.988606626675792, - "grad_norm": 0.002845642389729619, - "learning_rate": 0.00019999559735261162, - "loss": 46.0, - "step": 18558 - }, - { - "epoch": 2.9887676637545795, - "grad_norm": 0.008416905999183655, - "learning_rate": 0.00019999559687787215, - "loss": 46.0, - "step": 18559 - }, - { - "epoch": 2.988928700833367, - "grad_norm": 0.0017323879292234778, - "learning_rate": 0.0001999955964031071, - "loss": 46.0, - "step": 18560 - }, - { - "epoch": 2.9890897379121544, - "grad_norm": 0.00040783046279102564, - "learning_rate": 0.00019999559592831644, - "loss": 46.0, - "step": 18561 - }, - { - "epoch": 2.989250774990942, - "grad_norm": 0.005353429354727268, - "learning_rate": 0.00019999559545350019, - "loss": 46.0, - "step": 18562 - }, - { - "epoch": 2.9894118120697293, - "grad_norm": 0.00431845523416996, - "learning_rate": 0.00019999559497865837, - "loss": 46.0, - "step": 18563 - }, - { - "epoch": 2.9895728491485163, - "grad_norm": 0.0011112710926681757, - "learning_rate": 0.00019999559450379093, - "loss": 46.0, - "step": 18564 - }, - { - "epoch": 2.9897338862273037, - "grad_norm": 0.009182791225612164, - "learning_rate": 0.00019999559402889791, - "loss": 46.0, - "step": 18565 - }, - { - "epoch": 2.989894923306091, - "grad_norm": 0.0023359821643680334, - "learning_rate": 0.0001999955935539793, - "loss": 46.0, - "step": 18566 - }, - { - "epoch": 2.9900559603848786, - "grad_norm": 0.0009438145789317787, - "learning_rate": 0.0001999955930790351, - "loss": 46.0, - "step": 18567 - }, - { - "epoch": 2.990216997463666, - "grad_norm": 0.005053383763879538, - "learning_rate": 0.00019999559260406525, - "loss": 46.0, - "step": 18568 - }, - { - "epoch": 2.9903780345424535, - "grad_norm": 0.004160483833402395, - "learning_rate": 0.00019999559212906988, - "loss": 46.0, - "step": 18569 - }, - { - "epoch": 2.9905390716212406, - "grad_norm": 0.0017807877156883478, - "learning_rate": 0.0001999955916540489, - "loss": 46.0, - "step": 18570 - }, - { - "epoch": 2.990700108700028, - "grad_norm": 0.0013404268538579345, - "learning_rate": 0.0001999955911790023, - "loss": 46.0, - "step": 18571 - }, - { - "epoch": 2.9908611457788155, - "grad_norm": 0.0020173233933746815, - "learning_rate": 0.00019999559070393012, - "loss": 46.0, - "step": 18572 - }, - { - "epoch": 2.991022182857603, - "grad_norm": 0.006248829886317253, - "learning_rate": 0.00019999559022883237, - "loss": 46.0, - "step": 18573 - }, - { - "epoch": 2.9911832199363904, - "grad_norm": 0.008519444614648819, - "learning_rate": 0.000199995589753709, - "loss": 46.0, - "step": 18574 - }, - { - "epoch": 2.991344257015178, - "grad_norm": 0.001395870465785265, - "learning_rate": 0.00019999558927856002, - "loss": 46.0, - "step": 18575 - }, - { - "epoch": 2.9915052940939653, - "grad_norm": 0.0026189512573182583, - "learning_rate": 0.00019999558880338548, - "loss": 46.0, - "step": 18576 - }, - { - "epoch": 2.9916663311727527, - "grad_norm": 0.005446328781545162, - "learning_rate": 0.00019999558832818533, - "loss": 46.0, - "step": 18577 - }, - { - "epoch": 2.99182736825154, - "grad_norm": 0.0017989056650549173, - "learning_rate": 0.00019999558785295957, - "loss": 46.0, - "step": 18578 - }, - { - "epoch": 2.991988405330327, - "grad_norm": 0.003046183381229639, - "learning_rate": 0.00019999558737770822, - "loss": 46.0, - "step": 18579 - }, - { - "epoch": 2.9921494424091146, - "grad_norm": 0.0068048653192818165, - "learning_rate": 0.0001999955869024313, - "loss": 46.0, - "step": 18580 - }, - { - "epoch": 2.992310479487902, - "grad_norm": 0.0014451493043452501, - "learning_rate": 0.0001999955864271288, - "loss": 46.0, - "step": 18581 - }, - { - "epoch": 2.9924715165666895, - "grad_norm": 0.006651455070823431, - "learning_rate": 0.00019999558595180066, - "loss": 46.0, - "step": 18582 - }, - { - "epoch": 2.992632553645477, - "grad_norm": 0.005244353320449591, - "learning_rate": 0.00019999558547644696, - "loss": 46.0, - "step": 18583 - }, - { - "epoch": 2.992793590724264, - "grad_norm": 0.0014879959635436535, - "learning_rate": 0.00019999558500106765, - "loss": 46.0, - "step": 18584 - }, - { - "epoch": 2.9929546278030514, - "grad_norm": 0.0012022884329780936, - "learning_rate": 0.00019999558452566275, - "loss": 46.0, - "step": 18585 - }, - { - "epoch": 2.993115664881839, - "grad_norm": 0.0038021320942789316, - "learning_rate": 0.00019999558405023226, - "loss": 46.0, - "step": 18586 - }, - { - "epoch": 2.9932767019606263, - "grad_norm": 0.0038193119689822197, - "learning_rate": 0.00019999558357477616, - "loss": 46.0, - "step": 18587 - }, - { - "epoch": 2.993437739039414, - "grad_norm": 0.002903102897107601, - "learning_rate": 0.0001999955830992945, - "loss": 46.0, - "step": 18588 - }, - { - "epoch": 2.9935987761182012, - "grad_norm": 0.004516868386417627, - "learning_rate": 0.00019999558262378723, - "loss": 46.0, - "step": 18589 - }, - { - "epoch": 2.9937598131969887, - "grad_norm": 0.0005320445052348077, - "learning_rate": 0.00019999558214825436, - "loss": 46.0, - "step": 18590 - }, - { - "epoch": 2.993920850275776, - "grad_norm": 0.002455269917845726, - "learning_rate": 0.0001999955816726959, - "loss": 46.0, - "step": 18591 - }, - { - "epoch": 2.9940818873545636, - "grad_norm": 0.0030678401235491037, - "learning_rate": 0.00019999558119711183, - "loss": 46.0, - "step": 18592 - }, - { - "epoch": 2.994242924433351, - "grad_norm": 0.0007584202685393393, - "learning_rate": 0.0001999955807215022, - "loss": 46.0, - "step": 18593 - }, - { - "epoch": 2.994403961512138, - "grad_norm": 0.0019652366172522306, - "learning_rate": 0.00019999558024586696, - "loss": 46.0, - "step": 18594 - }, - { - "epoch": 2.9945649985909255, - "grad_norm": 0.002631678245961666, - "learning_rate": 0.00019999557977020614, - "loss": 46.0, - "step": 18595 - }, - { - "epoch": 2.994726035669713, - "grad_norm": 0.0021489951759576797, - "learning_rate": 0.00019999557929451967, - "loss": 46.0, - "step": 18596 - }, - { - "epoch": 2.9948870727485004, - "grad_norm": 0.0026354810688644648, - "learning_rate": 0.00019999557881880765, - "loss": 46.0, - "step": 18597 - }, - { - "epoch": 2.995048109827288, - "grad_norm": 0.0016480355989187956, - "learning_rate": 0.00019999557834307006, - "loss": 46.0, - "step": 18598 - }, - { - "epoch": 2.995209146906075, - "grad_norm": 0.0025088803377002478, - "learning_rate": 0.00019999557786730683, - "loss": 46.0, - "step": 18599 - }, - { - "epoch": 2.9953701839848623, - "grad_norm": 0.0015875728568062186, - "learning_rate": 0.00019999557739151805, - "loss": 46.0, - "step": 18600 - }, - { - "epoch": 2.9955312210636498, - "grad_norm": 0.002014116384088993, - "learning_rate": 0.00019999557691570364, - "loss": 46.0, - "step": 18601 - }, - { - "epoch": 2.9956922581424372, - "grad_norm": 0.002914705779403448, - "learning_rate": 0.00019999557643986363, - "loss": 46.0, - "step": 18602 - }, - { - "epoch": 2.9958532952212247, - "grad_norm": 0.005247557535767555, - "learning_rate": 0.00019999557596399808, - "loss": 46.0, - "step": 18603 - }, - { - "epoch": 2.996014332300012, - "grad_norm": 0.0063356030732393265, - "learning_rate": 0.0001999955754881069, - "loss": 46.0, - "step": 18604 - }, - { - "epoch": 2.9961753693787996, - "grad_norm": 0.0030614477582275867, - "learning_rate": 0.0001999955750121901, - "loss": 46.0, - "step": 18605 - }, - { - "epoch": 2.996336406457587, - "grad_norm": 0.015649404376745224, - "learning_rate": 0.00019999557453624775, - "loss": 46.0, - "step": 18606 - }, - { - "epoch": 2.9964974435363745, - "grad_norm": 0.00719917006790638, - "learning_rate": 0.00019999557406027977, - "loss": 46.0, - "step": 18607 - }, - { - "epoch": 2.9966584806151615, - "grad_norm": 0.012719051912426949, - "learning_rate": 0.00019999557358428623, - "loss": 46.0, - "step": 18608 - }, - { - "epoch": 2.996819517693949, - "grad_norm": 0.0011661925818771124, - "learning_rate": 0.0001999955731082671, - "loss": 46.0, - "step": 18609 - }, - { - "epoch": 2.9969805547727364, - "grad_norm": 0.001103631337173283, - "learning_rate": 0.00019999557263222233, - "loss": 46.0, - "step": 18610 - }, - { - "epoch": 2.997141591851524, - "grad_norm": 0.0011683221673592925, - "learning_rate": 0.00019999557215615198, - "loss": 46.0, - "step": 18611 - }, - { - "epoch": 2.9973026289303113, - "grad_norm": 0.0024710465222597122, - "learning_rate": 0.00019999557168005606, - "loss": 46.0, - "step": 18612 - }, - { - "epoch": 2.9974636660090987, - "grad_norm": 0.013376552611589432, - "learning_rate": 0.00019999557120393456, - "loss": 46.0, - "step": 18613 - }, - { - "epoch": 2.9976247030878858, - "grad_norm": 0.0036130868829786777, - "learning_rate": 0.00019999557072778742, - "loss": 46.0, - "step": 18614 - }, - { - "epoch": 2.997785740166673, - "grad_norm": 0.009003588929772377, - "learning_rate": 0.00019999557025161474, - "loss": 46.0, - "step": 18615 - }, - { - "epoch": 2.9979467772454607, - "grad_norm": 0.007207829505205154, - "learning_rate": 0.00019999556977541642, - "loss": 46.0, - "step": 18616 - }, - { - "epoch": 2.998107814324248, - "grad_norm": 0.008652038872241974, - "learning_rate": 0.00019999556929919252, - "loss": 46.0, - "step": 18617 - }, - { - "epoch": 2.9982688514030356, - "grad_norm": 0.004082380793988705, - "learning_rate": 0.00019999556882294302, - "loss": 46.0, - "step": 18618 - }, - { - "epoch": 2.998429888481823, - "grad_norm": 0.0042772721499204636, - "learning_rate": 0.00019999556834666794, - "loss": 46.0, - "step": 18619 - }, - { - "epoch": 2.9985909255606105, - "grad_norm": 0.006474708206951618, - "learning_rate": 0.00019999556787036725, - "loss": 46.0, - "step": 18620 - }, - { - "epoch": 2.998751962639398, - "grad_norm": 0.014925863593816757, - "learning_rate": 0.000199995567394041, - "loss": 46.0, - "step": 18621 - }, - { - "epoch": 2.9989129997181854, - "grad_norm": 0.011634370312094688, - "learning_rate": 0.00019999556691768913, - "loss": 46.0, - "step": 18622 - }, - { - "epoch": 2.9990740367969724, - "grad_norm": 0.0018507307395339012, - "learning_rate": 0.00019999556644131165, - "loss": 46.0, - "step": 18623 - }, - { - "epoch": 2.99923507387576, - "grad_norm": 0.0019304245943203568, - "learning_rate": 0.0001999955659649086, - "loss": 46.0, - "step": 18624 - }, - { - "epoch": 2.9993961109545473, - "grad_norm": 0.0014087522868067026, - "learning_rate": 0.00019999556548847997, - "loss": 46.0, - "step": 18625 - }, - { - "epoch": 2.9995571480333347, - "grad_norm": 0.01107286848127842, - "learning_rate": 0.00019999556501202573, - "loss": 46.0, - "step": 18626 - }, - { - "epoch": 2.999718185112122, - "grad_norm": 0.0028081524651497602, - "learning_rate": 0.0001999955645355459, - "loss": 46.0, - "step": 18627 - }, - { - "epoch": 2.999879222190909, - "grad_norm": 0.005611950531601906, - "learning_rate": 0.00019999556405904048, - "loss": 46.0, - "step": 18628 - }, - { - "epoch": 2.999879222190909, - "eval_loss": 11.5, - "eval_runtime": 14.5744, - "eval_samples_per_second": 179.424, - "eval_steps_per_second": 89.746, - "step": 18628 - }, - { - "epoch": 3.0000805185393937, - "grad_norm": 0.001445271191187203, - "learning_rate": 0.00019999556358250945, - "loss": 46.0, - "step": 18629 - }, - { - "epoch": 3.000241555618181, - "grad_norm": 0.015089699998497963, - "learning_rate": 0.00019999556310595283, - "loss": 46.0, - "step": 18630 - }, - { - "epoch": 3.0004025926969686, - "grad_norm": 0.008645368739962578, - "learning_rate": 0.00019999556262937062, - "loss": 46.0, - "step": 18631 - }, - { - "epoch": 3.000563629775756, - "grad_norm": 0.009409420192241669, - "learning_rate": 0.0001999955621527628, - "loss": 46.0, - "step": 18632 - }, - { - "epoch": 3.000724666854543, - "grad_norm": 0.0022755623795092106, - "learning_rate": 0.0001999955616761294, - "loss": 46.0, - "step": 18633 - }, - { - "epoch": 3.0008857039333305, - "grad_norm": 0.003756490768864751, - "learning_rate": 0.00019999556119947042, - "loss": 46.0, - "step": 18634 - }, - { - "epoch": 3.001046741012118, - "grad_norm": 0.00836783368140459, - "learning_rate": 0.00019999556072278581, - "loss": 46.0, - "step": 18635 - }, - { - "epoch": 3.0012077780909054, - "grad_norm": 0.008819560520350933, - "learning_rate": 0.00019999556024607565, - "loss": 46.0, - "step": 18636 - }, - { - "epoch": 3.001368815169693, - "grad_norm": 0.0006767732556909323, - "learning_rate": 0.0001999955597693399, - "loss": 46.0, - "step": 18637 - }, - { - "epoch": 3.0015298522484803, - "grad_norm": 0.008921225555241108, - "learning_rate": 0.00019999555929257852, - "loss": 46.0, - "step": 18638 - }, - { - "epoch": 3.001690889327268, - "grad_norm": 0.003402446396648884, - "learning_rate": 0.00019999555881579159, - "loss": 46.0, - "step": 18639 - }, - { - "epoch": 3.001851926406055, - "grad_norm": 0.0027959682047367096, - "learning_rate": 0.000199995558338979, - "loss": 46.0, - "step": 18640 - }, - { - "epoch": 3.0020129634848423, - "grad_norm": 0.0032038178760558367, - "learning_rate": 0.00019999555786214085, - "loss": 46.0, - "step": 18641 - }, - { - "epoch": 3.0021740005636297, - "grad_norm": 0.001509590889327228, - "learning_rate": 0.0001999955573852771, - "loss": 46.0, - "step": 18642 - }, - { - "epoch": 3.002335037642417, - "grad_norm": 0.0013796513667330146, - "learning_rate": 0.00019999555690838777, - "loss": 46.0, - "step": 18643 - }, - { - "epoch": 3.0024960747212046, - "grad_norm": 0.0012693741591647267, - "learning_rate": 0.00019999555643147285, - "loss": 46.0, - "step": 18644 - }, - { - "epoch": 3.002657111799992, - "grad_norm": 0.0009839271660894156, - "learning_rate": 0.00019999555595453234, - "loss": 46.0, - "step": 18645 - }, - { - "epoch": 3.0028181488787795, - "grad_norm": 0.00999511405825615, - "learning_rate": 0.00019999555547756622, - "loss": 46.0, - "step": 18646 - }, - { - "epoch": 3.0029791859575665, - "grad_norm": 0.00944233126938343, - "learning_rate": 0.0001999955550005745, - "loss": 46.0, - "step": 18647 - }, - { - "epoch": 3.003140223036354, - "grad_norm": 0.002734616631641984, - "learning_rate": 0.00019999555452355718, - "loss": 46.0, - "step": 18648 - }, - { - "epoch": 3.0033012601151414, - "grad_norm": 0.024181615561246872, - "learning_rate": 0.0001999955540465143, - "loss": 46.0, - "step": 18649 - }, - { - "epoch": 3.003462297193929, - "grad_norm": 0.0066743940114974976, - "learning_rate": 0.0001999955535694458, - "loss": 46.0, - "step": 18650 - }, - { - "epoch": 3.0036233342727163, - "grad_norm": 0.0005242424085736275, - "learning_rate": 0.00019999555309235172, - "loss": 46.0, - "step": 18651 - }, - { - "epoch": 3.003784371351504, - "grad_norm": 0.007995721884071827, - "learning_rate": 0.00019999555261523204, - "loss": 46.0, - "step": 18652 - }, - { - "epoch": 3.0039454084302912, - "grad_norm": 0.0018966497154906392, - "learning_rate": 0.00019999555213808678, - "loss": 46.0, - "step": 18653 - }, - { - "epoch": 3.0041064455090787, - "grad_norm": 0.0017775587039068341, - "learning_rate": 0.0001999955516609159, - "loss": 46.0, - "step": 18654 - }, - { - "epoch": 3.0042674825878657, - "grad_norm": 0.005550839938223362, - "learning_rate": 0.00019999555118371945, - "loss": 46.0, - "step": 18655 - }, - { - "epoch": 3.004428519666653, - "grad_norm": 0.0010232527274638414, - "learning_rate": 0.0001999955507064974, - "loss": 46.0, - "step": 18656 - }, - { - "epoch": 3.0045895567454406, - "grad_norm": 0.0020085815340280533, - "learning_rate": 0.00019999555022924973, - "loss": 46.0, - "step": 18657 - }, - { - "epoch": 3.004750593824228, - "grad_norm": 0.003025443758815527, - "learning_rate": 0.00019999554975197648, - "loss": 46.0, - "step": 18658 - }, - { - "epoch": 3.0049116309030155, - "grad_norm": 0.004842628259211779, - "learning_rate": 0.00019999554927467767, - "loss": 46.0, - "step": 18659 - }, - { - "epoch": 3.005072667981803, - "grad_norm": 0.005136552266776562, - "learning_rate": 0.00019999554879735325, - "loss": 46.0, - "step": 18660 - }, - { - "epoch": 3.0052337050605904, - "grad_norm": 0.002330041490495205, - "learning_rate": 0.0001999955483200032, - "loss": 46.0, - "step": 18661 - }, - { - "epoch": 3.0053947421393774, - "grad_norm": 0.0011494910577312112, - "learning_rate": 0.00019999554784262758, - "loss": 46.0, - "step": 18662 - }, - { - "epoch": 3.005555779218165, - "grad_norm": 0.0015137909213081002, - "learning_rate": 0.0001999955473652264, - "loss": 46.0, - "step": 18663 - }, - { - "epoch": 3.0057168162969523, - "grad_norm": 0.0007970886072143912, - "learning_rate": 0.0001999955468877996, - "loss": 46.0, - "step": 18664 - }, - { - "epoch": 3.0058778533757398, - "grad_norm": 0.006009059492498636, - "learning_rate": 0.0001999955464103472, - "loss": 46.0, - "step": 18665 - }, - { - "epoch": 3.006038890454527, - "grad_norm": 0.0021226785611361265, - "learning_rate": 0.0001999955459328692, - "loss": 46.0, - "step": 18666 - }, - { - "epoch": 3.0061999275333147, - "grad_norm": 0.0033282616641372442, - "learning_rate": 0.00019999554545536562, - "loss": 46.0, - "step": 18667 - }, - { - "epoch": 3.006360964612102, - "grad_norm": 0.00737522728741169, - "learning_rate": 0.00019999554497783644, - "loss": 46.0, - "step": 18668 - }, - { - "epoch": 3.006522001690889, - "grad_norm": 0.0076141394674777985, - "learning_rate": 0.00019999554450028168, - "loss": 46.0, - "step": 18669 - }, - { - "epoch": 3.0066830387696766, - "grad_norm": 0.003813562449067831, - "learning_rate": 0.0001999955440227013, - "loss": 46.0, - "step": 18670 - }, - { - "epoch": 3.006844075848464, - "grad_norm": 0.006963587366044521, - "learning_rate": 0.00019999554354509537, - "loss": 46.0, - "step": 18671 - }, - { - "epoch": 3.0070051129272515, - "grad_norm": 0.010456070303916931, - "learning_rate": 0.00019999554306746379, - "loss": 46.0, - "step": 18672 - }, - { - "epoch": 3.007166150006039, - "grad_norm": 0.0037042913027107716, - "learning_rate": 0.00019999554258980667, - "loss": 46.0, - "step": 18673 - }, - { - "epoch": 3.0073271870848264, - "grad_norm": 0.0019664722494781017, - "learning_rate": 0.00019999554211212392, - "loss": 46.0, - "step": 18674 - }, - { - "epoch": 3.007488224163614, - "grad_norm": 0.0022088796831667423, - "learning_rate": 0.00019999554163441555, - "loss": 46.0, - "step": 18675 - }, - { - "epoch": 3.0076492612424013, - "grad_norm": 0.00705556757748127, - "learning_rate": 0.00019999554115668165, - "loss": 46.0, - "step": 18676 - }, - { - "epoch": 3.0078102983211883, - "grad_norm": 0.001226063002832234, - "learning_rate": 0.0001999955406789221, - "loss": 46.0, - "step": 18677 - }, - { - "epoch": 3.0079713353999757, - "grad_norm": 0.0009407844627276063, - "learning_rate": 0.000199995540201137, - "loss": 46.0, - "step": 18678 - }, - { - "epoch": 3.008132372478763, - "grad_norm": 0.0007975497283041477, - "learning_rate": 0.0001999955397233263, - "loss": 46.0, - "step": 18679 - }, - { - "epoch": 3.0082934095575506, - "grad_norm": 0.004013837780803442, - "learning_rate": 0.00019999553924549, - "loss": 46.0, - "step": 18680 - }, - { - "epoch": 3.008454446636338, - "grad_norm": 0.0006178237963467836, - "learning_rate": 0.0001999955387676281, - "loss": 46.0, - "step": 18681 - }, - { - "epoch": 3.0086154837151255, - "grad_norm": 0.0010368465445935726, - "learning_rate": 0.00019999553828974062, - "loss": 46.0, - "step": 18682 - }, - { - "epoch": 3.008776520793913, - "grad_norm": 0.00988208968192339, - "learning_rate": 0.00019999553781182753, - "loss": 46.0, - "step": 18683 - }, - { - "epoch": 3.0089375578727, - "grad_norm": 0.005435602739453316, - "learning_rate": 0.00019999553733388885, - "loss": 46.0, - "step": 18684 - }, - { - "epoch": 3.0090985949514875, - "grad_norm": 0.005490770097821951, - "learning_rate": 0.00019999553685592458, - "loss": 46.0, - "step": 18685 - }, - { - "epoch": 3.009259632030275, - "grad_norm": 0.009492075070738792, - "learning_rate": 0.0001999955363779347, - "loss": 46.0, - "step": 18686 - }, - { - "epoch": 3.0094206691090624, - "grad_norm": 0.0025694286450743675, - "learning_rate": 0.00019999553589991926, - "loss": 46.0, - "step": 18687 - }, - { - "epoch": 3.00958170618785, - "grad_norm": 0.001257500029169023, - "learning_rate": 0.00019999553542187818, - "loss": 46.0, - "step": 18688 - }, - { - "epoch": 3.0097427432666373, - "grad_norm": 0.020698247477412224, - "learning_rate": 0.00019999553494381156, - "loss": 46.0, - "step": 18689 - }, - { - "epoch": 3.0099037803454247, - "grad_norm": 0.002571235643699765, - "learning_rate": 0.0001999955344657193, - "loss": 46.0, - "step": 18690 - }, - { - "epoch": 3.0100648174242117, - "grad_norm": 0.001914593973197043, - "learning_rate": 0.0001999955339876015, - "loss": 46.0, - "step": 18691 - }, - { - "epoch": 3.010225854502999, - "grad_norm": 0.007552071940153837, - "learning_rate": 0.00019999553350945806, - "loss": 46.0, - "step": 18692 - }, - { - "epoch": 3.0103868915817866, - "grad_norm": 0.002395815448835492, - "learning_rate": 0.00019999553303128904, - "loss": 46.0, - "step": 18693 - }, - { - "epoch": 3.010547928660574, - "grad_norm": 0.002316900063306093, - "learning_rate": 0.0001999955325530944, - "loss": 46.0, - "step": 18694 - }, - { - "epoch": 3.0107089657393615, - "grad_norm": 0.000937719305511564, - "learning_rate": 0.00019999553207487422, - "loss": 46.0, - "step": 18695 - }, - { - "epoch": 3.010870002818149, - "grad_norm": 0.003263594349846244, - "learning_rate": 0.0001999955315966284, - "loss": 46.0, - "step": 18696 - }, - { - "epoch": 3.0110310398969364, - "grad_norm": 0.0007541762897744775, - "learning_rate": 0.00019999553111835702, - "loss": 46.0, - "step": 18697 - }, - { - "epoch": 3.0111920769757234, - "grad_norm": 0.002288545947521925, - "learning_rate": 0.00019999553064006, - "loss": 46.0, - "step": 18698 - }, - { - "epoch": 3.011353114054511, - "grad_norm": 0.00357170682400465, - "learning_rate": 0.0001999955301617374, - "loss": 46.0, - "step": 18699 - }, - { - "epoch": 3.0115141511332983, - "grad_norm": 0.0012144313659518957, - "learning_rate": 0.00019999552968338926, - "loss": 46.0, - "step": 18700 - }, - { - "epoch": 3.011675188212086, - "grad_norm": 0.001376515137962997, - "learning_rate": 0.0001999955292050155, - "loss": 46.0, - "step": 18701 - }, - { - "epoch": 3.0118362252908732, - "grad_norm": 0.001987120136618614, - "learning_rate": 0.0001999955287266161, - "loss": 46.0, - "step": 18702 - }, - { - "epoch": 3.0119972623696607, - "grad_norm": 0.0013515574391931295, - "learning_rate": 0.00019999552824819116, - "loss": 46.0, - "step": 18703 - }, - { - "epoch": 3.012158299448448, - "grad_norm": 0.005974846426397562, - "learning_rate": 0.0001999955277697406, - "loss": 46.0, - "step": 18704 - }, - { - "epoch": 3.0123193365272356, - "grad_norm": 0.0016517681069672108, - "learning_rate": 0.00019999552729126446, - "loss": 46.0, - "step": 18705 - }, - { - "epoch": 3.0124803736060226, - "grad_norm": 0.0008199273725040257, - "learning_rate": 0.00019999552681276272, - "loss": 46.0, - "step": 18706 - }, - { - "epoch": 3.01264141068481, - "grad_norm": 0.0004188912862446159, - "learning_rate": 0.0001999955263342354, - "loss": 46.0, - "step": 18707 - }, - { - "epoch": 3.0128024477635975, - "grad_norm": 0.0023702443577349186, - "learning_rate": 0.00019999552585568247, - "loss": 46.0, - "step": 18708 - }, - { - "epoch": 3.012963484842385, - "grad_norm": 0.004834124818444252, - "learning_rate": 0.00019999552537710392, - "loss": 46.0, - "step": 18709 - }, - { - "epoch": 3.0131245219211724, - "grad_norm": 0.0008324910886585712, - "learning_rate": 0.00019999552489849984, - "loss": 46.0, - "step": 18710 - }, - { - "epoch": 3.01328555899996, - "grad_norm": 0.0036300348583608866, - "learning_rate": 0.00019999552441987012, - "loss": 46.0, - "step": 18711 - }, - { - "epoch": 3.0134465960787473, - "grad_norm": 0.0016632035840302706, - "learning_rate": 0.0001999955239412148, - "loss": 46.0, - "step": 18712 - }, - { - "epoch": 3.0136076331575343, - "grad_norm": 0.0021418659016489983, - "learning_rate": 0.0001999955234625339, - "loss": 46.0, - "step": 18713 - }, - { - "epoch": 3.013768670236322, - "grad_norm": 0.0009746663854457438, - "learning_rate": 0.00019999552298382743, - "loss": 46.0, - "step": 18714 - }, - { - "epoch": 3.0139297073151092, - "grad_norm": 0.001135788974352181, - "learning_rate": 0.00019999552250509536, - "loss": 46.0, - "step": 18715 - }, - { - "epoch": 3.0140907443938967, - "grad_norm": 0.0006664918037131429, - "learning_rate": 0.0001999955220263377, - "loss": 46.0, - "step": 18716 - }, - { - "epoch": 3.014251781472684, - "grad_norm": 0.00045535352546721697, - "learning_rate": 0.0001999955215475544, - "loss": 46.0, - "step": 18717 - }, - { - "epoch": 3.0144128185514716, - "grad_norm": 0.005772685632109642, - "learning_rate": 0.00019999552106874556, - "loss": 46.0, - "step": 18718 - }, - { - "epoch": 3.014573855630259, - "grad_norm": 0.0056907483376562595, - "learning_rate": 0.00019999552058991106, - "loss": 46.0, - "step": 18719 - }, - { - "epoch": 3.014734892709046, - "grad_norm": 0.0018697796622291207, - "learning_rate": 0.00019999552011105103, - "loss": 46.0, - "step": 18720 - }, - { - "epoch": 3.0148959297878335, - "grad_norm": 0.0012618767796084285, - "learning_rate": 0.00019999551963216538, - "loss": 46.0, - "step": 18721 - }, - { - "epoch": 3.015056966866621, - "grad_norm": 0.004601877648383379, - "learning_rate": 0.00019999551915325414, - "loss": 46.0, - "step": 18722 - }, - { - "epoch": 3.0152180039454084, - "grad_norm": 0.0019418438896536827, - "learning_rate": 0.0001999955186743173, - "loss": 46.0, - "step": 18723 - }, - { - "epoch": 3.015379041024196, - "grad_norm": 0.002044722205027938, - "learning_rate": 0.00019999551819535486, - "loss": 46.0, - "step": 18724 - }, - { - "epoch": 3.0155400781029833, - "grad_norm": 0.0032433385495096445, - "learning_rate": 0.00019999551771636686, - "loss": 46.0, - "step": 18725 - }, - { - "epoch": 3.0157011151817708, - "grad_norm": 0.004019223619252443, - "learning_rate": 0.00019999551723735325, - "loss": 46.0, - "step": 18726 - }, - { - "epoch": 3.015862152260558, - "grad_norm": 0.0028463841881603003, - "learning_rate": 0.00019999551675831402, - "loss": 46.0, - "step": 18727 - }, - { - "epoch": 3.016023189339345, - "grad_norm": 0.0025055555161088705, - "learning_rate": 0.00019999551627924924, - "loss": 46.0, - "step": 18728 - }, - { - "epoch": 3.0161842264181327, - "grad_norm": 0.0006456353585235775, - "learning_rate": 0.00019999551580015884, - "loss": 46.0, - "step": 18729 - }, - { - "epoch": 3.01634526349692, - "grad_norm": 0.0033867210149765015, - "learning_rate": 0.00019999551532104285, - "loss": 46.0, - "step": 18730 - }, - { - "epoch": 3.0165063005757076, - "grad_norm": 0.0036401154939085245, - "learning_rate": 0.00019999551484190127, - "loss": 46.0, - "step": 18731 - }, - { - "epoch": 3.016667337654495, - "grad_norm": 0.006417823024094105, - "learning_rate": 0.00019999551436273409, - "loss": 46.0, - "step": 18732 - }, - { - "epoch": 3.0168283747332825, - "grad_norm": 0.002617353107780218, - "learning_rate": 0.00019999551388354134, - "loss": 46.0, - "step": 18733 - }, - { - "epoch": 3.01698941181207, - "grad_norm": 0.002480825176462531, - "learning_rate": 0.00019999551340432297, - "loss": 46.0, - "step": 18734 - }, - { - "epoch": 3.017150448890857, - "grad_norm": 0.0038271714001893997, - "learning_rate": 0.000199995512925079, - "loss": 46.0, - "step": 18735 - }, - { - "epoch": 3.0173114859696444, - "grad_norm": 0.019938984885811806, - "learning_rate": 0.00019999551244580946, - "loss": 46.0, - "step": 18736 - }, - { - "epoch": 3.017472523048432, - "grad_norm": 0.00265054265037179, - "learning_rate": 0.0001999955119665143, - "loss": 46.0, - "step": 18737 - }, - { - "epoch": 3.0176335601272193, - "grad_norm": 0.007168011274188757, - "learning_rate": 0.00019999551148719357, - "loss": 46.0, - "step": 18738 - }, - { - "epoch": 3.0177945972060067, - "grad_norm": 0.0014649019576609135, - "learning_rate": 0.00019999551100784724, - "loss": 46.0, - "step": 18739 - }, - { - "epoch": 3.017955634284794, - "grad_norm": 0.003237906377762556, - "learning_rate": 0.0001999955105284753, - "loss": 46.0, - "step": 18740 - }, - { - "epoch": 3.0181166713635816, - "grad_norm": 0.0016598334768787026, - "learning_rate": 0.0001999955100490778, - "loss": 46.0, - "step": 18741 - }, - { - "epoch": 3.0182777084423686, - "grad_norm": 0.0014350726269185543, - "learning_rate": 0.00019999550956965466, - "loss": 46.0, - "step": 18742 - }, - { - "epoch": 3.018438745521156, - "grad_norm": 0.0027885762974619865, - "learning_rate": 0.00019999550909020596, - "loss": 46.0, - "step": 18743 - }, - { - "epoch": 3.0185997825999435, - "grad_norm": 0.005374143365770578, - "learning_rate": 0.00019999550861073167, - "loss": 46.0, - "step": 18744 - }, - { - "epoch": 3.018760819678731, - "grad_norm": 0.004725174512714148, - "learning_rate": 0.0001999955081312318, - "loss": 46.0, - "step": 18745 - }, - { - "epoch": 3.0189218567575185, - "grad_norm": 0.004258784465491772, - "learning_rate": 0.0001999955076517063, - "loss": 46.0, - "step": 18746 - }, - { - "epoch": 3.019082893836306, - "grad_norm": 0.003756534308195114, - "learning_rate": 0.0001999955071721552, - "loss": 46.0, - "step": 18747 - }, - { - "epoch": 3.0192439309150934, - "grad_norm": 0.0032782878261059523, - "learning_rate": 0.00019999550669257853, - "loss": 46.0, - "step": 18748 - }, - { - "epoch": 3.019404967993881, - "grad_norm": 0.000980779412202537, - "learning_rate": 0.00019999550621297628, - "loss": 46.0, - "step": 18749 - }, - { - "epoch": 3.019566005072668, - "grad_norm": 0.003476217156276107, - "learning_rate": 0.00019999550573334839, - "loss": 46.0, - "step": 18750 - }, - { - "epoch": 3.0197270421514553, - "grad_norm": 0.005282679107040167, - "learning_rate": 0.00019999550525369496, - "loss": 46.0, - "step": 18751 - }, - { - "epoch": 3.0198880792302427, - "grad_norm": 0.007214238867163658, - "learning_rate": 0.0001999955047740159, - "loss": 46.0, - "step": 18752 - }, - { - "epoch": 3.02004911630903, - "grad_norm": 0.0019638959784060717, - "learning_rate": 0.00019999550429431124, - "loss": 46.0, - "step": 18753 - }, - { - "epoch": 3.0202101533878176, - "grad_norm": 0.005872043315321207, - "learning_rate": 0.00019999550381458102, - "loss": 46.0, - "step": 18754 - }, - { - "epoch": 3.020371190466605, - "grad_norm": 0.007495333906263113, - "learning_rate": 0.00019999550333482517, - "loss": 46.0, - "step": 18755 - }, - { - "epoch": 3.0205322275453925, - "grad_norm": 0.006799828726798296, - "learning_rate": 0.00019999550285504375, - "loss": 46.0, - "step": 18756 - }, - { - "epoch": 3.0206932646241795, - "grad_norm": 0.00994163192808628, - "learning_rate": 0.00019999550237523672, - "loss": 46.0, - "step": 18757 - }, - { - "epoch": 3.020854301702967, - "grad_norm": 0.0038626380264759064, - "learning_rate": 0.0001999955018954041, - "loss": 46.0, - "step": 18758 - }, - { - "epoch": 3.0210153387817544, - "grad_norm": 0.004801772069185972, - "learning_rate": 0.0001999955014155459, - "loss": 46.0, - "step": 18759 - }, - { - "epoch": 3.021176375860542, - "grad_norm": 0.001676554442383349, - "learning_rate": 0.0001999955009356621, - "loss": 46.0, - "step": 18760 - }, - { - "epoch": 3.0213374129393293, - "grad_norm": 0.003552692010998726, - "learning_rate": 0.0001999955004557527, - "loss": 46.0, - "step": 18761 - }, - { - "epoch": 3.021498450018117, - "grad_norm": 0.0015632720896974206, - "learning_rate": 0.00019999549997581773, - "loss": 46.0, - "step": 18762 - }, - { - "epoch": 3.0216594870969042, - "grad_norm": 0.007069546729326248, - "learning_rate": 0.00019999549949585715, - "loss": 46.0, - "step": 18763 - }, - { - "epoch": 3.0218205241756912, - "grad_norm": 0.005410053767263889, - "learning_rate": 0.00019999549901587095, - "loss": 46.0, - "step": 18764 - }, - { - "epoch": 3.0219815612544787, - "grad_norm": 0.0020497876685112715, - "learning_rate": 0.0001999954985358592, - "loss": 46.0, - "step": 18765 - }, - { - "epoch": 3.022142598333266, - "grad_norm": 0.0029364845249801874, - "learning_rate": 0.00019999549805582183, - "loss": 46.0, - "step": 18766 - }, - { - "epoch": 3.0223036354120536, - "grad_norm": 0.006781401578336954, - "learning_rate": 0.00019999549757575887, - "loss": 46.0, - "step": 18767 - }, - { - "epoch": 3.022464672490841, - "grad_norm": 0.004405970685184002, - "learning_rate": 0.00019999549709567033, - "loss": 46.0, - "step": 18768 - }, - { - "epoch": 3.0226257095696285, - "grad_norm": 0.003249007510021329, - "learning_rate": 0.00019999549661555617, - "loss": 46.0, - "step": 18769 - }, - { - "epoch": 3.022786746648416, - "grad_norm": 0.0037679567467421293, - "learning_rate": 0.00019999549613541643, - "loss": 46.0, - "step": 18770 - }, - { - "epoch": 3.0229477837272034, - "grad_norm": 0.0030146290082484484, - "learning_rate": 0.00019999549565525112, - "loss": 46.0, - "step": 18771 - }, - { - "epoch": 3.0231088208059904, - "grad_norm": 0.003766910172998905, - "learning_rate": 0.00019999549517506017, - "loss": 46.0, - "step": 18772 - }, - { - "epoch": 3.023269857884778, - "grad_norm": 0.004299858585000038, - "learning_rate": 0.00019999549469484367, - "loss": 46.0, - "step": 18773 - }, - { - "epoch": 3.0234308949635653, - "grad_norm": 0.010553083382546902, - "learning_rate": 0.00019999549421460155, - "loss": 46.0, - "step": 18774 - }, - { - "epoch": 3.0235919320423528, - "grad_norm": 0.002172702457755804, - "learning_rate": 0.00019999549373433384, - "loss": 46.0, - "step": 18775 - }, - { - "epoch": 3.02375296912114, - "grad_norm": 0.01052631251513958, - "learning_rate": 0.00019999549325404054, - "loss": 46.0, - "step": 18776 - }, - { - "epoch": 3.0239140061999277, - "grad_norm": 0.0020577621180564165, - "learning_rate": 0.00019999549277372166, - "loss": 46.0, - "step": 18777 - }, - { - "epoch": 3.024075043278715, - "grad_norm": 0.009155402891337872, - "learning_rate": 0.00019999549229337716, - "loss": 46.0, - "step": 18778 - }, - { - "epoch": 3.024236080357502, - "grad_norm": 0.006087441463023424, - "learning_rate": 0.0001999954918130071, - "loss": 46.0, - "step": 18779 - }, - { - "epoch": 3.0243971174362896, - "grad_norm": 0.003831608220934868, - "learning_rate": 0.0001999954913326114, - "loss": 46.0, - "step": 18780 - }, - { - "epoch": 3.024558154515077, - "grad_norm": 0.0005081454291939735, - "learning_rate": 0.00019999549085219015, - "loss": 46.0, - "step": 18781 - }, - { - "epoch": 3.0247191915938645, - "grad_norm": 0.012818376533687115, - "learning_rate": 0.00019999549037174328, - "loss": 46.0, - "step": 18782 - }, - { - "epoch": 3.024880228672652, - "grad_norm": 0.007063471246510744, - "learning_rate": 0.00019999548989127082, - "loss": 46.0, - "step": 18783 - }, - { - "epoch": 3.0250412657514394, - "grad_norm": 0.0033594570122659206, - "learning_rate": 0.00019999548941077277, - "loss": 46.0, - "step": 18784 - }, - { - "epoch": 3.025202302830227, - "grad_norm": 0.007352369837462902, - "learning_rate": 0.00019999548893024914, - "loss": 46.0, - "step": 18785 - }, - { - "epoch": 3.025363339909014, - "grad_norm": 0.0030238190665841103, - "learning_rate": 0.00019999548844969992, - "loss": 46.0, - "step": 18786 - }, - { - "epoch": 3.0255243769878013, - "grad_norm": 0.002009775722399354, - "learning_rate": 0.00019999548796912505, - "loss": 46.0, - "step": 18787 - }, - { - "epoch": 3.0256854140665888, - "grad_norm": 0.002662340411916375, - "learning_rate": 0.00019999548748852463, - "loss": 46.0, - "step": 18788 - }, - { - "epoch": 3.025846451145376, - "grad_norm": 0.0009317048243246973, - "learning_rate": 0.00019999548700789862, - "loss": 46.0, - "step": 18789 - }, - { - "epoch": 3.0260074882241637, - "grad_norm": 0.0025243947748094797, - "learning_rate": 0.000199995486527247, - "loss": 46.0, - "step": 18790 - }, - { - "epoch": 3.026168525302951, - "grad_norm": 0.005571222864091396, - "learning_rate": 0.0001999954860465698, - "loss": 46.0, - "step": 18791 - }, - { - "epoch": 3.0263295623817386, - "grad_norm": 0.0019863948691636324, - "learning_rate": 0.000199995485565867, - "loss": 46.0, - "step": 18792 - }, - { - "epoch": 3.0264905994605256, - "grad_norm": 0.001520515768788755, - "learning_rate": 0.0001999954850851386, - "loss": 46.0, - "step": 18793 - }, - { - "epoch": 3.026651636539313, - "grad_norm": 0.0017588242189958692, - "learning_rate": 0.00019999548460438463, - "loss": 46.0, - "step": 18794 - }, - { - "epoch": 3.0268126736181005, - "grad_norm": 0.0005717701860703528, - "learning_rate": 0.00019999548412360504, - "loss": 46.0, - "step": 18795 - }, - { - "epoch": 3.026973710696888, - "grad_norm": 0.0031505892984569073, - "learning_rate": 0.00019999548364279987, - "loss": 46.0, - "step": 18796 - }, - { - "epoch": 3.0271347477756754, - "grad_norm": 0.002870397176593542, - "learning_rate": 0.0001999954831619691, - "loss": 46.0, - "step": 18797 - }, - { - "epoch": 3.027295784854463, - "grad_norm": 0.0012424769811332226, - "learning_rate": 0.00019999548268111276, - "loss": 46.0, - "step": 18798 - }, - { - "epoch": 3.0274568219332503, - "grad_norm": 0.0012431961949914694, - "learning_rate": 0.0001999954822002308, - "loss": 46.0, - "step": 18799 - }, - { - "epoch": 3.0276178590120377, - "grad_norm": 0.014016887173056602, - "learning_rate": 0.00019999548171932324, - "loss": 46.0, - "step": 18800 - }, - { - "epoch": 3.0277788960908247, - "grad_norm": 0.0014831143198534846, - "learning_rate": 0.0001999954812383901, - "loss": 46.0, - "step": 18801 - }, - { - "epoch": 3.027939933169612, - "grad_norm": 0.004771762993186712, - "learning_rate": 0.00019999548075743135, - "loss": 46.0, - "step": 18802 - }, - { - "epoch": 3.0281009702483996, - "grad_norm": 0.0055425758473575115, - "learning_rate": 0.00019999548027644704, - "loss": 46.0, - "step": 18803 - }, - { - "epoch": 3.028262007327187, - "grad_norm": 0.003356646979227662, - "learning_rate": 0.0001999954797954371, - "loss": 46.0, - "step": 18804 - }, - { - "epoch": 3.0284230444059745, - "grad_norm": 0.001957725500687957, - "learning_rate": 0.00019999547931440158, - "loss": 46.0, - "step": 18805 - }, - { - "epoch": 3.028584081484762, - "grad_norm": 0.0031272321939468384, - "learning_rate": 0.00019999547883334048, - "loss": 46.0, - "step": 18806 - }, - { - "epoch": 3.0287451185635494, - "grad_norm": 0.00486453901976347, - "learning_rate": 0.00019999547835225376, - "loss": 46.0, - "step": 18807 - }, - { - "epoch": 3.0289061556423365, - "grad_norm": 0.0048525077290833, - "learning_rate": 0.00019999547787114146, - "loss": 46.0, - "step": 18808 - }, - { - "epoch": 3.029067192721124, - "grad_norm": 0.01169600710272789, - "learning_rate": 0.00019999547739000357, - "loss": 46.0, - "step": 18809 - }, - { - "epoch": 3.0292282297999114, - "grad_norm": 0.002238035900518298, - "learning_rate": 0.0001999954769088401, - "loss": 46.0, - "step": 18810 - }, - { - "epoch": 3.029389266878699, - "grad_norm": 0.0010283234296366572, - "learning_rate": 0.000199995476427651, - "loss": 46.0, - "step": 18811 - }, - { - "epoch": 3.0295503039574863, - "grad_norm": 0.00341443857178092, - "learning_rate": 0.00019999547594643633, - "loss": 46.0, - "step": 18812 - }, - { - "epoch": 3.0297113410362737, - "grad_norm": 0.00789582822471857, - "learning_rate": 0.00019999547546519606, - "loss": 46.0, - "step": 18813 - }, - { - "epoch": 3.029872378115061, - "grad_norm": 0.004375720862299204, - "learning_rate": 0.0001999954749839302, - "loss": 46.0, - "step": 18814 - }, - { - "epoch": 3.030033415193848, - "grad_norm": 0.004484332632273436, - "learning_rate": 0.00019999547450263874, - "loss": 46.0, - "step": 18815 - }, - { - "epoch": 3.0301944522726356, - "grad_norm": 0.004468945786356926, - "learning_rate": 0.0001999954740213217, - "loss": 46.0, - "step": 18816 - }, - { - "epoch": 3.030355489351423, - "grad_norm": 0.0017636411357671022, - "learning_rate": 0.00019999547353997905, - "loss": 46.0, - "step": 18817 - }, - { - "epoch": 3.0305165264302105, - "grad_norm": 0.0004101128433831036, - "learning_rate": 0.00019999547305861082, - "loss": 46.0, - "step": 18818 - }, - { - "epoch": 3.030677563508998, - "grad_norm": 0.002128963591530919, - "learning_rate": 0.00019999547257721698, - "loss": 46.0, - "step": 18819 - }, - { - "epoch": 3.0308386005877854, - "grad_norm": 0.0025925440713763237, - "learning_rate": 0.00019999547209579755, - "loss": 46.0, - "step": 18820 - }, - { - "epoch": 3.030999637666573, - "grad_norm": 0.002610244555398822, - "learning_rate": 0.00019999547161435254, - "loss": 46.0, - "step": 18821 - }, - { - "epoch": 3.0311606747453603, - "grad_norm": 0.0006201292271725833, - "learning_rate": 0.00019999547113288193, - "loss": 46.0, - "step": 18822 - }, - { - "epoch": 3.0313217118241473, - "grad_norm": 0.00097772351000458, - "learning_rate": 0.00019999547065138574, - "loss": 46.0, - "step": 18823 - }, - { - "epoch": 3.031482748902935, - "grad_norm": 0.0012892846716567874, - "learning_rate": 0.00019999547016986394, - "loss": 46.0, - "step": 18824 - }, - { - "epoch": 3.0316437859817222, - "grad_norm": 0.003242732025682926, - "learning_rate": 0.00019999546968831652, - "loss": 46.0, - "step": 18825 - }, - { - "epoch": 3.0318048230605097, - "grad_norm": 0.0049265166744589806, - "learning_rate": 0.00019999546920674357, - "loss": 46.0, - "step": 18826 - }, - { - "epoch": 3.031965860139297, - "grad_norm": 0.005905521102249622, - "learning_rate": 0.00019999546872514497, - "loss": 46.0, - "step": 18827 - }, - { - "epoch": 3.0321268972180846, - "grad_norm": 0.003319772658869624, - "learning_rate": 0.0001999954682435208, - "loss": 46.0, - "step": 18828 - }, - { - "epoch": 3.032287934296872, - "grad_norm": 0.0021305489353835583, - "learning_rate": 0.00019999546776187102, - "loss": 46.0, - "step": 18829 - }, - { - "epoch": 3.032448971375659, - "grad_norm": 0.0016076359897851944, - "learning_rate": 0.00019999546728019567, - "loss": 46.0, - "step": 18830 - }, - { - "epoch": 3.0326100084544465, - "grad_norm": 0.004042502958327532, - "learning_rate": 0.00019999546679849473, - "loss": 46.0, - "step": 18831 - }, - { - "epoch": 3.032771045533234, - "grad_norm": 0.004754027351737022, - "learning_rate": 0.00019999546631676817, - "loss": 46.0, - "step": 18832 - }, - { - "epoch": 3.0329320826120214, - "grad_norm": 0.003226912347599864, - "learning_rate": 0.00019999546583501603, - "loss": 46.0, - "step": 18833 - }, - { - "epoch": 3.033093119690809, - "grad_norm": 0.027438033372163773, - "learning_rate": 0.0001999954653532383, - "loss": 46.0, - "step": 18834 - }, - { - "epoch": 3.0332541567695963, - "grad_norm": 0.0032605247106403112, - "learning_rate": 0.00019999546487143498, - "loss": 46.0, - "step": 18835 - }, - { - "epoch": 3.0334151938483838, - "grad_norm": 0.004450343083590269, - "learning_rate": 0.00019999546438960605, - "loss": 46.0, - "step": 18836 - }, - { - "epoch": 3.0335762309271708, - "grad_norm": 0.0030439465772360563, - "learning_rate": 0.00019999546390775153, - "loss": 46.0, - "step": 18837 - }, - { - "epoch": 3.033737268005958, - "grad_norm": 0.0020435668993741274, - "learning_rate": 0.00019999546342587142, - "loss": 46.0, - "step": 18838 - }, - { - "epoch": 3.0338983050847457, - "grad_norm": 0.0011062038829550147, - "learning_rate": 0.00019999546294396573, - "loss": 46.0, - "step": 18839 - }, - { - "epoch": 3.034059342163533, - "grad_norm": 0.006097679957747459, - "learning_rate": 0.00019999546246203442, - "loss": 46.0, - "step": 18840 - }, - { - "epoch": 3.0342203792423206, - "grad_norm": 0.005517052952200174, - "learning_rate": 0.00019999546198007752, - "loss": 46.0, - "step": 18841 - }, - { - "epoch": 3.034381416321108, - "grad_norm": 0.0010438545141369104, - "learning_rate": 0.00019999546149809504, - "loss": 46.0, - "step": 18842 - }, - { - "epoch": 3.0345424533998955, - "grad_norm": 0.0011138978879898787, - "learning_rate": 0.00019999546101608697, - "loss": 46.0, - "step": 18843 - }, - { - "epoch": 3.034703490478683, - "grad_norm": 0.007553721312433481, - "learning_rate": 0.0001999954605340533, - "loss": 46.0, - "step": 18844 - }, - { - "epoch": 3.03486452755747, - "grad_norm": 0.012318179942667484, - "learning_rate": 0.00019999546005199404, - "loss": 46.0, - "step": 18845 - }, - { - "epoch": 3.0350255646362574, - "grad_norm": 0.008240317925810814, - "learning_rate": 0.00019999545956990916, - "loss": 46.0, - "step": 18846 - }, - { - "epoch": 3.035186601715045, - "grad_norm": 0.0020806973334401846, - "learning_rate": 0.0001999954590877987, - "loss": 46.0, - "step": 18847 - }, - { - "epoch": 3.0353476387938323, - "grad_norm": 0.0019417419098317623, - "learning_rate": 0.00019999545860566265, - "loss": 46.0, - "step": 18848 - }, - { - "epoch": 3.0355086758726197, - "grad_norm": 0.00509099243208766, - "learning_rate": 0.00019999545812350103, - "loss": 46.0, - "step": 18849 - }, - { - "epoch": 3.035669712951407, - "grad_norm": 0.003336383495479822, - "learning_rate": 0.0001999954576413138, - "loss": 46.0, - "step": 18850 - }, - { - "epoch": 3.0358307500301946, - "grad_norm": 0.0034357572440057993, - "learning_rate": 0.00019999545715910095, - "loss": 46.0, - "step": 18851 - }, - { - "epoch": 3.0359917871089817, - "grad_norm": 0.007890604436397552, - "learning_rate": 0.00019999545667686252, - "loss": 46.0, - "step": 18852 - }, - { - "epoch": 3.036152824187769, - "grad_norm": 0.006422179751098156, - "learning_rate": 0.00019999545619459852, - "loss": 46.0, - "step": 18853 - }, - { - "epoch": 3.0363138612665566, - "grad_norm": 0.0009265092085115612, - "learning_rate": 0.0001999954557123089, - "loss": 46.0, - "step": 18854 - }, - { - "epoch": 3.036474898345344, - "grad_norm": 0.012349448166787624, - "learning_rate": 0.00019999545522999371, - "loss": 46.0, - "step": 18855 - }, - { - "epoch": 3.0366359354241315, - "grad_norm": 0.0021277754567563534, - "learning_rate": 0.0001999954547476529, - "loss": 46.0, - "step": 18856 - }, - { - "epoch": 3.036796972502919, - "grad_norm": 0.0139263104647398, - "learning_rate": 0.00019999545426528653, - "loss": 46.0, - "step": 18857 - }, - { - "epoch": 3.0369580095817064, - "grad_norm": 0.0029302441980689764, - "learning_rate": 0.00019999545378289452, - "loss": 46.0, - "step": 18858 - }, - { - "epoch": 3.0371190466604934, - "grad_norm": 0.0013795375125482678, - "learning_rate": 0.00019999545330047695, - "loss": 46.0, - "step": 18859 - }, - { - "epoch": 3.037280083739281, - "grad_norm": 0.007578321732580662, - "learning_rate": 0.00019999545281803381, - "loss": 46.0, - "step": 18860 - }, - { - "epoch": 3.0374411208180683, - "grad_norm": 0.0029816231690347195, - "learning_rate": 0.000199995452335565, - "loss": 46.0, - "step": 18861 - }, - { - "epoch": 3.0376021578968557, - "grad_norm": 0.0016293191583827138, - "learning_rate": 0.00019999545185307068, - "loss": 46.0, - "step": 18862 - }, - { - "epoch": 3.037763194975643, - "grad_norm": 0.0036395173519849777, - "learning_rate": 0.0001999954513705507, - "loss": 46.0, - "step": 18863 - }, - { - "epoch": 3.0379242320544306, - "grad_norm": 0.010569578036665916, - "learning_rate": 0.00019999545088800517, - "loss": 46.0, - "step": 18864 - }, - { - "epoch": 3.038085269133218, - "grad_norm": 0.001031906926073134, - "learning_rate": 0.00019999545040543402, - "loss": 46.0, - "step": 18865 - }, - { - "epoch": 3.0382463062120055, - "grad_norm": 0.008065774105489254, - "learning_rate": 0.00019999544992283728, - "loss": 46.0, - "step": 18866 - }, - { - "epoch": 3.0384073432907925, - "grad_norm": 0.0038250968791544437, - "learning_rate": 0.00019999544944021496, - "loss": 46.0, - "step": 18867 - }, - { - "epoch": 3.03856838036958, - "grad_norm": 0.007237450685352087, - "learning_rate": 0.00019999544895756704, - "loss": 46.0, - "step": 18868 - }, - { - "epoch": 3.0387294174483674, - "grad_norm": 0.0022652449551969767, - "learning_rate": 0.00019999544847489352, - "loss": 46.0, - "step": 18869 - }, - { - "epoch": 3.038890454527155, - "grad_norm": 0.005361523479223251, - "learning_rate": 0.00019999544799219443, - "loss": 46.0, - "step": 18870 - }, - { - "epoch": 3.0390514916059423, - "grad_norm": 0.0017716326983645558, - "learning_rate": 0.00019999544750946973, - "loss": 46.0, - "step": 18871 - }, - { - "epoch": 3.03921252868473, - "grad_norm": 0.0035376125015318394, - "learning_rate": 0.00019999544702671942, - "loss": 46.0, - "step": 18872 - }, - { - "epoch": 3.0393735657635172, - "grad_norm": 0.0018904684111475945, - "learning_rate": 0.00019999544654394354, - "loss": 46.0, - "step": 18873 - }, - { - "epoch": 3.0395346028423043, - "grad_norm": 0.013746327720582485, - "learning_rate": 0.00019999544606114206, - "loss": 46.0, - "step": 18874 - }, - { - "epoch": 3.0396956399210917, - "grad_norm": 0.0016141963424161077, - "learning_rate": 0.00019999544557831498, - "loss": 46.0, - "step": 18875 - }, - { - "epoch": 3.039856676999879, - "grad_norm": 0.007423622068017721, - "learning_rate": 0.0001999954450954623, - "loss": 46.0, - "step": 18876 - }, - { - "epoch": 3.0400177140786666, - "grad_norm": 0.0047676232643425465, - "learning_rate": 0.000199995444612584, - "loss": 46.0, - "step": 18877 - }, - { - "epoch": 3.040178751157454, - "grad_norm": 0.006295583210885525, - "learning_rate": 0.00019999544412968017, - "loss": 46.0, - "step": 18878 - }, - { - "epoch": 3.0403397882362415, - "grad_norm": 0.0018172429408878088, - "learning_rate": 0.00019999544364675072, - "loss": 46.0, - "step": 18879 - }, - { - "epoch": 3.040500825315029, - "grad_norm": 0.005919110961258411, - "learning_rate": 0.00019999544316379568, - "loss": 46.0, - "step": 18880 - }, - { - "epoch": 3.040661862393816, - "grad_norm": 0.0012296631466597319, - "learning_rate": 0.00019999544268081506, - "loss": 46.0, - "step": 18881 - }, - { - "epoch": 3.0408228994726034, - "grad_norm": 0.007597137708216906, - "learning_rate": 0.0001999954421978088, - "loss": 46.0, - "step": 18882 - }, - { - "epoch": 3.040983936551391, - "grad_norm": 0.0016721213469281793, - "learning_rate": 0.00019999544171477696, - "loss": 46.0, - "step": 18883 - }, - { - "epoch": 3.0411449736301783, - "grad_norm": 0.0032726868521422148, - "learning_rate": 0.00019999544123171955, - "loss": 46.0, - "step": 18884 - }, - { - "epoch": 3.041306010708966, - "grad_norm": 0.0037985057570040226, - "learning_rate": 0.00019999544074863655, - "loss": 46.0, - "step": 18885 - }, - { - "epoch": 3.0414670477877532, - "grad_norm": 0.005214225966483355, - "learning_rate": 0.00019999544026552793, - "loss": 46.0, - "step": 18886 - }, - { - "epoch": 3.0416280848665407, - "grad_norm": 0.0033551345113664865, - "learning_rate": 0.00019999543978239373, - "loss": 46.0, - "step": 18887 - }, - { - "epoch": 3.041789121945328, - "grad_norm": 0.0038539241068065166, - "learning_rate": 0.00019999543929923394, - "loss": 46.0, - "step": 18888 - }, - { - "epoch": 3.041950159024115, - "grad_norm": 0.004168844781816006, - "learning_rate": 0.00019999543881604853, - "loss": 46.0, - "step": 18889 - }, - { - "epoch": 3.0421111961029026, - "grad_norm": 0.003933880012482405, - "learning_rate": 0.00019999543833283757, - "loss": 46.0, - "step": 18890 - }, - { - "epoch": 3.04227223318169, - "grad_norm": 0.001300912699662149, - "learning_rate": 0.000199995437849601, - "loss": 46.0, - "step": 18891 - }, - { - "epoch": 3.0424332702604775, - "grad_norm": 0.0013871589908376336, - "learning_rate": 0.00019999543736633882, - "loss": 46.0, - "step": 18892 - }, - { - "epoch": 3.042594307339265, - "grad_norm": 0.00045556091936305165, - "learning_rate": 0.00019999543688305104, - "loss": 46.0, - "step": 18893 - }, - { - "epoch": 3.0427553444180524, - "grad_norm": 0.005769932176917791, - "learning_rate": 0.0001999954363997377, - "loss": 46.0, - "step": 18894 - }, - { - "epoch": 3.04291638149684, - "grad_norm": 0.003269238630309701, - "learning_rate": 0.00019999543591639872, - "loss": 46.0, - "step": 18895 - }, - { - "epoch": 3.043077418575627, - "grad_norm": 0.00249124295078218, - "learning_rate": 0.00019999543543303418, - "loss": 46.0, - "step": 18896 - }, - { - "epoch": 3.0432384556544143, - "grad_norm": 0.004293342120945454, - "learning_rate": 0.00019999543494964405, - "loss": 46.0, - "step": 18897 - }, - { - "epoch": 3.0433994927332018, - "grad_norm": 0.004066897556185722, - "learning_rate": 0.00019999543446622833, - "loss": 46.0, - "step": 18898 - }, - { - "epoch": 3.043560529811989, - "grad_norm": 0.002417211188003421, - "learning_rate": 0.000199995433982787, - "loss": 46.0, - "step": 18899 - }, - { - "epoch": 3.0437215668907767, - "grad_norm": 0.002351326635107398, - "learning_rate": 0.00019999543349932006, - "loss": 46.0, - "step": 18900 - }, - { - "epoch": 3.043882603969564, - "grad_norm": 0.005535287782549858, - "learning_rate": 0.00019999543301582758, - "loss": 46.0, - "step": 18901 - }, - { - "epoch": 3.0440436410483516, - "grad_norm": 0.0021434519439935684, - "learning_rate": 0.00019999543253230946, - "loss": 46.0, - "step": 18902 - }, - { - "epoch": 3.0442046781271386, - "grad_norm": 0.0018834639340639114, - "learning_rate": 0.00019999543204876575, - "loss": 46.0, - "step": 18903 - }, - { - "epoch": 3.044365715205926, - "grad_norm": 0.017911570146679878, - "learning_rate": 0.00019999543156519643, - "loss": 46.0, - "step": 18904 - }, - { - "epoch": 3.0445267522847135, - "grad_norm": 0.003974526654928923, - "learning_rate": 0.00019999543108160158, - "loss": 46.0, - "step": 18905 - }, - { - "epoch": 3.044687789363501, - "grad_norm": 0.0032931321766227484, - "learning_rate": 0.00019999543059798108, - "loss": 46.0, - "step": 18906 - }, - { - "epoch": 3.0448488264422884, - "grad_norm": 0.0022926926612854004, - "learning_rate": 0.000199995430114335, - "loss": 46.0, - "step": 18907 - }, - { - "epoch": 3.045009863521076, - "grad_norm": 0.0005494573269970715, - "learning_rate": 0.00019999542963066333, - "loss": 46.0, - "step": 18908 - }, - { - "epoch": 3.0451709005998633, - "grad_norm": 0.010758903808891773, - "learning_rate": 0.00019999542914696608, - "loss": 46.0, - "step": 18909 - }, - { - "epoch": 3.0453319376786503, - "grad_norm": 0.010110465809702873, - "learning_rate": 0.00019999542866324323, - "loss": 46.0, - "step": 18910 - }, - { - "epoch": 3.0454929747574377, - "grad_norm": 0.0005439491360448301, - "learning_rate": 0.00019999542817949477, - "loss": 46.0, - "step": 18911 - }, - { - "epoch": 3.045654011836225, - "grad_norm": 0.0037642328534275293, - "learning_rate": 0.00019999542769572073, - "loss": 46.0, - "step": 18912 - }, - { - "epoch": 3.0458150489150126, - "grad_norm": 0.0009757018415257335, - "learning_rate": 0.0001999954272119211, - "loss": 46.0, - "step": 18913 - }, - { - "epoch": 3.0459760859938, - "grad_norm": 0.010762848891317844, - "learning_rate": 0.00019999542672809585, - "loss": 46.0, - "step": 18914 - }, - { - "epoch": 3.0461371230725875, - "grad_norm": 0.003563212463632226, - "learning_rate": 0.00019999542624424504, - "loss": 46.0, - "step": 18915 - }, - { - "epoch": 3.046298160151375, - "grad_norm": 0.003709547221660614, - "learning_rate": 0.00019999542576036862, - "loss": 46.0, - "step": 18916 - }, - { - "epoch": 3.0464591972301625, - "grad_norm": 0.006384704262018204, - "learning_rate": 0.0001999954252764666, - "loss": 46.0, - "step": 18917 - }, - { - "epoch": 3.0466202343089495, - "grad_norm": 0.0021330039016902447, - "learning_rate": 0.000199995424792539, - "loss": 46.0, - "step": 18918 - }, - { - "epoch": 3.046781271387737, - "grad_norm": 0.0026270258240401745, - "learning_rate": 0.0001999954243085858, - "loss": 46.0, - "step": 18919 - }, - { - "epoch": 3.0469423084665244, - "grad_norm": 0.003202127991244197, - "learning_rate": 0.000199995423824607, - "loss": 46.0, - "step": 18920 - }, - { - "epoch": 3.047103345545312, - "grad_norm": 0.001913890358991921, - "learning_rate": 0.00019999542334060263, - "loss": 46.0, - "step": 18921 - }, - { - "epoch": 3.0472643826240993, - "grad_norm": 0.0006028263596817851, - "learning_rate": 0.00019999542285657263, - "loss": 46.0, - "step": 18922 - }, - { - "epoch": 3.0474254197028867, - "grad_norm": 0.003960226662456989, - "learning_rate": 0.00019999542237251707, - "loss": 46.0, - "step": 18923 - }, - { - "epoch": 3.047586456781674, - "grad_norm": 0.005489690694957972, - "learning_rate": 0.0001999954218884359, - "loss": 46.0, - "step": 18924 - }, - { - "epoch": 3.047747493860461, - "grad_norm": 0.0021123201586306095, - "learning_rate": 0.00019999542140432914, - "loss": 46.0, - "step": 18925 - }, - { - "epoch": 3.0479085309392486, - "grad_norm": 0.0012478644493967295, - "learning_rate": 0.00019999542092019677, - "loss": 46.0, - "step": 18926 - }, - { - "epoch": 3.048069568018036, - "grad_norm": 0.012074966914951801, - "learning_rate": 0.0001999954204360388, - "loss": 46.0, - "step": 18927 - }, - { - "epoch": 3.0482306050968235, - "grad_norm": 0.001110857934691012, - "learning_rate": 0.00019999541995185526, - "loss": 46.0, - "step": 18928 - }, - { - "epoch": 3.048391642175611, - "grad_norm": 0.011390984058380127, - "learning_rate": 0.00019999541946764615, - "loss": 46.0, - "step": 18929 - }, - { - "epoch": 3.0485526792543984, - "grad_norm": 0.0035159746184945107, - "learning_rate": 0.0001999954189834114, - "loss": 46.0, - "step": 18930 - }, - { - "epoch": 3.048713716333186, - "grad_norm": 0.00884826760739088, - "learning_rate": 0.0001999954184991511, - "loss": 46.0, - "step": 18931 - }, - { - "epoch": 3.048874753411973, - "grad_norm": 0.007816282100975513, - "learning_rate": 0.00019999541801486517, - "loss": 46.0, - "step": 18932 - }, - { - "epoch": 3.0490357904907603, - "grad_norm": 0.007879632525146008, - "learning_rate": 0.00019999541753055365, - "loss": 46.0, - "step": 18933 - }, - { - "epoch": 3.049196827569548, - "grad_norm": 0.002085912972688675, - "learning_rate": 0.00019999541704621656, - "loss": 46.0, - "step": 18934 - }, - { - "epoch": 3.0493578646483352, - "grad_norm": 0.0005128393531776965, - "learning_rate": 0.00019999541656185384, - "loss": 46.0, - "step": 18935 - }, - { - "epoch": 3.0495189017271227, - "grad_norm": 0.0019889206159859896, - "learning_rate": 0.00019999541607746557, - "loss": 46.0, - "step": 18936 - }, - { - "epoch": 3.04967993880591, - "grad_norm": 0.002107777167111635, - "learning_rate": 0.00019999541559305168, - "loss": 46.0, - "step": 18937 - }, - { - "epoch": 3.0498409758846976, - "grad_norm": 0.0053547583520412445, - "learning_rate": 0.00019999541510861218, - "loss": 46.0, - "step": 18938 - }, - { - "epoch": 3.050002012963485, - "grad_norm": 0.007552565541118383, - "learning_rate": 0.00019999541462414712, - "loss": 46.0, - "step": 18939 - }, - { - "epoch": 3.050163050042272, - "grad_norm": 0.005827803630381823, - "learning_rate": 0.00019999541413965645, - "loss": 46.0, - "step": 18940 - }, - { - "epoch": 3.0503240871210595, - "grad_norm": 0.003172443248331547, - "learning_rate": 0.00019999541365514018, - "loss": 46.0, - "step": 18941 - }, - { - "epoch": 3.050485124199847, - "grad_norm": 0.0031627239659428596, - "learning_rate": 0.00019999541317059833, - "loss": 46.0, - "step": 18942 - }, - { - "epoch": 3.0506461612786344, - "grad_norm": 0.007425693329423666, - "learning_rate": 0.00019999541268603087, - "loss": 46.0, - "step": 18943 - }, - { - "epoch": 3.050807198357422, - "grad_norm": 0.003946628887206316, - "learning_rate": 0.00019999541220143784, - "loss": 46.0, - "step": 18944 - }, - { - "epoch": 3.0509682354362093, - "grad_norm": 0.004186005797237158, - "learning_rate": 0.0001999954117168192, - "loss": 46.0, - "step": 18945 - }, - { - "epoch": 3.0511292725149968, - "grad_norm": 0.0015321825630962849, - "learning_rate": 0.00019999541123217498, - "loss": 46.0, - "step": 18946 - }, - { - "epoch": 3.051290309593784, - "grad_norm": 0.0016872929409146309, - "learning_rate": 0.00019999541074750514, - "loss": 46.0, - "step": 18947 - }, - { - "epoch": 3.0514513466725712, - "grad_norm": 0.0020634718239307404, - "learning_rate": 0.00019999541026280974, - "loss": 46.0, - "step": 18948 - }, - { - "epoch": 3.0516123837513587, - "grad_norm": 0.0044443742372095585, - "learning_rate": 0.00019999540977808873, - "loss": 46.0, - "step": 18949 - }, - { - "epoch": 3.051773420830146, - "grad_norm": 0.0053870584815740585, - "learning_rate": 0.00019999540929334212, - "loss": 46.0, - "step": 18950 - }, - { - "epoch": 3.0519344579089336, - "grad_norm": 0.0009179266635328531, - "learning_rate": 0.0001999954088085699, - "loss": 46.0, - "step": 18951 - }, - { - "epoch": 3.052095494987721, - "grad_norm": 0.007610139437019825, - "learning_rate": 0.00019999540832377213, - "loss": 46.0, - "step": 18952 - }, - { - "epoch": 3.0522565320665085, - "grad_norm": 0.009230876341462135, - "learning_rate": 0.00019999540783894874, - "loss": 46.0, - "step": 18953 - }, - { - "epoch": 3.0524175691452955, - "grad_norm": 0.0012778345262631774, - "learning_rate": 0.00019999540735409976, - "loss": 46.0, - "step": 18954 - }, - { - "epoch": 3.052578606224083, - "grad_norm": 0.006814607884734869, - "learning_rate": 0.00019999540686922517, - "loss": 46.0, - "step": 18955 - }, - { - "epoch": 3.0527396433028704, - "grad_norm": 0.004744658712297678, - "learning_rate": 0.00019999540638432502, - "loss": 46.0, - "step": 18956 - }, - { - "epoch": 3.052900680381658, - "grad_norm": 0.00651804031804204, - "learning_rate": 0.00019999540589939928, - "loss": 46.0, - "step": 18957 - }, - { - "epoch": 3.0530617174604453, - "grad_norm": 0.0016507715918123722, - "learning_rate": 0.0001999954054144479, - "loss": 46.0, - "step": 18958 - }, - { - "epoch": 3.0532227545392328, - "grad_norm": 0.0009587332606315613, - "learning_rate": 0.00019999540492947096, - "loss": 46.0, - "step": 18959 - }, - { - "epoch": 3.05338379161802, - "grad_norm": 0.005978851579129696, - "learning_rate": 0.00019999540444446843, - "loss": 46.0, - "step": 18960 - }, - { - "epoch": 3.0535448286968077, - "grad_norm": 0.005107056815177202, - "learning_rate": 0.00019999540395944026, - "loss": 46.0, - "step": 18961 - }, - { - "epoch": 3.0537058657755947, - "grad_norm": 0.0059722792357206345, - "learning_rate": 0.00019999540347438654, - "loss": 46.0, - "step": 18962 - }, - { - "epoch": 3.053866902854382, - "grad_norm": 0.007648978848010302, - "learning_rate": 0.00019999540298930725, - "loss": 46.0, - "step": 18963 - }, - { - "epoch": 3.0540279399331696, - "grad_norm": 0.00248450948856771, - "learning_rate": 0.0001999954025042023, - "loss": 46.0, - "step": 18964 - }, - { - "epoch": 3.054188977011957, - "grad_norm": 0.0018780005630105734, - "learning_rate": 0.0001999954020190718, - "loss": 46.0, - "step": 18965 - }, - { - "epoch": 3.0543500140907445, - "grad_norm": 0.0059234327636659145, - "learning_rate": 0.0001999954015339157, - "loss": 46.0, - "step": 18966 - }, - { - "epoch": 3.054511051169532, - "grad_norm": 0.0022601732052862644, - "learning_rate": 0.000199995401048734, - "loss": 46.0, - "step": 18967 - }, - { - "epoch": 3.0546720882483194, - "grad_norm": 0.001392781618051231, - "learning_rate": 0.00019999540056352672, - "loss": 46.0, - "step": 18968 - }, - { - "epoch": 3.0548331253271064, - "grad_norm": 0.0023862901143729687, - "learning_rate": 0.00019999540007829383, - "loss": 46.0, - "step": 18969 - }, - { - "epoch": 3.054994162405894, - "grad_norm": 0.005048906430602074, - "learning_rate": 0.00019999539959303535, - "loss": 46.0, - "step": 18970 - }, - { - "epoch": 3.0551551994846813, - "grad_norm": 0.003923005424439907, - "learning_rate": 0.00019999539910775128, - "loss": 46.0, - "step": 18971 - }, - { - "epoch": 3.0553162365634687, - "grad_norm": 0.0021009675692766905, - "learning_rate": 0.0001999953986224416, - "loss": 46.0, - "step": 18972 - }, - { - "epoch": 3.055477273642256, - "grad_norm": 0.0029160145204514265, - "learning_rate": 0.00019999539813710636, - "loss": 46.0, - "step": 18973 - }, - { - "epoch": 3.0556383107210436, - "grad_norm": 0.002000607782974839, - "learning_rate": 0.0001999953976517455, - "loss": 46.0, - "step": 18974 - }, - { - "epoch": 3.055799347799831, - "grad_norm": 0.003365874756127596, - "learning_rate": 0.00019999539716635906, - "loss": 46.0, - "step": 18975 - }, - { - "epoch": 3.055960384878618, - "grad_norm": 0.002717716619372368, - "learning_rate": 0.000199995396680947, - "loss": 46.0, - "step": 18976 - }, - { - "epoch": 3.0561214219574055, - "grad_norm": 0.001957734115421772, - "learning_rate": 0.00019999539619550938, - "loss": 46.0, - "step": 18977 - }, - { - "epoch": 3.056282459036193, - "grad_norm": 0.0007823628839105368, - "learning_rate": 0.00019999539571004612, - "loss": 46.0, - "step": 18978 - }, - { - "epoch": 3.0564434961149805, - "grad_norm": 0.007330520544201136, - "learning_rate": 0.00019999539522455733, - "loss": 46.0, - "step": 18979 - }, - { - "epoch": 3.056604533193768, - "grad_norm": 0.000815938925370574, - "learning_rate": 0.0001999953947390429, - "loss": 46.0, - "step": 18980 - }, - { - "epoch": 3.0567655702725554, - "grad_norm": 0.002668933942914009, - "learning_rate": 0.00019999539425350288, - "loss": 46.0, - "step": 18981 - }, - { - "epoch": 3.056926607351343, - "grad_norm": 0.0015341124963015318, - "learning_rate": 0.0001999953937679373, - "loss": 46.0, - "step": 18982 - }, - { - "epoch": 3.05708764443013, - "grad_norm": 0.0014781627105548978, - "learning_rate": 0.0001999953932823461, - "loss": 46.0, - "step": 18983 - }, - { - "epoch": 3.0572486815089173, - "grad_norm": 0.0025400114245712757, - "learning_rate": 0.0001999953927967293, - "loss": 46.0, - "step": 18984 - }, - { - "epoch": 3.0574097185877047, - "grad_norm": 0.01987171173095703, - "learning_rate": 0.00019999539231108692, - "loss": 46.0, - "step": 18985 - }, - { - "epoch": 3.057570755666492, - "grad_norm": 0.002904577413573861, - "learning_rate": 0.00019999539182541894, - "loss": 46.0, - "step": 18986 - }, - { - "epoch": 3.0577317927452796, - "grad_norm": 0.0034273648634552956, - "learning_rate": 0.00019999539133972537, - "loss": 46.0, - "step": 18987 - }, - { - "epoch": 3.057892829824067, - "grad_norm": 0.005315632093697786, - "learning_rate": 0.0001999953908540062, - "loss": 46.0, - "step": 18988 - }, - { - "epoch": 3.0580538669028545, - "grad_norm": 0.0017369046108797193, - "learning_rate": 0.00019999539036826144, - "loss": 46.0, - "step": 18989 - }, - { - "epoch": 3.058214903981642, - "grad_norm": 0.003790066344663501, - "learning_rate": 0.0001999953898824911, - "loss": 46.0, - "step": 18990 - }, - { - "epoch": 3.058375941060429, - "grad_norm": 0.004123042803257704, - "learning_rate": 0.00019999538939669516, - "loss": 46.0, - "step": 18991 - }, - { - "epoch": 3.0585369781392164, - "grad_norm": 0.0032799781765788794, - "learning_rate": 0.00019999538891087362, - "loss": 46.0, - "step": 18992 - }, - { - "epoch": 3.058698015218004, - "grad_norm": 0.000961768499109894, - "learning_rate": 0.00019999538842502648, - "loss": 46.0, - "step": 18993 - }, - { - "epoch": 3.0588590522967913, - "grad_norm": 0.0013210336910560727, - "learning_rate": 0.00019999538793915377, - "loss": 46.0, - "step": 18994 - }, - { - "epoch": 3.059020089375579, - "grad_norm": 0.015877913683652878, - "learning_rate": 0.00019999538745325545, - "loss": 46.0, - "step": 18995 - }, - { - "epoch": 3.0591811264543662, - "grad_norm": 0.002419903641566634, - "learning_rate": 0.0001999953869673315, - "loss": 46.0, - "step": 18996 - }, - { - "epoch": 3.0593421635331537, - "grad_norm": 0.002488799626007676, - "learning_rate": 0.000199995386481382, - "loss": 46.0, - "step": 18997 - }, - { - "epoch": 3.0595032006119407, - "grad_norm": 0.002177231013774872, - "learning_rate": 0.0001999953859954069, - "loss": 46.0, - "step": 18998 - }, - { - "epoch": 3.059664237690728, - "grad_norm": 0.008219020441174507, - "learning_rate": 0.0001999953855094062, - "loss": 46.0, - "step": 18999 - }, - { - "epoch": 3.0598252747695156, - "grad_norm": 0.00157107412815094, - "learning_rate": 0.00019999538502337992, - "loss": 46.0, - "step": 19000 - }, - { - "epoch": 3.059986311848303, - "grad_norm": 0.0013721369905397296, - "learning_rate": 0.00019999538453732804, - "loss": 46.0, - "step": 19001 - }, - { - "epoch": 3.0601473489270905, - "grad_norm": 0.003930696751922369, - "learning_rate": 0.00019999538405125056, - "loss": 46.0, - "step": 19002 - }, - { - "epoch": 3.060308386005878, - "grad_norm": 0.005053957924246788, - "learning_rate": 0.00019999538356514748, - "loss": 46.0, - "step": 19003 - }, - { - "epoch": 3.0604694230846654, - "grad_norm": 0.0066785081289708614, - "learning_rate": 0.00019999538307901885, - "loss": 46.0, - "step": 19004 - }, - { - "epoch": 3.060630460163453, - "grad_norm": 0.0007520662620663643, - "learning_rate": 0.00019999538259286457, - "loss": 46.0, - "step": 19005 - }, - { - "epoch": 3.06079149724224, - "grad_norm": 0.0018778488738462329, - "learning_rate": 0.0001999953821066847, - "loss": 46.0, - "step": 19006 - }, - { - "epoch": 3.0609525343210273, - "grad_norm": 0.002723777899518609, - "learning_rate": 0.0001999953816204793, - "loss": 46.0, - "step": 19007 - }, - { - "epoch": 3.0611135713998148, - "grad_norm": 0.0024173678830266, - "learning_rate": 0.00019999538113424822, - "loss": 46.0, - "step": 19008 - }, - { - "epoch": 3.061274608478602, - "grad_norm": 0.0023062312975525856, - "learning_rate": 0.0001999953806479916, - "loss": 46.0, - "step": 19009 - }, - { - "epoch": 3.0614356455573897, - "grad_norm": 0.005425425246357918, - "learning_rate": 0.0001999953801617094, - "loss": 46.0, - "step": 19010 - }, - { - "epoch": 3.061596682636177, - "grad_norm": 0.00368660781532526, - "learning_rate": 0.00019999537967540154, - "loss": 46.0, - "step": 19011 - }, - { - "epoch": 3.0617577197149646, - "grad_norm": 0.005451664328575134, - "learning_rate": 0.00019999537918906815, - "loss": 46.0, - "step": 19012 - }, - { - "epoch": 3.0619187567937516, - "grad_norm": 0.005232551135122776, - "learning_rate": 0.00019999537870270912, - "loss": 46.0, - "step": 19013 - }, - { - "epoch": 3.062079793872539, - "grad_norm": 0.002887663897126913, - "learning_rate": 0.00019999537821632454, - "loss": 46.0, - "step": 19014 - }, - { - "epoch": 3.0622408309513265, - "grad_norm": 0.0026603098958730698, - "learning_rate": 0.00019999537772991434, - "loss": 46.0, - "step": 19015 - }, - { - "epoch": 3.062401868030114, - "grad_norm": 0.0033915177918970585, - "learning_rate": 0.00019999537724347857, - "loss": 46.0, - "step": 19016 - }, - { - "epoch": 3.0625629051089014, - "grad_norm": 0.010073371231555939, - "learning_rate": 0.00019999537675701717, - "loss": 46.0, - "step": 19017 - }, - { - "epoch": 3.062723942187689, - "grad_norm": 0.011034855619072914, - "learning_rate": 0.00019999537627053018, - "loss": 46.0, - "step": 19018 - }, - { - "epoch": 3.0628849792664763, - "grad_norm": 0.002768860897049308, - "learning_rate": 0.0001999953757840176, - "loss": 46.0, - "step": 19019 - }, - { - "epoch": 3.0630460163452633, - "grad_norm": 0.0018350209575146437, - "learning_rate": 0.00019999537529747947, - "loss": 46.0, - "step": 19020 - }, - { - "epoch": 3.0632070534240508, - "grad_norm": 0.000755294575355947, - "learning_rate": 0.00019999537481091571, - "loss": 46.0, - "step": 19021 - }, - { - "epoch": 3.063368090502838, - "grad_norm": 0.001323820324614644, - "learning_rate": 0.00019999537432432635, - "loss": 46.0, - "step": 19022 - }, - { - "epoch": 3.0635291275816257, - "grad_norm": 0.002677367767319083, - "learning_rate": 0.0001999953738377114, - "loss": 46.0, - "step": 19023 - }, - { - "epoch": 3.063690164660413, - "grad_norm": 0.00567894522100687, - "learning_rate": 0.00019999537335107088, - "loss": 46.0, - "step": 19024 - }, - { - "epoch": 3.0638512017392006, - "grad_norm": 0.0025094083976000547, - "learning_rate": 0.00019999537286440476, - "loss": 46.0, - "step": 19025 - }, - { - "epoch": 3.064012238817988, - "grad_norm": 0.0041719041764736176, - "learning_rate": 0.00019999537237771301, - "loss": 46.0, - "step": 19026 - }, - { - "epoch": 3.064173275896775, - "grad_norm": 0.001999187283217907, - "learning_rate": 0.0001999953718909957, - "loss": 46.0, - "step": 19027 - }, - { - "epoch": 3.0643343129755625, - "grad_norm": 0.0008871268946677446, - "learning_rate": 0.0001999953714042528, - "loss": 46.0, - "step": 19028 - }, - { - "epoch": 3.06449535005435, - "grad_norm": 0.0022008302621543407, - "learning_rate": 0.0001999953709174843, - "loss": 46.0, - "step": 19029 - }, - { - "epoch": 3.0646563871331374, - "grad_norm": 0.0015959098236635327, - "learning_rate": 0.00019999537043069018, - "loss": 46.0, - "step": 19030 - }, - { - "epoch": 3.064817424211925, - "grad_norm": 0.005434796214103699, - "learning_rate": 0.0001999953699438705, - "loss": 46.0, - "step": 19031 - }, - { - "epoch": 3.0649784612907123, - "grad_norm": 0.0031477482989430428, - "learning_rate": 0.0001999953694570252, - "loss": 46.0, - "step": 19032 - }, - { - "epoch": 3.0651394983694997, - "grad_norm": 0.003985613118857145, - "learning_rate": 0.00019999536897015433, - "loss": 46.0, - "step": 19033 - }, - { - "epoch": 3.065300535448287, - "grad_norm": 0.0015886675100773573, - "learning_rate": 0.00019999536848325783, - "loss": 46.0, - "step": 19034 - }, - { - "epoch": 3.065461572527074, - "grad_norm": 0.003924034535884857, - "learning_rate": 0.00019999536799633578, - "loss": 46.0, - "step": 19035 - }, - { - "epoch": 3.0656226096058616, - "grad_norm": 0.005063912365585566, - "learning_rate": 0.0001999953675093881, - "loss": 46.0, - "step": 19036 - }, - { - "epoch": 3.065783646684649, - "grad_norm": 0.0011748622637242079, - "learning_rate": 0.00019999536702241486, - "loss": 46.0, - "step": 19037 - }, - { - "epoch": 3.0659446837634365, - "grad_norm": 0.003807569621130824, - "learning_rate": 0.000199995366535416, - "loss": 46.0, - "step": 19038 - }, - { - "epoch": 3.066105720842224, - "grad_norm": 0.002891937969252467, - "learning_rate": 0.00019999536604839156, - "loss": 46.0, - "step": 19039 - }, - { - "epoch": 3.0662667579210114, - "grad_norm": 0.004636032972484827, - "learning_rate": 0.00019999536556134152, - "loss": 46.0, - "step": 19040 - }, - { - "epoch": 3.066427794999799, - "grad_norm": 0.003022376913577318, - "learning_rate": 0.00019999536507426591, - "loss": 46.0, - "step": 19041 - }, - { - "epoch": 3.066588832078586, - "grad_norm": 0.0022735067177563906, - "learning_rate": 0.00019999536458716467, - "loss": 46.0, - "step": 19042 - }, - { - "epoch": 3.0667498691573734, - "grad_norm": 0.002569965785369277, - "learning_rate": 0.00019999536410003787, - "loss": 46.0, - "step": 19043 - }, - { - "epoch": 3.066910906236161, - "grad_norm": 0.004043196327984333, - "learning_rate": 0.00019999536361288545, - "loss": 46.0, - "step": 19044 - }, - { - "epoch": 3.0670719433149483, - "grad_norm": 0.0024684718810021877, - "learning_rate": 0.00019999536312570744, - "loss": 46.0, - "step": 19045 - }, - { - "epoch": 3.0672329803937357, - "grad_norm": 0.0024127268698066473, - "learning_rate": 0.00019999536263850382, - "loss": 46.0, - "step": 19046 - }, - { - "epoch": 3.067394017472523, - "grad_norm": 0.002998227719217539, - "learning_rate": 0.00019999536215127464, - "loss": 46.0, - "step": 19047 - }, - { - "epoch": 3.0675550545513106, - "grad_norm": 0.0041608987376093864, - "learning_rate": 0.00019999536166401987, - "loss": 46.0, - "step": 19048 - }, - { - "epoch": 3.0677160916300976, - "grad_norm": 0.0011057513765990734, - "learning_rate": 0.00019999536117673946, - "loss": 46.0, - "step": 19049 - }, - { - "epoch": 3.067877128708885, - "grad_norm": 0.007981859147548676, - "learning_rate": 0.0001999953606894335, - "loss": 46.0, - "step": 19050 - }, - { - "epoch": 3.0680381657876725, - "grad_norm": 0.004173414781689644, - "learning_rate": 0.00019999536020210194, - "loss": 46.0, - "step": 19051 - }, - { - "epoch": 3.06819920286646, - "grad_norm": 0.007655048277229071, - "learning_rate": 0.00019999535971474477, - "loss": 46.0, - "step": 19052 - }, - { - "epoch": 3.0683602399452474, - "grad_norm": 0.006797769572585821, - "learning_rate": 0.000199995359227362, - "loss": 46.0, - "step": 19053 - }, - { - "epoch": 3.068521277024035, - "grad_norm": 0.004133382346481085, - "learning_rate": 0.00019999535873995366, - "loss": 46.0, - "step": 19054 - }, - { - "epoch": 3.0686823141028223, - "grad_norm": 0.0009931714739650488, - "learning_rate": 0.00019999535825251973, - "loss": 46.0, - "step": 19055 - }, - { - "epoch": 3.06884335118161, - "grad_norm": 0.005739261396229267, - "learning_rate": 0.0001999953577650602, - "loss": 46.0, - "step": 19056 - }, - { - "epoch": 3.069004388260397, - "grad_norm": 0.004316538106650114, - "learning_rate": 0.00019999535727757505, - "loss": 46.0, - "step": 19057 - }, - { - "epoch": 3.0691654253391842, - "grad_norm": 0.0019707847386598587, - "learning_rate": 0.00019999535679006436, - "loss": 46.0, - "step": 19058 - }, - { - "epoch": 3.0693264624179717, - "grad_norm": 0.005369000602513552, - "learning_rate": 0.00019999535630252805, - "loss": 46.0, - "step": 19059 - }, - { - "epoch": 3.069487499496759, - "grad_norm": 0.00812921766191721, - "learning_rate": 0.0001999953558149661, - "loss": 46.0, - "step": 19060 - }, - { - "epoch": 3.0696485365755466, - "grad_norm": 0.010572869330644608, - "learning_rate": 0.00019999535532737862, - "loss": 46.0, - "step": 19061 - }, - { - "epoch": 3.069809573654334, - "grad_norm": 0.002061629667878151, - "learning_rate": 0.0001999953548397655, - "loss": 46.0, - "step": 19062 - }, - { - "epoch": 3.0699706107331215, - "grad_norm": 0.005293956026434898, - "learning_rate": 0.0001999953543521268, - "loss": 46.0, - "step": 19063 - }, - { - "epoch": 3.0701316478119085, - "grad_norm": 0.0014090079348534346, - "learning_rate": 0.00019999535386446257, - "loss": 46.0, - "step": 19064 - }, - { - "epoch": 3.070292684890696, - "grad_norm": 0.0025850203819572926, - "learning_rate": 0.00019999535337677265, - "loss": 46.0, - "step": 19065 - }, - { - "epoch": 3.0704537219694834, - "grad_norm": 0.0011749042896553874, - "learning_rate": 0.0001999953528890572, - "loss": 46.0, - "step": 19066 - }, - { - "epoch": 3.070614759048271, - "grad_norm": 0.014405600726604462, - "learning_rate": 0.00019999535240131612, - "loss": 46.0, - "step": 19067 - }, - { - "epoch": 3.0707757961270583, - "grad_norm": 0.0006996396114118397, - "learning_rate": 0.00019999535191354947, - "loss": 46.0, - "step": 19068 - }, - { - "epoch": 3.0709368332058458, - "grad_norm": 0.0014793199952691793, - "learning_rate": 0.0001999953514257572, - "loss": 46.0, - "step": 19069 - }, - { - "epoch": 3.071097870284633, - "grad_norm": 0.002936831908300519, - "learning_rate": 0.00019999535093793937, - "loss": 46.0, - "step": 19070 - }, - { - "epoch": 3.07125890736342, - "grad_norm": 0.0036574266850948334, - "learning_rate": 0.00019999535045009593, - "loss": 46.0, - "step": 19071 - }, - { - "epoch": 3.0714199444422077, - "grad_norm": 0.008239139802753925, - "learning_rate": 0.00019999534996222688, - "loss": 46.0, - "step": 19072 - }, - { - "epoch": 3.071580981520995, - "grad_norm": 0.00396543787792325, - "learning_rate": 0.00019999534947433224, - "loss": 46.0, - "step": 19073 - }, - { - "epoch": 3.0717420185997826, - "grad_norm": 0.0028600955847650766, - "learning_rate": 0.00019999534898641205, - "loss": 46.0, - "step": 19074 - }, - { - "epoch": 3.07190305567857, - "grad_norm": 0.0016765448963269591, - "learning_rate": 0.0001999953484984662, - "loss": 46.0, - "step": 19075 - }, - { - "epoch": 3.0720640927573575, - "grad_norm": 0.0019038845784962177, - "learning_rate": 0.0001999953480104948, - "loss": 46.0, - "step": 19076 - }, - { - "epoch": 3.072225129836145, - "grad_norm": 0.005017343908548355, - "learning_rate": 0.00019999534752249782, - "loss": 46.0, - "step": 19077 - }, - { - "epoch": 3.0723861669149324, - "grad_norm": 0.002789796097204089, - "learning_rate": 0.00019999534703447522, - "loss": 46.0, - "step": 19078 - }, - { - "epoch": 3.0725472039937194, - "grad_norm": 0.013695823960006237, - "learning_rate": 0.000199995346546427, - "loss": 46.0, - "step": 19079 - }, - { - "epoch": 3.072708241072507, - "grad_norm": 0.0012747837463393807, - "learning_rate": 0.00019999534605835324, - "loss": 46.0, - "step": 19080 - }, - { - "epoch": 3.0728692781512943, - "grad_norm": 0.0010314909741282463, - "learning_rate": 0.00019999534557025385, - "loss": 46.0, - "step": 19081 - }, - { - "epoch": 3.0730303152300817, - "grad_norm": 0.0006177301402203739, - "learning_rate": 0.0001999953450821289, - "loss": 46.0, - "step": 19082 - }, - { - "epoch": 3.073191352308869, - "grad_norm": 0.0034139796625822783, - "learning_rate": 0.00019999534459397834, - "loss": 46.0, - "step": 19083 - }, - { - "epoch": 3.0733523893876566, - "grad_norm": 0.003577446099370718, - "learning_rate": 0.00019999534410580216, - "loss": 46.0, - "step": 19084 - }, - { - "epoch": 3.073513426466444, - "grad_norm": 0.008248341269791126, - "learning_rate": 0.00019999534361760042, - "loss": 46.0, - "step": 19085 - }, - { - "epoch": 3.073674463545231, - "grad_norm": 0.00657343864440918, - "learning_rate": 0.00019999534312937307, - "loss": 46.0, - "step": 19086 - }, - { - "epoch": 3.0738355006240186, - "grad_norm": 0.00760771194472909, - "learning_rate": 0.00019999534264112013, - "loss": 46.0, - "step": 19087 - }, - { - "epoch": 3.073996537702806, - "grad_norm": 0.0028835826087743044, - "learning_rate": 0.00019999534215284158, - "loss": 46.0, - "step": 19088 - }, - { - "epoch": 3.0741575747815935, - "grad_norm": 0.001613772357814014, - "learning_rate": 0.00019999534166453747, - "loss": 46.0, - "step": 19089 - }, - { - "epoch": 3.074318611860381, - "grad_norm": 0.0063636754639446735, - "learning_rate": 0.00019999534117620774, - "loss": 46.0, - "step": 19090 - }, - { - "epoch": 3.0744796489391684, - "grad_norm": 0.0012422911822795868, - "learning_rate": 0.00019999534068785243, - "loss": 46.0, - "step": 19091 - }, - { - "epoch": 3.074640686017956, - "grad_norm": 0.017037486657500267, - "learning_rate": 0.00019999534019947152, - "loss": 46.0, - "step": 19092 - }, - { - "epoch": 3.074801723096743, - "grad_norm": 0.0009254264296032488, - "learning_rate": 0.00019999533971106504, - "loss": 46.0, - "step": 19093 - }, - { - "epoch": 3.0749627601755303, - "grad_norm": 0.0015830083284527063, - "learning_rate": 0.00019999533922263293, - "loss": 46.0, - "step": 19094 - }, - { - "epoch": 3.0751237972543177, - "grad_norm": 0.0027457610704004765, - "learning_rate": 0.00019999533873417524, - "loss": 46.0, - "step": 19095 - }, - { - "epoch": 3.075284834333105, - "grad_norm": 0.0009954030392691493, - "learning_rate": 0.00019999533824569197, - "loss": 46.0, - "step": 19096 - }, - { - "epoch": 3.0754458714118926, - "grad_norm": 0.0013651358895003796, - "learning_rate": 0.0001999953377571831, - "loss": 46.0, - "step": 19097 - }, - { - "epoch": 3.07560690849068, - "grad_norm": 0.005430291406810284, - "learning_rate": 0.0001999953372686486, - "loss": 46.0, - "step": 19098 - }, - { - "epoch": 3.0757679455694675, - "grad_norm": 0.006404591724276543, - "learning_rate": 0.00019999533678008856, - "loss": 46.0, - "step": 19099 - }, - { - "epoch": 3.0759289826482545, - "grad_norm": 0.0009197668405249715, - "learning_rate": 0.00019999533629150288, - "loss": 46.0, - "step": 19100 - }, - { - "epoch": 3.076090019727042, - "grad_norm": 0.002305516740307212, - "learning_rate": 0.00019999533580289164, - "loss": 46.0, - "step": 19101 - }, - { - "epoch": 3.0762510568058294, - "grad_norm": 0.0021574273705482483, - "learning_rate": 0.00019999533531425478, - "loss": 46.0, - "step": 19102 - }, - { - "epoch": 3.076412093884617, - "grad_norm": 0.0009340643882751465, - "learning_rate": 0.00019999533482559234, - "loss": 46.0, - "step": 19103 - }, - { - "epoch": 3.0765731309634043, - "grad_norm": 0.0005300783668644726, - "learning_rate": 0.0001999953343369043, - "loss": 46.0, - "step": 19104 - }, - { - "epoch": 3.076734168042192, - "grad_norm": 0.0010991146555170417, - "learning_rate": 0.0001999953338481907, - "loss": 46.0, - "step": 19105 - }, - { - "epoch": 3.0768952051209792, - "grad_norm": 0.002562117064371705, - "learning_rate": 0.0001999953333594515, - "loss": 46.0, - "step": 19106 - }, - { - "epoch": 3.0770562421997667, - "grad_norm": 0.003121523419395089, - "learning_rate": 0.00019999533287068665, - "loss": 46.0, - "step": 19107 - }, - { - "epoch": 3.0772172792785537, - "grad_norm": 0.0018334800843149424, - "learning_rate": 0.00019999533238189624, - "loss": 46.0, - "step": 19108 - }, - { - "epoch": 3.077378316357341, - "grad_norm": 0.002573322504758835, - "learning_rate": 0.00019999533189308025, - "loss": 46.0, - "step": 19109 - }, - { - "epoch": 3.0775393534361286, - "grad_norm": 0.00606690626591444, - "learning_rate": 0.00019999533140423864, - "loss": 46.0, - "step": 19110 - }, - { - "epoch": 3.077700390514916, - "grad_norm": 0.007160976063460112, - "learning_rate": 0.00019999533091537148, - "loss": 46.0, - "step": 19111 - }, - { - "epoch": 3.0778614275937035, - "grad_norm": 0.005840762052685022, - "learning_rate": 0.0001999953304264787, - "loss": 46.0, - "step": 19112 - }, - { - "epoch": 3.078022464672491, - "grad_norm": 0.0024405154399573803, - "learning_rate": 0.0001999953299375603, - "loss": 46.0, - "step": 19113 - }, - { - "epoch": 3.0781835017512784, - "grad_norm": 0.0037817552220076323, - "learning_rate": 0.00019999532944861635, - "loss": 46.0, - "step": 19114 - }, - { - "epoch": 3.0783445388300654, - "grad_norm": 0.0029663031455129385, - "learning_rate": 0.00019999532895964678, - "loss": 46.0, - "step": 19115 - }, - { - "epoch": 3.078505575908853, - "grad_norm": 0.0030656771268695593, - "learning_rate": 0.00019999532847065162, - "loss": 46.0, - "step": 19116 - }, - { - "epoch": 3.0786666129876403, - "grad_norm": 0.0016240831464529037, - "learning_rate": 0.00019999532798163088, - "loss": 46.0, - "step": 19117 - }, - { - "epoch": 3.078827650066428, - "grad_norm": 0.00464076129719615, - "learning_rate": 0.00019999532749258455, - "loss": 46.0, - "step": 19118 - }, - { - "epoch": 3.0789886871452152, - "grad_norm": 0.0023065260611474514, - "learning_rate": 0.0001999953270035126, - "loss": 46.0, - "step": 19119 - }, - { - "epoch": 3.0791497242240027, - "grad_norm": 0.005764540750533342, - "learning_rate": 0.00019999532651441507, - "loss": 46.0, - "step": 19120 - }, - { - "epoch": 3.07931076130279, - "grad_norm": 0.0040515111759305, - "learning_rate": 0.00019999532602529195, - "loss": 46.0, - "step": 19121 - }, - { - "epoch": 3.0794717983815776, - "grad_norm": 0.007397829554975033, - "learning_rate": 0.00019999532553614322, - "loss": 46.0, - "step": 19122 - }, - { - "epoch": 3.0796328354603646, - "grad_norm": 0.003961391746997833, - "learning_rate": 0.00019999532504696893, - "loss": 46.0, - "step": 19123 - }, - { - "epoch": 3.079793872539152, - "grad_norm": 0.0016536946641281247, - "learning_rate": 0.000199995324557769, - "loss": 46.0, - "step": 19124 - }, - { - "epoch": 3.0799549096179395, - "grad_norm": 0.0029958044178783894, - "learning_rate": 0.00019999532406854352, - "loss": 46.0, - "step": 19125 - }, - { - "epoch": 3.080115946696727, - "grad_norm": 0.0008020913228392601, - "learning_rate": 0.00019999532357929241, - "loss": 46.0, - "step": 19126 - }, - { - "epoch": 3.0802769837755144, - "grad_norm": 0.0029873119201511145, - "learning_rate": 0.00019999532309001575, - "loss": 46.0, - "step": 19127 - }, - { - "epoch": 3.080438020854302, - "grad_norm": 0.0005914492066949606, - "learning_rate": 0.00019999532260071346, - "loss": 46.0, - "step": 19128 - }, - { - "epoch": 3.0805990579330893, - "grad_norm": 0.010760613717138767, - "learning_rate": 0.0001999953221113856, - "loss": 46.0, - "step": 19129 - }, - { - "epoch": 3.0807600950118763, - "grad_norm": 0.003076407825574279, - "learning_rate": 0.0001999953216220321, - "loss": 46.0, - "step": 19130 - }, - { - "epoch": 3.0809211320906638, - "grad_norm": 0.008108134381473064, - "learning_rate": 0.00019999532113265303, - "loss": 46.0, - "step": 19131 - }, - { - "epoch": 3.081082169169451, - "grad_norm": 0.0010789695661514997, - "learning_rate": 0.00019999532064324837, - "loss": 46.0, - "step": 19132 - }, - { - "epoch": 3.0812432062482387, - "grad_norm": 0.0018693257588893175, - "learning_rate": 0.00019999532015381815, - "loss": 46.0, - "step": 19133 - }, - { - "epoch": 3.081404243327026, - "grad_norm": 0.006474814377725124, - "learning_rate": 0.0001999953196643623, - "loss": 46.0, - "step": 19134 - }, - { - "epoch": 3.0815652804058136, - "grad_norm": 0.0029409306589514017, - "learning_rate": 0.00019999531917488085, - "loss": 46.0, - "step": 19135 - }, - { - "epoch": 3.081726317484601, - "grad_norm": 0.010037630796432495, - "learning_rate": 0.00019999531868537384, - "loss": 46.0, - "step": 19136 - }, - { - "epoch": 3.081887354563388, - "grad_norm": 0.003775344928726554, - "learning_rate": 0.00019999531819584122, - "loss": 46.0, - "step": 19137 - }, - { - "epoch": 3.0820483916421755, - "grad_norm": 0.001168327871710062, - "learning_rate": 0.00019999531770628298, - "loss": 46.0, - "step": 19138 - }, - { - "epoch": 3.082209428720963, - "grad_norm": 0.005866238847374916, - "learning_rate": 0.00019999531721669918, - "loss": 46.0, - "step": 19139 - }, - { - "epoch": 3.0823704657997504, - "grad_norm": 0.0037726270966231823, - "learning_rate": 0.00019999531672708977, - "loss": 46.0, - "step": 19140 - }, - { - "epoch": 3.082531502878538, - "grad_norm": 0.0007196885999292135, - "learning_rate": 0.00019999531623745477, - "loss": 46.0, - "step": 19141 - }, - { - "epoch": 3.0826925399573253, - "grad_norm": 0.0031213753391057253, - "learning_rate": 0.0001999953157477942, - "loss": 46.0, - "step": 19142 - }, - { - "epoch": 3.0828535770361127, - "grad_norm": 0.0013955564936622977, - "learning_rate": 0.000199995315258108, - "loss": 46.0, - "step": 19143 - }, - { - "epoch": 3.0830146141148997, - "grad_norm": 0.012316612526774406, - "learning_rate": 0.0001999953147683962, - "loss": 46.0, - "step": 19144 - }, - { - "epoch": 3.083175651193687, - "grad_norm": 0.002138487994670868, - "learning_rate": 0.00019999531427865885, - "loss": 46.0, - "step": 19145 - }, - { - "epoch": 3.0833366882724746, - "grad_norm": 0.0034975174348801374, - "learning_rate": 0.00019999531378889587, - "loss": 46.0, - "step": 19146 - }, - { - "epoch": 3.083497725351262, - "grad_norm": 0.0024908240884542465, - "learning_rate": 0.00019999531329910732, - "loss": 46.0, - "step": 19147 - }, - { - "epoch": 3.0836587624300495, - "grad_norm": 0.004345782566815615, - "learning_rate": 0.00019999531280929316, - "loss": 46.0, - "step": 19148 - }, - { - "epoch": 3.083819799508837, - "grad_norm": 0.001221984624862671, - "learning_rate": 0.00019999531231945343, - "loss": 46.0, - "step": 19149 - }, - { - "epoch": 3.0839808365876245, - "grad_norm": 0.0036923657171428204, - "learning_rate": 0.0001999953118295881, - "loss": 46.0, - "step": 19150 - }, - { - "epoch": 3.084141873666412, - "grad_norm": 0.00238083116710186, - "learning_rate": 0.00019999531133969715, - "loss": 46.0, - "step": 19151 - }, - { - "epoch": 3.084302910745199, - "grad_norm": 0.0021271277219057083, - "learning_rate": 0.0001999953108497806, - "loss": 46.0, - "step": 19152 - }, - { - "epoch": 3.0844639478239864, - "grad_norm": 0.005991783458739519, - "learning_rate": 0.00019999531035983848, - "loss": 46.0, - "step": 19153 - }, - { - "epoch": 3.084624984902774, - "grad_norm": 0.002737136324867606, - "learning_rate": 0.00019999530986987077, - "loss": 46.0, - "step": 19154 - }, - { - "epoch": 3.0847860219815613, - "grad_norm": 0.005372087936848402, - "learning_rate": 0.00019999530937987747, - "loss": 46.0, - "step": 19155 - }, - { - "epoch": 3.0849470590603487, - "grad_norm": 0.003594535170122981, - "learning_rate": 0.00019999530888985855, - "loss": 46.0, - "step": 19156 - }, - { - "epoch": 3.085108096139136, - "grad_norm": 0.0014518267707899213, - "learning_rate": 0.00019999530839981405, - "loss": 46.0, - "step": 19157 - }, - { - "epoch": 3.0852691332179236, - "grad_norm": 0.0033178445883095264, - "learning_rate": 0.000199995307909744, - "loss": 46.0, - "step": 19158 - }, - { - "epoch": 3.0854301702967106, - "grad_norm": 0.005538743920624256, - "learning_rate": 0.0001999953074196483, - "loss": 46.0, - "step": 19159 - }, - { - "epoch": 3.085591207375498, - "grad_norm": 0.0009418170084245503, - "learning_rate": 0.00019999530692952703, - "loss": 46.0, - "step": 19160 - }, - { - "epoch": 3.0857522444542855, - "grad_norm": 0.005124872550368309, - "learning_rate": 0.00019999530643938015, - "loss": 46.0, - "step": 19161 - }, - { - "epoch": 3.085913281533073, - "grad_norm": 0.0015647117979824543, - "learning_rate": 0.00019999530594920769, - "loss": 46.0, - "step": 19162 - }, - { - "epoch": 3.0860743186118604, - "grad_norm": 0.0010755390394479036, - "learning_rate": 0.0001999953054590096, - "loss": 46.0, - "step": 19163 - }, - { - "epoch": 3.086235355690648, - "grad_norm": 0.003597024828195572, - "learning_rate": 0.00019999530496878597, - "loss": 46.0, - "step": 19164 - }, - { - "epoch": 3.0863963927694353, - "grad_norm": 0.005692549515515566, - "learning_rate": 0.00019999530447853672, - "loss": 46.0, - "step": 19165 - }, - { - "epoch": 3.0865574298482223, - "grad_norm": 0.00314165442250669, - "learning_rate": 0.00019999530398826188, - "loss": 46.0, - "step": 19166 - }, - { - "epoch": 3.08671846692701, - "grad_norm": 0.0061389184556901455, - "learning_rate": 0.00019999530349796145, - "loss": 46.0, - "step": 19167 - }, - { - "epoch": 3.0868795040057972, - "grad_norm": 0.005200805142521858, - "learning_rate": 0.00019999530300763543, - "loss": 46.0, - "step": 19168 - }, - { - "epoch": 3.0870405410845847, - "grad_norm": 0.011876535601913929, - "learning_rate": 0.0001999953025172838, - "loss": 46.0, - "step": 19169 - }, - { - "epoch": 3.087201578163372, - "grad_norm": 0.003240771358832717, - "learning_rate": 0.00019999530202690656, - "loss": 46.0, - "step": 19170 - }, - { - "epoch": 3.0873626152421596, - "grad_norm": 0.00891954731196165, - "learning_rate": 0.00019999530153650379, - "loss": 46.0, - "step": 19171 - }, - { - "epoch": 3.087523652320947, - "grad_norm": 0.002669870387762785, - "learning_rate": 0.00019999530104607537, - "loss": 46.0, - "step": 19172 - }, - { - "epoch": 3.087684689399734, - "grad_norm": 0.0055496483109891415, - "learning_rate": 0.0001999953005556214, - "loss": 46.0, - "step": 19173 - }, - { - "epoch": 3.0878457264785215, - "grad_norm": 0.004981720820069313, - "learning_rate": 0.00019999530006514177, - "loss": 46.0, - "step": 19174 - }, - { - "epoch": 3.088006763557309, - "grad_norm": 0.0006577274180017412, - "learning_rate": 0.00019999529957463662, - "loss": 46.0, - "step": 19175 - }, - { - "epoch": 3.0881678006360964, - "grad_norm": 0.0017516218358650804, - "learning_rate": 0.00019999529908410583, - "loss": 46.0, - "step": 19176 - }, - { - "epoch": 3.088328837714884, - "grad_norm": 0.0026046959683299065, - "learning_rate": 0.00019999529859354947, - "loss": 46.0, - "step": 19177 - }, - { - "epoch": 3.0884898747936713, - "grad_norm": 0.004757405258715153, - "learning_rate": 0.00019999529810296748, - "loss": 46.0, - "step": 19178 - }, - { - "epoch": 3.0886509118724588, - "grad_norm": 0.011958839371800423, - "learning_rate": 0.00019999529761235993, - "loss": 46.0, - "step": 19179 - }, - { - "epoch": 3.0888119489512462, - "grad_norm": 0.0006466091726906598, - "learning_rate": 0.0001999952971217268, - "loss": 46.0, - "step": 19180 - }, - { - "epoch": 3.0889729860300332, - "grad_norm": 0.0010535804321989417, - "learning_rate": 0.00019999529663106803, - "loss": 46.0, - "step": 19181 - }, - { - "epoch": 3.0891340231088207, - "grad_norm": 0.0012020752765238285, - "learning_rate": 0.0001999952961403837, - "loss": 46.0, - "step": 19182 - }, - { - "epoch": 3.089295060187608, - "grad_norm": 0.007841324433684349, - "learning_rate": 0.00019999529564967378, - "loss": 46.0, - "step": 19183 - }, - { - "epoch": 3.0894560972663956, - "grad_norm": 0.004968698602169752, - "learning_rate": 0.00019999529515893824, - "loss": 46.0, - "step": 19184 - }, - { - "epoch": 3.089617134345183, - "grad_norm": 0.002287528244778514, - "learning_rate": 0.00019999529466817713, - "loss": 46.0, - "step": 19185 - }, - { - "epoch": 3.0897781714239705, - "grad_norm": 0.001124705420807004, - "learning_rate": 0.0001999952941773904, - "loss": 46.0, - "step": 19186 - }, - { - "epoch": 3.089939208502758, - "grad_norm": 0.0076405955478549, - "learning_rate": 0.0001999952936865781, - "loss": 46.0, - "step": 19187 - }, - { - "epoch": 3.090100245581545, - "grad_norm": 0.004483805038034916, - "learning_rate": 0.0001999952931957402, - "loss": 46.0, - "step": 19188 - }, - { - "epoch": 3.0902612826603324, - "grad_norm": 0.002658623270690441, - "learning_rate": 0.0001999952927048767, - "loss": 46.0, - "step": 19189 - }, - { - "epoch": 3.09042231973912, - "grad_norm": 0.002346122870221734, - "learning_rate": 0.00019999529221398764, - "loss": 46.0, - "step": 19190 - }, - { - "epoch": 3.0905833568179073, - "grad_norm": 0.0032003053929656744, - "learning_rate": 0.00019999529172307293, - "loss": 46.0, - "step": 19191 - }, - { - "epoch": 3.0907443938966948, - "grad_norm": 0.007972792722284794, - "learning_rate": 0.0001999952912321327, - "loss": 46.0, - "step": 19192 - }, - { - "epoch": 3.090905430975482, - "grad_norm": 0.002378620905801654, - "learning_rate": 0.0001999952907411668, - "loss": 46.0, - "step": 19193 - }, - { - "epoch": 3.0910664680542697, - "grad_norm": 0.0011053212219849229, - "learning_rate": 0.00019999529025017533, - "loss": 46.0, - "step": 19194 - }, - { - "epoch": 3.091227505133057, - "grad_norm": 0.0009945402853190899, - "learning_rate": 0.00019999528975915828, - "loss": 46.0, - "step": 19195 - }, - { - "epoch": 3.091388542211844, - "grad_norm": 0.000790278660133481, - "learning_rate": 0.00019999528926811563, - "loss": 46.0, - "step": 19196 - }, - { - "epoch": 3.0915495792906316, - "grad_norm": 0.002588866511359811, - "learning_rate": 0.00019999528877704737, - "loss": 46.0, - "step": 19197 - }, - { - "epoch": 3.091710616369419, - "grad_norm": 0.0033637951128184795, - "learning_rate": 0.00019999528828595355, - "loss": 46.0, - "step": 19198 - }, - { - "epoch": 3.0918716534482065, - "grad_norm": 0.002044109860435128, - "learning_rate": 0.00019999528779483412, - "loss": 46.0, - "step": 19199 - }, - { - "epoch": 3.092032690526994, - "grad_norm": 0.0008687538793310523, - "learning_rate": 0.00019999528730368907, - "loss": 46.0, - "step": 19200 - }, - { - "epoch": 3.0921937276057814, - "grad_norm": 0.005857252515852451, - "learning_rate": 0.00019999528681251846, - "loss": 46.0, - "step": 19201 - }, - { - "epoch": 3.092354764684569, - "grad_norm": 0.001143820583820343, - "learning_rate": 0.00019999528632132227, - "loss": 46.0, - "step": 19202 - }, - { - "epoch": 3.092515801763356, - "grad_norm": 0.004507384262979031, - "learning_rate": 0.00019999528583010046, - "loss": 46.0, - "step": 19203 - }, - { - "epoch": 3.0926768388421433, - "grad_norm": 0.0022199093364179134, - "learning_rate": 0.00019999528533885306, - "loss": 46.0, - "step": 19204 - }, - { - "epoch": 3.0928378759209307, - "grad_norm": 0.0016401053871959448, - "learning_rate": 0.00019999528484758008, - "loss": 46.0, - "step": 19205 - }, - { - "epoch": 3.092998912999718, - "grad_norm": 0.0032426516991108656, - "learning_rate": 0.00019999528435628148, - "loss": 46.0, - "step": 19206 - }, - { - "epoch": 3.0931599500785056, - "grad_norm": 0.001714535872451961, - "learning_rate": 0.0001999952838649573, - "loss": 46.0, - "step": 19207 - }, - { - "epoch": 3.093320987157293, - "grad_norm": 0.0034224370028823614, - "learning_rate": 0.00019999528337360752, - "loss": 46.0, - "step": 19208 - }, - { - "epoch": 3.0934820242360805, - "grad_norm": 0.003591538406908512, - "learning_rate": 0.00019999528288223214, - "loss": 46.0, - "step": 19209 - }, - { - "epoch": 3.0936430613148675, - "grad_norm": 0.005739274434745312, - "learning_rate": 0.00019999528239083122, - "loss": 46.0, - "step": 19210 - }, - { - "epoch": 3.093804098393655, - "grad_norm": 0.001627480611205101, - "learning_rate": 0.00019999528189940463, - "loss": 46.0, - "step": 19211 - }, - { - "epoch": 3.0939651354724425, - "grad_norm": 0.0010919008636847138, - "learning_rate": 0.0001999952814079525, - "loss": 46.0, - "step": 19212 - }, - { - "epoch": 3.09412617255123, - "grad_norm": 0.008490863256156445, - "learning_rate": 0.00019999528091647477, - "loss": 46.0, - "step": 19213 - }, - { - "epoch": 3.0942872096300174, - "grad_norm": 0.005402341019362211, - "learning_rate": 0.00019999528042497142, - "loss": 46.0, - "step": 19214 - }, - { - "epoch": 3.094448246708805, - "grad_norm": 0.0018791462061926723, - "learning_rate": 0.0001999952799334425, - "loss": 46.0, - "step": 19215 - }, - { - "epoch": 3.0946092837875923, - "grad_norm": 0.0056406366638839245, - "learning_rate": 0.00019999527944188796, - "loss": 46.0, - "step": 19216 - }, - { - "epoch": 3.0947703208663793, - "grad_norm": 0.0020144961308687925, - "learning_rate": 0.00019999527895030785, - "loss": 46.0, - "step": 19217 - }, - { - "epoch": 3.0949313579451667, - "grad_norm": 0.003603936405852437, - "learning_rate": 0.00019999527845870213, - "loss": 46.0, - "step": 19218 - }, - { - "epoch": 3.095092395023954, - "grad_norm": 0.001043665106408298, - "learning_rate": 0.00019999527796707084, - "loss": 46.0, - "step": 19219 - }, - { - "epoch": 3.0952534321027416, - "grad_norm": 0.006417769473046064, - "learning_rate": 0.00019999527747541397, - "loss": 46.0, - "step": 19220 - }, - { - "epoch": 3.095414469181529, - "grad_norm": 0.007026469800621271, - "learning_rate": 0.00019999527698373143, - "loss": 46.0, - "step": 19221 - }, - { - "epoch": 3.0955755062603165, - "grad_norm": 0.005228106398135424, - "learning_rate": 0.00019999527649202335, - "loss": 46.0, - "step": 19222 - }, - { - "epoch": 3.095736543339104, - "grad_norm": 0.0035425720270723104, - "learning_rate": 0.0001999952760002897, - "loss": 46.0, - "step": 19223 - }, - { - "epoch": 3.0958975804178914, - "grad_norm": 0.003815435105934739, - "learning_rate": 0.00019999527550853042, - "loss": 46.0, - "step": 19224 - }, - { - "epoch": 3.0960586174966784, - "grad_norm": 0.0014954148791730404, - "learning_rate": 0.00019999527501674553, - "loss": 46.0, - "step": 19225 - }, - { - "epoch": 3.096219654575466, - "grad_norm": 0.011047880165278912, - "learning_rate": 0.00019999527452493508, - "loss": 46.0, - "step": 19226 - }, - { - "epoch": 3.0963806916542533, - "grad_norm": 0.001661021844483912, - "learning_rate": 0.00019999527403309904, - "loss": 46.0, - "step": 19227 - }, - { - "epoch": 3.096541728733041, - "grad_norm": 0.0013593442272394896, - "learning_rate": 0.0001999952735412374, - "loss": 46.0, - "step": 19228 - }, - { - "epoch": 3.0967027658118282, - "grad_norm": 0.009929660707712173, - "learning_rate": 0.00019999527304935015, - "loss": 46.0, - "step": 19229 - }, - { - "epoch": 3.0968638028906157, - "grad_norm": 0.0025440549943596125, - "learning_rate": 0.0001999952725574373, - "loss": 46.0, - "step": 19230 - }, - { - "epoch": 3.097024839969403, - "grad_norm": 0.0015816837549209595, - "learning_rate": 0.00019999527206549889, - "loss": 46.0, - "step": 19231 - }, - { - "epoch": 3.09718587704819, - "grad_norm": 0.003044366603717208, - "learning_rate": 0.00019999527157353486, - "loss": 46.0, - "step": 19232 - }, - { - "epoch": 3.0973469141269776, - "grad_norm": 0.003409534227102995, - "learning_rate": 0.00019999527108154524, - "loss": 46.0, - "step": 19233 - }, - { - "epoch": 3.097507951205765, - "grad_norm": 0.0026044296100735664, - "learning_rate": 0.00019999527058953004, - "loss": 46.0, - "step": 19234 - }, - { - "epoch": 3.0976689882845525, - "grad_norm": 0.0004715148243121803, - "learning_rate": 0.00019999527009748923, - "loss": 46.0, - "step": 19235 - }, - { - "epoch": 3.09783002536334, - "grad_norm": 0.0017794615123420954, - "learning_rate": 0.00019999526960542285, - "loss": 46.0, - "step": 19236 - }, - { - "epoch": 3.0979910624421274, - "grad_norm": 0.0054541523568332195, - "learning_rate": 0.00019999526911333083, - "loss": 46.0, - "step": 19237 - }, - { - "epoch": 3.098152099520915, - "grad_norm": 0.004682239145040512, - "learning_rate": 0.00019999526862121328, - "loss": 46.0, - "step": 19238 - }, - { - "epoch": 3.098313136599702, - "grad_norm": 0.006904846057295799, - "learning_rate": 0.00019999526812907007, - "loss": 46.0, - "step": 19239 - }, - { - "epoch": 3.0984741736784893, - "grad_norm": 0.004432819318026304, - "learning_rate": 0.00019999526763690134, - "loss": 46.0, - "step": 19240 - }, - { - "epoch": 3.0986352107572768, - "grad_norm": 0.0037251566536724567, - "learning_rate": 0.00019999526714470695, - "loss": 46.0, - "step": 19241 - }, - { - "epoch": 3.098796247836064, - "grad_norm": 0.004127888940274715, - "learning_rate": 0.000199995266652487, - "loss": 46.0, - "step": 19242 - }, - { - "epoch": 3.0989572849148517, - "grad_norm": 0.0057748123072087765, - "learning_rate": 0.00019999526616024146, - "loss": 46.0, - "step": 19243 - }, - { - "epoch": 3.099118321993639, - "grad_norm": 0.0029337021987885237, - "learning_rate": 0.0001999952656679703, - "loss": 46.0, - "step": 19244 - }, - { - "epoch": 3.0992793590724266, - "grad_norm": 0.002829350531101227, - "learning_rate": 0.0001999952651756736, - "loss": 46.0, - "step": 19245 - }, - { - "epoch": 3.099440396151214, - "grad_norm": 0.003779337042942643, - "learning_rate": 0.0001999952646833512, - "loss": 46.0, - "step": 19246 - }, - { - "epoch": 3.099601433230001, - "grad_norm": 0.009024163708090782, - "learning_rate": 0.00019999526419100331, - "loss": 46.0, - "step": 19247 - }, - { - "epoch": 3.0997624703087885, - "grad_norm": 0.004311509430408478, - "learning_rate": 0.00019999526369862978, - "loss": 46.0, - "step": 19248 - }, - { - "epoch": 3.099923507387576, - "grad_norm": 0.0021545386407524347, - "learning_rate": 0.00019999526320623067, - "loss": 46.0, - "step": 19249 - }, - { - "epoch": 3.1000845444663634, - "grad_norm": 0.001936333836056292, - "learning_rate": 0.00019999526271380596, - "loss": 46.0, - "step": 19250 - }, - { - "epoch": 3.100245581545151, - "grad_norm": 0.002758914837613702, - "learning_rate": 0.00019999526222135567, - "loss": 46.0, - "step": 19251 - }, - { - "epoch": 3.1004066186239383, - "grad_norm": 0.0029531673062592745, - "learning_rate": 0.00019999526172887977, - "loss": 46.0, - "step": 19252 - }, - { - "epoch": 3.1005676557027257, - "grad_norm": 0.005109885707497597, - "learning_rate": 0.00019999526123637827, - "loss": 46.0, - "step": 19253 - }, - { - "epoch": 3.1007286927815128, - "grad_norm": 0.004946262110024691, - "learning_rate": 0.0001999952607438512, - "loss": 46.0, - "step": 19254 - }, - { - "epoch": 3.1008897298603, - "grad_norm": 0.0007333557005040348, - "learning_rate": 0.00019999526025129852, - "loss": 46.0, - "step": 19255 - }, - { - "epoch": 3.1010507669390877, - "grad_norm": 0.0016733099473640323, - "learning_rate": 0.00019999525975872024, - "loss": 46.0, - "step": 19256 - }, - { - "epoch": 3.101211804017875, - "grad_norm": 0.0009643870871514082, - "learning_rate": 0.00019999525926611637, - "loss": 46.0, - "step": 19257 - }, - { - "epoch": 3.1013728410966626, - "grad_norm": 0.005745363887399435, - "learning_rate": 0.00019999525877348692, - "loss": 46.0, - "step": 19258 - }, - { - "epoch": 3.10153387817545, - "grad_norm": 0.010808480903506279, - "learning_rate": 0.00019999525828083187, - "loss": 46.0, - "step": 19259 - }, - { - "epoch": 3.1016949152542375, - "grad_norm": 0.0015271289739757776, - "learning_rate": 0.00019999525778815124, - "loss": 46.0, - "step": 19260 - }, - { - "epoch": 3.1018559523330245, - "grad_norm": 0.0021282541565597057, - "learning_rate": 0.000199995257295445, - "loss": 46.0, - "step": 19261 - }, - { - "epoch": 3.102016989411812, - "grad_norm": 0.0022353986278176308, - "learning_rate": 0.00019999525680271317, - "loss": 46.0, - "step": 19262 - }, - { - "epoch": 3.1021780264905994, - "grad_norm": 0.0010247172322124243, - "learning_rate": 0.00019999525630995572, - "loss": 46.0, - "step": 19263 - }, - { - "epoch": 3.102339063569387, - "grad_norm": 0.0007396000437438488, - "learning_rate": 0.00019999525581717271, - "loss": 46.0, - "step": 19264 - }, - { - "epoch": 3.1025001006481743, - "grad_norm": 0.002451027510687709, - "learning_rate": 0.00019999525532436412, - "loss": 46.0, - "step": 19265 - }, - { - "epoch": 3.1026611377269617, - "grad_norm": 0.008563502691686153, - "learning_rate": 0.00019999525483152989, - "loss": 46.0, - "step": 19266 - }, - { - "epoch": 3.102822174805749, - "grad_norm": 0.0020906063728034496, - "learning_rate": 0.0001999952543386701, - "loss": 46.0, - "step": 19267 - }, - { - "epoch": 3.1029832118845366, - "grad_norm": 0.005402501206845045, - "learning_rate": 0.00019999525384578474, - "loss": 46.0, - "step": 19268 - }, - { - "epoch": 3.1031442489633236, - "grad_norm": 0.0021859346888959408, - "learning_rate": 0.0001999952533528737, - "loss": 46.0, - "step": 19269 - }, - { - "epoch": 3.103305286042111, - "grad_norm": 0.011181621812283993, - "learning_rate": 0.00019999525285993716, - "loss": 46.0, - "step": 19270 - }, - { - "epoch": 3.1034663231208985, - "grad_norm": 0.0070154485292732716, - "learning_rate": 0.00019999525236697499, - "loss": 46.0, - "step": 19271 - }, - { - "epoch": 3.103627360199686, - "grad_norm": 0.0041419873014092445, - "learning_rate": 0.0001999952518739872, - "loss": 46.0, - "step": 19272 - }, - { - "epoch": 3.1037883972784734, - "grad_norm": 0.005718898959457874, - "learning_rate": 0.00019999525138097383, - "loss": 46.0, - "step": 19273 - }, - { - "epoch": 3.103949434357261, - "grad_norm": 0.003628492821007967, - "learning_rate": 0.0001999952508879349, - "loss": 46.0, - "step": 19274 - }, - { - "epoch": 3.1041104714360483, - "grad_norm": 0.003389259334653616, - "learning_rate": 0.00019999525039487032, - "loss": 46.0, - "step": 19275 - }, - { - "epoch": 3.1042715085148354, - "grad_norm": 0.0023660468868911266, - "learning_rate": 0.0001999952499017802, - "loss": 46.0, - "step": 19276 - }, - { - "epoch": 3.104432545593623, - "grad_norm": 0.00660520326346159, - "learning_rate": 0.00019999524940866444, - "loss": 46.0, - "step": 19277 - }, - { - "epoch": 3.1045935826724103, - "grad_norm": 0.0017505967989563942, - "learning_rate": 0.00019999524891552313, - "loss": 46.0, - "step": 19278 - }, - { - "epoch": 3.1047546197511977, - "grad_norm": 0.011438223533332348, - "learning_rate": 0.0001999952484223562, - "loss": 46.0, - "step": 19279 - }, - { - "epoch": 3.104915656829985, - "grad_norm": 0.0010729064233601093, - "learning_rate": 0.00019999524792916368, - "loss": 46.0, - "step": 19280 - }, - { - "epoch": 3.1050766939087726, - "grad_norm": 0.0010524013778194785, - "learning_rate": 0.00019999524743594558, - "loss": 46.0, - "step": 19281 - }, - { - "epoch": 3.10523773098756, - "grad_norm": 0.0019382539903745055, - "learning_rate": 0.00019999524694270187, - "loss": 46.0, - "step": 19282 - }, - { - "epoch": 3.105398768066347, - "grad_norm": 0.002451792825013399, - "learning_rate": 0.00019999524644943257, - "loss": 46.0, - "step": 19283 - }, - { - "epoch": 3.1055598051451345, - "grad_norm": 0.008010496385395527, - "learning_rate": 0.0001999952459561377, - "loss": 46.0, - "step": 19284 - }, - { - "epoch": 3.105720842223922, - "grad_norm": 0.0073457336984574795, - "learning_rate": 0.0001999952454628172, - "loss": 46.0, - "step": 19285 - }, - { - "epoch": 3.1058818793027094, - "grad_norm": 0.0011635952396318316, - "learning_rate": 0.00019999524496947113, - "loss": 46.0, - "step": 19286 - }, - { - "epoch": 3.106042916381497, - "grad_norm": 0.002992767607793212, - "learning_rate": 0.00019999524447609945, - "loss": 46.0, - "step": 19287 - }, - { - "epoch": 3.1062039534602843, - "grad_norm": 0.0036717213224619627, - "learning_rate": 0.0001999952439827022, - "loss": 46.0, - "step": 19288 - }, - { - "epoch": 3.106364990539072, - "grad_norm": 0.0024175650905817747, - "learning_rate": 0.00019999524348927934, - "loss": 46.0, - "step": 19289 - }, - { - "epoch": 3.106526027617859, - "grad_norm": 0.003215793753042817, - "learning_rate": 0.00019999524299583085, - "loss": 46.0, - "step": 19290 - }, - { - "epoch": 3.1066870646966462, - "grad_norm": 0.004355255514383316, - "learning_rate": 0.0001999952425023568, - "loss": 46.0, - "step": 19291 - }, - { - "epoch": 3.1068481017754337, - "grad_norm": 0.0015961091266945004, - "learning_rate": 0.00019999524200885717, - "loss": 46.0, - "step": 19292 - }, - { - "epoch": 3.107009138854221, - "grad_norm": 0.0033468592446297407, - "learning_rate": 0.00019999524151533194, - "loss": 46.0, - "step": 19293 - }, - { - "epoch": 3.1071701759330086, - "grad_norm": 0.0016286567552015185, - "learning_rate": 0.00019999524102178113, - "loss": 46.0, - "step": 19294 - }, - { - "epoch": 3.107331213011796, - "grad_norm": 0.0033984172623604536, - "learning_rate": 0.0001999952405282047, - "loss": 46.0, - "step": 19295 - }, - { - "epoch": 3.1074922500905835, - "grad_norm": 0.005804160609841347, - "learning_rate": 0.0001999952400346027, - "loss": 46.0, - "step": 19296 - }, - { - "epoch": 3.107653287169371, - "grad_norm": 0.005598474759608507, - "learning_rate": 0.00019999523954097507, - "loss": 46.0, - "step": 19297 - }, - { - "epoch": 3.107814324248158, - "grad_norm": 0.0022115791216492653, - "learning_rate": 0.00019999523904732188, - "loss": 46.0, - "step": 19298 - }, - { - "epoch": 3.1079753613269454, - "grad_norm": 0.008368846029043198, - "learning_rate": 0.00019999523855364308, - "loss": 46.0, - "step": 19299 - }, - { - "epoch": 3.108136398405733, - "grad_norm": 0.0006773097557015717, - "learning_rate": 0.00019999523805993866, - "loss": 46.0, - "step": 19300 - }, - { - "epoch": 3.1082974354845203, - "grad_norm": 0.0026804094668477774, - "learning_rate": 0.0001999952375662087, - "loss": 46.0, - "step": 19301 - }, - { - "epoch": 3.1084584725633078, - "grad_norm": 0.0038352832198143005, - "learning_rate": 0.00019999523707245313, - "loss": 46.0, - "step": 19302 - }, - { - "epoch": 3.108619509642095, - "grad_norm": 0.00030896722455509007, - "learning_rate": 0.00019999523657867198, - "loss": 46.0, - "step": 19303 - }, - { - "epoch": 3.1087805467208827, - "grad_norm": 0.004510022699832916, - "learning_rate": 0.00019999523608486518, - "loss": 46.0, - "step": 19304 - }, - { - "epoch": 3.1089415837996697, - "grad_norm": 0.004488205537199974, - "learning_rate": 0.00019999523559103283, - "loss": 46.0, - "step": 19305 - }, - { - "epoch": 3.109102620878457, - "grad_norm": 0.0026863000821322203, - "learning_rate": 0.00019999523509717487, - "loss": 46.0, - "step": 19306 - }, - { - "epoch": 3.1092636579572446, - "grad_norm": 0.0026817957405000925, - "learning_rate": 0.00019999523460329131, - "loss": 46.0, - "step": 19307 - }, - { - "epoch": 3.109424695036032, - "grad_norm": 0.0047181290574371815, - "learning_rate": 0.0001999952341093822, - "loss": 46.0, - "step": 19308 - }, - { - "epoch": 3.1095857321148195, - "grad_norm": 0.008078353479504585, - "learning_rate": 0.00019999523361544745, - "loss": 46.0, - "step": 19309 - }, - { - "epoch": 3.109746769193607, - "grad_norm": 0.0031496433075517416, - "learning_rate": 0.00019999523312148713, - "loss": 46.0, - "step": 19310 - }, - { - "epoch": 3.1099078062723944, - "grad_norm": 0.0007472268189303577, - "learning_rate": 0.0001999952326275012, - "loss": 46.0, - "step": 19311 - }, - { - "epoch": 3.110068843351182, - "grad_norm": 0.0006258013308979571, - "learning_rate": 0.0001999952321334897, - "loss": 46.0, - "step": 19312 - }, - { - "epoch": 3.110229880429969, - "grad_norm": 0.0041410732083022594, - "learning_rate": 0.00019999523163945259, - "loss": 46.0, - "step": 19313 - }, - { - "epoch": 3.1103909175087563, - "grad_norm": 0.001808946137316525, - "learning_rate": 0.00019999523114538987, - "loss": 46.0, - "step": 19314 - }, - { - "epoch": 3.1105519545875437, - "grad_norm": 0.0007960887160152197, - "learning_rate": 0.0001999952306513016, - "loss": 46.0, - "step": 19315 - }, - { - "epoch": 3.110712991666331, - "grad_norm": 0.0029116705991327763, - "learning_rate": 0.0001999952301571877, - "loss": 46.0, - "step": 19316 - }, - { - "epoch": 3.1108740287451186, - "grad_norm": 0.0008015538332983851, - "learning_rate": 0.00019999522966304822, - "loss": 46.0, - "step": 19317 - }, - { - "epoch": 3.111035065823906, - "grad_norm": 0.00940077193081379, - "learning_rate": 0.00019999522916888313, - "loss": 46.0, - "step": 19318 - }, - { - "epoch": 3.1111961029026935, - "grad_norm": 0.002370478818193078, - "learning_rate": 0.0001999952286746925, - "loss": 46.0, - "step": 19319 - }, - { - "epoch": 3.1113571399814806, - "grad_norm": 0.0028829616494476795, - "learning_rate": 0.00019999522818047624, - "loss": 46.0, - "step": 19320 - }, - { - "epoch": 3.111518177060268, - "grad_norm": 0.001068557146936655, - "learning_rate": 0.00019999522768623435, - "loss": 46.0, - "step": 19321 - }, - { - "epoch": 3.1116792141390555, - "grad_norm": 0.002573475707322359, - "learning_rate": 0.0001999952271919669, - "loss": 46.0, - "step": 19322 - }, - { - "epoch": 3.111840251217843, - "grad_norm": 0.0021205635275691748, - "learning_rate": 0.00019999522669767386, - "loss": 46.0, - "step": 19323 - }, - { - "epoch": 3.1120012882966304, - "grad_norm": 0.00813312828540802, - "learning_rate": 0.0001999952262033552, - "loss": 46.0, - "step": 19324 - }, - { - "epoch": 3.112162325375418, - "grad_norm": 0.004118777811527252, - "learning_rate": 0.00019999522570901098, - "loss": 46.0, - "step": 19325 - }, - { - "epoch": 3.1123233624542053, - "grad_norm": 0.0015542840119451284, - "learning_rate": 0.0001999952252146412, - "loss": 46.0, - "step": 19326 - }, - { - "epoch": 3.1124843995329923, - "grad_norm": 0.007820246741175652, - "learning_rate": 0.00019999522472024576, - "loss": 46.0, - "step": 19327 - }, - { - "epoch": 3.1126454366117797, - "grad_norm": 0.002289773663505912, - "learning_rate": 0.00019999522422582474, - "loss": 46.0, - "step": 19328 - }, - { - "epoch": 3.112806473690567, - "grad_norm": 0.009453109465539455, - "learning_rate": 0.00019999522373137814, - "loss": 46.0, - "step": 19329 - }, - { - "epoch": 3.1129675107693546, - "grad_norm": 0.004741839598864317, - "learning_rate": 0.00019999522323690594, - "loss": 46.0, - "step": 19330 - }, - { - "epoch": 3.113128547848142, - "grad_norm": 0.0024593514390289783, - "learning_rate": 0.00019999522274240813, - "loss": 46.0, - "step": 19331 - }, - { - "epoch": 3.1132895849269295, - "grad_norm": 0.0018467070767655969, - "learning_rate": 0.00019999522224788474, - "loss": 46.0, - "step": 19332 - }, - { - "epoch": 3.113450622005717, - "grad_norm": 0.003571482840925455, - "learning_rate": 0.00019999522175333576, - "loss": 46.0, - "step": 19333 - }, - { - "epoch": 3.113611659084504, - "grad_norm": 0.0040740324184298515, - "learning_rate": 0.0001999952212587612, - "loss": 46.0, - "step": 19334 - }, - { - "epoch": 3.1137726961632914, - "grad_norm": 0.0014620781876146793, - "learning_rate": 0.000199995220764161, - "loss": 46.0, - "step": 19335 - }, - { - "epoch": 3.113933733242079, - "grad_norm": 0.0028638429939746857, - "learning_rate": 0.00019999522026953526, - "loss": 46.0, - "step": 19336 - }, - { - "epoch": 3.1140947703208663, - "grad_norm": 0.001697445404715836, - "learning_rate": 0.0001999952197748839, - "loss": 46.0, - "step": 19337 - }, - { - "epoch": 3.114255807399654, - "grad_norm": 0.004957149736583233, - "learning_rate": 0.00019999521928020696, - "loss": 46.0, - "step": 19338 - }, - { - "epoch": 3.1144168444784412, - "grad_norm": 0.0012377359671518207, - "learning_rate": 0.00019999521878550443, - "loss": 46.0, - "step": 19339 - }, - { - "epoch": 3.1145778815572287, - "grad_norm": 0.005835159216076136, - "learning_rate": 0.00019999521829077628, - "loss": 46.0, - "step": 19340 - }, - { - "epoch": 3.114738918636016, - "grad_norm": 0.0023562470450997353, - "learning_rate": 0.00019999521779602258, - "loss": 46.0, - "step": 19341 - }, - { - "epoch": 3.114899955714803, - "grad_norm": 0.004596192389726639, - "learning_rate": 0.00019999521730124326, - "loss": 46.0, - "step": 19342 - }, - { - "epoch": 3.1150609927935906, - "grad_norm": 0.0037462601903826, - "learning_rate": 0.00019999521680643832, - "loss": 46.0, - "step": 19343 - }, - { - "epoch": 3.115222029872378, - "grad_norm": 0.0022621031384915113, - "learning_rate": 0.0001999952163116078, - "loss": 46.0, - "step": 19344 - }, - { - "epoch": 3.1153830669511655, - "grad_norm": 0.0028835791163146496, - "learning_rate": 0.0001999952158167517, - "loss": 46.0, - "step": 19345 - }, - { - "epoch": 3.115544104029953, - "grad_norm": 0.0014843278331682086, - "learning_rate": 0.00019999521532187, - "loss": 46.0, - "step": 19346 - }, - { - "epoch": 3.1157051411087404, - "grad_norm": 0.004026864189654589, - "learning_rate": 0.00019999521482696274, - "loss": 46.0, - "step": 19347 - }, - { - "epoch": 3.115866178187528, - "grad_norm": 0.0040955375880002975, - "learning_rate": 0.00019999521433202984, - "loss": 46.0, - "step": 19348 - }, - { - "epoch": 3.116027215266315, - "grad_norm": 0.0012072190875187516, - "learning_rate": 0.00019999521383707135, - "loss": 46.0, - "step": 19349 - }, - { - "epoch": 3.1161882523451023, - "grad_norm": 0.007035098038613796, - "learning_rate": 0.00019999521334208725, - "loss": 46.0, - "step": 19350 - }, - { - "epoch": 3.11634928942389, - "grad_norm": 0.0031054222490638494, - "learning_rate": 0.0001999952128470776, - "loss": 46.0, - "step": 19351 - }, - { - "epoch": 3.1165103265026772, - "grad_norm": 0.0011674832785502076, - "learning_rate": 0.00019999521235204235, - "loss": 46.0, - "step": 19352 - }, - { - "epoch": 3.1166713635814647, - "grad_norm": 0.0017470386810600758, - "learning_rate": 0.0001999952118569815, - "loss": 46.0, - "step": 19353 - }, - { - "epoch": 3.116832400660252, - "grad_norm": 0.009978740476071835, - "learning_rate": 0.00019999521136189504, - "loss": 46.0, - "step": 19354 - }, - { - "epoch": 3.1169934377390396, - "grad_norm": 0.0017703694757074118, - "learning_rate": 0.000199995210866783, - "loss": 46.0, - "step": 19355 - }, - { - "epoch": 3.1171544748178266, - "grad_norm": 0.0020083021372556686, - "learning_rate": 0.00019999521037164538, - "loss": 46.0, - "step": 19356 - }, - { - "epoch": 3.117315511896614, - "grad_norm": 0.0012780020479112864, - "learning_rate": 0.00019999520987648215, - "loss": 46.0, - "step": 19357 - }, - { - "epoch": 3.1174765489754015, - "grad_norm": 0.002130125882104039, - "learning_rate": 0.00019999520938129332, - "loss": 46.0, - "step": 19358 - }, - { - "epoch": 3.117637586054189, - "grad_norm": 0.001066154451109469, - "learning_rate": 0.0001999952088860789, - "loss": 46.0, - "step": 19359 - }, - { - "epoch": 3.1177986231329764, - "grad_norm": 0.0016177851939573884, - "learning_rate": 0.0001999952083908389, - "loss": 46.0, - "step": 19360 - }, - { - "epoch": 3.117959660211764, - "grad_norm": 0.0016625664429739118, - "learning_rate": 0.0001999952078955733, - "loss": 46.0, - "step": 19361 - }, - { - "epoch": 3.1181206972905513, - "grad_norm": 0.0017413728637620807, - "learning_rate": 0.0001999952074002821, - "loss": 46.0, - "step": 19362 - }, - { - "epoch": 3.1182817343693388, - "grad_norm": 0.013530614785850048, - "learning_rate": 0.00019999520690496531, - "loss": 46.0, - "step": 19363 - }, - { - "epoch": 3.1184427714481258, - "grad_norm": 0.003448938950896263, - "learning_rate": 0.0001999952064096229, - "loss": 46.0, - "step": 19364 - }, - { - "epoch": 3.118603808526913, - "grad_norm": 0.004797689151018858, - "learning_rate": 0.00019999520591425495, - "loss": 46.0, - "step": 19365 - }, - { - "epoch": 3.1187648456057007, - "grad_norm": 0.003533191978931427, - "learning_rate": 0.00019999520541886138, - "loss": 46.0, - "step": 19366 - }, - { - "epoch": 3.118925882684488, - "grad_norm": 0.0009692597086541355, - "learning_rate": 0.00019999520492344221, - "loss": 46.0, - "step": 19367 - }, - { - "epoch": 3.1190869197632756, - "grad_norm": 0.006410129833966494, - "learning_rate": 0.00019999520442799746, - "loss": 46.0, - "step": 19368 - }, - { - "epoch": 3.119247956842063, - "grad_norm": 0.001012086751870811, - "learning_rate": 0.0001999952039325271, - "loss": 46.0, - "step": 19369 - }, - { - "epoch": 3.1194089939208505, - "grad_norm": 0.0004513339954428375, - "learning_rate": 0.00019999520343703115, - "loss": 46.0, - "step": 19370 - }, - { - "epoch": 3.1195700309996375, - "grad_norm": 0.00431870250031352, - "learning_rate": 0.0001999952029415096, - "loss": 46.0, - "step": 19371 - }, - { - "epoch": 3.119731068078425, - "grad_norm": 0.0027193366549909115, - "learning_rate": 0.0001999952024459625, - "loss": 46.0, - "step": 19372 - }, - { - "epoch": 3.1198921051572124, - "grad_norm": 0.0023377921897917986, - "learning_rate": 0.00019999520195038977, - "loss": 46.0, - "step": 19373 - }, - { - "epoch": 3.120053142236, - "grad_norm": 0.001031764899380505, - "learning_rate": 0.00019999520145479145, - "loss": 46.0, - "step": 19374 - }, - { - "epoch": 3.1202141793147873, - "grad_norm": 0.0024424795992672443, - "learning_rate": 0.00019999520095916753, - "loss": 46.0, - "step": 19375 - }, - { - "epoch": 3.1203752163935747, - "grad_norm": 0.0005334223387762904, - "learning_rate": 0.00019999520046351803, - "loss": 46.0, - "step": 19376 - }, - { - "epoch": 3.120536253472362, - "grad_norm": 0.005903015844523907, - "learning_rate": 0.00019999519996784291, - "loss": 46.0, - "step": 19377 - }, - { - "epoch": 3.120697290551149, - "grad_norm": 0.0012081346940249205, - "learning_rate": 0.0001999951994721422, - "loss": 46.0, - "step": 19378 - }, - { - "epoch": 3.1208583276299366, - "grad_norm": 0.0010159856174141169, - "learning_rate": 0.00019999519897641595, - "loss": 46.0, - "step": 19379 - }, - { - "epoch": 3.121019364708724, - "grad_norm": 0.0038528433069586754, - "learning_rate": 0.00019999519848066405, - "loss": 46.0, - "step": 19380 - }, - { - "epoch": 3.1211804017875115, - "grad_norm": 0.0011607845081016421, - "learning_rate": 0.0001999951979848866, - "loss": 46.0, - "step": 19381 - }, - { - "epoch": 3.121341438866299, - "grad_norm": 0.002512323437258601, - "learning_rate": 0.0001999951974890835, - "loss": 46.0, - "step": 19382 - }, - { - "epoch": 3.1215024759450865, - "grad_norm": 0.0061342050321400166, - "learning_rate": 0.00019999519699325484, - "loss": 46.0, - "step": 19383 - }, - { - "epoch": 3.121663513023874, - "grad_norm": 0.004224579315632582, - "learning_rate": 0.00019999519649740058, - "loss": 46.0, - "step": 19384 - }, - { - "epoch": 3.1218245501026614, - "grad_norm": 0.010057457722723484, - "learning_rate": 0.00019999519600152074, - "loss": 46.0, - "step": 19385 - }, - { - "epoch": 3.1219855871814484, - "grad_norm": 0.0026779703330248594, - "learning_rate": 0.00019999519550561532, - "loss": 46.0, - "step": 19386 - }, - { - "epoch": 3.122146624260236, - "grad_norm": 0.006408275105059147, - "learning_rate": 0.00019999519500968428, - "loss": 46.0, - "step": 19387 - }, - { - "epoch": 3.1223076613390233, - "grad_norm": 0.0007667880854569376, - "learning_rate": 0.00019999519451372765, - "loss": 46.0, - "step": 19388 - }, - { - "epoch": 3.1224686984178107, - "grad_norm": 0.008423435501754284, - "learning_rate": 0.0001999951940177454, - "loss": 46.0, - "step": 19389 - }, - { - "epoch": 3.122629735496598, - "grad_norm": 0.001704515889286995, - "learning_rate": 0.0001999951935217376, - "loss": 46.0, - "step": 19390 - }, - { - "epoch": 3.1227907725753856, - "grad_norm": 0.0006694623734802008, - "learning_rate": 0.00019999519302570418, - "loss": 46.0, - "step": 19391 - }, - { - "epoch": 3.122951809654173, - "grad_norm": 0.001778366044163704, - "learning_rate": 0.00019999519252964518, - "loss": 46.0, - "step": 19392 - }, - { - "epoch": 3.12311284673296, - "grad_norm": 0.005164226517081261, - "learning_rate": 0.0001999951920335606, - "loss": 46.0, - "step": 19393 - }, - { - "epoch": 3.1232738838117475, - "grad_norm": 0.00195777858607471, - "learning_rate": 0.00019999519153745038, - "loss": 46.0, - "step": 19394 - }, - { - "epoch": 3.123434920890535, - "grad_norm": 0.004751776345074177, - "learning_rate": 0.00019999519104131462, - "loss": 46.0, - "step": 19395 - }, - { - "epoch": 3.1235959579693224, - "grad_norm": 0.007243297062814236, - "learning_rate": 0.00019999519054515324, - "loss": 46.0, - "step": 19396 - }, - { - "epoch": 3.12375699504811, - "grad_norm": 0.009361706674098969, - "learning_rate": 0.00019999519004896624, - "loss": 46.0, - "step": 19397 - }, - { - "epoch": 3.1239180321268973, - "grad_norm": 0.004897703416645527, - "learning_rate": 0.0001999951895527537, - "loss": 46.0, - "step": 19398 - }, - { - "epoch": 3.124079069205685, - "grad_norm": 0.007208153605461121, - "learning_rate": 0.00019999518905651552, - "loss": 46.0, - "step": 19399 - }, - { - "epoch": 3.124240106284472, - "grad_norm": 0.004397696815431118, - "learning_rate": 0.00019999518856025176, - "loss": 46.0, - "step": 19400 - }, - { - "epoch": 3.1244011433632592, - "grad_norm": 0.0012879247078672051, - "learning_rate": 0.00019999518806396242, - "loss": 46.0, - "step": 19401 - }, - { - "epoch": 3.1245621804420467, - "grad_norm": 0.002920987317338586, - "learning_rate": 0.0001999951875676475, - "loss": 46.0, - "step": 19402 - }, - { - "epoch": 3.124723217520834, - "grad_norm": 0.003686473472043872, - "learning_rate": 0.00019999518707130694, - "loss": 46.0, - "step": 19403 - }, - { - "epoch": 3.1248842545996216, - "grad_norm": 0.0036910930648446083, - "learning_rate": 0.0001999951865749408, - "loss": 46.0, - "step": 19404 - }, - { - "epoch": 3.125045291678409, - "grad_norm": 0.0038740267045795918, - "learning_rate": 0.0001999951860785491, - "loss": 46.0, - "step": 19405 - }, - { - "epoch": 3.1252063287571965, - "grad_norm": 0.009565374813973904, - "learning_rate": 0.00019999518558213176, - "loss": 46.0, - "step": 19406 - }, - { - "epoch": 3.1253673658359835, - "grad_norm": 0.002369903726503253, - "learning_rate": 0.00019999518508568887, - "loss": 46.0, - "step": 19407 - }, - { - "epoch": 3.125528402914771, - "grad_norm": 0.0029599997214972973, - "learning_rate": 0.00019999518458922036, - "loss": 46.0, - "step": 19408 - }, - { - "epoch": 3.1256894399935584, - "grad_norm": 0.0015792009653523564, - "learning_rate": 0.00019999518409272626, - "loss": 46.0, - "step": 19409 - }, - { - "epoch": 3.125850477072346, - "grad_norm": 0.0016246461309492588, - "learning_rate": 0.00019999518359620658, - "loss": 46.0, - "step": 19410 - }, - { - "epoch": 3.1260115141511333, - "grad_norm": 0.0009963264456018806, - "learning_rate": 0.00019999518309966129, - "loss": 46.0, - "step": 19411 - }, - { - "epoch": 3.1261725512299208, - "grad_norm": 0.0022691329941153526, - "learning_rate": 0.0001999951826030904, - "loss": 46.0, - "step": 19412 - }, - { - "epoch": 3.1263335883087082, - "grad_norm": 0.001222363207489252, - "learning_rate": 0.00019999518210649396, - "loss": 46.0, - "step": 19413 - }, - { - "epoch": 3.1264946253874957, - "grad_norm": 0.0015487669734284282, - "learning_rate": 0.00019999518160987187, - "loss": 46.0, - "step": 19414 - }, - { - "epoch": 3.1266556624662827, - "grad_norm": 0.005512857344001532, - "learning_rate": 0.00019999518111322423, - "loss": 46.0, - "step": 19415 - }, - { - "epoch": 3.12681669954507, - "grad_norm": 0.009297710843384266, - "learning_rate": 0.00019999518061655097, - "loss": 46.0, - "step": 19416 - }, - { - "epoch": 3.1269777366238576, - "grad_norm": 0.01024394016712904, - "learning_rate": 0.00019999518011985212, - "loss": 46.0, - "step": 19417 - }, - { - "epoch": 3.127138773702645, - "grad_norm": 0.006012026220560074, - "learning_rate": 0.0001999951796231277, - "loss": 46.0, - "step": 19418 - }, - { - "epoch": 3.1272998107814325, - "grad_norm": 0.001367154880426824, - "learning_rate": 0.00019999517912637764, - "loss": 46.0, - "step": 19419 - }, - { - "epoch": 3.12746084786022, - "grad_norm": 0.036792658269405365, - "learning_rate": 0.000199995178629602, - "loss": 46.0, - "step": 19420 - }, - { - "epoch": 3.1276218849390074, - "grad_norm": 0.007829583249986172, - "learning_rate": 0.00019999517813280079, - "loss": 46.0, - "step": 19421 - }, - { - "epoch": 3.1277829220177944, - "grad_norm": 0.004285490605980158, - "learning_rate": 0.00019999517763597398, - "loss": 46.0, - "step": 19422 - }, - { - "epoch": 3.127943959096582, - "grad_norm": 0.001631292630918324, - "learning_rate": 0.00019999517713912158, - "loss": 46.0, - "step": 19423 - }, - { - "epoch": 3.1281049961753693, - "grad_norm": 0.008447431959211826, - "learning_rate": 0.00019999517664224357, - "loss": 46.0, - "step": 19424 - }, - { - "epoch": 3.1282660332541568, - "grad_norm": 0.00588274234905839, - "learning_rate": 0.00019999517614533997, - "loss": 46.0, - "step": 19425 - }, - { - "epoch": 3.128427070332944, - "grad_norm": 0.002601267071440816, - "learning_rate": 0.00019999517564841076, - "loss": 46.0, - "step": 19426 - }, - { - "epoch": 3.1285881074117317, - "grad_norm": 0.007402703166007996, - "learning_rate": 0.000199995175151456, - "loss": 46.0, - "step": 19427 - }, - { - "epoch": 3.128749144490519, - "grad_norm": 0.004526689648628235, - "learning_rate": 0.00019999517465447563, - "loss": 46.0, - "step": 19428 - }, - { - "epoch": 3.1289101815693066, - "grad_norm": 0.0032449415884912014, - "learning_rate": 0.00019999517415746965, - "loss": 46.0, - "step": 19429 - }, - { - "epoch": 3.1290712186480936, - "grad_norm": 0.0029983920976519585, - "learning_rate": 0.0001999951736604381, - "loss": 46.0, - "step": 19430 - }, - { - "epoch": 3.129232255726881, - "grad_norm": 0.00463849538937211, - "learning_rate": 0.00019999517316338094, - "loss": 46.0, - "step": 19431 - }, - { - "epoch": 3.1293932928056685, - "grad_norm": 0.002553614554926753, - "learning_rate": 0.00019999517266629818, - "loss": 46.0, - "step": 19432 - }, - { - "epoch": 3.129554329884456, - "grad_norm": 0.0014503775164484978, - "learning_rate": 0.00019999517216918983, - "loss": 46.0, - "step": 19433 - }, - { - "epoch": 3.1297153669632434, - "grad_norm": 0.003753715194761753, - "learning_rate": 0.0001999951716720559, - "loss": 46.0, - "step": 19434 - }, - { - "epoch": 3.129876404042031, - "grad_norm": 0.0017359053017571568, - "learning_rate": 0.00019999517117489637, - "loss": 46.0, - "step": 19435 - }, - { - "epoch": 3.1300374411208183, - "grad_norm": 0.00038306316128000617, - "learning_rate": 0.00019999517067771123, - "loss": 46.0, - "step": 19436 - }, - { - "epoch": 3.1301984781996053, - "grad_norm": 0.0006059609586372972, - "learning_rate": 0.0001999951701805005, - "loss": 46.0, - "step": 19437 - }, - { - "epoch": 3.1303595152783927, - "grad_norm": 0.001608496531844139, - "learning_rate": 0.00019999516968326422, - "loss": 46.0, - "step": 19438 - }, - { - "epoch": 3.13052055235718, - "grad_norm": 0.001452327356673777, - "learning_rate": 0.0001999951691860023, - "loss": 46.0, - "step": 19439 - }, - { - "epoch": 3.1306815894359676, - "grad_norm": 0.0008440740057267249, - "learning_rate": 0.00019999516868871478, - "loss": 46.0, - "step": 19440 - }, - { - "epoch": 3.130842626514755, - "grad_norm": 0.000956208270508796, - "learning_rate": 0.0001999951681914017, - "loss": 46.0, - "step": 19441 - }, - { - "epoch": 3.1310036635935425, - "grad_norm": 0.004498219583183527, - "learning_rate": 0.00019999516769406302, - "loss": 46.0, - "step": 19442 - }, - { - "epoch": 3.13116470067233, - "grad_norm": 0.0007291391375474632, - "learning_rate": 0.00019999516719669872, - "loss": 46.0, - "step": 19443 - }, - { - "epoch": 3.131325737751117, - "grad_norm": 0.008510365150868893, - "learning_rate": 0.00019999516669930886, - "loss": 46.0, - "step": 19444 - }, - { - "epoch": 3.1314867748299045, - "grad_norm": 0.004410738591104746, - "learning_rate": 0.0001999951662018934, - "loss": 46.0, - "step": 19445 - }, - { - "epoch": 3.131647811908692, - "grad_norm": 0.0032752861734479666, - "learning_rate": 0.00019999516570445234, - "loss": 46.0, - "step": 19446 - }, - { - "epoch": 3.1318088489874794, - "grad_norm": 0.004121949430555105, - "learning_rate": 0.00019999516520698566, - "loss": 46.0, - "step": 19447 - }, - { - "epoch": 3.131969886066267, - "grad_norm": 0.0009498496074229479, - "learning_rate": 0.00019999516470949345, - "loss": 46.0, - "step": 19448 - }, - { - "epoch": 3.1321309231450543, - "grad_norm": 0.007713876198977232, - "learning_rate": 0.0001999951642119756, - "loss": 46.0, - "step": 19449 - }, - { - "epoch": 3.1322919602238417, - "grad_norm": 0.004907692316919565, - "learning_rate": 0.00019999516371443216, - "loss": 46.0, - "step": 19450 - }, - { - "epoch": 3.1324529973026287, - "grad_norm": 0.0027944075409322977, - "learning_rate": 0.0001999951632168631, - "loss": 46.0, - "step": 19451 - }, - { - "epoch": 3.132614034381416, - "grad_norm": 0.0088083790615201, - "learning_rate": 0.0001999951627192685, - "loss": 46.0, - "step": 19452 - }, - { - "epoch": 3.1327750714602036, - "grad_norm": 0.0007252052309922874, - "learning_rate": 0.00019999516222164826, - "loss": 46.0, - "step": 19453 - }, - { - "epoch": 3.132936108538991, - "grad_norm": 0.008067525923252106, - "learning_rate": 0.00019999516172400247, - "loss": 46.0, - "step": 19454 - }, - { - "epoch": 3.1330971456177785, - "grad_norm": 0.001005639904178679, - "learning_rate": 0.00019999516122633107, - "loss": 46.0, - "step": 19455 - }, - { - "epoch": 3.133258182696566, - "grad_norm": 0.006840166635811329, - "learning_rate": 0.00019999516072863405, - "loss": 46.0, - "step": 19456 - }, - { - "epoch": 3.1334192197753534, - "grad_norm": 0.013018138706684113, - "learning_rate": 0.0001999951602309115, - "loss": 46.0, - "step": 19457 - }, - { - "epoch": 3.133580256854141, - "grad_norm": 0.0010031365090981126, - "learning_rate": 0.00019999515973316329, - "loss": 46.0, - "step": 19458 - }, - { - "epoch": 3.133741293932928, - "grad_norm": 0.003623103955760598, - "learning_rate": 0.0001999951592353895, - "loss": 46.0, - "step": 19459 - }, - { - "epoch": 3.1339023310117153, - "grad_norm": 0.0018743551336228848, - "learning_rate": 0.00019999515873759014, - "loss": 46.0, - "step": 19460 - }, - { - "epoch": 3.134063368090503, - "grad_norm": 0.008622311055660248, - "learning_rate": 0.0001999951582397652, - "loss": 46.0, - "step": 19461 - }, - { - "epoch": 3.1342244051692902, - "grad_norm": 0.004155608359724283, - "learning_rate": 0.00019999515774191462, - "loss": 46.0, - "step": 19462 - }, - { - "epoch": 3.1343854422480777, - "grad_norm": 0.003546161809936166, - "learning_rate": 0.00019999515724403847, - "loss": 46.0, - "step": 19463 - }, - { - "epoch": 3.134546479326865, - "grad_norm": 0.009766785427927971, - "learning_rate": 0.00019999515674613673, - "loss": 46.0, - "step": 19464 - }, - { - "epoch": 3.1347075164056526, - "grad_norm": 0.0019418501760810614, - "learning_rate": 0.00019999515624820937, - "loss": 46.0, - "step": 19465 - }, - { - "epoch": 3.1348685534844396, - "grad_norm": 0.001314381486736238, - "learning_rate": 0.00019999515575025645, - "loss": 46.0, - "step": 19466 - }, - { - "epoch": 3.135029590563227, - "grad_norm": 0.004811578895896673, - "learning_rate": 0.0001999951552522779, - "loss": 46.0, - "step": 19467 - }, - { - "epoch": 3.1351906276420145, - "grad_norm": 0.009981920011341572, - "learning_rate": 0.0001999951547542738, - "loss": 46.0, - "step": 19468 - }, - { - "epoch": 3.135351664720802, - "grad_norm": 0.00500292656943202, - "learning_rate": 0.00019999515425624408, - "loss": 46.0, - "step": 19469 - }, - { - "epoch": 3.1355127017995894, - "grad_norm": 0.0019361722515895963, - "learning_rate": 0.00019999515375818878, - "loss": 46.0, - "step": 19470 - }, - { - "epoch": 3.135673738878377, - "grad_norm": 0.0033515747636556625, - "learning_rate": 0.00019999515326010788, - "loss": 46.0, - "step": 19471 - }, - { - "epoch": 3.1358347759571643, - "grad_norm": 0.002729524625465274, - "learning_rate": 0.00019999515276200138, - "loss": 46.0, - "step": 19472 - }, - { - "epoch": 3.1359958130359518, - "grad_norm": 0.0046296501532197, - "learning_rate": 0.00019999515226386928, - "loss": 46.0, - "step": 19473 - }, - { - "epoch": 3.1361568501147388, - "grad_norm": 0.0030332240276038647, - "learning_rate": 0.0001999951517657116, - "loss": 46.0, - "step": 19474 - }, - { - "epoch": 3.136317887193526, - "grad_norm": 0.00582721084356308, - "learning_rate": 0.00019999515126752833, - "loss": 46.0, - "step": 19475 - }, - { - "epoch": 3.1364789242723137, - "grad_norm": 0.004632348660379648, - "learning_rate": 0.00019999515076931946, - "loss": 46.0, - "step": 19476 - }, - { - "epoch": 3.136639961351101, - "grad_norm": 0.0017418634379282594, - "learning_rate": 0.00019999515027108498, - "loss": 46.0, - "step": 19477 - }, - { - "epoch": 3.1368009984298886, - "grad_norm": 0.0028542291838675737, - "learning_rate": 0.00019999514977282493, - "loss": 46.0, - "step": 19478 - }, - { - "epoch": 3.136962035508676, - "grad_norm": 0.006918790750205517, - "learning_rate": 0.00019999514927453927, - "loss": 46.0, - "step": 19479 - }, - { - "epoch": 3.137123072587463, - "grad_norm": 0.004386961460113525, - "learning_rate": 0.00019999514877622803, - "loss": 46.0, - "step": 19480 - }, - { - "epoch": 3.1372841096662505, - "grad_norm": 0.01744384691119194, - "learning_rate": 0.0001999951482778912, - "loss": 46.0, - "step": 19481 - }, - { - "epoch": 3.137445146745038, - "grad_norm": 0.007464381866157055, - "learning_rate": 0.00019999514777952878, - "loss": 46.0, - "step": 19482 - }, - { - "epoch": 3.1376061838238254, - "grad_norm": 0.005523985717445612, - "learning_rate": 0.00019999514728114072, - "loss": 46.0, - "step": 19483 - }, - { - "epoch": 3.137767220902613, - "grad_norm": 0.0058706956915557384, - "learning_rate": 0.0001999951467827271, - "loss": 46.0, - "step": 19484 - }, - { - "epoch": 3.1379282579814003, - "grad_norm": 0.0017817047191783786, - "learning_rate": 0.0001999951462842879, - "loss": 46.0, - "step": 19485 - }, - { - "epoch": 3.1380892950601877, - "grad_norm": 0.003794813994318247, - "learning_rate": 0.00019999514578582307, - "loss": 46.0, - "step": 19486 - }, - { - "epoch": 3.138250332138975, - "grad_norm": 0.00484858313575387, - "learning_rate": 0.0001999951452873327, - "loss": 46.0, - "step": 19487 - }, - { - "epoch": 3.138411369217762, - "grad_norm": 0.0012541485484689474, - "learning_rate": 0.0001999951447888167, - "loss": 46.0, - "step": 19488 - }, - { - "epoch": 3.1385724062965497, - "grad_norm": 0.003293959889560938, - "learning_rate": 0.00019999514429027513, - "loss": 46.0, - "step": 19489 - }, - { - "epoch": 3.138733443375337, - "grad_norm": 0.0022064840886741877, - "learning_rate": 0.00019999514379170794, - "loss": 46.0, - "step": 19490 - }, - { - "epoch": 3.1388944804541246, - "grad_norm": 0.0025604343973100185, - "learning_rate": 0.00019999514329311518, - "loss": 46.0, - "step": 19491 - }, - { - "epoch": 3.139055517532912, - "grad_norm": 0.0015439606504514813, - "learning_rate": 0.00019999514279449678, - "loss": 46.0, - "step": 19492 - }, - { - "epoch": 3.1392165546116995, - "grad_norm": 0.004343516193330288, - "learning_rate": 0.00019999514229585282, - "loss": 46.0, - "step": 19493 - }, - { - "epoch": 3.139377591690487, - "grad_norm": 0.0020656301639974117, - "learning_rate": 0.00019999514179718327, - "loss": 46.0, - "step": 19494 - }, - { - "epoch": 3.139538628769274, - "grad_norm": 0.005326667334884405, - "learning_rate": 0.00019999514129848814, - "loss": 46.0, - "step": 19495 - }, - { - "epoch": 3.1396996658480614, - "grad_norm": 0.003601297503337264, - "learning_rate": 0.00019999514079976737, - "loss": 46.0, - "step": 19496 - }, - { - "epoch": 3.139860702926849, - "grad_norm": 0.0009913978865370154, - "learning_rate": 0.00019999514030102106, - "loss": 46.0, - "step": 19497 - }, - { - "epoch": 3.1400217400056363, - "grad_norm": 0.0022651536855846643, - "learning_rate": 0.0001999951398022491, - "loss": 46.0, - "step": 19498 - }, - { - "epoch": 3.1401827770844237, - "grad_norm": 0.0034134145826101303, - "learning_rate": 0.0001999951393034516, - "loss": 46.0, - "step": 19499 - }, - { - "epoch": 3.140343814163211, - "grad_norm": 0.0014328915858641267, - "learning_rate": 0.00019999513880462847, - "loss": 46.0, - "step": 19500 - }, - { - "epoch": 3.1405048512419986, - "grad_norm": 0.008060412481427193, - "learning_rate": 0.00019999513830577976, - "loss": 46.0, - "step": 19501 - }, - { - "epoch": 3.140665888320786, - "grad_norm": 0.003635668894276023, - "learning_rate": 0.00019999513780690547, - "loss": 46.0, - "step": 19502 - }, - { - "epoch": 3.140826925399573, - "grad_norm": 0.0037198769859969616, - "learning_rate": 0.00019999513730800555, - "loss": 46.0, - "step": 19503 - }, - { - "epoch": 3.1409879624783605, - "grad_norm": 0.00278861285187304, - "learning_rate": 0.00019999513680908005, - "loss": 46.0, - "step": 19504 - }, - { - "epoch": 3.141148999557148, - "grad_norm": 0.004973365925252438, - "learning_rate": 0.000199995136310129, - "loss": 46.0, - "step": 19505 - }, - { - "epoch": 3.1413100366359354, - "grad_norm": 0.006277016829699278, - "learning_rate": 0.0001999951358111523, - "loss": 46.0, - "step": 19506 - }, - { - "epoch": 3.141471073714723, - "grad_norm": 0.004592635203152895, - "learning_rate": 0.00019999513531215003, - "loss": 46.0, - "step": 19507 - }, - { - "epoch": 3.1416321107935103, - "grad_norm": 0.0012343861162662506, - "learning_rate": 0.00019999513481312216, - "loss": 46.0, - "step": 19508 - }, - { - "epoch": 3.141793147872298, - "grad_norm": 0.0009086685022339225, - "learning_rate": 0.0001999951343140687, - "loss": 46.0, - "step": 19509 - }, - { - "epoch": 3.141954184951085, - "grad_norm": 0.0006483003380708396, - "learning_rate": 0.00019999513381498965, - "loss": 46.0, - "step": 19510 - }, - { - "epoch": 3.1421152220298723, - "grad_norm": 0.00268872850574553, - "learning_rate": 0.000199995133315885, - "loss": 46.0, - "step": 19511 - }, - { - "epoch": 3.1422762591086597, - "grad_norm": 0.002022774191573262, - "learning_rate": 0.00019999513281675476, - "loss": 46.0, - "step": 19512 - }, - { - "epoch": 3.142437296187447, - "grad_norm": 0.007730285171419382, - "learning_rate": 0.00019999513231759892, - "loss": 46.0, - "step": 19513 - }, - { - "epoch": 3.1425983332662346, - "grad_norm": 0.006866161711513996, - "learning_rate": 0.0001999951318184175, - "loss": 46.0, - "step": 19514 - }, - { - "epoch": 3.142759370345022, - "grad_norm": 0.003307870589196682, - "learning_rate": 0.00019999513131921045, - "loss": 46.0, - "step": 19515 - }, - { - "epoch": 3.1429204074238095, - "grad_norm": 0.003311126260086894, - "learning_rate": 0.00019999513081997786, - "loss": 46.0, - "step": 19516 - }, - { - "epoch": 3.1430814445025965, - "grad_norm": 0.000960042467340827, - "learning_rate": 0.00019999513032071967, - "loss": 46.0, - "step": 19517 - }, - { - "epoch": 3.143242481581384, - "grad_norm": 0.001321278978139162, - "learning_rate": 0.00019999512982143584, - "loss": 46.0, - "step": 19518 - }, - { - "epoch": 3.1434035186601714, - "grad_norm": 0.001282593933865428, - "learning_rate": 0.00019999512932212645, - "loss": 46.0, - "step": 19519 - }, - { - "epoch": 3.143564555738959, - "grad_norm": 0.0018429771298542619, - "learning_rate": 0.00019999512882279145, - "loss": 46.0, - "step": 19520 - }, - { - "epoch": 3.1437255928177463, - "grad_norm": 0.00237071979790926, - "learning_rate": 0.0001999951283234309, - "loss": 46.0, - "step": 19521 - }, - { - "epoch": 3.143886629896534, - "grad_norm": 0.0008535197703167796, - "learning_rate": 0.0001999951278240447, - "loss": 46.0, - "step": 19522 - }, - { - "epoch": 3.1440476669753212, - "grad_norm": 0.0074363392777740955, - "learning_rate": 0.00019999512732463292, - "loss": 46.0, - "step": 19523 - }, - { - "epoch": 3.1442087040541082, - "grad_norm": 0.00275426940061152, - "learning_rate": 0.00019999512682519557, - "loss": 46.0, - "step": 19524 - }, - { - "epoch": 3.1443697411328957, - "grad_norm": 0.0010301631409674883, - "learning_rate": 0.0001999951263257326, - "loss": 46.0, - "step": 19525 - }, - { - "epoch": 3.144530778211683, - "grad_norm": 0.008522147312760353, - "learning_rate": 0.00019999512582624405, - "loss": 46.0, - "step": 19526 - }, - { - "epoch": 3.1446918152904706, - "grad_norm": 0.0034903259947896004, - "learning_rate": 0.0001999951253267299, - "loss": 46.0, - "step": 19527 - }, - { - "epoch": 3.144852852369258, - "grad_norm": 0.007606164086610079, - "learning_rate": 0.00019999512482719015, - "loss": 46.0, - "step": 19528 - }, - { - "epoch": 3.1450138894480455, - "grad_norm": 0.0007449169643223286, - "learning_rate": 0.00019999512432762484, - "loss": 46.0, - "step": 19529 - }, - { - "epoch": 3.145174926526833, - "grad_norm": 0.003868551691994071, - "learning_rate": 0.00019999512382803388, - "loss": 46.0, - "step": 19530 - }, - { - "epoch": 3.1453359636056204, - "grad_norm": 0.0020239658188074827, - "learning_rate": 0.0001999951233284174, - "loss": 46.0, - "step": 19531 - }, - { - "epoch": 3.1454970006844074, - "grad_norm": 0.0023372266441583633, - "learning_rate": 0.00019999512282877527, - "loss": 46.0, - "step": 19532 - }, - { - "epoch": 3.145658037763195, - "grad_norm": 0.0050746239721775055, - "learning_rate": 0.00019999512232910758, - "loss": 46.0, - "step": 19533 - }, - { - "epoch": 3.1458190748419823, - "grad_norm": 0.013485323637723923, - "learning_rate": 0.00019999512182941427, - "loss": 46.0, - "step": 19534 - }, - { - "epoch": 3.1459801119207698, - "grad_norm": 0.004309992305934429, - "learning_rate": 0.00019999512132969538, - "loss": 46.0, - "step": 19535 - }, - { - "epoch": 3.146141148999557, - "grad_norm": 0.013468729332089424, - "learning_rate": 0.00019999512082995087, - "loss": 46.0, - "step": 19536 - }, - { - "epoch": 3.1463021860783447, - "grad_norm": 0.0070412238128483295, - "learning_rate": 0.0001999951203301808, - "loss": 46.0, - "step": 19537 - }, - { - "epoch": 3.146463223157132, - "grad_norm": 0.003107303287833929, - "learning_rate": 0.00019999511983038513, - "loss": 46.0, - "step": 19538 - }, - { - "epoch": 3.146624260235919, - "grad_norm": 0.0010528797283768654, - "learning_rate": 0.00019999511933056386, - "loss": 46.0, - "step": 19539 - }, - { - "epoch": 3.1467852973147066, - "grad_norm": 0.0016254577785730362, - "learning_rate": 0.000199995118830717, - "loss": 46.0, - "step": 19540 - }, - { - "epoch": 3.146946334393494, - "grad_norm": 0.008398851379752159, - "learning_rate": 0.0001999951183308445, - "loss": 46.0, - "step": 19541 - }, - { - "epoch": 3.1471073714722815, - "grad_norm": 0.0010431804694235325, - "learning_rate": 0.00019999511783094648, - "loss": 46.0, - "step": 19542 - }, - { - "epoch": 3.147268408551069, - "grad_norm": 0.002881931606680155, - "learning_rate": 0.00019999511733102284, - "loss": 46.0, - "step": 19543 - }, - { - "epoch": 3.1474294456298564, - "grad_norm": 0.001314807333983481, - "learning_rate": 0.00019999511683107358, - "loss": 46.0, - "step": 19544 - }, - { - "epoch": 3.147590482708644, - "grad_norm": 0.0066570742055773735, - "learning_rate": 0.00019999511633109876, - "loss": 46.0, - "step": 19545 - }, - { - "epoch": 3.1477515197874313, - "grad_norm": 0.001074617262929678, - "learning_rate": 0.00019999511583109836, - "loss": 46.0, - "step": 19546 - }, - { - "epoch": 3.1479125568662183, - "grad_norm": 0.0010014285799115896, - "learning_rate": 0.00019999511533107234, - "loss": 46.0, - "step": 19547 - }, - { - "epoch": 3.1480735939450057, - "grad_norm": 0.005359780043363571, - "learning_rate": 0.0001999951148310207, - "loss": 46.0, - "step": 19548 - }, - { - "epoch": 3.148234631023793, - "grad_norm": 0.004768101964145899, - "learning_rate": 0.00019999511433094354, - "loss": 46.0, - "step": 19549 - }, - { - "epoch": 3.1483956681025806, - "grad_norm": 0.0020312040578573942, - "learning_rate": 0.00019999511383084073, - "loss": 46.0, - "step": 19550 - }, - { - "epoch": 3.148556705181368, - "grad_norm": 0.001919933594763279, - "learning_rate": 0.0001999951133307123, - "loss": 46.0, - "step": 19551 - }, - { - "epoch": 3.1487177422601555, - "grad_norm": 0.004075990524142981, - "learning_rate": 0.00019999511283055833, - "loss": 46.0, - "step": 19552 - }, - { - "epoch": 3.1488787793389426, - "grad_norm": 0.007960813120007515, - "learning_rate": 0.00019999511233037876, - "loss": 46.0, - "step": 19553 - }, - { - "epoch": 3.14903981641773, - "grad_norm": 0.0069361524656414986, - "learning_rate": 0.00019999511183017358, - "loss": 46.0, - "step": 19554 - }, - { - "epoch": 3.1492008534965175, - "grad_norm": 0.003147019073367119, - "learning_rate": 0.0001999951113299428, - "loss": 46.0, - "step": 19555 - }, - { - "epoch": 3.149361890575305, - "grad_norm": 0.0016937522450461984, - "learning_rate": 0.00019999511082968648, - "loss": 46.0, - "step": 19556 - }, - { - "epoch": 3.1495229276540924, - "grad_norm": 0.0017041638493537903, - "learning_rate": 0.00019999511032940453, - "loss": 46.0, - "step": 19557 - }, - { - "epoch": 3.14968396473288, - "grad_norm": 0.007754228077828884, - "learning_rate": 0.00019999510982909695, - "loss": 46.0, - "step": 19558 - }, - { - "epoch": 3.1498450018116673, - "grad_norm": 0.005011291243135929, - "learning_rate": 0.00019999510932876383, - "loss": 46.0, - "step": 19559 - }, - { - "epoch": 3.1500060388904547, - "grad_norm": 0.0050039710476994514, - "learning_rate": 0.0001999951088284051, - "loss": 46.0, - "step": 19560 - }, - { - "epoch": 3.1501670759692417, - "grad_norm": 0.004077255725860596, - "learning_rate": 0.00019999510832802075, - "loss": 46.0, - "step": 19561 - }, - { - "epoch": 3.150328113048029, - "grad_norm": 0.0010054357117041945, - "learning_rate": 0.00019999510782761084, - "loss": 46.0, - "step": 19562 - }, - { - "epoch": 3.1504891501268166, - "grad_norm": 0.0013901363126933575, - "learning_rate": 0.00019999510732717532, - "loss": 46.0, - "step": 19563 - }, - { - "epoch": 3.150650187205604, - "grad_norm": 0.003362879855558276, - "learning_rate": 0.0001999951068267142, - "loss": 46.0, - "step": 19564 - }, - { - "epoch": 3.1508112242843915, - "grad_norm": 0.005998195614665747, - "learning_rate": 0.0001999951063262275, - "loss": 46.0, - "step": 19565 - }, - { - "epoch": 3.150972261363179, - "grad_norm": 0.0047590890899300575, - "learning_rate": 0.0001999951058257152, - "loss": 46.0, - "step": 19566 - }, - { - "epoch": 3.1511332984419664, - "grad_norm": 0.0019848798401653767, - "learning_rate": 0.0001999951053251773, - "loss": 46.0, - "step": 19567 - }, - { - "epoch": 3.1512943355207534, - "grad_norm": 0.0020959326066076756, - "learning_rate": 0.00019999510482461385, - "loss": 46.0, - "step": 19568 - }, - { - "epoch": 3.151455372599541, - "grad_norm": 0.010506482794880867, - "learning_rate": 0.00019999510432402477, - "loss": 46.0, - "step": 19569 - }, - { - "epoch": 3.1516164096783283, - "grad_norm": 0.007895024493336678, - "learning_rate": 0.0001999951038234101, - "loss": 46.0, - "step": 19570 - }, - { - "epoch": 3.151777446757116, - "grad_norm": 0.0019134761532768607, - "learning_rate": 0.00019999510332276984, - "loss": 46.0, - "step": 19571 - }, - { - "epoch": 3.1519384838359032, - "grad_norm": 0.020334867760539055, - "learning_rate": 0.00019999510282210395, - "loss": 46.0, - "step": 19572 - }, - { - "epoch": 3.1520995209146907, - "grad_norm": 0.018368765711784363, - "learning_rate": 0.00019999510232141253, - "loss": 46.0, - "step": 19573 - }, - { - "epoch": 3.152260557993478, - "grad_norm": 0.0007146418211050332, - "learning_rate": 0.00019999510182069544, - "loss": 46.0, - "step": 19574 - }, - { - "epoch": 3.1524215950722656, - "grad_norm": 0.0016286029713228345, - "learning_rate": 0.00019999510131995282, - "loss": 46.0, - "step": 19575 - }, - { - "epoch": 3.1525826321510526, - "grad_norm": 0.006618593819439411, - "learning_rate": 0.0001999951008191846, - "loss": 46.0, - "step": 19576 - }, - { - "epoch": 3.15274366922984, - "grad_norm": 0.0036968053318560123, - "learning_rate": 0.00019999510031839076, - "loss": 46.0, - "step": 19577 - }, - { - "epoch": 3.1529047063086275, - "grad_norm": 0.008197592571377754, - "learning_rate": 0.00019999509981757135, - "loss": 46.0, - "step": 19578 - }, - { - "epoch": 3.153065743387415, - "grad_norm": 0.005064803175628185, - "learning_rate": 0.00019999509931672632, - "loss": 46.0, - "step": 19579 - }, - { - "epoch": 3.1532267804662024, - "grad_norm": 0.0037036787252873182, - "learning_rate": 0.00019999509881585574, - "loss": 46.0, - "step": 19580 - }, - { - "epoch": 3.15338781754499, - "grad_norm": 0.001896088826470077, - "learning_rate": 0.00019999509831495954, - "loss": 46.0, - "step": 19581 - }, - { - "epoch": 3.1535488546237773, - "grad_norm": 0.0016632025362923741, - "learning_rate": 0.00019999509781403772, - "loss": 46.0, - "step": 19582 - }, - { - "epoch": 3.1537098917025643, - "grad_norm": 0.0014822020893916488, - "learning_rate": 0.00019999509731309035, - "loss": 46.0, - "step": 19583 - }, - { - "epoch": 3.153870928781352, - "grad_norm": 0.0025838366709649563, - "learning_rate": 0.00019999509681211736, - "loss": 46.0, - "step": 19584 - }, - { - "epoch": 3.1540319658601392, - "grad_norm": 0.00504272012040019, - "learning_rate": 0.0001999950963111188, - "loss": 46.0, - "step": 19585 - }, - { - "epoch": 3.1541930029389267, - "grad_norm": 0.0012272970052435994, - "learning_rate": 0.00019999509581009462, - "loss": 46.0, - "step": 19586 - }, - { - "epoch": 3.154354040017714, - "grad_norm": 0.0008530979393981397, - "learning_rate": 0.00019999509530904485, - "loss": 46.0, - "step": 19587 - }, - { - "epoch": 3.1545150770965016, - "grad_norm": 0.0011674538254737854, - "learning_rate": 0.00019999509480796948, - "loss": 46.0, - "step": 19588 - }, - { - "epoch": 3.154676114175289, - "grad_norm": 0.0010253567015752196, - "learning_rate": 0.00019999509430686856, - "loss": 46.0, - "step": 19589 - }, - { - "epoch": 3.154837151254076, - "grad_norm": 0.0034656180068850517, - "learning_rate": 0.000199995093805742, - "loss": 46.0, - "step": 19590 - }, - { - "epoch": 3.1549981883328635, - "grad_norm": 0.00491347024217248, - "learning_rate": 0.00019999509330458987, - "loss": 46.0, - "step": 19591 - }, - { - "epoch": 3.155159225411651, - "grad_norm": 0.006057028193026781, - "learning_rate": 0.00019999509280341213, - "loss": 46.0, - "step": 19592 - }, - { - "epoch": 3.1553202624904384, - "grad_norm": 0.002717996947467327, - "learning_rate": 0.00019999509230220883, - "loss": 46.0, - "step": 19593 - }, - { - "epoch": 3.155481299569226, - "grad_norm": 0.0031754192896187305, - "learning_rate": 0.00019999509180097992, - "loss": 46.0, - "step": 19594 - }, - { - "epoch": 3.1556423366480133, - "grad_norm": 0.005409469828009605, - "learning_rate": 0.00019999509129972541, - "loss": 46.0, - "step": 19595 - }, - { - "epoch": 3.1558033737268008, - "grad_norm": 0.002848827512934804, - "learning_rate": 0.00019999509079844527, - "loss": 46.0, - "step": 19596 - }, - { - "epoch": 3.1559644108055878, - "grad_norm": 0.00689384201541543, - "learning_rate": 0.0001999950902971396, - "loss": 46.0, - "step": 19597 - }, - { - "epoch": 3.156125447884375, - "grad_norm": 0.016865484416484833, - "learning_rate": 0.00019999508979580828, - "loss": 46.0, - "step": 19598 - }, - { - "epoch": 3.1562864849631627, - "grad_norm": 0.003080186899751425, - "learning_rate": 0.0001999950892944514, - "loss": 46.0, - "step": 19599 - }, - { - "epoch": 3.15644752204195, - "grad_norm": 0.0034886961802840233, - "learning_rate": 0.0001999950887930689, - "loss": 46.0, - "step": 19600 - }, - { - "epoch": 3.1566085591207376, - "grad_norm": 0.0019938715267926455, - "learning_rate": 0.00019999508829166086, - "loss": 46.0, - "step": 19601 - }, - { - "epoch": 3.156769596199525, - "grad_norm": 0.0033503801096230745, - "learning_rate": 0.0001999950877902272, - "loss": 46.0, - "step": 19602 - }, - { - "epoch": 3.1569306332783125, - "grad_norm": 0.0061528743244707584, - "learning_rate": 0.00019999508728876791, - "loss": 46.0, - "step": 19603 - }, - { - "epoch": 3.1570916703571, - "grad_norm": 0.0010879081673920155, - "learning_rate": 0.00019999508678728307, - "loss": 46.0, - "step": 19604 - }, - { - "epoch": 3.157252707435887, - "grad_norm": 0.007956474088132381, - "learning_rate": 0.00019999508628577265, - "loss": 46.0, - "step": 19605 - }, - { - "epoch": 3.1574137445146744, - "grad_norm": 0.007138898596167564, - "learning_rate": 0.00019999508578423658, - "loss": 46.0, - "step": 19606 - }, - { - "epoch": 3.157574781593462, - "grad_norm": 0.00251348246820271, - "learning_rate": 0.00019999508528267495, - "loss": 46.0, - "step": 19607 - }, - { - "epoch": 3.1577358186722493, - "grad_norm": 0.006476631388068199, - "learning_rate": 0.00019999508478108773, - "loss": 46.0, - "step": 19608 - }, - { - "epoch": 3.1578968557510367, - "grad_norm": 0.002998229581862688, - "learning_rate": 0.00019999508427947488, - "loss": 46.0, - "step": 19609 - }, - { - "epoch": 3.158057892829824, - "grad_norm": 0.004335950128734112, - "learning_rate": 0.00019999508377783646, - "loss": 46.0, - "step": 19610 - }, - { - "epoch": 3.1582189299086116, - "grad_norm": 0.0023647022899240255, - "learning_rate": 0.00019999508327617246, - "loss": 46.0, - "step": 19611 - }, - { - "epoch": 3.1583799669873986, - "grad_norm": 0.004215963184833527, - "learning_rate": 0.00019999508277448286, - "loss": 46.0, - "step": 19612 - }, - { - "epoch": 3.158541004066186, - "grad_norm": 0.00445952732115984, - "learning_rate": 0.00019999508227276769, - "loss": 46.0, - "step": 19613 - }, - { - "epoch": 3.1587020411449735, - "grad_norm": 0.0020399068016558886, - "learning_rate": 0.0001999950817710269, - "loss": 46.0, - "step": 19614 - }, - { - "epoch": 3.158863078223761, - "grad_norm": 0.0036151974927634, - "learning_rate": 0.00019999508126926048, - "loss": 46.0, - "step": 19615 - }, - { - "epoch": 3.1590241153025485, - "grad_norm": 0.004897549748420715, - "learning_rate": 0.00019999508076746852, - "loss": 46.0, - "step": 19616 - }, - { - "epoch": 3.159185152381336, - "grad_norm": 0.001708691823296249, - "learning_rate": 0.00019999508026565094, - "loss": 46.0, - "step": 19617 - }, - { - "epoch": 3.1593461894601234, - "grad_norm": 0.0015553038101643324, - "learning_rate": 0.0001999950797638078, - "loss": 46.0, - "step": 19618 - }, - { - "epoch": 3.159507226538911, - "grad_norm": 0.0017440100200474262, - "learning_rate": 0.00019999507926193904, - "loss": 46.0, - "step": 19619 - }, - { - "epoch": 3.159668263617698, - "grad_norm": 0.0018952075624838471, - "learning_rate": 0.0001999950787600447, - "loss": 46.0, - "step": 19620 - }, - { - "epoch": 3.1598293006964853, - "grad_norm": 0.011986377649009228, - "learning_rate": 0.00019999507825812474, - "loss": 46.0, - "step": 19621 - }, - { - "epoch": 3.1599903377752727, - "grad_norm": 0.0020185604225844145, - "learning_rate": 0.0001999950777561792, - "loss": 46.0, - "step": 19622 - }, - { - "epoch": 3.16015137485406, - "grad_norm": 0.00163203664124012, - "learning_rate": 0.00019999507725420806, - "loss": 46.0, - "step": 19623 - }, - { - "epoch": 3.1603124119328476, - "grad_norm": 0.006958762649446726, - "learning_rate": 0.00019999507675221134, - "loss": 46.0, - "step": 19624 - }, - { - "epoch": 3.160473449011635, - "grad_norm": 0.0051189446821808815, - "learning_rate": 0.00019999507625018904, - "loss": 46.0, - "step": 19625 - }, - { - "epoch": 3.1606344860904225, - "grad_norm": 0.003895426169037819, - "learning_rate": 0.00019999507574814112, - "loss": 46.0, - "step": 19626 - }, - { - "epoch": 3.1607955231692095, - "grad_norm": 0.01487557590007782, - "learning_rate": 0.0001999950752460676, - "loss": 46.0, - "step": 19627 - }, - { - "epoch": 3.160956560247997, - "grad_norm": 0.0015625845408067107, - "learning_rate": 0.00019999507474396851, - "loss": 46.0, - "step": 19628 - }, - { - "epoch": 3.1611175973267844, - "grad_norm": 0.0030629541724920273, - "learning_rate": 0.0001999950742418438, - "loss": 46.0, - "step": 19629 - }, - { - "epoch": 3.161278634405572, - "grad_norm": 0.002921532606706023, - "learning_rate": 0.00019999507373969354, - "loss": 46.0, - "step": 19630 - }, - { - "epoch": 3.1614396714843593, - "grad_norm": 0.0029720959719270468, - "learning_rate": 0.00019999507323751763, - "loss": 46.0, - "step": 19631 - }, - { - "epoch": 3.161600708563147, - "grad_norm": 0.0026841016951948404, - "learning_rate": 0.00019999507273531615, - "loss": 46.0, - "step": 19632 - }, - { - "epoch": 3.1617617456419342, - "grad_norm": 0.024503102526068687, - "learning_rate": 0.0001999950722330891, - "loss": 46.0, - "step": 19633 - }, - { - "epoch": 3.1619227827207212, - "grad_norm": 0.0014513133792206645, - "learning_rate": 0.00019999507173083642, - "loss": 46.0, - "step": 19634 - }, - { - "epoch": 3.1620838197995087, - "grad_norm": 0.0015343984123319387, - "learning_rate": 0.0001999950712285582, - "loss": 46.0, - "step": 19635 - }, - { - "epoch": 3.162244856878296, - "grad_norm": 0.0011266530491411686, - "learning_rate": 0.00019999507072625432, - "loss": 46.0, - "step": 19636 - }, - { - "epoch": 3.1624058939570836, - "grad_norm": 0.006266784854233265, - "learning_rate": 0.00019999507022392489, - "loss": 46.0, - "step": 19637 - }, - { - "epoch": 3.162566931035871, - "grad_norm": 0.0020720544271171093, - "learning_rate": 0.00019999506972156984, - "loss": 46.0, - "step": 19638 - }, - { - "epoch": 3.1627279681146585, - "grad_norm": 0.0019829608500003815, - "learning_rate": 0.0001999950692191892, - "loss": 46.0, - "step": 19639 - }, - { - "epoch": 3.162889005193446, - "grad_norm": 0.0038157084491103888, - "learning_rate": 0.000199995068716783, - "loss": 46.0, - "step": 19640 - }, - { - "epoch": 3.163050042272233, - "grad_norm": 0.0044404580257833, - "learning_rate": 0.00019999506821435117, - "loss": 46.0, - "step": 19641 - }, - { - "epoch": 3.1632110793510204, - "grad_norm": 0.002125629223883152, - "learning_rate": 0.00019999506771189375, - "loss": 46.0, - "step": 19642 - }, - { - "epoch": 3.163372116429808, - "grad_norm": 0.0030336007475852966, - "learning_rate": 0.00019999506720941076, - "loss": 46.0, - "step": 19643 - }, - { - "epoch": 3.1635331535085953, - "grad_norm": 0.003360901726409793, - "learning_rate": 0.00019999506670690217, - "loss": 46.0, - "step": 19644 - }, - { - "epoch": 3.1636941905873828, - "grad_norm": 0.0054570771753787994, - "learning_rate": 0.00019999506620436795, - "loss": 46.0, - "step": 19645 - }, - { - "epoch": 3.1638552276661702, - "grad_norm": 0.0010881581110879779, - "learning_rate": 0.00019999506570180818, - "loss": 46.0, - "step": 19646 - }, - { - "epoch": 3.1640162647449577, - "grad_norm": 0.002101463731378317, - "learning_rate": 0.0001999950651992228, - "loss": 46.0, - "step": 19647 - }, - { - "epoch": 3.164177301823745, - "grad_norm": 0.0011758340988308191, - "learning_rate": 0.00019999506469661182, - "loss": 46.0, - "step": 19648 - }, - { - "epoch": 3.164338338902532, - "grad_norm": 0.005233744625002146, - "learning_rate": 0.00019999506419397526, - "loss": 46.0, - "step": 19649 - }, - { - "epoch": 3.1644993759813196, - "grad_norm": 0.0012121361214667559, - "learning_rate": 0.0001999950636913131, - "loss": 46.0, - "step": 19650 - }, - { - "epoch": 3.164660413060107, - "grad_norm": 0.010114284232258797, - "learning_rate": 0.00019999506318862535, - "loss": 46.0, - "step": 19651 - }, - { - "epoch": 3.1648214501388945, - "grad_norm": 0.011123177595436573, - "learning_rate": 0.000199995062685912, - "loss": 46.0, - "step": 19652 - }, - { - "epoch": 3.164982487217682, - "grad_norm": 0.0038304938934743404, - "learning_rate": 0.00019999506218317307, - "loss": 46.0, - "step": 19653 - }, - { - "epoch": 3.1651435242964694, - "grad_norm": 0.0006281990790739655, - "learning_rate": 0.00019999506168040854, - "loss": 46.0, - "step": 19654 - }, - { - "epoch": 3.165304561375257, - "grad_norm": 0.005770376417785883, - "learning_rate": 0.0001999950611776184, - "loss": 46.0, - "step": 19655 - }, - { - "epoch": 3.165465598454044, - "grad_norm": 0.002181033371016383, - "learning_rate": 0.00019999506067480268, - "loss": 46.0, - "step": 19656 - }, - { - "epoch": 3.1656266355328313, - "grad_norm": 0.0061495923437178135, - "learning_rate": 0.00019999506017196134, - "loss": 46.0, - "step": 19657 - }, - { - "epoch": 3.1657876726116188, - "grad_norm": 0.012446665205061436, - "learning_rate": 0.00019999505966909444, - "loss": 46.0, - "step": 19658 - }, - { - "epoch": 3.165948709690406, - "grad_norm": 0.0030658040195703506, - "learning_rate": 0.00019999505916620195, - "loss": 46.0, - "step": 19659 - }, - { - "epoch": 3.1661097467691937, - "grad_norm": 0.0023766884114593267, - "learning_rate": 0.00019999505866328385, - "loss": 46.0, - "step": 19660 - }, - { - "epoch": 3.166270783847981, - "grad_norm": 0.007102517876774073, - "learning_rate": 0.00019999505816034016, - "loss": 46.0, - "step": 19661 - }, - { - "epoch": 3.1664318209267686, - "grad_norm": 0.005342569667845964, - "learning_rate": 0.0001999950576573709, - "loss": 46.0, - "step": 19662 - }, - { - "epoch": 3.166592858005556, - "grad_norm": 0.0024085398763418198, - "learning_rate": 0.000199995057154376, - "loss": 46.0, - "step": 19663 - }, - { - "epoch": 3.166753895084343, - "grad_norm": 0.006693227216601372, - "learning_rate": 0.00019999505665135555, - "loss": 46.0, - "step": 19664 - }, - { - "epoch": 3.1669149321631305, - "grad_norm": 0.0009110403480008245, - "learning_rate": 0.00019999505614830946, - "loss": 46.0, - "step": 19665 - }, - { - "epoch": 3.167075969241918, - "grad_norm": 0.0009074477129615843, - "learning_rate": 0.0001999950556452378, - "loss": 46.0, - "step": 19666 - }, - { - "epoch": 3.1672370063207054, - "grad_norm": 0.002921093488112092, - "learning_rate": 0.00019999505514214057, - "loss": 46.0, - "step": 19667 - }, - { - "epoch": 3.167398043399493, - "grad_norm": 0.006306065712124109, - "learning_rate": 0.00019999505463901774, - "loss": 46.0, - "step": 19668 - }, - { - "epoch": 3.1675590804782803, - "grad_norm": 0.002673779847100377, - "learning_rate": 0.00019999505413586928, - "loss": 46.0, - "step": 19669 - }, - { - "epoch": 3.1677201175570673, - "grad_norm": 0.002985946135595441, - "learning_rate": 0.00019999505363269525, - "loss": 46.0, - "step": 19670 - }, - { - "epoch": 3.1678811546358547, - "grad_norm": 0.0014855649787932634, - "learning_rate": 0.00019999505312949563, - "loss": 46.0, - "step": 19671 - }, - { - "epoch": 3.168042191714642, - "grad_norm": 0.0018030541250482202, - "learning_rate": 0.0001999950526262704, - "loss": 46.0, - "step": 19672 - }, - { - "epoch": 3.1682032287934296, - "grad_norm": 0.0012917385902255774, - "learning_rate": 0.0001999950521230196, - "loss": 46.0, - "step": 19673 - }, - { - "epoch": 3.168364265872217, - "grad_norm": 0.002281046472489834, - "learning_rate": 0.00019999505161974316, - "loss": 46.0, - "step": 19674 - }, - { - "epoch": 3.1685253029510045, - "grad_norm": 0.005363738164305687, - "learning_rate": 0.0001999950511164412, - "loss": 46.0, - "step": 19675 - }, - { - "epoch": 3.168686340029792, - "grad_norm": 0.010731049813330173, - "learning_rate": 0.0001999950506131136, - "loss": 46.0, - "step": 19676 - }, - { - "epoch": 3.1688473771085794, - "grad_norm": 0.008554967120289803, - "learning_rate": 0.0001999950501097604, - "loss": 46.0, - "step": 19677 - }, - { - "epoch": 3.1690084141873665, - "grad_norm": 0.01828489452600479, - "learning_rate": 0.00019999504960638162, - "loss": 46.0, - "step": 19678 - }, - { - "epoch": 3.169169451266154, - "grad_norm": 0.0017399616772308946, - "learning_rate": 0.00019999504910297726, - "loss": 46.0, - "step": 19679 - }, - { - "epoch": 3.1693304883449414, - "grad_norm": 0.004214193671941757, - "learning_rate": 0.00019999504859954728, - "loss": 46.0, - "step": 19680 - }, - { - "epoch": 3.169491525423729, - "grad_norm": 0.004244233947247267, - "learning_rate": 0.0001999950480960917, - "loss": 46.0, - "step": 19681 - }, - { - "epoch": 3.1696525625025163, - "grad_norm": 0.009747356176376343, - "learning_rate": 0.00019999504759261056, - "loss": 46.0, - "step": 19682 - }, - { - "epoch": 3.1698135995813037, - "grad_norm": 0.01339562889188528, - "learning_rate": 0.00019999504708910381, - "loss": 46.0, - "step": 19683 - }, - { - "epoch": 3.169974636660091, - "grad_norm": 0.0027845578733831644, - "learning_rate": 0.00019999504658557146, - "loss": 46.0, - "step": 19684 - }, - { - "epoch": 3.170135673738878, - "grad_norm": 0.0005256269359961152, - "learning_rate": 0.00019999504608201354, - "loss": 46.0, - "step": 19685 - }, - { - "epoch": 3.1702967108176656, - "grad_norm": 0.0026178061962127686, - "learning_rate": 0.00019999504557843, - "loss": 46.0, - "step": 19686 - }, - { - "epoch": 3.170457747896453, - "grad_norm": 0.016922473907470703, - "learning_rate": 0.00019999504507482087, - "loss": 46.0, - "step": 19687 - }, - { - "epoch": 3.1706187849752405, - "grad_norm": 0.001348848338238895, - "learning_rate": 0.00019999504457118616, - "loss": 46.0, - "step": 19688 - }, - { - "epoch": 3.170779822054028, - "grad_norm": 0.002857778687030077, - "learning_rate": 0.00019999504406752587, - "loss": 46.0, - "step": 19689 - }, - { - "epoch": 3.1709408591328154, - "grad_norm": 0.00183499651029706, - "learning_rate": 0.00019999504356383997, - "loss": 46.0, - "step": 19690 - }, - { - "epoch": 3.171101896211603, - "grad_norm": 0.0021241542417556047, - "learning_rate": 0.00019999504306012847, - "loss": 46.0, - "step": 19691 - }, - { - "epoch": 3.1712629332903903, - "grad_norm": 0.02402188442647457, - "learning_rate": 0.00019999504255639137, - "loss": 46.0, - "step": 19692 - }, - { - "epoch": 3.1714239703691773, - "grad_norm": 0.005322204902768135, - "learning_rate": 0.0001999950420526287, - "loss": 46.0, - "step": 19693 - }, - { - "epoch": 3.171585007447965, - "grad_norm": 0.0005124992458149791, - "learning_rate": 0.0001999950415488404, - "loss": 46.0, - "step": 19694 - }, - { - "epoch": 3.1717460445267522, - "grad_norm": 0.0014059654204174876, - "learning_rate": 0.00019999504104502652, - "loss": 46.0, - "step": 19695 - }, - { - "epoch": 3.1719070816055397, - "grad_norm": 0.0015810240292921662, - "learning_rate": 0.00019999504054118704, - "loss": 46.0, - "step": 19696 - }, - { - "epoch": 3.172068118684327, - "grad_norm": 0.002150014042854309, - "learning_rate": 0.00019999504003732202, - "loss": 46.0, - "step": 19697 - }, - { - "epoch": 3.1722291557631146, - "grad_norm": 0.005694233346730471, - "learning_rate": 0.00019999503953343134, - "loss": 46.0, - "step": 19698 - }, - { - "epoch": 3.172390192841902, - "grad_norm": 0.004789724014699459, - "learning_rate": 0.00019999503902951512, - "loss": 46.0, - "step": 19699 - }, - { - "epoch": 3.172551229920689, - "grad_norm": 0.0008061034604907036, - "learning_rate": 0.00019999503852557326, - "loss": 46.0, - "step": 19700 - }, - { - "epoch": 3.1727122669994765, - "grad_norm": 0.0015269630821421742, - "learning_rate": 0.00019999503802160584, - "loss": 46.0, - "step": 19701 - }, - { - "epoch": 3.172873304078264, - "grad_norm": 0.0024221769999712706, - "learning_rate": 0.0001999950375176128, - "loss": 46.0, - "step": 19702 - }, - { - "epoch": 3.1730343411570514, - "grad_norm": 0.0031108269467949867, - "learning_rate": 0.00019999503701359416, - "loss": 46.0, - "step": 19703 - }, - { - "epoch": 3.173195378235839, - "grad_norm": 0.0027087212074548006, - "learning_rate": 0.00019999503650954998, - "loss": 46.0, - "step": 19704 - }, - { - "epoch": 3.1733564153146263, - "grad_norm": 0.0023988927714526653, - "learning_rate": 0.00019999503600548019, - "loss": 46.0, - "step": 19705 - }, - { - "epoch": 3.1735174523934138, - "grad_norm": 0.0014148681657388806, - "learning_rate": 0.00019999503550138475, - "loss": 46.0, - "step": 19706 - }, - { - "epoch": 3.1736784894722008, - "grad_norm": 0.007136817555874586, - "learning_rate": 0.00019999503499726378, - "loss": 46.0, - "step": 19707 - }, - { - "epoch": 3.173839526550988, - "grad_norm": 0.002934227231889963, - "learning_rate": 0.0001999950344931172, - "loss": 46.0, - "step": 19708 - }, - { - "epoch": 3.1740005636297757, - "grad_norm": 0.008164871484041214, - "learning_rate": 0.000199995033988945, - "loss": 46.0, - "step": 19709 - }, - { - "epoch": 3.174161600708563, - "grad_norm": 0.011660341173410416, - "learning_rate": 0.00019999503348474721, - "loss": 46.0, - "step": 19710 - }, - { - "epoch": 3.1743226377873506, - "grad_norm": 0.0023767270613461733, - "learning_rate": 0.00019999503298052384, - "loss": 46.0, - "step": 19711 - }, - { - "epoch": 3.174483674866138, - "grad_norm": 0.0013685694430023432, - "learning_rate": 0.00019999503247627488, - "loss": 46.0, - "step": 19712 - }, - { - "epoch": 3.1746447119449255, - "grad_norm": 0.0022953858133405447, - "learning_rate": 0.0001999950319720003, - "loss": 46.0, - "step": 19713 - }, - { - "epoch": 3.1748057490237125, - "grad_norm": 0.0007009068503975868, - "learning_rate": 0.00019999503146770018, - "loss": 46.0, - "step": 19714 - }, - { - "epoch": 3.1749667861025, - "grad_norm": 0.0010385161731392145, - "learning_rate": 0.00019999503096337443, - "loss": 46.0, - "step": 19715 - }, - { - "epoch": 3.1751278231812874, - "grad_norm": 0.0037897727452218533, - "learning_rate": 0.0001999950304590231, - "loss": 46.0, - "step": 19716 - }, - { - "epoch": 3.175288860260075, - "grad_norm": 0.005629927385598421, - "learning_rate": 0.00019999502995464617, - "loss": 46.0, - "step": 19717 - }, - { - "epoch": 3.1754498973388623, - "grad_norm": 0.0012219124473631382, - "learning_rate": 0.00019999502945024363, - "loss": 46.0, - "step": 19718 - }, - { - "epoch": 3.1756109344176497, - "grad_norm": 0.002781735733151436, - "learning_rate": 0.0001999950289458155, - "loss": 46.0, - "step": 19719 - }, - { - "epoch": 3.175771971496437, - "grad_norm": 0.003969190176576376, - "learning_rate": 0.00019999502844136183, - "loss": 46.0, - "step": 19720 - }, - { - "epoch": 3.1759330085752246, - "grad_norm": 0.0014375975588336587, - "learning_rate": 0.00019999502793688253, - "loss": 46.0, - "step": 19721 - }, - { - "epoch": 3.1760940456540117, - "grad_norm": 0.0033689807169139385, - "learning_rate": 0.0001999950274323776, - "loss": 46.0, - "step": 19722 - }, - { - "epoch": 3.176255082732799, - "grad_norm": 0.0009516612044535577, - "learning_rate": 0.00019999502692784712, - "loss": 46.0, - "step": 19723 - }, - { - "epoch": 3.1764161198115866, - "grad_norm": 0.003003350691869855, - "learning_rate": 0.00019999502642329103, - "loss": 46.0, - "step": 19724 - }, - { - "epoch": 3.176577156890374, - "grad_norm": 0.0013025281950831413, - "learning_rate": 0.00019999502591870933, - "loss": 46.0, - "step": 19725 - }, - { - "epoch": 3.1767381939691615, - "grad_norm": 0.0008496972732245922, - "learning_rate": 0.00019999502541410207, - "loss": 46.0, - "step": 19726 - }, - { - "epoch": 3.176899231047949, - "grad_norm": 0.0034412203822284937, - "learning_rate": 0.00019999502490946922, - "loss": 46.0, - "step": 19727 - }, - { - "epoch": 3.1770602681267364, - "grad_norm": 0.003885657526552677, - "learning_rate": 0.00019999502440481073, - "loss": 46.0, - "step": 19728 - }, - { - "epoch": 3.1772213052055234, - "grad_norm": 0.0019904214423149824, - "learning_rate": 0.0001999950239001267, - "loss": 46.0, - "step": 19729 - }, - { - "epoch": 3.177382342284311, - "grad_norm": 0.007293752394616604, - "learning_rate": 0.00019999502339541704, - "loss": 46.0, - "step": 19730 - }, - { - "epoch": 3.1775433793630983, - "grad_norm": 0.0009874945972114801, - "learning_rate": 0.00019999502289068182, - "loss": 46.0, - "step": 19731 - }, - { - "epoch": 3.1777044164418857, - "grad_norm": 0.0016249839682132006, - "learning_rate": 0.00019999502238592096, - "loss": 46.0, - "step": 19732 - }, - { - "epoch": 3.177865453520673, - "grad_norm": 0.0013751467922702432, - "learning_rate": 0.00019999502188113453, - "loss": 46.0, - "step": 19733 - }, - { - "epoch": 3.1780264905994606, - "grad_norm": 0.007252424024045467, - "learning_rate": 0.00019999502137632252, - "loss": 46.0, - "step": 19734 - }, - { - "epoch": 3.178187527678248, - "grad_norm": 0.004219766706228256, - "learning_rate": 0.0001999950208714849, - "loss": 46.0, - "step": 19735 - }, - { - "epoch": 3.1783485647570355, - "grad_norm": 0.00420208228752017, - "learning_rate": 0.0001999950203666217, - "loss": 46.0, - "step": 19736 - }, - { - "epoch": 3.1785096018358225, - "grad_norm": 0.003922350239008665, - "learning_rate": 0.00019999501986173288, - "loss": 46.0, - "step": 19737 - }, - { - "epoch": 3.17867063891461, - "grad_norm": 0.006449609529227018, - "learning_rate": 0.0001999950193568185, - "loss": 46.0, - "step": 19738 - }, - { - "epoch": 3.1788316759933974, - "grad_norm": 0.011483429931104183, - "learning_rate": 0.0001999950188518785, - "loss": 46.0, - "step": 19739 - }, - { - "epoch": 3.178992713072185, - "grad_norm": 0.004094013012945652, - "learning_rate": 0.0001999950183469129, - "loss": 46.0, - "step": 19740 - }, - { - "epoch": 3.1791537501509723, - "grad_norm": 0.003709926037117839, - "learning_rate": 0.00019999501784192175, - "loss": 46.0, - "step": 19741 - }, - { - "epoch": 3.17931478722976, - "grad_norm": 0.0010222380515187979, - "learning_rate": 0.00019999501733690495, - "loss": 46.0, - "step": 19742 - }, - { - "epoch": 3.1794758243085472, - "grad_norm": 0.0028213472105562687, - "learning_rate": 0.0001999950168318626, - "loss": 46.0, - "step": 19743 - }, - { - "epoch": 3.1796368613873343, - "grad_norm": 0.008547100238502026, - "learning_rate": 0.00019999501632679464, - "loss": 46.0, - "step": 19744 - }, - { - "epoch": 3.1797978984661217, - "grad_norm": 0.003768955823034048, - "learning_rate": 0.00019999501582170108, - "loss": 46.0, - "step": 19745 - }, - { - "epoch": 3.179958935544909, - "grad_norm": 0.002996401395648718, - "learning_rate": 0.00019999501531658195, - "loss": 46.0, - "step": 19746 - }, - { - "epoch": 3.1801199726236966, - "grad_norm": 0.004124962259083986, - "learning_rate": 0.00019999501481143722, - "loss": 46.0, - "step": 19747 - }, - { - "epoch": 3.180281009702484, - "grad_norm": 0.0015998920425772667, - "learning_rate": 0.00019999501430626685, - "loss": 46.0, - "step": 19748 - }, - { - "epoch": 3.1804420467812715, - "grad_norm": 0.002692669630050659, - "learning_rate": 0.00019999501380107095, - "loss": 46.0, - "step": 19749 - }, - { - "epoch": 3.180603083860059, - "grad_norm": 0.005620558280497789, - "learning_rate": 0.00019999501329584943, - "loss": 46.0, - "step": 19750 - }, - { - "epoch": 3.180764120938846, - "grad_norm": 0.011771776713430882, - "learning_rate": 0.0001999950127906023, - "loss": 46.0, - "step": 19751 - }, - { - "epoch": 3.1809251580176334, - "grad_norm": 0.002610383555293083, - "learning_rate": 0.0001999950122853296, - "loss": 46.0, - "step": 19752 - }, - { - "epoch": 3.181086195096421, - "grad_norm": 0.0015096035785973072, - "learning_rate": 0.0001999950117800313, - "loss": 46.0, - "step": 19753 - }, - { - "epoch": 3.1812472321752083, - "grad_norm": 0.0026770683471113443, - "learning_rate": 0.00019999501127470742, - "loss": 46.0, - "step": 19754 - }, - { - "epoch": 3.181408269253996, - "grad_norm": 0.0055971224792301655, - "learning_rate": 0.0001999950107693579, - "loss": 46.0, - "step": 19755 - }, - { - "epoch": 3.1815693063327832, - "grad_norm": 0.004829136189073324, - "learning_rate": 0.00019999501026398285, - "loss": 46.0, - "step": 19756 - }, - { - "epoch": 3.1817303434115707, - "grad_norm": 0.001391859375871718, - "learning_rate": 0.00019999500975858217, - "loss": 46.0, - "step": 19757 - }, - { - "epoch": 3.1818913804903577, - "grad_norm": 0.0016644347924739122, - "learning_rate": 0.0001999950092531559, - "loss": 46.0, - "step": 19758 - }, - { - "epoch": 3.182052417569145, - "grad_norm": 0.002994673326611519, - "learning_rate": 0.00019999500874770404, - "loss": 46.0, - "step": 19759 - }, - { - "epoch": 3.1822134546479326, - "grad_norm": 0.003188950940966606, - "learning_rate": 0.00019999500824222657, - "loss": 46.0, - "step": 19760 - }, - { - "epoch": 3.18237449172672, - "grad_norm": 0.008136718533933163, - "learning_rate": 0.00019999500773672352, - "loss": 46.0, - "step": 19761 - }, - { - "epoch": 3.1825355288055075, - "grad_norm": 0.0007128171855583787, - "learning_rate": 0.00019999500723119488, - "loss": 46.0, - "step": 19762 - }, - { - "epoch": 3.182696565884295, - "grad_norm": 0.001606535050086677, - "learning_rate": 0.00019999500672564065, - "loss": 46.0, - "step": 19763 - }, - { - "epoch": 3.1828576029630824, - "grad_norm": 0.0011022391263395548, - "learning_rate": 0.00019999500622006083, - "loss": 46.0, - "step": 19764 - }, - { - "epoch": 3.18301864004187, - "grad_norm": 0.0026857571210712194, - "learning_rate": 0.0001999950057144554, - "loss": 46.0, - "step": 19765 - }, - { - "epoch": 3.183179677120657, - "grad_norm": 0.0010084382956847548, - "learning_rate": 0.00019999500520882438, - "loss": 46.0, - "step": 19766 - }, - { - "epoch": 3.1833407141994443, - "grad_norm": 0.005197951104491949, - "learning_rate": 0.0001999950047031678, - "loss": 46.0, - "step": 19767 - }, - { - "epoch": 3.1835017512782318, - "grad_norm": 0.004256872925907373, - "learning_rate": 0.00019999500419748558, - "loss": 46.0, - "step": 19768 - }, - { - "epoch": 3.183662788357019, - "grad_norm": 0.007161410059779882, - "learning_rate": 0.00019999500369177777, - "loss": 46.0, - "step": 19769 - }, - { - "epoch": 3.1838238254358067, - "grad_norm": 0.0011229043593630195, - "learning_rate": 0.00019999500318604438, - "loss": 46.0, - "step": 19770 - }, - { - "epoch": 3.183984862514594, - "grad_norm": 0.0037400221917778254, - "learning_rate": 0.0001999950026802854, - "loss": 46.0, - "step": 19771 - }, - { - "epoch": 3.1841458995933816, - "grad_norm": 0.00586453964933753, - "learning_rate": 0.00019999500217450083, - "loss": 46.0, - "step": 19772 - }, - { - "epoch": 3.1843069366721686, - "grad_norm": 0.0017640376463532448, - "learning_rate": 0.00019999500166869062, - "loss": 46.0, - "step": 19773 - }, - { - "epoch": 3.184467973750956, - "grad_norm": 0.005383512005209923, - "learning_rate": 0.00019999500116285487, - "loss": 46.0, - "step": 19774 - }, - { - "epoch": 3.1846290108297435, - "grad_norm": 0.0017542550340294838, - "learning_rate": 0.00019999500065699351, - "loss": 46.0, - "step": 19775 - }, - { - "epoch": 3.184790047908531, - "grad_norm": 0.005183705594390631, - "learning_rate": 0.00019999500015110657, - "loss": 46.0, - "step": 19776 - }, - { - "epoch": 3.1849510849873184, - "grad_norm": 0.0045154201798141, - "learning_rate": 0.000199994999645194, - "loss": 46.0, - "step": 19777 - }, - { - "epoch": 3.185112122066106, - "grad_norm": 0.0083194300532341, - "learning_rate": 0.0001999949991392559, - "loss": 46.0, - "step": 19778 - }, - { - "epoch": 3.1852731591448933, - "grad_norm": 0.009240538813173771, - "learning_rate": 0.00019999499863329213, - "loss": 46.0, - "step": 19779 - }, - { - "epoch": 3.1854341962236807, - "grad_norm": 0.001111158519051969, - "learning_rate": 0.0001999949981273028, - "loss": 46.0, - "step": 19780 - }, - { - "epoch": 3.1855952333024677, - "grad_norm": 0.006247317884117365, - "learning_rate": 0.0001999949976212879, - "loss": 46.0, - "step": 19781 - }, - { - "epoch": 3.185756270381255, - "grad_norm": 0.008027937263250351, - "learning_rate": 0.00019999499711524735, - "loss": 46.0, - "step": 19782 - }, - { - "epoch": 3.1859173074600426, - "grad_norm": 0.0036614250857383013, - "learning_rate": 0.00019999499660918127, - "loss": 46.0, - "step": 19783 - }, - { - "epoch": 3.18607834453883, - "grad_norm": 0.00796531606465578, - "learning_rate": 0.00019999499610308958, - "loss": 46.0, - "step": 19784 - }, - { - "epoch": 3.1862393816176175, - "grad_norm": 0.013205653987824917, - "learning_rate": 0.00019999499559697227, - "loss": 46.0, - "step": 19785 - }, - { - "epoch": 3.186400418696405, - "grad_norm": 0.0025140228681266308, - "learning_rate": 0.0001999949950908294, - "loss": 46.0, - "step": 19786 - }, - { - "epoch": 3.186561455775192, - "grad_norm": 0.011965310201048851, - "learning_rate": 0.0001999949945846609, - "loss": 46.0, - "step": 19787 - }, - { - "epoch": 3.1867224928539795, - "grad_norm": 0.004593444988131523, - "learning_rate": 0.00019999499407846684, - "loss": 46.0, - "step": 19788 - }, - { - "epoch": 3.186883529932767, - "grad_norm": 0.005895540118217468, - "learning_rate": 0.00019999499357224715, - "loss": 46.0, - "step": 19789 - }, - { - "epoch": 3.1870445670115544, - "grad_norm": 0.002427153056487441, - "learning_rate": 0.00019999499306600188, - "loss": 46.0, - "step": 19790 - }, - { - "epoch": 3.187205604090342, - "grad_norm": 0.009925429709255695, - "learning_rate": 0.00019999499255973104, - "loss": 46.0, - "step": 19791 - }, - { - "epoch": 3.1873666411691293, - "grad_norm": 0.002062993822619319, - "learning_rate": 0.00019999499205343457, - "loss": 46.0, - "step": 19792 - }, - { - "epoch": 3.1875276782479167, - "grad_norm": 0.002389253117144108, - "learning_rate": 0.00019999499154711253, - "loss": 46.0, - "step": 19793 - }, - { - "epoch": 3.187688715326704, - "grad_norm": 0.003960807342082262, - "learning_rate": 0.00019999499104076488, - "loss": 46.0, - "step": 19794 - }, - { - "epoch": 3.187849752405491, - "grad_norm": 0.0035039905924350023, - "learning_rate": 0.00019999499053439167, - "loss": 46.0, - "step": 19795 - }, - { - "epoch": 3.1880107894842786, - "grad_norm": 0.002037300728261471, - "learning_rate": 0.00019999499002799285, - "loss": 46.0, - "step": 19796 - }, - { - "epoch": 3.188171826563066, - "grad_norm": 0.0019222765695303679, - "learning_rate": 0.00019999498952156844, - "loss": 46.0, - "step": 19797 - }, - { - "epoch": 3.1883328636418535, - "grad_norm": 0.0019225234864279628, - "learning_rate": 0.00019999498901511842, - "loss": 46.0, - "step": 19798 - }, - { - "epoch": 3.188493900720641, - "grad_norm": 0.01437341421842575, - "learning_rate": 0.00019999498850864283, - "loss": 46.0, - "step": 19799 - }, - { - "epoch": 3.1886549377994284, - "grad_norm": 0.008268388919532299, - "learning_rate": 0.00019999498800214163, - "loss": 46.0, - "step": 19800 - }, - { - "epoch": 3.188815974878216, - "grad_norm": 0.00804196484386921, - "learning_rate": 0.00019999498749561482, - "loss": 46.0, - "step": 19801 - }, - { - "epoch": 3.188977011957003, - "grad_norm": 0.005860279779881239, - "learning_rate": 0.00019999498698906242, - "loss": 46.0, - "step": 19802 - }, - { - "epoch": 3.1891380490357903, - "grad_norm": 0.0023101232945919037, - "learning_rate": 0.00019999498648248445, - "loss": 46.0, - "step": 19803 - }, - { - "epoch": 3.189299086114578, - "grad_norm": 0.0016302675940096378, - "learning_rate": 0.00019999498597588088, - "loss": 46.0, - "step": 19804 - }, - { - "epoch": 3.1894601231933652, - "grad_norm": 0.00838552601635456, - "learning_rate": 0.00019999498546925172, - "loss": 46.0, - "step": 19805 - }, - { - "epoch": 3.1896211602721527, - "grad_norm": 0.015919389203190804, - "learning_rate": 0.00019999498496259697, - "loss": 46.0, - "step": 19806 - }, - { - "epoch": 3.18978219735094, - "grad_norm": 0.0009158725151792169, - "learning_rate": 0.00019999498445591658, - "loss": 46.0, - "step": 19807 - }, - { - "epoch": 3.1899432344297276, - "grad_norm": 0.012250055558979511, - "learning_rate": 0.00019999498394921065, - "loss": 46.0, - "step": 19808 - }, - { - "epoch": 3.190104271508515, - "grad_norm": 0.0009662067168392241, - "learning_rate": 0.00019999498344247911, - "loss": 46.0, - "step": 19809 - }, - { - "epoch": 3.190265308587302, - "grad_norm": 0.005286928731948137, - "learning_rate": 0.00019999498293572196, - "loss": 46.0, - "step": 19810 - }, - { - "epoch": 3.1904263456660895, - "grad_norm": 0.00653979042544961, - "learning_rate": 0.00019999498242893925, - "loss": 46.0, - "step": 19811 - }, - { - "epoch": 3.190587382744877, - "grad_norm": 0.0033032251521945, - "learning_rate": 0.0001999949819221309, - "loss": 46.0, - "step": 19812 - }, - { - "epoch": 3.1907484198236644, - "grad_norm": 0.0006170686101540923, - "learning_rate": 0.000199994981415297, - "loss": 46.0, - "step": 19813 - }, - { - "epoch": 3.190909456902452, - "grad_norm": 0.006778727751225233, - "learning_rate": 0.00019999498090843748, - "loss": 46.0, - "step": 19814 - }, - { - "epoch": 3.1910704939812393, - "grad_norm": 0.006944861263036728, - "learning_rate": 0.0001999949804015524, - "loss": 46.0, - "step": 19815 - }, - { - "epoch": 3.1912315310600268, - "grad_norm": 0.002142457291483879, - "learning_rate": 0.00019999497989464172, - "loss": 46.0, - "step": 19816 - }, - { - "epoch": 3.191392568138814, - "grad_norm": 0.0023954843636602163, - "learning_rate": 0.0001999949793877054, - "loss": 46.0, - "step": 19817 - }, - { - "epoch": 3.1915536052176012, - "grad_norm": 0.0009656427428126335, - "learning_rate": 0.00019999497888074352, - "loss": 46.0, - "step": 19818 - }, - { - "epoch": 3.1917146422963887, - "grad_norm": 0.008898722007870674, - "learning_rate": 0.00019999497837375603, - "loss": 46.0, - "step": 19819 - }, - { - "epoch": 3.191875679375176, - "grad_norm": 0.0036675261799246073, - "learning_rate": 0.00019999497786674298, - "loss": 46.0, - "step": 19820 - }, - { - "epoch": 3.1920367164539636, - "grad_norm": 0.007081733550876379, - "learning_rate": 0.0001999949773597043, - "loss": 46.0, - "step": 19821 - }, - { - "epoch": 3.192197753532751, - "grad_norm": 0.0029735665302723646, - "learning_rate": 0.00019999497685264003, - "loss": 46.0, - "step": 19822 - }, - { - "epoch": 3.1923587906115385, - "grad_norm": 0.003738351399078965, - "learning_rate": 0.0001999949763455502, - "loss": 46.0, - "step": 19823 - }, - { - "epoch": 3.1925198276903255, - "grad_norm": 0.0013856987934559584, - "learning_rate": 0.00019999497583843474, - "loss": 46.0, - "step": 19824 - }, - { - "epoch": 3.192680864769113, - "grad_norm": 0.0017351857386529446, - "learning_rate": 0.0001999949753312937, - "loss": 46.0, - "step": 19825 - }, - { - "epoch": 3.1928419018479004, - "grad_norm": 0.017825378105044365, - "learning_rate": 0.00019999497482412707, - "loss": 46.0, - "step": 19826 - }, - { - "epoch": 3.193002938926688, - "grad_norm": 0.0014785912353545427, - "learning_rate": 0.00019999497431693485, - "loss": 46.0, - "step": 19827 - }, - { - "epoch": 3.1931639760054753, - "grad_norm": 0.0010605257702991366, - "learning_rate": 0.00019999497380971702, - "loss": 46.0, - "step": 19828 - }, - { - "epoch": 3.1933250130842628, - "grad_norm": 0.004136411938816309, - "learning_rate": 0.0001999949733024736, - "loss": 46.0, - "step": 19829 - }, - { - "epoch": 3.19348605016305, - "grad_norm": 0.0009177191532216966, - "learning_rate": 0.00019999497279520463, - "loss": 46.0, - "step": 19830 - }, - { - "epoch": 3.193647087241837, - "grad_norm": 0.004069517366588116, - "learning_rate": 0.00019999497228791, - "loss": 46.0, - "step": 19831 - }, - { - "epoch": 3.1938081243206247, - "grad_norm": 0.0018785659922286868, - "learning_rate": 0.00019999497178058983, - "loss": 46.0, - "step": 19832 - }, - { - "epoch": 3.193969161399412, - "grad_norm": 0.000989588093943894, - "learning_rate": 0.000199994971273244, - "loss": 46.0, - "step": 19833 - }, - { - "epoch": 3.1941301984781996, - "grad_norm": 0.008440427482128143, - "learning_rate": 0.00019999497076587263, - "loss": 46.0, - "step": 19834 - }, - { - "epoch": 3.194291235556987, - "grad_norm": 0.0034890423994511366, - "learning_rate": 0.00019999497025847566, - "loss": 46.0, - "step": 19835 - }, - { - "epoch": 3.1944522726357745, - "grad_norm": 0.0015511981910094619, - "learning_rate": 0.0001999949697510531, - "loss": 46.0, - "step": 19836 - }, - { - "epoch": 3.194613309714562, - "grad_norm": 0.002736080205067992, - "learning_rate": 0.00019999496924360494, - "loss": 46.0, - "step": 19837 - }, - { - "epoch": 3.1947743467933494, - "grad_norm": 0.0018079903675243258, - "learning_rate": 0.00019999496873613119, - "loss": 46.0, - "step": 19838 - }, - { - "epoch": 3.1949353838721364, - "grad_norm": 0.0037863650359213352, - "learning_rate": 0.00019999496822863184, - "loss": 46.0, - "step": 19839 - }, - { - "epoch": 3.195096420950924, - "grad_norm": 0.0018491890514269471, - "learning_rate": 0.0001999949677211069, - "loss": 46.0, - "step": 19840 - }, - { - "epoch": 3.1952574580297113, - "grad_norm": 0.0014581484720110893, - "learning_rate": 0.00019999496721355637, - "loss": 46.0, - "step": 19841 - }, - { - "epoch": 3.1954184951084987, - "grad_norm": 0.0014562910655513406, - "learning_rate": 0.00019999496670598024, - "loss": 46.0, - "step": 19842 - }, - { - "epoch": 3.195579532187286, - "grad_norm": 0.003640740644186735, - "learning_rate": 0.00019999496619837852, - "loss": 46.0, - "step": 19843 - }, - { - "epoch": 3.1957405692660736, - "grad_norm": 0.0029499635566025972, - "learning_rate": 0.0001999949656907512, - "loss": 46.0, - "step": 19844 - }, - { - "epoch": 3.195901606344861, - "grad_norm": 0.0012260916410014033, - "learning_rate": 0.0001999949651830983, - "loss": 46.0, - "step": 19845 - }, - { - "epoch": 3.196062643423648, - "grad_norm": 0.0007619918324053288, - "learning_rate": 0.00019999496467541978, - "loss": 46.0, - "step": 19846 - }, - { - "epoch": 3.1962236805024355, - "grad_norm": 0.005921292584389448, - "learning_rate": 0.0001999949641677157, - "loss": 46.0, - "step": 19847 - }, - { - "epoch": 3.196384717581223, - "grad_norm": 0.0020439988002181053, - "learning_rate": 0.000199994963659986, - "loss": 46.0, - "step": 19848 - }, - { - "epoch": 3.1965457546600105, - "grad_norm": 0.008453522808849812, - "learning_rate": 0.00019999496315223068, - "loss": 46.0, - "step": 19849 - }, - { - "epoch": 3.196706791738798, - "grad_norm": 0.007206012029200792, - "learning_rate": 0.0001999949626444498, - "loss": 46.0, - "step": 19850 - }, - { - "epoch": 3.1968678288175854, - "grad_norm": 0.010151001624763012, - "learning_rate": 0.00019999496213664336, - "loss": 46.0, - "step": 19851 - }, - { - "epoch": 3.197028865896373, - "grad_norm": 0.0028867933433502913, - "learning_rate": 0.00019999496162881127, - "loss": 46.0, - "step": 19852 - }, - { - "epoch": 3.1971899029751603, - "grad_norm": 0.005977341905236244, - "learning_rate": 0.00019999496112095363, - "loss": 46.0, - "step": 19853 - }, - { - "epoch": 3.1973509400539473, - "grad_norm": 0.007132588420063257, - "learning_rate": 0.0001999949606130704, - "loss": 46.0, - "step": 19854 - }, - { - "epoch": 3.1975119771327347, - "grad_norm": 0.0011176193365827203, - "learning_rate": 0.00019999496010516152, - "loss": 46.0, - "step": 19855 - }, - { - "epoch": 3.197673014211522, - "grad_norm": 0.003553074086084962, - "learning_rate": 0.0001999949595972271, - "loss": 46.0, - "step": 19856 - }, - { - "epoch": 3.1978340512903096, - "grad_norm": 0.001030263607390225, - "learning_rate": 0.00019999495908926704, - "loss": 46.0, - "step": 19857 - }, - { - "epoch": 3.197995088369097, - "grad_norm": 0.0005515654920600355, - "learning_rate": 0.00019999495858128143, - "loss": 46.0, - "step": 19858 - }, - { - "epoch": 3.1981561254478845, - "grad_norm": 0.0038948762230575085, - "learning_rate": 0.0001999949580732702, - "loss": 46.0, - "step": 19859 - }, - { - "epoch": 3.1983171625266715, - "grad_norm": 0.012143001891672611, - "learning_rate": 0.00019999495756523337, - "loss": 46.0, - "step": 19860 - }, - { - "epoch": 3.198478199605459, - "grad_norm": 0.0031428919173777103, - "learning_rate": 0.000199994957057171, - "loss": 46.0, - "step": 19861 - }, - { - "epoch": 3.1986392366842464, - "grad_norm": 0.007391807623207569, - "learning_rate": 0.00019999495654908297, - "loss": 46.0, - "step": 19862 - }, - { - "epoch": 3.198800273763034, - "grad_norm": 0.0018393341451883316, - "learning_rate": 0.0001999949560409694, - "loss": 46.0, - "step": 19863 - }, - { - "epoch": 3.1989613108418213, - "grad_norm": 0.0036613158881664276, - "learning_rate": 0.0001999949555328302, - "loss": 46.0, - "step": 19864 - }, - { - "epoch": 3.199122347920609, - "grad_norm": 0.001508887391537428, - "learning_rate": 0.0001999949550246654, - "loss": 46.0, - "step": 19865 - }, - { - "epoch": 3.1992833849993962, - "grad_norm": 0.005387998651713133, - "learning_rate": 0.00019999495451647503, - "loss": 46.0, - "step": 19866 - }, - { - "epoch": 3.1994444220781837, - "grad_norm": 0.01063913106918335, - "learning_rate": 0.00019999495400825908, - "loss": 46.0, - "step": 19867 - }, - { - "epoch": 3.1996054591569707, - "grad_norm": 0.0027136628050357103, - "learning_rate": 0.00019999495350001752, - "loss": 46.0, - "step": 19868 - }, - { - "epoch": 3.199766496235758, - "grad_norm": 0.0026820707134902477, - "learning_rate": 0.00019999495299175034, - "loss": 46.0, - "step": 19869 - }, - { - "epoch": 3.1999275333145456, - "grad_norm": 0.0017326547531411052, - "learning_rate": 0.0001999949524834576, - "loss": 46.0, - "step": 19870 - }, - { - "epoch": 3.200088570393333, - "grad_norm": 0.004446511156857014, - "learning_rate": 0.00019999495197513926, - "loss": 46.0, - "step": 19871 - }, - { - "epoch": 3.2002496074721205, - "grad_norm": 0.0013527220580726862, - "learning_rate": 0.00019999495146679532, - "loss": 46.0, - "step": 19872 - }, - { - "epoch": 3.200410644550908, - "grad_norm": 0.008834746666252613, - "learning_rate": 0.0001999949509584258, - "loss": 46.0, - "step": 19873 - }, - { - "epoch": 3.2005716816296954, - "grad_norm": 0.010460826568305492, - "learning_rate": 0.00019999495045003066, - "loss": 46.0, - "step": 19874 - }, - { - "epoch": 3.2007327187084824, - "grad_norm": 0.0028877719305455685, - "learning_rate": 0.00019999494994160996, - "loss": 46.0, - "step": 19875 - }, - { - "epoch": 3.20089375578727, - "grad_norm": 0.0012199478223919868, - "learning_rate": 0.00019999494943316365, - "loss": 46.0, - "step": 19876 - }, - { - "epoch": 3.2010547928660573, - "grad_norm": 0.0007976951310411096, - "learning_rate": 0.00019999494892469172, - "loss": 46.0, - "step": 19877 - }, - { - "epoch": 3.2012158299448448, - "grad_norm": 0.0009848412591964006, - "learning_rate": 0.00019999494841619424, - "loss": 46.0, - "step": 19878 - }, - { - "epoch": 3.2013768670236322, - "grad_norm": 0.0006164132500998676, - "learning_rate": 0.00019999494790767114, - "loss": 46.0, - "step": 19879 - }, - { - "epoch": 3.2015379041024197, - "grad_norm": 0.006771137472242117, - "learning_rate": 0.00019999494739912245, - "loss": 46.0, - "step": 19880 - }, - { - "epoch": 3.201698941181207, - "grad_norm": 0.0028572995215654373, - "learning_rate": 0.00019999494689054817, - "loss": 46.0, - "step": 19881 - }, - { - "epoch": 3.2018599782599946, - "grad_norm": 0.0038422876968979836, - "learning_rate": 0.0001999949463819483, - "loss": 46.0, - "step": 19882 - }, - { - "epoch": 3.2020210153387816, - "grad_norm": 0.013455407693982124, - "learning_rate": 0.00019999494587332283, - "loss": 46.0, - "step": 19883 - }, - { - "epoch": 3.202182052417569, - "grad_norm": 0.01469449419528246, - "learning_rate": 0.0001999949453646718, - "loss": 46.0, - "step": 19884 - }, - { - "epoch": 3.2023430894963565, - "grad_norm": 0.0018090845551341772, - "learning_rate": 0.00019999494485599512, - "loss": 46.0, - "step": 19885 - }, - { - "epoch": 3.202504126575144, - "grad_norm": 0.000849371834192425, - "learning_rate": 0.00019999494434729288, - "loss": 46.0, - "step": 19886 - }, - { - "epoch": 3.2026651636539314, - "grad_norm": 0.0017237755237147212, - "learning_rate": 0.00019999494383856505, - "loss": 46.0, - "step": 19887 - }, - { - "epoch": 3.202826200732719, - "grad_norm": 0.0022191668394953012, - "learning_rate": 0.0001999949433298116, - "loss": 46.0, - "step": 19888 - }, - { - "epoch": 3.2029872378115063, - "grad_norm": 0.008016888052225113, - "learning_rate": 0.00019999494282103256, - "loss": 46.0, - "step": 19889 - }, - { - "epoch": 3.2031482748902933, - "grad_norm": 0.0024024019949138165, - "learning_rate": 0.00019999494231222795, - "loss": 46.0, - "step": 19890 - }, - { - "epoch": 3.2033093119690808, - "grad_norm": 0.001229557441547513, - "learning_rate": 0.00019999494180339772, - "loss": 46.0, - "step": 19891 - }, - { - "epoch": 3.203470349047868, - "grad_norm": 0.0033974347170442343, - "learning_rate": 0.00019999494129454193, - "loss": 46.0, - "step": 19892 - }, - { - "epoch": 3.2036313861266557, - "grad_norm": 0.0015074391849339008, - "learning_rate": 0.00019999494078566055, - "loss": 46.0, - "step": 19893 - }, - { - "epoch": 3.203792423205443, - "grad_norm": 0.008552758023142815, - "learning_rate": 0.00019999494027675354, - "loss": 46.0, - "step": 19894 - }, - { - "epoch": 3.2039534602842306, - "grad_norm": 0.002506379736587405, - "learning_rate": 0.00019999493976782096, - "loss": 46.0, - "step": 19895 - }, - { - "epoch": 3.204114497363018, - "grad_norm": 0.0010894174920395017, - "learning_rate": 0.00019999493925886274, - "loss": 46.0, - "step": 19896 - }, - { - "epoch": 3.204275534441805, - "grad_norm": 0.0034105246886610985, - "learning_rate": 0.000199994938749879, - "loss": 46.0, - "step": 19897 - }, - { - "epoch": 3.2044365715205925, - "grad_norm": 0.02548542059957981, - "learning_rate": 0.00019999493824086962, - "loss": 46.0, - "step": 19898 - }, - { - "epoch": 3.20459760859938, - "grad_norm": 0.012117553502321243, - "learning_rate": 0.00019999493773183464, - "loss": 46.0, - "step": 19899 - }, - { - "epoch": 3.2047586456781674, - "grad_norm": 0.002581066219136119, - "learning_rate": 0.0001999949372227741, - "loss": 46.0, - "step": 19900 - }, - { - "epoch": 3.204919682756955, - "grad_norm": 0.0008938151295296848, - "learning_rate": 0.00019999493671368795, - "loss": 46.0, - "step": 19901 - }, - { - "epoch": 3.2050807198357423, - "grad_norm": 0.0030661774799227715, - "learning_rate": 0.00019999493620457618, - "loss": 46.0, - "step": 19902 - }, - { - "epoch": 3.2052417569145297, - "grad_norm": 0.0034973423462361097, - "learning_rate": 0.00019999493569543885, - "loss": 46.0, - "step": 19903 - }, - { - "epoch": 3.2054027939933167, - "grad_norm": 0.001700281398370862, - "learning_rate": 0.00019999493518627594, - "loss": 46.0, - "step": 19904 - }, - { - "epoch": 3.205563831072104, - "grad_norm": 0.012507494539022446, - "learning_rate": 0.00019999493467708738, - "loss": 46.0, - "step": 19905 - }, - { - "epoch": 3.2057248681508916, - "grad_norm": 0.002298508770763874, - "learning_rate": 0.0001999949341678733, - "loss": 46.0, - "step": 19906 - }, - { - "epoch": 3.205885905229679, - "grad_norm": 0.0019406152423471212, - "learning_rate": 0.00019999493365863358, - "loss": 46.0, - "step": 19907 - }, - { - "epoch": 3.2060469423084665, - "grad_norm": 0.0018927935743704438, - "learning_rate": 0.0001999949331493683, - "loss": 46.0, - "step": 19908 - }, - { - "epoch": 3.206207979387254, - "grad_norm": 0.0038575297221541405, - "learning_rate": 0.00019999493264007736, - "loss": 46.0, - "step": 19909 - }, - { - "epoch": 3.2063690164660414, - "grad_norm": 0.003207950619980693, - "learning_rate": 0.00019999493213076087, - "loss": 46.0, - "step": 19910 - }, - { - "epoch": 3.206530053544829, - "grad_norm": 0.0022205302957445383, - "learning_rate": 0.00019999493162141879, - "loss": 46.0, - "step": 19911 - }, - { - "epoch": 3.206691090623616, - "grad_norm": 0.0017520474502816796, - "learning_rate": 0.00019999493111205112, - "loss": 46.0, - "step": 19912 - }, - { - "epoch": 3.2068521277024034, - "grad_norm": 0.0014618110144510865, - "learning_rate": 0.00019999493060265784, - "loss": 46.0, - "step": 19913 - }, - { - "epoch": 3.207013164781191, - "grad_norm": 0.00253496365621686, - "learning_rate": 0.00019999493009323897, - "loss": 46.0, - "step": 19914 - }, - { - "epoch": 3.2071742018599783, - "grad_norm": 0.0010251352796331048, - "learning_rate": 0.0001999949295837945, - "loss": 46.0, - "step": 19915 - }, - { - "epoch": 3.2073352389387657, - "grad_norm": 0.017363103106617928, - "learning_rate": 0.00019999492907432447, - "loss": 46.0, - "step": 19916 - }, - { - "epoch": 3.207496276017553, - "grad_norm": 0.006027654744684696, - "learning_rate": 0.0001999949285648288, - "loss": 46.0, - "step": 19917 - }, - { - "epoch": 3.2076573130963406, - "grad_norm": 0.005080610979348421, - "learning_rate": 0.00019999492805530754, - "loss": 46.0, - "step": 19918 - }, - { - "epoch": 3.2078183501751276, - "grad_norm": 0.008073265664279461, - "learning_rate": 0.00019999492754576074, - "loss": 46.0, - "step": 19919 - }, - { - "epoch": 3.207979387253915, - "grad_norm": 0.0013680735137313604, - "learning_rate": 0.0001999949270361883, - "loss": 46.0, - "step": 19920 - }, - { - "epoch": 3.2081404243327025, - "grad_norm": 0.00872149970382452, - "learning_rate": 0.00019999492652659026, - "loss": 46.0, - "step": 19921 - }, - { - "epoch": 3.20830146141149, - "grad_norm": 0.01572323590517044, - "learning_rate": 0.00019999492601696666, - "loss": 46.0, - "step": 19922 - }, - { - "epoch": 3.2084624984902774, - "grad_norm": 0.004353813827037811, - "learning_rate": 0.00019999492550731745, - "loss": 46.0, - "step": 19923 - }, - { - "epoch": 3.208623535569065, - "grad_norm": 0.0010871444828808308, - "learning_rate": 0.00019999492499764266, - "loss": 46.0, - "step": 19924 - }, - { - "epoch": 3.2087845726478523, - "grad_norm": 0.0056698499247431755, - "learning_rate": 0.00019999492448794225, - "loss": 46.0, - "step": 19925 - }, - { - "epoch": 3.20894560972664, - "grad_norm": 0.0003940163878723979, - "learning_rate": 0.00019999492397821625, - "loss": 46.0, - "step": 19926 - }, - { - "epoch": 3.209106646805427, - "grad_norm": 0.00767147121950984, - "learning_rate": 0.0001999949234684647, - "loss": 46.0, - "step": 19927 - }, - { - "epoch": 3.2092676838842142, - "grad_norm": 0.0029793891590088606, - "learning_rate": 0.0001999949229586875, - "loss": 46.0, - "step": 19928 - }, - { - "epoch": 3.2094287209630017, - "grad_norm": 0.002021720865741372, - "learning_rate": 0.00019999492244888472, - "loss": 46.0, - "step": 19929 - }, - { - "epoch": 3.209589758041789, - "grad_norm": 0.005881303455680609, - "learning_rate": 0.00019999492193905634, - "loss": 46.0, - "step": 19930 - }, - { - "epoch": 3.2097507951205766, - "grad_norm": 0.0014934591017663479, - "learning_rate": 0.0001999949214292024, - "loss": 46.0, - "step": 19931 - }, - { - "epoch": 3.209911832199364, - "grad_norm": 0.004109697882086039, - "learning_rate": 0.00019999492091932287, - "loss": 46.0, - "step": 19932 - }, - { - "epoch": 3.2100728692781515, - "grad_norm": 0.0014857043279334903, - "learning_rate": 0.00019999492040941773, - "loss": 46.0, - "step": 19933 - }, - { - "epoch": 3.2102339063569385, - "grad_norm": 0.0015882095322012901, - "learning_rate": 0.00019999491989948696, - "loss": 46.0, - "step": 19934 - }, - { - "epoch": 3.210394943435726, - "grad_norm": 0.01587427780032158, - "learning_rate": 0.00019999491938953065, - "loss": 46.0, - "step": 19935 - }, - { - "epoch": 3.2105559805145134, - "grad_norm": 0.0029281058814376593, - "learning_rate": 0.0001999949188795487, - "loss": 46.0, - "step": 19936 - }, - { - "epoch": 3.210717017593301, - "grad_norm": 0.001928345998749137, - "learning_rate": 0.0001999949183695412, - "loss": 46.0, - "step": 19937 - }, - { - "epoch": 3.2108780546720883, - "grad_norm": 0.012835285626351833, - "learning_rate": 0.00019999491785950807, - "loss": 46.0, - "step": 19938 - }, - { - "epoch": 3.2110390917508758, - "grad_norm": 0.0009133763378486037, - "learning_rate": 0.00019999491734944938, - "loss": 46.0, - "step": 19939 - }, - { - "epoch": 3.211200128829663, - "grad_norm": 0.0032668181229382753, - "learning_rate": 0.00019999491683936506, - "loss": 46.0, - "step": 19940 - }, - { - "epoch": 3.21136116590845, - "grad_norm": 0.003891577711328864, - "learning_rate": 0.00019999491632925517, - "loss": 46.0, - "step": 19941 - }, - { - "epoch": 3.2115222029872377, - "grad_norm": 0.0018773623742163181, - "learning_rate": 0.0001999949158191197, - "loss": 46.0, - "step": 19942 - }, - { - "epoch": 3.211683240066025, - "grad_norm": 0.0008994661038741469, - "learning_rate": 0.0001999949153089586, - "loss": 46.0, - "step": 19943 - }, - { - "epoch": 3.2118442771448126, - "grad_norm": 0.0037934351712465286, - "learning_rate": 0.00019999491479877194, - "loss": 46.0, - "step": 19944 - }, - { - "epoch": 3.2120053142236, - "grad_norm": 0.000621344312094152, - "learning_rate": 0.00019999491428855968, - "loss": 46.0, - "step": 19945 - }, - { - "epoch": 3.2121663513023875, - "grad_norm": 0.0006715527852065861, - "learning_rate": 0.00019999491377832183, - "loss": 46.0, - "step": 19946 - }, - { - "epoch": 3.212327388381175, - "grad_norm": 0.007685655262321234, - "learning_rate": 0.00019999491326805834, - "loss": 46.0, - "step": 19947 - }, - { - "epoch": 3.212488425459962, - "grad_norm": 0.003946251701563597, - "learning_rate": 0.0001999949127577693, - "loss": 46.0, - "step": 19948 - }, - { - "epoch": 3.2126494625387494, - "grad_norm": 0.004356798250228167, - "learning_rate": 0.00019999491224745466, - "loss": 46.0, - "step": 19949 - }, - { - "epoch": 3.212810499617537, - "grad_norm": 0.003964099567383528, - "learning_rate": 0.00019999491173711443, - "loss": 46.0, - "step": 19950 - }, - { - "epoch": 3.2129715366963243, - "grad_norm": 0.0004672715731430799, - "learning_rate": 0.0001999949112267486, - "loss": 46.0, - "step": 19951 - }, - { - "epoch": 3.2131325737751117, - "grad_norm": 0.0037271385081112385, - "learning_rate": 0.0001999949107163572, - "loss": 46.0, - "step": 19952 - }, - { - "epoch": 3.213293610853899, - "grad_norm": 0.001955863321200013, - "learning_rate": 0.00019999491020594016, - "loss": 46.0, - "step": 19953 - }, - { - "epoch": 3.2134546479326866, - "grad_norm": 0.004761870950460434, - "learning_rate": 0.00019999490969549756, - "loss": 46.0, - "step": 19954 - }, - { - "epoch": 3.213615685011474, - "grad_norm": 0.005952753592282534, - "learning_rate": 0.00019999490918502934, - "loss": 46.0, - "step": 19955 - }, - { - "epoch": 3.213776722090261, - "grad_norm": 0.00965246744453907, - "learning_rate": 0.00019999490867453554, - "loss": 46.0, - "step": 19956 - }, - { - "epoch": 3.2139377591690486, - "grad_norm": 0.007838096469640732, - "learning_rate": 0.00019999490816401615, - "loss": 46.0, - "step": 19957 - }, - { - "epoch": 3.214098796247836, - "grad_norm": 0.0017091804184019566, - "learning_rate": 0.00019999490765347118, - "loss": 46.0, - "step": 19958 - }, - { - "epoch": 3.2142598333266235, - "grad_norm": 0.0024706751573830843, - "learning_rate": 0.0001999949071429006, - "loss": 46.0, - "step": 19959 - }, - { - "epoch": 3.214420870405411, - "grad_norm": 0.003551687579602003, - "learning_rate": 0.0001999949066323044, - "loss": 46.0, - "step": 19960 - }, - { - "epoch": 3.2145819074841984, - "grad_norm": 0.00403203209862113, - "learning_rate": 0.00019999490612168265, - "loss": 46.0, - "step": 19961 - }, - { - "epoch": 3.214742944562986, - "grad_norm": 0.0007498914492316544, - "learning_rate": 0.0001999949056110353, - "loss": 46.0, - "step": 19962 - }, - { - "epoch": 3.214903981641773, - "grad_norm": 0.0055478839203715324, - "learning_rate": 0.00019999490510036236, - "loss": 46.0, - "step": 19963 - }, - { - "epoch": 3.2150650187205603, - "grad_norm": 0.0013714124215766788, - "learning_rate": 0.0001999949045896638, - "loss": 46.0, - "step": 19964 - }, - { - "epoch": 3.2152260557993477, - "grad_norm": 0.002369512338191271, - "learning_rate": 0.00019999490407893967, - "loss": 46.0, - "step": 19965 - }, - { - "epoch": 3.215387092878135, - "grad_norm": 0.005854782182723284, - "learning_rate": 0.00019999490356818994, - "loss": 46.0, - "step": 19966 - }, - { - "epoch": 3.2155481299569226, - "grad_norm": 0.002678430173546076, - "learning_rate": 0.0001999949030574146, - "loss": 46.0, - "step": 19967 - }, - { - "epoch": 3.21570916703571, - "grad_norm": 0.0008392783929593861, - "learning_rate": 0.0001999949025466137, - "loss": 46.0, - "step": 19968 - }, - { - "epoch": 3.2158702041144975, - "grad_norm": 0.015743868425488472, - "learning_rate": 0.00019999490203578718, - "loss": 46.0, - "step": 19969 - }, - { - "epoch": 3.216031241193285, - "grad_norm": 0.001104449387639761, - "learning_rate": 0.00019999490152493508, - "loss": 46.0, - "step": 19970 - }, - { - "epoch": 3.216192278272072, - "grad_norm": 0.0076009598560631275, - "learning_rate": 0.0001999949010140574, - "loss": 46.0, - "step": 19971 - }, - { - "epoch": 3.2163533153508594, - "grad_norm": 0.003827478038147092, - "learning_rate": 0.00019999490050315406, - "loss": 46.0, - "step": 19972 - }, - { - "epoch": 3.216514352429647, - "grad_norm": 0.004382527898997068, - "learning_rate": 0.0001999948999922252, - "loss": 46.0, - "step": 19973 - }, - { - "epoch": 3.2166753895084343, - "grad_norm": 0.004581006243824959, - "learning_rate": 0.0001999948994812707, - "loss": 46.0, - "step": 19974 - }, - { - "epoch": 3.216836426587222, - "grad_norm": 0.002011304022744298, - "learning_rate": 0.00019999489897029062, - "loss": 46.0, - "step": 19975 - }, - { - "epoch": 3.2169974636660092, - "grad_norm": 0.005309367086738348, - "learning_rate": 0.00019999489845928494, - "loss": 46.0, - "step": 19976 - }, - { - "epoch": 3.2171585007447963, - "grad_norm": 0.0010016907472163439, - "learning_rate": 0.00019999489794825367, - "loss": 46.0, - "step": 19977 - }, - { - "epoch": 3.2173195378235837, - "grad_norm": 0.0038207981269806623, - "learning_rate": 0.00019999489743719685, - "loss": 46.0, - "step": 19978 - }, - { - "epoch": 3.217480574902371, - "grad_norm": 0.0033337429631501436, - "learning_rate": 0.0001999948969261144, - "loss": 46.0, - "step": 19979 - }, - { - "epoch": 3.2176416119811586, - "grad_norm": 0.011218244209885597, - "learning_rate": 0.00019999489641500635, - "loss": 46.0, - "step": 19980 - }, - { - "epoch": 3.217802649059946, - "grad_norm": 0.009353700093925, - "learning_rate": 0.0001999948959038727, - "loss": 46.0, - "step": 19981 - }, - { - "epoch": 3.2179636861387335, - "grad_norm": 0.005936374422162771, - "learning_rate": 0.00019999489539271347, - "loss": 46.0, - "step": 19982 - }, - { - "epoch": 3.218124723217521, - "grad_norm": 0.008977899327874184, - "learning_rate": 0.00019999489488152868, - "loss": 46.0, - "step": 19983 - }, - { - "epoch": 3.2182857602963084, - "grad_norm": 0.007583903148770332, - "learning_rate": 0.00019999489437031825, - "loss": 46.0, - "step": 19984 - }, - { - "epoch": 3.2184467973750954, - "grad_norm": 0.006460958626121283, - "learning_rate": 0.00019999489385908223, - "loss": 46.0, - "step": 19985 - }, - { - "epoch": 3.218607834453883, - "grad_norm": 0.00982884131371975, - "learning_rate": 0.00019999489334782063, - "loss": 46.0, - "step": 19986 - }, - { - "epoch": 3.2187688715326703, - "grad_norm": 0.0028576948679983616, - "learning_rate": 0.00019999489283653344, - "loss": 46.0, - "step": 19987 - }, - { - "epoch": 3.218929908611458, - "grad_norm": 0.005682682618498802, - "learning_rate": 0.00019999489232522063, - "loss": 46.0, - "step": 19988 - }, - { - "epoch": 3.2190909456902452, - "grad_norm": 0.005526925902813673, - "learning_rate": 0.00019999489181388226, - "loss": 46.0, - "step": 19989 - }, - { - "epoch": 3.2192519827690327, - "grad_norm": 0.0011081219417974353, - "learning_rate": 0.00019999489130251828, - "loss": 46.0, - "step": 19990 - }, - { - "epoch": 3.21941301984782, - "grad_norm": 0.002907171845436096, - "learning_rate": 0.0001999948907911287, - "loss": 46.0, - "step": 19991 - }, - { - "epoch": 3.219574056926607, - "grad_norm": 0.0026894756592810154, - "learning_rate": 0.00019999489027971353, - "loss": 46.0, - "step": 19992 - }, - { - "epoch": 3.2197350940053946, - "grad_norm": 0.002433567075058818, - "learning_rate": 0.00019999488976827276, - "loss": 46.0, - "step": 19993 - }, - { - "epoch": 3.219896131084182, - "grad_norm": 0.0030539012514054775, - "learning_rate": 0.00019999488925680643, - "loss": 46.0, - "step": 19994 - }, - { - "epoch": 3.2200571681629695, - "grad_norm": 0.0007772623794153333, - "learning_rate": 0.00019999488874531448, - "loss": 46.0, - "step": 19995 - }, - { - "epoch": 3.220218205241757, - "grad_norm": 0.0018702943343669176, - "learning_rate": 0.00019999488823379692, - "loss": 46.0, - "step": 19996 - }, - { - "epoch": 3.2203792423205444, - "grad_norm": 0.001668772310949862, - "learning_rate": 0.0001999948877222538, - "loss": 46.0, - "step": 19997 - }, - { - "epoch": 3.220540279399332, - "grad_norm": 0.0018319544615224004, - "learning_rate": 0.00019999488721068507, - "loss": 46.0, - "step": 19998 - }, - { - "epoch": 3.2207013164781193, - "grad_norm": 0.002652161056175828, - "learning_rate": 0.00019999488669909075, - "loss": 46.0, - "step": 19999 - }, - { - "epoch": 3.2208623535569063, - "grad_norm": 0.0018790222238749266, - "learning_rate": 0.00019999488618747084, - "loss": 46.0, - "step": 20000 - }, - { - "epoch": 3.2210233906356938, - "grad_norm": 0.009816550649702549, - "learning_rate": 0.00019999488567582535, - "loss": 46.0, - "step": 20001 - }, - { - "epoch": 3.221184427714481, - "grad_norm": 0.0014191671507433057, - "learning_rate": 0.00019999488516415424, - "loss": 46.0, - "step": 20002 - }, - { - "epoch": 3.2213454647932687, - "grad_norm": 0.002027905313298106, - "learning_rate": 0.00019999488465245751, - "loss": 46.0, - "step": 20003 - }, - { - "epoch": 3.221506501872056, - "grad_norm": 0.006543018389493227, - "learning_rate": 0.00019999488414073526, - "loss": 46.0, - "step": 20004 - }, - { - "epoch": 3.2216675389508436, - "grad_norm": 0.0009746000287123024, - "learning_rate": 0.00019999488362898736, - "loss": 46.0, - "step": 20005 - }, - { - "epoch": 3.221828576029631, - "grad_norm": 0.0026498446241021156, - "learning_rate": 0.0001999948831172139, - "loss": 46.0, - "step": 20006 - }, - { - "epoch": 3.221989613108418, - "grad_norm": 0.0013738729758188128, - "learning_rate": 0.0001999948826054148, - "loss": 46.0, - "step": 20007 - }, - { - "epoch": 3.2221506501872055, - "grad_norm": 0.004459552001208067, - "learning_rate": 0.00019999488209359017, - "loss": 46.0, - "step": 20008 - }, - { - "epoch": 3.222311687265993, - "grad_norm": 0.0033183731138706207, - "learning_rate": 0.00019999488158173992, - "loss": 46.0, - "step": 20009 - }, - { - "epoch": 3.2224727243447804, - "grad_norm": 0.002151679014787078, - "learning_rate": 0.00019999488106986406, - "loss": 46.0, - "step": 20010 - }, - { - "epoch": 3.222633761423568, - "grad_norm": 0.0021975645795464516, - "learning_rate": 0.00019999488055796262, - "loss": 46.0, - "step": 20011 - }, - { - "epoch": 3.2227947985023553, - "grad_norm": 0.004420607816427946, - "learning_rate": 0.00019999488004603558, - "loss": 46.0, - "step": 20012 - }, - { - "epoch": 3.2229558355811427, - "grad_norm": 0.006460062228143215, - "learning_rate": 0.00019999487953408296, - "loss": 46.0, - "step": 20013 - }, - { - "epoch": 3.2231168726599297, - "grad_norm": 0.00763485673815012, - "learning_rate": 0.00019999487902210472, - "loss": 46.0, - "step": 20014 - }, - { - "epoch": 3.223277909738717, - "grad_norm": 0.003281361423432827, - "learning_rate": 0.0001999948785101009, - "loss": 46.0, - "step": 20015 - }, - { - "epoch": 3.2234389468175046, - "grad_norm": 0.0022874479182064533, - "learning_rate": 0.00019999487799807151, - "loss": 46.0, - "step": 20016 - }, - { - "epoch": 3.223599983896292, - "grad_norm": 0.0014630361692979932, - "learning_rate": 0.0001999948774860165, - "loss": 46.0, - "step": 20017 - }, - { - "epoch": 3.2237610209750795, - "grad_norm": 0.009154632687568665, - "learning_rate": 0.00019999487697393588, - "loss": 46.0, - "step": 20018 - }, - { - "epoch": 3.223922058053867, - "grad_norm": 0.0021597500890493393, - "learning_rate": 0.0001999948764618297, - "loss": 46.0, - "step": 20019 - }, - { - "epoch": 3.2240830951326545, - "grad_norm": 0.005203419830650091, - "learning_rate": 0.0001999948759496979, - "loss": 46.0, - "step": 20020 - }, - { - "epoch": 3.2242441322114415, - "grad_norm": 0.0008442361722700298, - "learning_rate": 0.00019999487543754054, - "loss": 46.0, - "step": 20021 - }, - { - "epoch": 3.224405169290229, - "grad_norm": 0.000546953349839896, - "learning_rate": 0.00019999487492535756, - "loss": 46.0, - "step": 20022 - }, - { - "epoch": 3.2245662063690164, - "grad_norm": 0.0030808565206825733, - "learning_rate": 0.000199994874413149, - "loss": 46.0, - "step": 20023 - }, - { - "epoch": 3.224727243447804, - "grad_norm": 0.00448093144223094, - "learning_rate": 0.00019999487390091485, - "loss": 46.0, - "step": 20024 - }, - { - "epoch": 3.2248882805265913, - "grad_norm": 0.002609221264719963, - "learning_rate": 0.0001999948733886551, - "loss": 46.0, - "step": 20025 - }, - { - "epoch": 3.2250493176053787, - "grad_norm": 0.0006908430950716138, - "learning_rate": 0.00019999487287636973, - "loss": 46.0, - "step": 20026 - }, - { - "epoch": 3.225210354684166, - "grad_norm": 0.0007829205715097487, - "learning_rate": 0.0001999948723640588, - "loss": 46.0, - "step": 20027 - }, - { - "epoch": 3.2253713917629536, - "grad_norm": 0.004746154882013798, - "learning_rate": 0.00019999487185172224, - "loss": 46.0, - "step": 20028 - }, - { - "epoch": 3.2255324288417406, - "grad_norm": 0.0032963696867227554, - "learning_rate": 0.00019999487133936014, - "loss": 46.0, - "step": 20029 - }, - { - "epoch": 3.225693465920528, - "grad_norm": 0.0027679477352648973, - "learning_rate": 0.0001999948708269724, - "loss": 46.0, - "step": 20030 - }, - { - "epoch": 3.2258545029993155, - "grad_norm": 0.003499365644529462, - "learning_rate": 0.0001999948703145591, - "loss": 46.0, - "step": 20031 - }, - { - "epoch": 3.226015540078103, - "grad_norm": 0.0031455690041184425, - "learning_rate": 0.00019999486980212022, - "loss": 46.0, - "step": 20032 - }, - { - "epoch": 3.2261765771568904, - "grad_norm": 0.006072757765650749, - "learning_rate": 0.0001999948692896557, - "loss": 46.0, - "step": 20033 - }, - { - "epoch": 3.226337614235678, - "grad_norm": 0.002154717454686761, - "learning_rate": 0.0001999948687771656, - "loss": 46.0, - "step": 20034 - }, - { - "epoch": 3.2264986513144653, - "grad_norm": 0.0023426564875990152, - "learning_rate": 0.00019999486826464993, - "loss": 46.0, - "step": 20035 - }, - { - "epoch": 3.2266596883932523, - "grad_norm": 0.003374747931957245, - "learning_rate": 0.00019999486775210864, - "loss": 46.0, - "step": 20036 - }, - { - "epoch": 3.22682072547204, - "grad_norm": 0.0027175594586879015, - "learning_rate": 0.00019999486723954176, - "loss": 46.0, - "step": 20037 - }, - { - "epoch": 3.2269817625508272, - "grad_norm": 0.001925921649672091, - "learning_rate": 0.0001999948667269493, - "loss": 46.0, - "step": 20038 - }, - { - "epoch": 3.2271427996296147, - "grad_norm": 0.004148443695157766, - "learning_rate": 0.00019999486621433122, - "loss": 46.0, - "step": 20039 - }, - { - "epoch": 3.227303836708402, - "grad_norm": 0.0011738608591258526, - "learning_rate": 0.00019999486570168758, - "loss": 46.0, - "step": 20040 - }, - { - "epoch": 3.2274648737871896, - "grad_norm": 0.0023432662710547447, - "learning_rate": 0.0001999948651890183, - "loss": 46.0, - "step": 20041 - }, - { - "epoch": 3.227625910865977, - "grad_norm": 0.0011990058701485395, - "learning_rate": 0.00019999486467632349, - "loss": 46.0, - "step": 20042 - }, - { - "epoch": 3.2277869479447645, - "grad_norm": 0.0035896487534046173, - "learning_rate": 0.00019999486416360306, - "loss": 46.0, - "step": 20043 - }, - { - "epoch": 3.2279479850235515, - "grad_norm": 0.0007968611898832023, - "learning_rate": 0.00019999486365085702, - "loss": 46.0, - "step": 20044 - }, - { - "epoch": 3.228109022102339, - "grad_norm": 0.006229350343346596, - "learning_rate": 0.0001999948631380854, - "loss": 46.0, - "step": 20045 - }, - { - "epoch": 3.2282700591811264, - "grad_norm": 0.0033001210540533066, - "learning_rate": 0.00019999486262528817, - "loss": 46.0, - "step": 20046 - }, - { - "epoch": 3.228431096259914, - "grad_norm": 0.002701087621971965, - "learning_rate": 0.00019999486211246534, - "loss": 46.0, - "step": 20047 - }, - { - "epoch": 3.2285921333387013, - "grad_norm": 0.0010901725618168712, - "learning_rate": 0.00019999486159961695, - "loss": 46.0, - "step": 20048 - }, - { - "epoch": 3.2287531704174888, - "grad_norm": 0.0037155996542423964, - "learning_rate": 0.00019999486108674295, - "loss": 46.0, - "step": 20049 - }, - { - "epoch": 3.2289142074962762, - "grad_norm": 0.0026516560465097427, - "learning_rate": 0.00019999486057384336, - "loss": 46.0, - "step": 20050 - }, - { - "epoch": 3.2290752445750632, - "grad_norm": 0.0006564582581631839, - "learning_rate": 0.00019999486006091818, - "loss": 46.0, - "step": 20051 - }, - { - "epoch": 3.2292362816538507, - "grad_norm": 0.0029622321017086506, - "learning_rate": 0.00019999485954796739, - "loss": 46.0, - "step": 20052 - }, - { - "epoch": 3.229397318732638, - "grad_norm": 0.005908967927098274, - "learning_rate": 0.00019999485903499103, - "loss": 46.0, - "step": 20053 - }, - { - "epoch": 3.2295583558114256, - "grad_norm": 0.00589406955987215, - "learning_rate": 0.00019999485852198906, - "loss": 46.0, - "step": 20054 - }, - { - "epoch": 3.229719392890213, - "grad_norm": 0.0016828267835080624, - "learning_rate": 0.00019999485800896148, - "loss": 46.0, - "step": 20055 - }, - { - "epoch": 3.2298804299690005, - "grad_norm": 0.0031218871008604765, - "learning_rate": 0.00019999485749590834, - "loss": 46.0, - "step": 20056 - }, - { - "epoch": 3.230041467047788, - "grad_norm": 0.0008189807995222509, - "learning_rate": 0.0001999948569828296, - "loss": 46.0, - "step": 20057 - }, - { - "epoch": 3.230202504126575, - "grad_norm": 0.0042694020085036755, - "learning_rate": 0.00019999485646972524, - "loss": 46.0, - "step": 20058 - }, - { - "epoch": 3.2303635412053624, - "grad_norm": 0.004502309486269951, - "learning_rate": 0.00019999485595659534, - "loss": 46.0, - "step": 20059 - }, - { - "epoch": 3.23052457828415, - "grad_norm": 0.0020823825616389513, - "learning_rate": 0.0001999948554434398, - "loss": 46.0, - "step": 20060 - }, - { - "epoch": 3.2306856153629373, - "grad_norm": 0.012499809265136719, - "learning_rate": 0.0001999948549302587, - "loss": 46.0, - "step": 20061 - }, - { - "epoch": 3.2308466524417248, - "grad_norm": 0.00125453423243016, - "learning_rate": 0.00019999485441705197, - "loss": 46.0, - "step": 20062 - }, - { - "epoch": 3.231007689520512, - "grad_norm": 0.0053497618064284325, - "learning_rate": 0.00019999485390381967, - "loss": 46.0, - "step": 20063 - }, - { - "epoch": 3.2311687265992997, - "grad_norm": 0.001731640542857349, - "learning_rate": 0.00019999485339056175, - "loss": 46.0, - "step": 20064 - }, - { - "epoch": 3.2313297636780867, - "grad_norm": 0.01032224204391241, - "learning_rate": 0.00019999485287727826, - "loss": 46.0, - "step": 20065 - }, - { - "epoch": 3.231490800756874, - "grad_norm": 0.0032352639827877283, - "learning_rate": 0.00019999485236396914, - "loss": 46.0, - "step": 20066 - }, - { - "epoch": 3.2316518378356616, - "grad_norm": 0.0018381490372121334, - "learning_rate": 0.0001999948518506345, - "loss": 46.0, - "step": 20067 - }, - { - "epoch": 3.231812874914449, - "grad_norm": 0.003487006528303027, - "learning_rate": 0.0001999948513372742, - "loss": 46.0, - "step": 20068 - }, - { - "epoch": 3.2319739119932365, - "grad_norm": 0.007057672366499901, - "learning_rate": 0.00019999485082388834, - "loss": 46.0, - "step": 20069 - }, - { - "epoch": 3.232134949072024, - "grad_norm": 0.007005220744758844, - "learning_rate": 0.0001999948503104769, - "loss": 46.0, - "step": 20070 - }, - { - "epoch": 3.2322959861508114, - "grad_norm": 0.0008962152060121298, - "learning_rate": 0.0001999948497970398, - "loss": 46.0, - "step": 20071 - }, - { - "epoch": 3.232457023229599, - "grad_norm": 0.007168622687458992, - "learning_rate": 0.00019999484928357716, - "loss": 46.0, - "step": 20072 - }, - { - "epoch": 3.232618060308386, - "grad_norm": 0.0006051333621144295, - "learning_rate": 0.00019999484877008893, - "loss": 46.0, - "step": 20073 - }, - { - "epoch": 3.2327790973871733, - "grad_norm": 0.010350801050662994, - "learning_rate": 0.00019999484825657508, - "loss": 46.0, - "step": 20074 - }, - { - "epoch": 3.2329401344659607, - "grad_norm": 0.0038665507454425097, - "learning_rate": 0.00019999484774303565, - "loss": 46.0, - "step": 20075 - }, - { - "epoch": 3.233101171544748, - "grad_norm": 0.0037180643994361162, - "learning_rate": 0.00019999484722947063, - "loss": 46.0, - "step": 20076 - }, - { - "epoch": 3.2332622086235356, - "grad_norm": 0.0025976565666496754, - "learning_rate": 0.00019999484671588002, - "loss": 46.0, - "step": 20077 - }, - { - "epoch": 3.233423245702323, - "grad_norm": 0.006441825069487095, - "learning_rate": 0.0001999948462022638, - "loss": 46.0, - "step": 20078 - }, - { - "epoch": 3.2335842827811105, - "grad_norm": 0.005928924772888422, - "learning_rate": 0.000199994845688622, - "loss": 46.0, - "step": 20079 - }, - { - "epoch": 3.2337453198598975, - "grad_norm": 0.012846838682889938, - "learning_rate": 0.00019999484517495462, - "loss": 46.0, - "step": 20080 - }, - { - "epoch": 3.233906356938685, - "grad_norm": 0.0015549047384411097, - "learning_rate": 0.0001999948446612616, - "loss": 46.0, - "step": 20081 - }, - { - "epoch": 3.2340673940174725, - "grad_norm": 0.008976079523563385, - "learning_rate": 0.00019999484414754304, - "loss": 46.0, - "step": 20082 - }, - { - "epoch": 3.23422843109626, - "grad_norm": 0.0017978926189243793, - "learning_rate": 0.00019999484363379883, - "loss": 46.0, - "step": 20083 - }, - { - "epoch": 3.2343894681750474, - "grad_norm": 0.007539292331784964, - "learning_rate": 0.00019999484312002908, - "loss": 46.0, - "step": 20084 - }, - { - "epoch": 3.234550505253835, - "grad_norm": 0.0007488857954740524, - "learning_rate": 0.00019999484260623372, - "loss": 46.0, - "step": 20085 - }, - { - "epoch": 3.2347115423326223, - "grad_norm": 0.00465942919254303, - "learning_rate": 0.00019999484209241275, - "loss": 46.0, - "step": 20086 - }, - { - "epoch": 3.2348725794114097, - "grad_norm": 0.005457698367536068, - "learning_rate": 0.0001999948415785662, - "loss": 46.0, - "step": 20087 - }, - { - "epoch": 3.2350336164901967, - "grad_norm": 0.004229591228067875, - "learning_rate": 0.00019999484106469404, - "loss": 46.0, - "step": 20088 - }, - { - "epoch": 3.235194653568984, - "grad_norm": 0.011333261616528034, - "learning_rate": 0.0001999948405507963, - "loss": 46.0, - "step": 20089 - }, - { - "epoch": 3.2353556906477716, - "grad_norm": 0.0010983553947880864, - "learning_rate": 0.00019999484003687296, - "loss": 46.0, - "step": 20090 - }, - { - "epoch": 3.235516727726559, - "grad_norm": 0.0011428326833993196, - "learning_rate": 0.00019999483952292405, - "loss": 46.0, - "step": 20091 - }, - { - "epoch": 3.2356777648053465, - "grad_norm": 0.0009357786038890481, - "learning_rate": 0.00019999483900894952, - "loss": 46.0, - "step": 20092 - }, - { - "epoch": 3.235838801884134, - "grad_norm": 0.007837435230612755, - "learning_rate": 0.0001999948384949494, - "loss": 46.0, - "step": 20093 - }, - { - "epoch": 3.235999838962921, - "grad_norm": 0.0028274888172745705, - "learning_rate": 0.00019999483798092372, - "loss": 46.0, - "step": 20094 - }, - { - "epoch": 3.2361608760417084, - "grad_norm": 0.004880876280367374, - "learning_rate": 0.0001999948374668724, - "loss": 46.0, - "step": 20095 - }, - { - "epoch": 3.236321913120496, - "grad_norm": 0.0009277403587475419, - "learning_rate": 0.0001999948369527955, - "loss": 46.0, - "step": 20096 - }, - { - "epoch": 3.2364829501992833, - "grad_norm": 0.0020233565010130405, - "learning_rate": 0.00019999483643869304, - "loss": 46.0, - "step": 20097 - }, - { - "epoch": 3.236643987278071, - "grad_norm": 0.003471504896879196, - "learning_rate": 0.00019999483592456494, - "loss": 46.0, - "step": 20098 - }, - { - "epoch": 3.2368050243568582, - "grad_norm": 0.000386683561373502, - "learning_rate": 0.00019999483541041126, - "loss": 46.0, - "step": 20099 - }, - { - "epoch": 3.2369660614356457, - "grad_norm": 0.004984088707715273, - "learning_rate": 0.00019999483489623198, - "loss": 46.0, - "step": 20100 - }, - { - "epoch": 3.237127098514433, - "grad_norm": 0.0024535865522921085, - "learning_rate": 0.00019999483438202712, - "loss": 46.0, - "step": 20101 - }, - { - "epoch": 3.23728813559322, - "grad_norm": 0.0008911170880310237, - "learning_rate": 0.00019999483386779667, - "loss": 46.0, - "step": 20102 - }, - { - "epoch": 3.2374491726720076, - "grad_norm": 0.0036983625032007694, - "learning_rate": 0.00019999483335354063, - "loss": 46.0, - "step": 20103 - }, - { - "epoch": 3.237610209750795, - "grad_norm": 0.0018291639862582088, - "learning_rate": 0.000199994832839259, - "loss": 46.0, - "step": 20104 - }, - { - "epoch": 3.2377712468295825, - "grad_norm": 0.003597784787416458, - "learning_rate": 0.00019999483232495174, - "loss": 46.0, - "step": 20105 - }, - { - "epoch": 3.23793228390837, - "grad_norm": 0.0036381843965500593, - "learning_rate": 0.00019999483181061892, - "loss": 46.0, - "step": 20106 - }, - { - "epoch": 3.2380933209871574, - "grad_norm": 0.0008736033923923969, - "learning_rate": 0.00019999483129626048, - "loss": 46.0, - "step": 20107 - }, - { - "epoch": 3.238254358065945, - "grad_norm": 0.0012813173234462738, - "learning_rate": 0.00019999483078187648, - "loss": 46.0, - "step": 20108 - }, - { - "epoch": 3.238415395144732, - "grad_norm": 0.004531661048531532, - "learning_rate": 0.00019999483026746686, - "loss": 46.0, - "step": 20109 - }, - { - "epoch": 3.2385764322235193, - "grad_norm": 0.0021447124890983105, - "learning_rate": 0.00019999482975303166, - "loss": 46.0, - "step": 20110 - }, - { - "epoch": 3.2387374693023068, - "grad_norm": 0.010657808743417263, - "learning_rate": 0.00019999482923857085, - "loss": 46.0, - "step": 20111 - }, - { - "epoch": 3.2388985063810942, - "grad_norm": 0.0017723551718518138, - "learning_rate": 0.00019999482872408447, - "loss": 46.0, - "step": 20112 - }, - { - "epoch": 3.2390595434598817, - "grad_norm": 0.002096558688208461, - "learning_rate": 0.00019999482820957248, - "loss": 46.0, - "step": 20113 - }, - { - "epoch": 3.239220580538669, - "grad_norm": 0.0033797612413764, - "learning_rate": 0.0001999948276950349, - "loss": 46.0, - "step": 20114 - }, - { - "epoch": 3.2393816176174566, - "grad_norm": 0.0021784815471619368, - "learning_rate": 0.00019999482718047174, - "loss": 46.0, - "step": 20115 - }, - { - "epoch": 3.239542654696244, - "grad_norm": 0.004739631898701191, - "learning_rate": 0.00019999482666588296, - "loss": 46.0, - "step": 20116 - }, - { - "epoch": 3.239703691775031, - "grad_norm": 0.005034257657825947, - "learning_rate": 0.00019999482615126862, - "loss": 46.0, - "step": 20117 - }, - { - "epoch": 3.2398647288538185, - "grad_norm": 0.005472995340824127, - "learning_rate": 0.00019999482563662864, - "loss": 46.0, - "step": 20118 - }, - { - "epoch": 3.240025765932606, - "grad_norm": 0.001128678792156279, - "learning_rate": 0.0001999948251219631, - "loss": 46.0, - "step": 20119 - }, - { - "epoch": 3.2401868030113934, - "grad_norm": 0.0017618119018152356, - "learning_rate": 0.00019999482460727195, - "loss": 46.0, - "step": 20120 - }, - { - "epoch": 3.240347840090181, - "grad_norm": 0.0010891658021137118, - "learning_rate": 0.00019999482409255523, - "loss": 46.0, - "step": 20121 - }, - { - "epoch": 3.2405088771689683, - "grad_norm": 0.0016678066458553076, - "learning_rate": 0.0001999948235778129, - "loss": 46.0, - "step": 20122 - }, - { - "epoch": 3.2406699142477557, - "grad_norm": 0.006061645690351725, - "learning_rate": 0.000199994823063045, - "loss": 46.0, - "step": 20123 - }, - { - "epoch": 3.2408309513265428, - "grad_norm": 0.010113246738910675, - "learning_rate": 0.00019999482254825149, - "loss": 46.0, - "step": 20124 - }, - { - "epoch": 3.24099198840533, - "grad_norm": 0.003650469006970525, - "learning_rate": 0.00019999482203343237, - "loss": 46.0, - "step": 20125 - }, - { - "epoch": 3.2411530254841177, - "grad_norm": 0.0164493415504694, - "learning_rate": 0.00019999482151858766, - "loss": 46.0, - "step": 20126 - }, - { - "epoch": 3.241314062562905, - "grad_norm": 0.0008639883599244058, - "learning_rate": 0.00019999482100371735, - "loss": 46.0, - "step": 20127 - }, - { - "epoch": 3.2414750996416926, - "grad_norm": 0.003550648922100663, - "learning_rate": 0.00019999482048882147, - "loss": 46.0, - "step": 20128 - }, - { - "epoch": 3.24163613672048, - "grad_norm": 0.003403973998501897, - "learning_rate": 0.0001999948199739, - "loss": 46.0, - "step": 20129 - }, - { - "epoch": 3.2417971737992675, - "grad_norm": 0.0042611416429281235, - "learning_rate": 0.00019999481945895292, - "loss": 46.0, - "step": 20130 - }, - { - "epoch": 3.2419582108780545, - "grad_norm": 0.0019452397245913744, - "learning_rate": 0.00019999481894398025, - "loss": 46.0, - "step": 20131 - }, - { - "epoch": 3.242119247956842, - "grad_norm": 0.003827241715043783, - "learning_rate": 0.00019999481842898197, - "loss": 46.0, - "step": 20132 - }, - { - "epoch": 3.2422802850356294, - "grad_norm": 0.0025218441151082516, - "learning_rate": 0.00019999481791395813, - "loss": 46.0, - "step": 20133 - }, - { - "epoch": 3.242441322114417, - "grad_norm": 0.006772161927074194, - "learning_rate": 0.00019999481739890867, - "loss": 46.0, - "step": 20134 - }, - { - "epoch": 3.2426023591932043, - "grad_norm": 0.007145663257688284, - "learning_rate": 0.00019999481688383363, - "loss": 46.0, - "step": 20135 - }, - { - "epoch": 3.2427633962719917, - "grad_norm": 0.001702795852907002, - "learning_rate": 0.00019999481636873297, - "loss": 46.0, - "step": 20136 - }, - { - "epoch": 3.242924433350779, - "grad_norm": 0.014489852823317051, - "learning_rate": 0.00019999481585360678, - "loss": 46.0, - "step": 20137 - }, - { - "epoch": 3.243085470429566, - "grad_norm": 0.0037290044128894806, - "learning_rate": 0.00019999481533845495, - "loss": 46.0, - "step": 20138 - }, - { - "epoch": 3.2432465075083536, - "grad_norm": 0.0073197754099965096, - "learning_rate": 0.00019999481482327753, - "loss": 46.0, - "step": 20139 - }, - { - "epoch": 3.243407544587141, - "grad_norm": 0.00508636562153697, - "learning_rate": 0.00019999481430807453, - "loss": 46.0, - "step": 20140 - }, - { - "epoch": 3.2435685816659285, - "grad_norm": 0.0035158093087375164, - "learning_rate": 0.00019999481379284593, - "loss": 46.0, - "step": 20141 - }, - { - "epoch": 3.243729618744716, - "grad_norm": 0.003287319326773286, - "learning_rate": 0.0001999948132775917, - "loss": 46.0, - "step": 20142 - }, - { - "epoch": 3.2438906558235034, - "grad_norm": 0.004204927012324333, - "learning_rate": 0.0001999948127623119, - "loss": 46.0, - "step": 20143 - }, - { - "epoch": 3.244051692902291, - "grad_norm": 0.0014636429259553552, - "learning_rate": 0.00019999481224700655, - "loss": 46.0, - "step": 20144 - }, - { - "epoch": 3.2442127299810783, - "grad_norm": 0.001967041287571192, - "learning_rate": 0.00019999481173167555, - "loss": 46.0, - "step": 20145 - }, - { - "epoch": 3.2443737670598654, - "grad_norm": 0.0039205183275043964, - "learning_rate": 0.00019999481121631897, - "loss": 46.0, - "step": 20146 - }, - { - "epoch": 3.244534804138653, - "grad_norm": 0.0016264102887362242, - "learning_rate": 0.00019999481070093683, - "loss": 46.0, - "step": 20147 - }, - { - "epoch": 3.2446958412174403, - "grad_norm": 0.0010595329804345965, - "learning_rate": 0.00019999481018552907, - "loss": 46.0, - "step": 20148 - }, - { - "epoch": 3.2448568782962277, - "grad_norm": 0.0006715874187648296, - "learning_rate": 0.0001999948096700957, - "loss": 46.0, - "step": 20149 - }, - { - "epoch": 3.245017915375015, - "grad_norm": 0.008115099743008614, - "learning_rate": 0.00019999480915463676, - "loss": 46.0, - "step": 20150 - }, - { - "epoch": 3.2451789524538026, - "grad_norm": 0.0025919293984770775, - "learning_rate": 0.00019999480863915224, - "loss": 46.0, - "step": 20151 - }, - { - "epoch": 3.24533998953259, - "grad_norm": 0.0009808718459680676, - "learning_rate": 0.00019999480812364208, - "loss": 46.0, - "step": 20152 - }, - { - "epoch": 3.245501026611377, - "grad_norm": 0.003987419884651899, - "learning_rate": 0.00019999480760810636, - "loss": 46.0, - "step": 20153 - }, - { - "epoch": 3.2456620636901645, - "grad_norm": 0.010316704399883747, - "learning_rate": 0.00019999480709254503, - "loss": 46.0, - "step": 20154 - }, - { - "epoch": 3.245823100768952, - "grad_norm": 0.005609240382909775, - "learning_rate": 0.00019999480657695813, - "loss": 46.0, - "step": 20155 - }, - { - "epoch": 3.2459841378477394, - "grad_norm": 0.008579924702644348, - "learning_rate": 0.00019999480606134562, - "loss": 46.0, - "step": 20156 - }, - { - "epoch": 3.246145174926527, - "grad_norm": 0.004717460833489895, - "learning_rate": 0.00019999480554570753, - "loss": 46.0, - "step": 20157 - }, - { - "epoch": 3.2463062120053143, - "grad_norm": 0.015387299470603466, - "learning_rate": 0.00019999480503004382, - "loss": 46.0, - "step": 20158 - }, - { - "epoch": 3.246467249084102, - "grad_norm": 0.0026779938489198685, - "learning_rate": 0.00019999480451435452, - "loss": 46.0, - "step": 20159 - }, - { - "epoch": 3.2466282861628892, - "grad_norm": 0.0012958986917510629, - "learning_rate": 0.00019999480399863963, - "loss": 46.0, - "step": 20160 - }, - { - "epoch": 3.2467893232416762, - "grad_norm": 0.010639621876180172, - "learning_rate": 0.00019999480348289916, - "loss": 46.0, - "step": 20161 - }, - { - "epoch": 3.2469503603204637, - "grad_norm": 0.001926374388858676, - "learning_rate": 0.0001999948029671331, - "loss": 46.0, - "step": 20162 - }, - { - "epoch": 3.247111397399251, - "grad_norm": 0.0015899967402219772, - "learning_rate": 0.00019999480245134143, - "loss": 46.0, - "step": 20163 - }, - { - "epoch": 3.2472724344780386, - "grad_norm": 0.001845594379119575, - "learning_rate": 0.00019999480193552417, - "loss": 46.0, - "step": 20164 - }, - { - "epoch": 3.247433471556826, - "grad_norm": 0.004314039368182421, - "learning_rate": 0.00019999480141968135, - "loss": 46.0, - "step": 20165 - }, - { - "epoch": 3.2475945086356135, - "grad_norm": 0.004243573639541864, - "learning_rate": 0.00019999480090381288, - "loss": 46.0, - "step": 20166 - }, - { - "epoch": 3.247755545714401, - "grad_norm": 0.0025196417700499296, - "learning_rate": 0.00019999480038791883, - "loss": 46.0, - "step": 20167 - }, - { - "epoch": 3.247916582793188, - "grad_norm": 0.005762726068496704, - "learning_rate": 0.00019999479987199923, - "loss": 46.0, - "step": 20168 - }, - { - "epoch": 3.2480776198719754, - "grad_norm": 0.0007746221963316202, - "learning_rate": 0.000199994799356054, - "loss": 46.0, - "step": 20169 - }, - { - "epoch": 3.248238656950763, - "grad_norm": 0.0020391386933624744, - "learning_rate": 0.0001999947988400832, - "loss": 46.0, - "step": 20170 - }, - { - "epoch": 3.2483996940295503, - "grad_norm": 0.004793431144207716, - "learning_rate": 0.00019999479832408677, - "loss": 46.0, - "step": 20171 - }, - { - "epoch": 3.2485607311083378, - "grad_norm": 0.0006186505197547376, - "learning_rate": 0.00019999479780806478, - "loss": 46.0, - "step": 20172 - }, - { - "epoch": 3.248721768187125, - "grad_norm": 0.004951554350554943, - "learning_rate": 0.00019999479729201718, - "loss": 46.0, - "step": 20173 - }, - { - "epoch": 3.2488828052659127, - "grad_norm": 0.004281815141439438, - "learning_rate": 0.000199994796775944, - "loss": 46.0, - "step": 20174 - }, - { - "epoch": 3.2490438423446997, - "grad_norm": 0.001294878195039928, - "learning_rate": 0.0001999947962598452, - "loss": 46.0, - "step": 20175 - }, - { - "epoch": 3.249204879423487, - "grad_norm": 0.0044972640462219715, - "learning_rate": 0.00019999479574372083, - "loss": 46.0, - "step": 20176 - }, - { - "epoch": 3.2493659165022746, - "grad_norm": 0.003758159698918462, - "learning_rate": 0.00019999479522757083, - "loss": 46.0, - "step": 20177 - }, - { - "epoch": 3.249526953581062, - "grad_norm": 0.002592820441350341, - "learning_rate": 0.0001999947947113953, - "loss": 46.0, - "step": 20178 - }, - { - "epoch": 3.2496879906598495, - "grad_norm": 0.006812127307057381, - "learning_rate": 0.00019999479419519412, - "loss": 46.0, - "step": 20179 - }, - { - "epoch": 3.249849027738637, - "grad_norm": 0.001565458718687296, - "learning_rate": 0.00019999479367896735, - "loss": 46.0, - "step": 20180 - }, - { - "epoch": 3.2500100648174244, - "grad_norm": 0.004508504178375006, - "learning_rate": 0.00019999479316271503, - "loss": 46.0, - "step": 20181 - }, - { - "epoch": 3.2501711018962114, - "grad_norm": 0.00215457403101027, - "learning_rate": 0.0001999947926464371, - "loss": 46.0, - "step": 20182 - }, - { - "epoch": 3.250332138974999, - "grad_norm": 0.0008396839839406312, - "learning_rate": 0.00019999479213013354, - "loss": 46.0, - "step": 20183 - }, - { - "epoch": 3.2504931760537863, - "grad_norm": 0.002159419935196638, - "learning_rate": 0.00019999479161380442, - "loss": 46.0, - "step": 20184 - }, - { - "epoch": 3.2506542131325737, - "grad_norm": 0.0007195051875896752, - "learning_rate": 0.0001999947910974497, - "loss": 46.0, - "step": 20185 - }, - { - "epoch": 3.250815250211361, - "grad_norm": 0.00041729031363502145, - "learning_rate": 0.00019999479058106938, - "loss": 46.0, - "step": 20186 - }, - { - "epoch": 3.2509762872901486, - "grad_norm": 0.0020426579285413027, - "learning_rate": 0.00019999479006466348, - "loss": 46.0, - "step": 20187 - }, - { - "epoch": 3.251137324368936, - "grad_norm": 0.0027418043464422226, - "learning_rate": 0.00019999478954823197, - "loss": 46.0, - "step": 20188 - }, - { - "epoch": 3.2512983614477236, - "grad_norm": 0.001304731471464038, - "learning_rate": 0.0001999947890317749, - "loss": 46.0, - "step": 20189 - }, - { - "epoch": 3.2514593985265106, - "grad_norm": 0.0023751843255013227, - "learning_rate": 0.00019999478851529217, - "loss": 46.0, - "step": 20190 - }, - { - "epoch": 3.251620435605298, - "grad_norm": 0.0017244471237063408, - "learning_rate": 0.00019999478799878392, - "loss": 46.0, - "step": 20191 - }, - { - "epoch": 3.2517814726840855, - "grad_norm": 0.004116279538720846, - "learning_rate": 0.00019999478748225003, - "loss": 46.0, - "step": 20192 - }, - { - "epoch": 3.251942509762873, - "grad_norm": 0.0018614638829603791, - "learning_rate": 0.00019999478696569058, - "loss": 46.0, - "step": 20193 - }, - { - "epoch": 3.2521035468416604, - "grad_norm": 0.001488811569288373, - "learning_rate": 0.0001999947864491055, - "loss": 46.0, - "step": 20194 - }, - { - "epoch": 3.252264583920448, - "grad_norm": 0.0035445280373096466, - "learning_rate": 0.00019999478593249486, - "loss": 46.0, - "step": 20195 - }, - { - "epoch": 3.2524256209992353, - "grad_norm": 0.005500501021742821, - "learning_rate": 0.00019999478541585862, - "loss": 46.0, - "step": 20196 - }, - { - "epoch": 3.2525866580780223, - "grad_norm": 0.0012397834798321128, - "learning_rate": 0.00019999478489919677, - "loss": 46.0, - "step": 20197 - }, - { - "epoch": 3.2527476951568097, - "grad_norm": 0.004841955844312906, - "learning_rate": 0.00019999478438250933, - "loss": 46.0, - "step": 20198 - }, - { - "epoch": 3.252908732235597, - "grad_norm": 0.0013978432398289442, - "learning_rate": 0.0001999947838657963, - "loss": 46.0, - "step": 20199 - }, - { - "epoch": 3.2530697693143846, - "grad_norm": 0.005983384791761637, - "learning_rate": 0.00019999478334905768, - "loss": 46.0, - "step": 20200 - }, - { - "epoch": 3.253230806393172, - "grad_norm": 0.0022298377007246017, - "learning_rate": 0.00019999478283229345, - "loss": 46.0, - "step": 20201 - }, - { - "epoch": 3.2533918434719595, - "grad_norm": 0.002500379690900445, - "learning_rate": 0.00019999478231550363, - "loss": 46.0, - "step": 20202 - }, - { - "epoch": 3.253552880550747, - "grad_norm": 0.00448454637080431, - "learning_rate": 0.00019999478179868826, - "loss": 46.0, - "step": 20203 - }, - { - "epoch": 3.2537139176295344, - "grad_norm": 0.0016677109524607658, - "learning_rate": 0.00019999478128184724, - "loss": 46.0, - "step": 20204 - }, - { - "epoch": 3.2538749547083214, - "grad_norm": 0.0017818021588027477, - "learning_rate": 0.00019999478076498063, - "loss": 46.0, - "step": 20205 - }, - { - "epoch": 3.254035991787109, - "grad_norm": 0.005662537645548582, - "learning_rate": 0.00019999478024808847, - "loss": 46.0, - "step": 20206 - }, - { - "epoch": 3.2541970288658963, - "grad_norm": 0.0025463267229497433, - "learning_rate": 0.00019999477973117069, - "loss": 46.0, - "step": 20207 - }, - { - "epoch": 3.254358065944684, - "grad_norm": 0.002630396280437708, - "learning_rate": 0.00019999477921422735, - "loss": 46.0, - "step": 20208 - }, - { - "epoch": 3.2545191030234712, - "grad_norm": 0.0034333046060055494, - "learning_rate": 0.00019999477869725836, - "loss": 46.0, - "step": 20209 - }, - { - "epoch": 3.2546801401022587, - "grad_norm": 0.0009719359804876149, - "learning_rate": 0.0001999947781802638, - "loss": 46.0, - "step": 20210 - }, - { - "epoch": 3.2548411771810457, - "grad_norm": 0.00383931677788496, - "learning_rate": 0.00019999477766324364, - "loss": 46.0, - "step": 20211 - }, - { - "epoch": 3.255002214259833, - "grad_norm": 0.0007152375183068216, - "learning_rate": 0.0001999947771461979, - "loss": 46.0, - "step": 20212 - }, - { - "epoch": 3.2551632513386206, - "grad_norm": 0.002020742278546095, - "learning_rate": 0.00019999477662912657, - "loss": 46.0, - "step": 20213 - }, - { - "epoch": 3.255324288417408, - "grad_norm": 0.002276879968121648, - "learning_rate": 0.00019999477611202965, - "loss": 46.0, - "step": 20214 - }, - { - "epoch": 3.2554853254961955, - "grad_norm": 0.002185808029025793, - "learning_rate": 0.00019999477559490712, - "loss": 46.0, - "step": 20215 - }, - { - "epoch": 3.255646362574983, - "grad_norm": 0.0019452732522040606, - "learning_rate": 0.00019999477507775897, - "loss": 46.0, - "step": 20216 - }, - { - "epoch": 3.2558073996537704, - "grad_norm": 0.0019966145046055317, - "learning_rate": 0.00019999477456058526, - "loss": 46.0, - "step": 20217 - }, - { - "epoch": 3.255968436732558, - "grad_norm": 0.003022721502929926, - "learning_rate": 0.00019999477404338597, - "loss": 46.0, - "step": 20218 - }, - { - "epoch": 3.256129473811345, - "grad_norm": 0.0034912684932351112, - "learning_rate": 0.00019999477352616106, - "loss": 46.0, - "step": 20219 - }, - { - "epoch": 3.2562905108901323, - "grad_norm": 0.003489930182695389, - "learning_rate": 0.00019999477300891057, - "loss": 46.0, - "step": 20220 - }, - { - "epoch": 3.25645154796892, - "grad_norm": 0.00244498229585588, - "learning_rate": 0.00019999477249163449, - "loss": 46.0, - "step": 20221 - }, - { - "epoch": 3.2566125850477072, - "grad_norm": 0.004198648501187563, - "learning_rate": 0.0001999947719743328, - "loss": 46.0, - "step": 20222 - }, - { - "epoch": 3.2567736221264947, - "grad_norm": 0.010327229276299477, - "learning_rate": 0.00019999477145700553, - "loss": 46.0, - "step": 20223 - }, - { - "epoch": 3.256934659205282, - "grad_norm": 0.003642395371571183, - "learning_rate": 0.00019999477093965266, - "loss": 46.0, - "step": 20224 - }, - { - "epoch": 3.2570956962840696, - "grad_norm": 0.002781534567475319, - "learning_rate": 0.0001999947704222742, - "loss": 46.0, - "step": 20225 - }, - { - "epoch": 3.2572567333628566, - "grad_norm": 0.0007274140953086317, - "learning_rate": 0.00019999476990487013, - "loss": 46.0, - "step": 20226 - }, - { - "epoch": 3.257417770441644, - "grad_norm": 0.002433524699881673, - "learning_rate": 0.00019999476938744047, - "loss": 46.0, - "step": 20227 - }, - { - "epoch": 3.2575788075204315, - "grad_norm": 0.0006209313869476318, - "learning_rate": 0.00019999476886998525, - "loss": 46.0, - "step": 20228 - }, - { - "epoch": 3.257739844599219, - "grad_norm": 0.0038785021752119064, - "learning_rate": 0.00019999476835250442, - "loss": 46.0, - "step": 20229 - }, - { - "epoch": 3.2579008816780064, - "grad_norm": 0.0014134763041511178, - "learning_rate": 0.00019999476783499797, - "loss": 46.0, - "step": 20230 - }, - { - "epoch": 3.258061918756794, - "grad_norm": 0.00252518686465919, - "learning_rate": 0.00019999476731746596, - "loss": 46.0, - "step": 20231 - }, - { - "epoch": 3.2582229558355813, - "grad_norm": 0.0059619261883199215, - "learning_rate": 0.00019999476679990834, - "loss": 46.0, - "step": 20232 - }, - { - "epoch": 3.2583839929143688, - "grad_norm": 0.0005177116836421192, - "learning_rate": 0.0001999947662823251, - "loss": 46.0, - "step": 20233 - }, - { - "epoch": 3.2585450299931558, - "grad_norm": 0.0035683391615748405, - "learning_rate": 0.0001999947657647163, - "loss": 46.0, - "step": 20234 - }, - { - "epoch": 3.258706067071943, - "grad_norm": 0.007995419204235077, - "learning_rate": 0.00019999476524708193, - "loss": 46.0, - "step": 20235 - }, - { - "epoch": 3.2588671041507307, - "grad_norm": 0.0016613744664937258, - "learning_rate": 0.00019999476472942193, - "loss": 46.0, - "step": 20236 - }, - { - "epoch": 3.259028141229518, - "grad_norm": 0.0028535693418234587, - "learning_rate": 0.00019999476421173634, - "loss": 46.0, - "step": 20237 - }, - { - "epoch": 3.2591891783083056, - "grad_norm": 0.0012323361588642001, - "learning_rate": 0.0001999947636940252, - "loss": 46.0, - "step": 20238 - }, - { - "epoch": 3.259350215387093, - "grad_norm": 0.005233832634985447, - "learning_rate": 0.0001999947631762884, - "loss": 46.0, - "step": 20239 - }, - { - "epoch": 3.25951125246588, - "grad_norm": 0.008632045239210129, - "learning_rate": 0.00019999476265852604, - "loss": 46.0, - "step": 20240 - }, - { - "epoch": 3.2596722895446675, - "grad_norm": 0.000558165367692709, - "learning_rate": 0.0001999947621407381, - "loss": 46.0, - "step": 20241 - }, - { - "epoch": 3.259833326623455, - "grad_norm": 0.007402414921671152, - "learning_rate": 0.00019999476162292453, - "loss": 46.0, - "step": 20242 - }, - { - "epoch": 3.2599943637022424, - "grad_norm": 0.0020012983586639166, - "learning_rate": 0.00019999476110508537, - "loss": 46.0, - "step": 20243 - }, - { - "epoch": 3.26015540078103, - "grad_norm": 0.008699571713805199, - "learning_rate": 0.00019999476058722064, - "loss": 46.0, - "step": 20244 - }, - { - "epoch": 3.2603164378598173, - "grad_norm": 0.0037860579323023558, - "learning_rate": 0.0001999947600693303, - "loss": 46.0, - "step": 20245 - }, - { - "epoch": 3.2604774749386047, - "grad_norm": 0.0028904483187943697, - "learning_rate": 0.0001999947595514144, - "loss": 46.0, - "step": 20246 - }, - { - "epoch": 3.260638512017392, - "grad_norm": 0.009074817411601543, - "learning_rate": 0.00019999475903347287, - "loss": 46.0, - "step": 20247 - }, - { - "epoch": 3.2607995490961796, - "grad_norm": 0.002737767994403839, - "learning_rate": 0.00019999475851550575, - "loss": 46.0, - "step": 20248 - }, - { - "epoch": 3.2609605861749666, - "grad_norm": 0.0028765045572072268, - "learning_rate": 0.00019999475799751303, - "loss": 46.0, - "step": 20249 - }, - { - "epoch": 3.261121623253754, - "grad_norm": 0.0038798353634774685, - "learning_rate": 0.00019999475747949473, - "loss": 46.0, - "step": 20250 - }, - { - "epoch": 3.2612826603325415, - "grad_norm": 0.001062415773048997, - "learning_rate": 0.00019999475696145087, - "loss": 46.0, - "step": 20251 - }, - { - "epoch": 3.261443697411329, - "grad_norm": 0.005231064278632402, - "learning_rate": 0.00019999475644338137, - "loss": 46.0, - "step": 20252 - }, - { - "epoch": 3.2616047344901165, - "grad_norm": 0.0025006080977618694, - "learning_rate": 0.00019999475592528628, - "loss": 46.0, - "step": 20253 - }, - { - "epoch": 3.261765771568904, - "grad_norm": 0.0017447317950427532, - "learning_rate": 0.0001999947554071656, - "loss": 46.0, - "step": 20254 - }, - { - "epoch": 3.261926808647691, - "grad_norm": 0.003976028878241777, - "learning_rate": 0.00019999475488901935, - "loss": 46.0, - "step": 20255 - }, - { - "epoch": 3.2620878457264784, - "grad_norm": 0.004502329509705305, - "learning_rate": 0.0001999947543708475, - "loss": 46.0, - "step": 20256 - }, - { - "epoch": 3.262248882805266, - "grad_norm": 0.0025826182682067156, - "learning_rate": 0.00019999475385265003, - "loss": 46.0, - "step": 20257 - }, - { - "epoch": 3.2624099198840533, - "grad_norm": 0.0040931422263383865, - "learning_rate": 0.00019999475333442698, - "loss": 46.0, - "step": 20258 - }, - { - "epoch": 3.2625709569628407, - "grad_norm": 0.005306491162627935, - "learning_rate": 0.00019999475281617834, - "loss": 46.0, - "step": 20259 - }, - { - "epoch": 3.262731994041628, - "grad_norm": 0.00695073651149869, - "learning_rate": 0.00019999475229790412, - "loss": 46.0, - "step": 20260 - }, - { - "epoch": 3.2628930311204156, - "grad_norm": 0.003346773562952876, - "learning_rate": 0.00019999475177960428, - "loss": 46.0, - "step": 20261 - }, - { - "epoch": 3.263054068199203, - "grad_norm": 0.008357105776667595, - "learning_rate": 0.00019999475126127885, - "loss": 46.0, - "step": 20262 - }, - { - "epoch": 3.26321510527799, - "grad_norm": 0.005580850411206484, - "learning_rate": 0.00019999475074292784, - "loss": 46.0, - "step": 20263 - }, - { - "epoch": 3.2633761423567775, - "grad_norm": 0.003865674138069153, - "learning_rate": 0.00019999475022455123, - "loss": 46.0, - "step": 20264 - }, - { - "epoch": 3.263537179435565, - "grad_norm": 0.007624385412782431, - "learning_rate": 0.00019999474970614905, - "loss": 46.0, - "step": 20265 - }, - { - "epoch": 3.2636982165143524, - "grad_norm": 0.0013032362330704927, - "learning_rate": 0.00019999474918772124, - "loss": 46.0, - "step": 20266 - }, - { - "epoch": 3.26385925359314, - "grad_norm": 0.005241406615823507, - "learning_rate": 0.00019999474866926785, - "loss": 46.0, - "step": 20267 - }, - { - "epoch": 3.2640202906719273, - "grad_norm": 0.00929312314838171, - "learning_rate": 0.00019999474815078885, - "loss": 46.0, - "step": 20268 - }, - { - "epoch": 3.264181327750715, - "grad_norm": 0.003043997334316373, - "learning_rate": 0.00019999474763228428, - "loss": 46.0, - "step": 20269 - }, - { - "epoch": 3.264342364829502, - "grad_norm": 0.002271262463182211, - "learning_rate": 0.00019999474711375413, - "loss": 46.0, - "step": 20270 - }, - { - "epoch": 3.2645034019082892, - "grad_norm": 0.00334842293523252, - "learning_rate": 0.00019999474659519834, - "loss": 46.0, - "step": 20271 - }, - { - "epoch": 3.2646644389870767, - "grad_norm": 0.0010278024710714817, - "learning_rate": 0.000199994746076617, - "loss": 46.0, - "step": 20272 - }, - { - "epoch": 3.264825476065864, - "grad_norm": 0.0020910913590341806, - "learning_rate": 0.00019999474555801005, - "loss": 46.0, - "step": 20273 - }, - { - "epoch": 3.2649865131446516, - "grad_norm": 0.003604491939768195, - "learning_rate": 0.0001999947450393775, - "loss": 46.0, - "step": 20274 - }, - { - "epoch": 3.265147550223439, - "grad_norm": 0.003708265256136656, - "learning_rate": 0.00019999474452071935, - "loss": 46.0, - "step": 20275 - }, - { - "epoch": 3.2653085873022265, - "grad_norm": 0.002162586199119687, - "learning_rate": 0.00019999474400203565, - "loss": 46.0, - "step": 20276 - }, - { - "epoch": 3.265469624381014, - "grad_norm": 0.005173577461391687, - "learning_rate": 0.0001999947434833263, - "loss": 46.0, - "step": 20277 - }, - { - "epoch": 3.265630661459801, - "grad_norm": 0.004262245260179043, - "learning_rate": 0.00019999474296459138, - "loss": 46.0, - "step": 20278 - }, - { - "epoch": 3.2657916985385884, - "grad_norm": 0.002357808407396078, - "learning_rate": 0.00019999474244583089, - "loss": 46.0, - "step": 20279 - }, - { - "epoch": 3.265952735617376, - "grad_norm": 0.0014336516615003347, - "learning_rate": 0.00019999474192704478, - "loss": 46.0, - "step": 20280 - }, - { - "epoch": 3.2661137726961633, - "grad_norm": 0.0024729929864406586, - "learning_rate": 0.00019999474140823306, - "loss": 46.0, - "step": 20281 - }, - { - "epoch": 3.2662748097749508, - "grad_norm": 0.0009040281875059009, - "learning_rate": 0.00019999474088939576, - "loss": 46.0, - "step": 20282 - }, - { - "epoch": 3.2664358468537382, - "grad_norm": 0.010032703168690205, - "learning_rate": 0.0001999947403705329, - "loss": 46.0, - "step": 20283 - }, - { - "epoch": 3.2665968839325252, - "grad_norm": 0.0009144143550656736, - "learning_rate": 0.0001999947398516444, - "loss": 46.0, - "step": 20284 - }, - { - "epoch": 3.2667579210113127, - "grad_norm": 0.0071673267520964146, - "learning_rate": 0.00019999473933273034, - "loss": 46.0, - "step": 20285 - }, - { - "epoch": 3.2669189580901, - "grad_norm": 0.006829691119492054, - "learning_rate": 0.00019999473881379066, - "loss": 46.0, - "step": 20286 - }, - { - "epoch": 3.2670799951688876, - "grad_norm": 0.0014621352311223745, - "learning_rate": 0.00019999473829482542, - "loss": 46.0, - "step": 20287 - }, - { - "epoch": 3.267241032247675, - "grad_norm": 0.0033235775772482157, - "learning_rate": 0.00019999473777583456, - "loss": 46.0, - "step": 20288 - }, - { - "epoch": 3.2674020693264625, - "grad_norm": 0.002992296824231744, - "learning_rate": 0.0001999947372568181, - "loss": 46.0, - "step": 20289 - }, - { - "epoch": 3.26756310640525, - "grad_norm": 0.005860642530024052, - "learning_rate": 0.00019999473673777606, - "loss": 46.0, - "step": 20290 - }, - { - "epoch": 3.2677241434840374, - "grad_norm": 0.001699344371445477, - "learning_rate": 0.00019999473621870844, - "loss": 46.0, - "step": 20291 - }, - { - "epoch": 3.2678851805628244, - "grad_norm": 0.0011733805295079947, - "learning_rate": 0.0001999947356996152, - "loss": 46.0, - "step": 20292 - }, - { - "epoch": 3.268046217641612, - "grad_norm": 0.0076037305407226086, - "learning_rate": 0.0001999947351804964, - "loss": 46.0, - "step": 20293 - }, - { - "epoch": 3.2682072547203993, - "grad_norm": 0.00441789161413908, - "learning_rate": 0.00019999473466135196, - "loss": 46.0, - "step": 20294 - }, - { - "epoch": 3.2683682917991868, - "grad_norm": 0.005370750557631254, - "learning_rate": 0.00019999473414218194, - "loss": 46.0, - "step": 20295 - }, - { - "epoch": 3.268529328877974, - "grad_norm": 0.011534000746905804, - "learning_rate": 0.00019999473362298636, - "loss": 46.0, - "step": 20296 - }, - { - "epoch": 3.2686903659567617, - "grad_norm": 0.0047613345086574554, - "learning_rate": 0.00019999473310376516, - "loss": 46.0, - "step": 20297 - }, - { - "epoch": 3.268851403035549, - "grad_norm": 0.0040600174106657505, - "learning_rate": 0.00019999473258451838, - "loss": 46.0, - "step": 20298 - }, - { - "epoch": 3.269012440114336, - "grad_norm": 0.005267912056297064, - "learning_rate": 0.00019999473206524598, - "loss": 46.0, - "step": 20299 - }, - { - "epoch": 3.2691734771931236, - "grad_norm": 0.0023280796594917774, - "learning_rate": 0.000199994731545948, - "loss": 46.0, - "step": 20300 - }, - { - "epoch": 3.269334514271911, - "grad_norm": 0.00952787883579731, - "learning_rate": 0.00019999473102662446, - "loss": 46.0, - "step": 20301 - }, - { - "epoch": 3.2694955513506985, - "grad_norm": 0.002269686432555318, - "learning_rate": 0.00019999473050727527, - "loss": 46.0, - "step": 20302 - }, - { - "epoch": 3.269656588429486, - "grad_norm": 0.00834378320723772, - "learning_rate": 0.00019999472998790053, - "loss": 46.0, - "step": 20303 - }, - { - "epoch": 3.2698176255082734, - "grad_norm": 0.0015597398160025477, - "learning_rate": 0.00019999472946850017, - "loss": 46.0, - "step": 20304 - }, - { - "epoch": 3.269978662587061, - "grad_norm": 0.0025558231864124537, - "learning_rate": 0.00019999472894907422, - "loss": 46.0, - "step": 20305 - }, - { - "epoch": 3.2701396996658483, - "grad_norm": 0.003061908995732665, - "learning_rate": 0.0001999947284296227, - "loss": 46.0, - "step": 20306 - }, - { - "epoch": 3.2703007367446353, - "grad_norm": 0.008666201494634151, - "learning_rate": 0.00019999472791014554, - "loss": 46.0, - "step": 20307 - }, - { - "epoch": 3.2704617738234227, - "grad_norm": 0.0032079971861094236, - "learning_rate": 0.00019999472739064283, - "loss": 46.0, - "step": 20308 - }, - { - "epoch": 3.27062281090221, - "grad_norm": 0.0030128522776067257, - "learning_rate": 0.0001999947268711145, - "loss": 46.0, - "step": 20309 - }, - { - "epoch": 3.2707838479809976, - "grad_norm": 0.005816400051116943, - "learning_rate": 0.0001999947263515606, - "loss": 46.0, - "step": 20310 - }, - { - "epoch": 3.270944885059785, - "grad_norm": 0.003315188456326723, - "learning_rate": 0.0001999947258319811, - "loss": 46.0, - "step": 20311 - }, - { - "epoch": 3.2711059221385725, - "grad_norm": 0.0037742629647254944, - "learning_rate": 0.000199994725312376, - "loss": 46.0, - "step": 20312 - }, - { - "epoch": 3.2712669592173595, - "grad_norm": 0.002245318843051791, - "learning_rate": 0.0001999947247927453, - "loss": 46.0, - "step": 20313 - }, - { - "epoch": 3.271427996296147, - "grad_norm": 0.0032121038530021906, - "learning_rate": 0.00019999472427308904, - "loss": 46.0, - "step": 20314 - }, - { - "epoch": 3.2715890333749345, - "grad_norm": 0.006445621140301228, - "learning_rate": 0.00019999472375340716, - "loss": 46.0, - "step": 20315 - }, - { - "epoch": 3.271750070453722, - "grad_norm": 0.004758812487125397, - "learning_rate": 0.00019999472323369968, - "loss": 46.0, - "step": 20316 - }, - { - "epoch": 3.2719111075325094, - "grad_norm": 0.011252738535404205, - "learning_rate": 0.0001999947227139666, - "loss": 46.0, - "step": 20317 - }, - { - "epoch": 3.272072144611297, - "grad_norm": 0.00478716753423214, - "learning_rate": 0.00019999472219420797, - "loss": 46.0, - "step": 20318 - }, - { - "epoch": 3.2722331816900843, - "grad_norm": 0.009921620599925518, - "learning_rate": 0.0001999947216744237, - "loss": 46.0, - "step": 20319 - }, - { - "epoch": 3.2723942187688717, - "grad_norm": 0.001872863038443029, - "learning_rate": 0.00019999472115461386, - "loss": 46.0, - "step": 20320 - }, - { - "epoch": 3.272555255847659, - "grad_norm": 0.0028653375338763, - "learning_rate": 0.0001999947206347784, - "loss": 46.0, - "step": 20321 - }, - { - "epoch": 3.272716292926446, - "grad_norm": 0.0021373596973717213, - "learning_rate": 0.00019999472011491737, - "loss": 46.0, - "step": 20322 - }, - { - "epoch": 3.2728773300052336, - "grad_norm": 0.00657370500266552, - "learning_rate": 0.00019999471959503075, - "loss": 46.0, - "step": 20323 - }, - { - "epoch": 3.273038367084021, - "grad_norm": 0.001477471087127924, - "learning_rate": 0.00019999471907511854, - "loss": 46.0, - "step": 20324 - }, - { - "epoch": 3.2731994041628085, - "grad_norm": 0.0005806459812447429, - "learning_rate": 0.0001999947185551807, - "loss": 46.0, - "step": 20325 - }, - { - "epoch": 3.273360441241596, - "grad_norm": 0.000988700077868998, - "learning_rate": 0.0001999947180352173, - "loss": 46.0, - "step": 20326 - }, - { - "epoch": 3.2735214783203834, - "grad_norm": 0.001616235706023872, - "learning_rate": 0.00019999471751522833, - "loss": 46.0, - "step": 20327 - }, - { - "epoch": 3.2736825153991704, - "grad_norm": 0.004434907343238592, - "learning_rate": 0.0001999947169952137, - "loss": 46.0, - "step": 20328 - }, - { - "epoch": 3.273843552477958, - "grad_norm": 0.011576223187148571, - "learning_rate": 0.0001999947164751735, - "loss": 46.0, - "step": 20329 - }, - { - "epoch": 3.2740045895567453, - "grad_norm": 0.0007288128254003823, - "learning_rate": 0.00019999471595510775, - "loss": 46.0, - "step": 20330 - }, - { - "epoch": 3.274165626635533, - "grad_norm": 0.00329859578050673, - "learning_rate": 0.00019999471543501635, - "loss": 46.0, - "step": 20331 - }, - { - "epoch": 3.2743266637143202, - "grad_norm": 0.008171114139258862, - "learning_rate": 0.0001999947149148994, - "loss": 46.0, - "step": 20332 - }, - { - "epoch": 3.2744877007931077, - "grad_norm": 0.0018043456366285682, - "learning_rate": 0.00019999471439475683, - "loss": 46.0, - "step": 20333 - }, - { - "epoch": 3.274648737871895, - "grad_norm": 0.006257224828004837, - "learning_rate": 0.00019999471387458867, - "loss": 46.0, - "step": 20334 - }, - { - "epoch": 3.2748097749506826, - "grad_norm": 0.004654455464333296, - "learning_rate": 0.00019999471335439495, - "loss": 46.0, - "step": 20335 - }, - { - "epoch": 3.2749708120294696, - "grad_norm": 0.002779880305752158, - "learning_rate": 0.00019999471283417558, - "loss": 46.0, - "step": 20336 - }, - { - "epoch": 3.275131849108257, - "grad_norm": 0.0016182982362806797, - "learning_rate": 0.00019999471231393066, - "loss": 46.0, - "step": 20337 - }, - { - "epoch": 3.2752928861870445, - "grad_norm": 0.008424478583037853, - "learning_rate": 0.00019999471179366012, - "loss": 46.0, - "step": 20338 - }, - { - "epoch": 3.275453923265832, - "grad_norm": 0.003382084658369422, - "learning_rate": 0.000199994711273364, - "loss": 46.0, - "step": 20339 - }, - { - "epoch": 3.2756149603446194, - "grad_norm": 0.0006228085840120912, - "learning_rate": 0.00019999471075304227, - "loss": 46.0, - "step": 20340 - }, - { - "epoch": 3.275775997423407, - "grad_norm": 0.0029146980959922075, - "learning_rate": 0.00019999471023269497, - "loss": 46.0, - "step": 20341 - }, - { - "epoch": 3.2759370345021943, - "grad_norm": 0.003615321358665824, - "learning_rate": 0.00019999470971232208, - "loss": 46.0, - "step": 20342 - }, - { - "epoch": 3.2760980715809813, - "grad_norm": 0.004053561948239803, - "learning_rate": 0.00019999470919192355, - "loss": 46.0, - "step": 20343 - }, - { - "epoch": 3.2762591086597688, - "grad_norm": 0.00173073576297611, - "learning_rate": 0.0001999947086714995, - "loss": 46.0, - "step": 20344 - }, - { - "epoch": 3.2764201457385562, - "grad_norm": 0.0049613104201853275, - "learning_rate": 0.0001999947081510498, - "loss": 46.0, - "step": 20345 - }, - { - "epoch": 3.2765811828173437, - "grad_norm": 0.0024762353859841824, - "learning_rate": 0.0001999947076305745, - "loss": 46.0, - "step": 20346 - }, - { - "epoch": 3.276742219896131, - "grad_norm": 0.015306377783417702, - "learning_rate": 0.00019999470711007367, - "loss": 46.0, - "step": 20347 - }, - { - "epoch": 3.2769032569749186, - "grad_norm": 0.0014777373289689422, - "learning_rate": 0.0001999947065895472, - "loss": 46.0, - "step": 20348 - }, - { - "epoch": 3.277064294053706, - "grad_norm": 0.0022636540234088898, - "learning_rate": 0.00019999470606899512, - "loss": 46.0, - "step": 20349 - }, - { - "epoch": 3.2772253311324935, - "grad_norm": 0.0004711646761279553, - "learning_rate": 0.00019999470554841748, - "loss": 46.0, - "step": 20350 - }, - { - "epoch": 3.2773863682112805, - "grad_norm": 0.0030130173545330763, - "learning_rate": 0.00019999470502781423, - "loss": 46.0, - "step": 20351 - }, - { - "epoch": 3.277547405290068, - "grad_norm": 0.0039952476508915424, - "learning_rate": 0.00019999470450718541, - "loss": 46.0, - "step": 20352 - }, - { - "epoch": 3.2777084423688554, - "grad_norm": 0.00043090531835332513, - "learning_rate": 0.00019999470398653096, - "loss": 46.0, - "step": 20353 - }, - { - "epoch": 3.277869479447643, - "grad_norm": 0.002169425366446376, - "learning_rate": 0.00019999470346585092, - "loss": 46.0, - "step": 20354 - }, - { - "epoch": 3.2780305165264303, - "grad_norm": 0.0063762301579117775, - "learning_rate": 0.0001999947029451453, - "loss": 46.0, - "step": 20355 - }, - { - "epoch": 3.2781915536052177, - "grad_norm": 0.0014614181127399206, - "learning_rate": 0.0001999947024244141, - "loss": 46.0, - "step": 20356 - }, - { - "epoch": 3.2783525906840048, - "grad_norm": 0.015582134015858173, - "learning_rate": 0.0001999947019036573, - "loss": 46.0, - "step": 20357 - }, - { - "epoch": 3.278513627762792, - "grad_norm": 0.001045424840413034, - "learning_rate": 0.0001999947013828749, - "loss": 46.0, - "step": 20358 - }, - { - "epoch": 3.2786746648415797, - "grad_norm": 0.006103664170950651, - "learning_rate": 0.0001999947008620669, - "loss": 46.0, - "step": 20359 - }, - { - "epoch": 3.278835701920367, - "grad_norm": 0.007698753383010626, - "learning_rate": 0.0001999947003412333, - "loss": 46.0, - "step": 20360 - }, - { - "epoch": 3.2789967389991546, - "grad_norm": 0.003799412166699767, - "learning_rate": 0.00019999469982037412, - "loss": 46.0, - "step": 20361 - }, - { - "epoch": 3.279157776077942, - "grad_norm": 0.0030289930291473866, - "learning_rate": 0.00019999469929948935, - "loss": 46.0, - "step": 20362 - }, - { - "epoch": 3.2793188131567295, - "grad_norm": 0.0024464610032737255, - "learning_rate": 0.000199994698778579, - "loss": 46.0, - "step": 20363 - }, - { - "epoch": 3.279479850235517, - "grad_norm": 0.0048452941700816154, - "learning_rate": 0.00019999469825764303, - "loss": 46.0, - "step": 20364 - }, - { - "epoch": 3.2796408873143044, - "grad_norm": 0.0014272892149165273, - "learning_rate": 0.00019999469773668147, - "loss": 46.0, - "step": 20365 - }, - { - "epoch": 3.2798019243930914, - "grad_norm": 0.004037264734506607, - "learning_rate": 0.00019999469721569433, - "loss": 46.0, - "step": 20366 - }, - { - "epoch": 3.279962961471879, - "grad_norm": 0.002270028693601489, - "learning_rate": 0.00019999469669468157, - "loss": 46.0, - "step": 20367 - }, - { - "epoch": 3.2801239985506663, - "grad_norm": 0.0011525989975780249, - "learning_rate": 0.00019999469617364323, - "loss": 46.0, - "step": 20368 - }, - { - "epoch": 3.2802850356294537, - "grad_norm": 0.015889935195446014, - "learning_rate": 0.00019999469565257932, - "loss": 46.0, - "step": 20369 - }, - { - "epoch": 3.280446072708241, - "grad_norm": 0.0024111676029860973, - "learning_rate": 0.0001999946951314898, - "loss": 46.0, - "step": 20370 - }, - { - "epoch": 3.2806071097870286, - "grad_norm": 0.005994080100208521, - "learning_rate": 0.00019999469461037467, - "loss": 46.0, - "step": 20371 - }, - { - "epoch": 3.2807681468658156, - "grad_norm": 0.002925029257312417, - "learning_rate": 0.00019999469408923397, - "loss": 46.0, - "step": 20372 - }, - { - "epoch": 3.280929183944603, - "grad_norm": 0.00670371251180768, - "learning_rate": 0.00019999469356806767, - "loss": 46.0, - "step": 20373 - }, - { - "epoch": 3.2810902210233905, - "grad_norm": 0.005363027565181255, - "learning_rate": 0.00019999469304687577, - "loss": 46.0, - "step": 20374 - }, - { - "epoch": 3.281251258102178, - "grad_norm": 0.005633344408124685, - "learning_rate": 0.0001999946925256583, - "loss": 46.0, - "step": 20375 - }, - { - "epoch": 3.2814122951809654, - "grad_norm": 0.00604011956602335, - "learning_rate": 0.00019999469200441522, - "loss": 46.0, - "step": 20376 - }, - { - "epoch": 3.281573332259753, - "grad_norm": 0.0011458328226581216, - "learning_rate": 0.00019999469148314654, - "loss": 46.0, - "step": 20377 - }, - { - "epoch": 3.2817343693385403, - "grad_norm": 0.007230387534946203, - "learning_rate": 0.00019999469096185224, - "loss": 46.0, - "step": 20378 - }, - { - "epoch": 3.281895406417328, - "grad_norm": 0.0023067896254360676, - "learning_rate": 0.00019999469044053238, - "loss": 46.0, - "step": 20379 - }, - { - "epoch": 3.282056443496115, - "grad_norm": 0.004255686420947313, - "learning_rate": 0.00019999468991918694, - "loss": 46.0, - "step": 20380 - }, - { - "epoch": 3.2822174805749023, - "grad_norm": 0.0031535234302282333, - "learning_rate": 0.00019999468939781588, - "loss": 46.0, - "step": 20381 - }, - { - "epoch": 3.2823785176536897, - "grad_norm": 0.00668123085051775, - "learning_rate": 0.00019999468887641923, - "loss": 46.0, - "step": 20382 - }, - { - "epoch": 3.282539554732477, - "grad_norm": 0.002975334180518985, - "learning_rate": 0.000199994688354997, - "loss": 46.0, - "step": 20383 - }, - { - "epoch": 3.2827005918112646, - "grad_norm": 0.00376451900228858, - "learning_rate": 0.00019999468783354915, - "loss": 46.0, - "step": 20384 - }, - { - "epoch": 3.282861628890052, - "grad_norm": 0.002300912281498313, - "learning_rate": 0.00019999468731207577, - "loss": 46.0, - "step": 20385 - }, - { - "epoch": 3.2830226659688395, - "grad_norm": 0.002907482208684087, - "learning_rate": 0.00019999468679057674, - "loss": 46.0, - "step": 20386 - }, - { - "epoch": 3.2831837030476265, - "grad_norm": 0.0014460888924077153, - "learning_rate": 0.0001999946862690521, - "loss": 46.0, - "step": 20387 - }, - { - "epoch": 3.283344740126414, - "grad_norm": 0.0010876660235226154, - "learning_rate": 0.0001999946857475019, - "loss": 46.0, - "step": 20388 - }, - { - "epoch": 3.2835057772052014, - "grad_norm": 0.0024834831710904837, - "learning_rate": 0.0001999946852259261, - "loss": 46.0, - "step": 20389 - }, - { - "epoch": 3.283666814283989, - "grad_norm": 0.0009719815570861101, - "learning_rate": 0.00019999468470432473, - "loss": 46.0, - "step": 20390 - }, - { - "epoch": 3.2838278513627763, - "grad_norm": 0.009112821891903877, - "learning_rate": 0.00019999468418269774, - "loss": 46.0, - "step": 20391 - }, - { - "epoch": 3.283988888441564, - "grad_norm": 0.0022545999381691217, - "learning_rate": 0.00019999468366104514, - "loss": 46.0, - "step": 20392 - }, - { - "epoch": 3.2841499255203512, - "grad_norm": 0.000750127190258354, - "learning_rate": 0.00019999468313936698, - "loss": 46.0, - "step": 20393 - }, - { - "epoch": 3.2843109625991387, - "grad_norm": 0.002832061145454645, - "learning_rate": 0.0001999946826176632, - "loss": 46.0, - "step": 20394 - }, - { - "epoch": 3.2844719996779257, - "grad_norm": 0.0018956970889121294, - "learning_rate": 0.00019999468209593385, - "loss": 46.0, - "step": 20395 - }, - { - "epoch": 3.284633036756713, - "grad_norm": 0.0022145244292914867, - "learning_rate": 0.0001999946815741789, - "loss": 46.0, - "step": 20396 - }, - { - "epoch": 3.2847940738355006, - "grad_norm": 0.011509504169225693, - "learning_rate": 0.00019999468105239837, - "loss": 46.0, - "step": 20397 - }, - { - "epoch": 3.284955110914288, - "grad_norm": 0.002674112329259515, - "learning_rate": 0.00019999468053059222, - "loss": 46.0, - "step": 20398 - }, - { - "epoch": 3.2851161479930755, - "grad_norm": 0.007826443761587143, - "learning_rate": 0.00019999468000876048, - "loss": 46.0, - "step": 20399 - }, - { - "epoch": 3.285277185071863, - "grad_norm": 0.0027850372716784477, - "learning_rate": 0.00019999467948690315, - "loss": 46.0, - "step": 20400 - }, - { - "epoch": 3.28543822215065, - "grad_norm": 0.003031131811439991, - "learning_rate": 0.00019999467896502022, - "loss": 46.0, - "step": 20401 - }, - { - "epoch": 3.2855992592294374, - "grad_norm": 0.0012760359095409513, - "learning_rate": 0.00019999467844311172, - "loss": 46.0, - "step": 20402 - }, - { - "epoch": 3.285760296308225, - "grad_norm": 0.007209960371255875, - "learning_rate": 0.0001999946779211776, - "loss": 46.0, - "step": 20403 - }, - { - "epoch": 3.2859213333870123, - "grad_norm": 0.0016973179299384356, - "learning_rate": 0.00019999467739921788, - "loss": 46.0, - "step": 20404 - }, - { - "epoch": 3.2860823704657998, - "grad_norm": 0.004956655669957399, - "learning_rate": 0.00019999467687723262, - "loss": 46.0, - "step": 20405 - }, - { - "epoch": 3.286243407544587, - "grad_norm": 0.0013060332275927067, - "learning_rate": 0.00019999467635522172, - "loss": 46.0, - "step": 20406 - }, - { - "epoch": 3.2864044446233747, - "grad_norm": 0.003149711061269045, - "learning_rate": 0.00019999467583318526, - "loss": 46.0, - "step": 20407 - }, - { - "epoch": 3.286565481702162, - "grad_norm": 0.000669082859531045, - "learning_rate": 0.00019999467531112318, - "loss": 46.0, - "step": 20408 - }, - { - "epoch": 3.286726518780949, - "grad_norm": 0.004308716394007206, - "learning_rate": 0.00019999467478903552, - "loss": 46.0, - "step": 20409 - }, - { - "epoch": 3.2868875558597366, - "grad_norm": 0.0035340306349098682, - "learning_rate": 0.0001999946742669222, - "loss": 46.0, - "step": 20410 - }, - { - "epoch": 3.287048592938524, - "grad_norm": 0.008656601421535015, - "learning_rate": 0.00019999467374478338, - "loss": 46.0, - "step": 20411 - }, - { - "epoch": 3.2872096300173115, - "grad_norm": 0.004896919708698988, - "learning_rate": 0.00019999467322261892, - "loss": 46.0, - "step": 20412 - }, - { - "epoch": 3.287370667096099, - "grad_norm": 0.001092921826057136, - "learning_rate": 0.00019999467270042889, - "loss": 46.0, - "step": 20413 - }, - { - "epoch": 3.2875317041748864, - "grad_norm": 0.0023733729030936956, - "learning_rate": 0.00019999467217821323, - "loss": 46.0, - "step": 20414 - }, - { - "epoch": 3.287692741253674, - "grad_norm": 0.012586906552314758, - "learning_rate": 0.00019999467165597202, - "loss": 46.0, - "step": 20415 - }, - { - "epoch": 3.287853778332461, - "grad_norm": 0.00931586418300867, - "learning_rate": 0.0001999946711337052, - "loss": 46.0, - "step": 20416 - }, - { - "epoch": 3.2880148154112483, - "grad_norm": 0.00513248797506094, - "learning_rate": 0.00019999467061141278, - "loss": 46.0, - "step": 20417 - }, - { - "epoch": 3.2881758524900357, - "grad_norm": 0.0017419853247702122, - "learning_rate": 0.00019999467008909475, - "loss": 46.0, - "step": 20418 - }, - { - "epoch": 3.288336889568823, - "grad_norm": 0.0012986648362129927, - "learning_rate": 0.00019999466956675116, - "loss": 46.0, - "step": 20419 - }, - { - "epoch": 3.2884979266476106, - "grad_norm": 0.005398717243224382, - "learning_rate": 0.00019999466904438196, - "loss": 46.0, - "step": 20420 - }, - { - "epoch": 3.288658963726398, - "grad_norm": 0.0048441351391375065, - "learning_rate": 0.00019999466852198717, - "loss": 46.0, - "step": 20421 - }, - { - "epoch": 3.2888200008051856, - "grad_norm": 0.0017884616972878575, - "learning_rate": 0.0001999946679995668, - "loss": 46.0, - "step": 20422 - }, - { - "epoch": 3.288981037883973, - "grad_norm": 0.003438309533521533, - "learning_rate": 0.0001999946674771208, - "loss": 46.0, - "step": 20423 - }, - { - "epoch": 3.28914207496276, - "grad_norm": 0.010533241555094719, - "learning_rate": 0.00019999466695464925, - "loss": 46.0, - "step": 20424 - }, - { - "epoch": 3.2893031120415475, - "grad_norm": 0.0010520125506445765, - "learning_rate": 0.00019999466643215205, - "loss": 46.0, - "step": 20425 - }, - { - "epoch": 3.289464149120335, - "grad_norm": 0.010319353081285954, - "learning_rate": 0.00019999466590962933, - "loss": 46.0, - "step": 20426 - }, - { - "epoch": 3.2896251861991224, - "grad_norm": 0.0007083070231601596, - "learning_rate": 0.00019999466538708096, - "loss": 46.0, - "step": 20427 - }, - { - "epoch": 3.28978622327791, - "grad_norm": 0.005335061810910702, - "learning_rate": 0.00019999466486450703, - "loss": 46.0, - "step": 20428 - }, - { - "epoch": 3.2899472603566973, - "grad_norm": 0.0022226402070373297, - "learning_rate": 0.0001999946643419075, - "loss": 46.0, - "step": 20429 - }, - { - "epoch": 3.2901082974354843, - "grad_norm": 0.0023903241381049156, - "learning_rate": 0.00019999466381928236, - "loss": 46.0, - "step": 20430 - }, - { - "epoch": 3.2902693345142717, - "grad_norm": 0.0005466773873195052, - "learning_rate": 0.00019999466329663162, - "loss": 46.0, - "step": 20431 - }, - { - "epoch": 3.290430371593059, - "grad_norm": 0.0009620157652534544, - "learning_rate": 0.0001999946627739553, - "loss": 46.0, - "step": 20432 - }, - { - "epoch": 3.2905914086718466, - "grad_norm": 0.004091755487024784, - "learning_rate": 0.00019999466225125337, - "loss": 46.0, - "step": 20433 - }, - { - "epoch": 3.290752445750634, - "grad_norm": 0.002535921987146139, - "learning_rate": 0.00019999466172852587, - "loss": 46.0, - "step": 20434 - }, - { - "epoch": 3.2909134828294215, - "grad_norm": 0.0007435278384946287, - "learning_rate": 0.00019999466120577277, - "loss": 46.0, - "step": 20435 - }, - { - "epoch": 3.291074519908209, - "grad_norm": 0.001149357995018363, - "learning_rate": 0.0001999946606829941, - "loss": 46.0, - "step": 20436 - }, - { - "epoch": 3.2912355569869964, - "grad_norm": 0.003767345566302538, - "learning_rate": 0.0001999946601601898, - "loss": 46.0, - "step": 20437 - }, - { - "epoch": 3.291396594065784, - "grad_norm": 0.0013400643365457654, - "learning_rate": 0.0001999946596373599, - "loss": 46.0, - "step": 20438 - }, - { - "epoch": 3.291557631144571, - "grad_norm": 0.0023799403570592403, - "learning_rate": 0.00019999465911450445, - "loss": 46.0, - "step": 20439 - }, - { - "epoch": 3.2917186682233583, - "grad_norm": 0.0017399901989847422, - "learning_rate": 0.00019999465859162337, - "loss": 46.0, - "step": 20440 - }, - { - "epoch": 3.291879705302146, - "grad_norm": 0.0016450092662125826, - "learning_rate": 0.0001999946580687167, - "loss": 46.0, - "step": 20441 - }, - { - "epoch": 3.2920407423809332, - "grad_norm": 0.0027623374480754137, - "learning_rate": 0.00019999465754578445, - "loss": 46.0, - "step": 20442 - }, - { - "epoch": 3.2922017794597207, - "grad_norm": 0.009613106027245522, - "learning_rate": 0.0001999946570228266, - "loss": 46.0, - "step": 20443 - }, - { - "epoch": 3.292362816538508, - "grad_norm": 0.008135140873491764, - "learning_rate": 0.00019999465649984317, - "loss": 46.0, - "step": 20444 - }, - { - "epoch": 3.292523853617295, - "grad_norm": 0.009226481430232525, - "learning_rate": 0.00019999465597683413, - "loss": 46.0, - "step": 20445 - }, - { - "epoch": 3.2926848906960826, - "grad_norm": 0.002246605232357979, - "learning_rate": 0.00019999465545379952, - "loss": 46.0, - "step": 20446 - }, - { - "epoch": 3.29284592777487, - "grad_norm": 0.002180477837100625, - "learning_rate": 0.00019999465493073928, - "loss": 46.0, - "step": 20447 - }, - { - "epoch": 3.2930069648536575, - "grad_norm": 0.012579609639942646, - "learning_rate": 0.00019999465440765347, - "loss": 46.0, - "step": 20448 - }, - { - "epoch": 3.293168001932445, - "grad_norm": 0.001363204326480627, - "learning_rate": 0.00019999465388454205, - "loss": 46.0, - "step": 20449 - }, - { - "epoch": 3.2933290390112324, - "grad_norm": 0.0010487326653674245, - "learning_rate": 0.00019999465336140507, - "loss": 46.0, - "step": 20450 - }, - { - "epoch": 3.29349007609002, - "grad_norm": 0.002048888709396124, - "learning_rate": 0.00019999465283824245, - "loss": 46.0, - "step": 20451 - }, - { - "epoch": 3.2936511131688073, - "grad_norm": 0.007130644749850035, - "learning_rate": 0.00019999465231505426, - "loss": 46.0, - "step": 20452 - }, - { - "epoch": 3.2938121502475943, - "grad_norm": 0.0039036625530570745, - "learning_rate": 0.00019999465179184047, - "loss": 46.0, - "step": 20453 - }, - { - "epoch": 3.293973187326382, - "grad_norm": 0.0017658513970673084, - "learning_rate": 0.0001999946512686011, - "loss": 46.0, - "step": 20454 - }, - { - "epoch": 3.2941342244051692, - "grad_norm": 0.001026177080348134, - "learning_rate": 0.00019999465074533614, - "loss": 46.0, - "step": 20455 - }, - { - "epoch": 3.2942952614839567, - "grad_norm": 0.0037942954804748297, - "learning_rate": 0.00019999465022204558, - "loss": 46.0, - "step": 20456 - }, - { - "epoch": 3.294456298562744, - "grad_norm": 0.009627923369407654, - "learning_rate": 0.0001999946496987294, - "loss": 46.0, - "step": 20457 - }, - { - "epoch": 3.2946173356415316, - "grad_norm": 0.004906713031232357, - "learning_rate": 0.00019999464917538768, - "loss": 46.0, - "step": 20458 - }, - { - "epoch": 3.294778372720319, - "grad_norm": 0.0030307569541037083, - "learning_rate": 0.00019999464865202033, - "loss": 46.0, - "step": 20459 - }, - { - "epoch": 3.294939409799106, - "grad_norm": 0.005650836508721113, - "learning_rate": 0.00019999464812862737, - "loss": 46.0, - "step": 20460 - }, - { - "epoch": 3.2951004468778935, - "grad_norm": 0.0017931369366124272, - "learning_rate": 0.00019999464760520885, - "loss": 46.0, - "step": 20461 - }, - { - "epoch": 3.295261483956681, - "grad_norm": 0.0019347493071109056, - "learning_rate": 0.0001999946470817647, - "loss": 46.0, - "step": 20462 - }, - { - "epoch": 3.2954225210354684, - "grad_norm": 0.003170775016769767, - "learning_rate": 0.00019999464655829502, - "loss": 46.0, - "step": 20463 - }, - { - "epoch": 3.295583558114256, - "grad_norm": 0.0015064164763316512, - "learning_rate": 0.0001999946460347997, - "loss": 46.0, - "step": 20464 - }, - { - "epoch": 3.2957445951930433, - "grad_norm": 0.009030257351696491, - "learning_rate": 0.0001999946455112788, - "loss": 46.0, - "step": 20465 - }, - { - "epoch": 3.2959056322718308, - "grad_norm": 0.006091805640608072, - "learning_rate": 0.0001999946449877323, - "loss": 46.0, - "step": 20466 - }, - { - "epoch": 3.296066669350618, - "grad_norm": 0.001362030627205968, - "learning_rate": 0.0001999946444641602, - "loss": 46.0, - "step": 20467 - }, - { - "epoch": 3.296227706429405, - "grad_norm": 0.0007975163171067834, - "learning_rate": 0.00019999464394056254, - "loss": 46.0, - "step": 20468 - }, - { - "epoch": 3.2963887435081927, - "grad_norm": 0.0028633701149374247, - "learning_rate": 0.00019999464341693924, - "loss": 46.0, - "step": 20469 - }, - { - "epoch": 3.29654978058698, - "grad_norm": 0.0022130045108497143, - "learning_rate": 0.00019999464289329036, - "loss": 46.0, - "step": 20470 - }, - { - "epoch": 3.2967108176657676, - "grad_norm": 0.002059884835034609, - "learning_rate": 0.0001999946423696159, - "loss": 46.0, - "step": 20471 - }, - { - "epoch": 3.296871854744555, - "grad_norm": 0.0026290572714060545, - "learning_rate": 0.00019999464184591585, - "loss": 46.0, - "step": 20472 - }, - { - "epoch": 3.2970328918233425, - "grad_norm": 0.0024627484381198883, - "learning_rate": 0.0001999946413221902, - "loss": 46.0, - "step": 20473 - }, - { - "epoch": 3.2971939289021295, - "grad_norm": 0.0047419723123312, - "learning_rate": 0.00019999464079843894, - "loss": 46.0, - "step": 20474 - }, - { - "epoch": 3.297354965980917, - "grad_norm": 0.0022318412084132433, - "learning_rate": 0.00019999464027466212, - "loss": 46.0, - "step": 20475 - }, - { - "epoch": 3.2975160030597044, - "grad_norm": 0.012493956834077835, - "learning_rate": 0.00019999463975085968, - "loss": 46.0, - "step": 20476 - }, - { - "epoch": 3.297677040138492, - "grad_norm": 0.001505105523392558, - "learning_rate": 0.00019999463922703163, - "loss": 46.0, - "step": 20477 - }, - { - "epoch": 3.2978380772172793, - "grad_norm": 0.0020163014996796846, - "learning_rate": 0.00019999463870317802, - "loss": 46.0, - "step": 20478 - }, - { - "epoch": 3.2979991142960667, - "grad_norm": 0.004755750764161348, - "learning_rate": 0.0001999946381792988, - "loss": 46.0, - "step": 20479 - }, - { - "epoch": 3.298160151374854, - "grad_norm": 0.0021764670964330435, - "learning_rate": 0.000199994637655394, - "loss": 46.0, - "step": 20480 - }, - { - "epoch": 3.2983211884536416, - "grad_norm": 0.002002222230657935, - "learning_rate": 0.0001999946371314636, - "loss": 46.0, - "step": 20481 - }, - { - "epoch": 3.298482225532429, - "grad_norm": 0.006661428604274988, - "learning_rate": 0.00019999463660750762, - "loss": 46.0, - "step": 20482 - }, - { - "epoch": 3.298643262611216, - "grad_norm": 0.007542894221842289, - "learning_rate": 0.000199994636083526, - "loss": 46.0, - "step": 20483 - }, - { - "epoch": 3.2988042996900035, - "grad_norm": 0.015227694995701313, - "learning_rate": 0.00019999463555951883, - "loss": 46.0, - "step": 20484 - }, - { - "epoch": 3.298965336768791, - "grad_norm": 0.012763544917106628, - "learning_rate": 0.00019999463503548605, - "loss": 46.0, - "step": 20485 - }, - { - "epoch": 3.2991263738475785, - "grad_norm": 0.0019030938856303692, - "learning_rate": 0.0001999946345114277, - "loss": 46.0, - "step": 20486 - }, - { - "epoch": 3.299287410926366, - "grad_norm": 0.003876202739775181, - "learning_rate": 0.00019999463398734374, - "loss": 46.0, - "step": 20487 - }, - { - "epoch": 3.2994484480051534, - "grad_norm": 0.002691423986107111, - "learning_rate": 0.00019999463346323418, - "loss": 46.0, - "step": 20488 - }, - { - "epoch": 3.2996094850839404, - "grad_norm": 0.0016912458231672645, - "learning_rate": 0.00019999463293909902, - "loss": 46.0, - "step": 20489 - }, - { - "epoch": 3.299770522162728, - "grad_norm": 0.0032249304931610823, - "learning_rate": 0.00019999463241493828, - "loss": 46.0, - "step": 20490 - }, - { - "epoch": 3.2999315592415153, - "grad_norm": 0.004676357842981815, - "learning_rate": 0.00019999463189075193, - "loss": 46.0, - "step": 20491 - }, - { - "epoch": 3.3000925963203027, - "grad_norm": 0.012644380331039429, - "learning_rate": 0.00019999463136654, - "loss": 46.0, - "step": 20492 - }, - { - "epoch": 3.30025363339909, - "grad_norm": 0.002780510112643242, - "learning_rate": 0.0001999946308423025, - "loss": 46.0, - "step": 20493 - }, - { - "epoch": 3.3004146704778776, - "grad_norm": 0.005221215542405844, - "learning_rate": 0.00019999463031803938, - "loss": 46.0, - "step": 20494 - }, - { - "epoch": 3.300575707556665, - "grad_norm": 0.0029816976748406887, - "learning_rate": 0.00019999462979375068, - "loss": 46.0, - "step": 20495 - }, - { - "epoch": 3.3007367446354525, - "grad_norm": 0.006407435983419418, - "learning_rate": 0.00019999462926943636, - "loss": 46.0, - "step": 20496 - }, - { - "epoch": 3.3008977817142395, - "grad_norm": 0.006033220328390598, - "learning_rate": 0.00019999462874509646, - "loss": 46.0, - "step": 20497 - }, - { - "epoch": 3.301058818793027, - "grad_norm": 0.008315898478031158, - "learning_rate": 0.00019999462822073097, - "loss": 46.0, - "step": 20498 - }, - { - "epoch": 3.3012198558718144, - "grad_norm": 0.004438674543052912, - "learning_rate": 0.0001999946276963399, - "loss": 46.0, - "step": 20499 - }, - { - "epoch": 3.301380892950602, - "grad_norm": 0.0006123550701886415, - "learning_rate": 0.0001999946271719232, - "loss": 46.0, - "step": 20500 - }, - { - "epoch": 3.3015419300293893, - "grad_norm": 0.0035484223626554012, - "learning_rate": 0.00019999462664748095, - "loss": 46.0, - "step": 20501 - }, - { - "epoch": 3.301702967108177, - "grad_norm": 0.0021711597219109535, - "learning_rate": 0.00019999462612301308, - "loss": 46.0, - "step": 20502 - }, - { - "epoch": 3.3018640041869642, - "grad_norm": 0.0013619862729683518, - "learning_rate": 0.00019999462559851963, - "loss": 46.0, - "step": 20503 - }, - { - "epoch": 3.3020250412657512, - "grad_norm": 0.004725265316665173, - "learning_rate": 0.00019999462507400059, - "loss": 46.0, - "step": 20504 - }, - { - "epoch": 3.3021860783445387, - "grad_norm": 0.011984646320343018, - "learning_rate": 0.00019999462454945593, - "loss": 46.0, - "step": 20505 - }, - { - "epoch": 3.302347115423326, - "grad_norm": 0.0004883562796749175, - "learning_rate": 0.0001999946240248857, - "loss": 46.0, - "step": 20506 - }, - { - "epoch": 3.3025081525021136, - "grad_norm": 0.002777355955913663, - "learning_rate": 0.00019999462350028986, - "loss": 46.0, - "step": 20507 - }, - { - "epoch": 3.302669189580901, - "grad_norm": 0.01274262834340334, - "learning_rate": 0.00019999462297566842, - "loss": 46.0, - "step": 20508 - }, - { - "epoch": 3.3028302266596885, - "grad_norm": 0.0017357562901452184, - "learning_rate": 0.0001999946224510214, - "loss": 46.0, - "step": 20509 - }, - { - "epoch": 3.302991263738476, - "grad_norm": 0.012472463771700859, - "learning_rate": 0.0001999946219263488, - "loss": 46.0, - "step": 20510 - }, - { - "epoch": 3.3031523008172634, - "grad_norm": 0.0036122689489275217, - "learning_rate": 0.0001999946214016506, - "loss": 46.0, - "step": 20511 - }, - { - "epoch": 3.3033133378960504, - "grad_norm": 0.0019842092879116535, - "learning_rate": 0.00019999462087692677, - "loss": 46.0, - "step": 20512 - }, - { - "epoch": 3.303474374974838, - "grad_norm": 0.0065961251966655254, - "learning_rate": 0.0001999946203521774, - "loss": 46.0, - "step": 20513 - }, - { - "epoch": 3.3036354120536253, - "grad_norm": 0.01669849455356598, - "learning_rate": 0.0001999946198274024, - "loss": 46.0, - "step": 20514 - }, - { - "epoch": 3.3037964491324128, - "grad_norm": 0.0029924034606665373, - "learning_rate": 0.00019999461930260185, - "loss": 46.0, - "step": 20515 - }, - { - "epoch": 3.3039574862112002, - "grad_norm": 0.0035890196450054646, - "learning_rate": 0.00019999461877777565, - "loss": 46.0, - "step": 20516 - }, - { - "epoch": 3.3041185232899877, - "grad_norm": 0.008518245071172714, - "learning_rate": 0.0001999946182529239, - "loss": 46.0, - "step": 20517 - }, - { - "epoch": 3.3042795603687747, - "grad_norm": 0.005013282876461744, - "learning_rate": 0.00019999461772804653, - "loss": 46.0, - "step": 20518 - }, - { - "epoch": 3.304440597447562, - "grad_norm": 0.005528761073946953, - "learning_rate": 0.0001999946172031436, - "loss": 46.0, - "step": 20519 - }, - { - "epoch": 3.3046016345263496, - "grad_norm": 0.0031828877981752157, - "learning_rate": 0.00019999461667821505, - "loss": 46.0, - "step": 20520 - }, - { - "epoch": 3.304762671605137, - "grad_norm": 0.00280867051333189, - "learning_rate": 0.00019999461615326087, - "loss": 46.0, - "step": 20521 - }, - { - "epoch": 3.3049237086839245, - "grad_norm": 0.007805526256561279, - "learning_rate": 0.00019999461562828118, - "loss": 46.0, - "step": 20522 - }, - { - "epoch": 3.305084745762712, - "grad_norm": 0.004497086629271507, - "learning_rate": 0.00019999461510327585, - "loss": 46.0, - "step": 20523 - }, - { - "epoch": 3.3052457828414994, - "grad_norm": 0.0031976636964827776, - "learning_rate": 0.0001999946145782449, - "loss": 46.0, - "step": 20524 - }, - { - "epoch": 3.305406819920287, - "grad_norm": 0.0019408968510106206, - "learning_rate": 0.0001999946140531884, - "loss": 46.0, - "step": 20525 - }, - { - "epoch": 3.305567856999074, - "grad_norm": 0.004233045503497124, - "learning_rate": 0.0001999946135281063, - "loss": 46.0, - "step": 20526 - }, - { - "epoch": 3.3057288940778613, - "grad_norm": 0.0015091991517692804, - "learning_rate": 0.00019999461300299856, - "loss": 46.0, - "step": 20527 - }, - { - "epoch": 3.3058899311566488, - "grad_norm": 0.008871247991919518, - "learning_rate": 0.0001999946124778653, - "loss": 46.0, - "step": 20528 - }, - { - "epoch": 3.306050968235436, - "grad_norm": 0.0023937553633004427, - "learning_rate": 0.0001999946119527064, - "loss": 46.0, - "step": 20529 - }, - { - "epoch": 3.3062120053142237, - "grad_norm": 0.0022780275903642178, - "learning_rate": 0.00019999461142752192, - "loss": 46.0, - "step": 20530 - }, - { - "epoch": 3.306373042393011, - "grad_norm": 0.00475579546764493, - "learning_rate": 0.0001999946109023118, - "loss": 46.0, - "step": 20531 - }, - { - "epoch": 3.3065340794717986, - "grad_norm": 0.0043720765970647335, - "learning_rate": 0.00019999461037707616, - "loss": 46.0, - "step": 20532 - }, - { - "epoch": 3.3066951165505856, - "grad_norm": 0.000885319896042347, - "learning_rate": 0.0001999946098518149, - "loss": 46.0, - "step": 20533 - }, - { - "epoch": 3.306856153629373, - "grad_norm": 0.0032566478475928307, - "learning_rate": 0.00019999460932652806, - "loss": 46.0, - "step": 20534 - }, - { - "epoch": 3.3070171907081605, - "grad_norm": 0.003298891708254814, - "learning_rate": 0.0001999946088012156, - "loss": 46.0, - "step": 20535 - }, - { - "epoch": 3.307178227786948, - "grad_norm": 0.0016318131238222122, - "learning_rate": 0.00019999460827587755, - "loss": 46.0, - "step": 20536 - }, - { - "epoch": 3.3073392648657354, - "grad_norm": 0.00962746050208807, - "learning_rate": 0.00019999460775051392, - "loss": 46.0, - "step": 20537 - }, - { - "epoch": 3.307500301944523, - "grad_norm": 0.003954700194299221, - "learning_rate": 0.0001999946072251247, - "loss": 46.0, - "step": 20538 - }, - { - "epoch": 3.3076613390233103, - "grad_norm": 0.00856354646384716, - "learning_rate": 0.00019999460669970986, - "loss": 46.0, - "step": 20539 - }, - { - "epoch": 3.3078223761020977, - "grad_norm": 0.0009719360386952758, - "learning_rate": 0.00019999460617426947, - "loss": 46.0, - "step": 20540 - }, - { - "epoch": 3.3079834131808847, - "grad_norm": 0.01439482718706131, - "learning_rate": 0.00019999460564880343, - "loss": 46.0, - "step": 20541 - }, - { - "epoch": 3.308144450259672, - "grad_norm": 0.004348213318735361, - "learning_rate": 0.00019999460512331183, - "loss": 46.0, - "step": 20542 - }, - { - "epoch": 3.3083054873384596, - "grad_norm": 0.002071383874863386, - "learning_rate": 0.00019999460459779465, - "loss": 46.0, - "step": 20543 - }, - { - "epoch": 3.308466524417247, - "grad_norm": 0.0018467125482857227, - "learning_rate": 0.00019999460407225182, - "loss": 46.0, - "step": 20544 - }, - { - "epoch": 3.3086275614960345, - "grad_norm": 0.002804292133077979, - "learning_rate": 0.00019999460354668344, - "loss": 46.0, - "step": 20545 - }, - { - "epoch": 3.308788598574822, - "grad_norm": 0.000851544551551342, - "learning_rate": 0.00019999460302108947, - "loss": 46.0, - "step": 20546 - }, - { - "epoch": 3.308949635653609, - "grad_norm": 0.007816920056939125, - "learning_rate": 0.0001999946024954699, - "loss": 46.0, - "step": 20547 - }, - { - "epoch": 3.3091106727323965, - "grad_norm": 0.001477054669521749, - "learning_rate": 0.00019999460196982476, - "loss": 46.0, - "step": 20548 - }, - { - "epoch": 3.309271709811184, - "grad_norm": 0.001662776106968522, - "learning_rate": 0.00019999460144415397, - "loss": 46.0, - "step": 20549 - }, - { - "epoch": 3.3094327468899714, - "grad_norm": 0.006226846016943455, - "learning_rate": 0.00019999460091845762, - "loss": 46.0, - "step": 20550 - }, - { - "epoch": 3.309593783968759, - "grad_norm": 0.001959709683433175, - "learning_rate": 0.0001999946003927357, - "loss": 46.0, - "step": 20551 - }, - { - "epoch": 3.3097548210475463, - "grad_norm": 0.0023772441782057285, - "learning_rate": 0.00019999459986698814, - "loss": 46.0, - "step": 20552 - }, - { - "epoch": 3.3099158581263337, - "grad_norm": 0.0011946274898946285, - "learning_rate": 0.00019999459934121503, - "loss": 46.0, - "step": 20553 - }, - { - "epoch": 3.310076895205121, - "grad_norm": 0.006707313004881144, - "learning_rate": 0.0001999945988154163, - "loss": 46.0, - "step": 20554 - }, - { - "epoch": 3.3102379322839086, - "grad_norm": 0.006173006258904934, - "learning_rate": 0.00019999459828959196, - "loss": 46.0, - "step": 20555 - }, - { - "epoch": 3.3103989693626956, - "grad_norm": 0.004015904385596514, - "learning_rate": 0.00019999459776374207, - "loss": 46.0, - "step": 20556 - }, - { - "epoch": 3.310560006441483, - "grad_norm": 0.00606947997584939, - "learning_rate": 0.00019999459723786655, - "loss": 46.0, - "step": 20557 - }, - { - "epoch": 3.3107210435202705, - "grad_norm": 0.009014831855893135, - "learning_rate": 0.00019999459671196545, - "loss": 46.0, - "step": 20558 - }, - { - "epoch": 3.310882080599058, - "grad_norm": 0.005148408003151417, - "learning_rate": 0.00019999459618603874, - "loss": 46.0, - "step": 20559 - }, - { - "epoch": 3.3110431176778454, - "grad_norm": 0.0055821118876338005, - "learning_rate": 0.00019999459566008646, - "loss": 46.0, - "step": 20560 - }, - { - "epoch": 3.311204154756633, - "grad_norm": 0.0021294988691806793, - "learning_rate": 0.00019999459513410858, - "loss": 46.0, - "step": 20561 - }, - { - "epoch": 3.31136519183542, - "grad_norm": 0.0019224905408918858, - "learning_rate": 0.00019999459460810513, - "loss": 46.0, - "step": 20562 - }, - { - "epoch": 3.3115262289142073, - "grad_norm": 0.005129345692694187, - "learning_rate": 0.00019999459408207604, - "loss": 46.0, - "step": 20563 - }, - { - "epoch": 3.311687265992995, - "grad_norm": 0.010406536050140858, - "learning_rate": 0.00019999459355602136, - "loss": 46.0, - "step": 20564 - }, - { - "epoch": 3.3118483030717822, - "grad_norm": 0.0063169002532958984, - "learning_rate": 0.00019999459302994112, - "loss": 46.0, - "step": 20565 - }, - { - "epoch": 3.3120093401505697, - "grad_norm": 0.004996041301637888, - "learning_rate": 0.0001999945925038353, - "loss": 46.0, - "step": 20566 - }, - { - "epoch": 3.312170377229357, - "grad_norm": 0.00403920654207468, - "learning_rate": 0.00019999459197770386, - "loss": 46.0, - "step": 20567 - }, - { - "epoch": 3.3123314143081446, - "grad_norm": 0.0018020266434177756, - "learning_rate": 0.00019999459145154683, - "loss": 46.0, - "step": 20568 - }, - { - "epoch": 3.312492451386932, - "grad_norm": 0.0040070051327347755, - "learning_rate": 0.00019999459092536417, - "loss": 46.0, - "step": 20569 - }, - { - "epoch": 3.312653488465719, - "grad_norm": 0.005236563738435507, - "learning_rate": 0.00019999459039915597, - "loss": 46.0, - "step": 20570 - }, - { - "epoch": 3.3128145255445065, - "grad_norm": 0.003927438519895077, - "learning_rate": 0.00019999458987292215, - "loss": 46.0, - "step": 20571 - }, - { - "epoch": 3.312975562623294, - "grad_norm": 0.014588958583772182, - "learning_rate": 0.00019999458934666272, - "loss": 46.0, - "step": 20572 - }, - { - "epoch": 3.3131365997020814, - "grad_norm": 0.002326664514839649, - "learning_rate": 0.0001999945888203777, - "loss": 46.0, - "step": 20573 - }, - { - "epoch": 3.313297636780869, - "grad_norm": 0.0018098813015967607, - "learning_rate": 0.00019999458829406713, - "loss": 46.0, - "step": 20574 - }, - { - "epoch": 3.3134586738596563, - "grad_norm": 0.003908189013600349, - "learning_rate": 0.00019999458776773094, - "loss": 46.0, - "step": 20575 - }, - { - "epoch": 3.3136197109384438, - "grad_norm": 0.015249301679432392, - "learning_rate": 0.00019999458724136916, - "loss": 46.0, - "step": 20576 - }, - { - "epoch": 3.3137807480172308, - "grad_norm": 0.0030181773472577333, - "learning_rate": 0.0001999945867149818, - "loss": 46.0, - "step": 20577 - }, - { - "epoch": 3.3139417850960182, - "grad_norm": 0.0014944358263164759, - "learning_rate": 0.00019999458618856882, - "loss": 46.0, - "step": 20578 - }, - { - "epoch": 3.3141028221748057, - "grad_norm": 0.00282674515619874, - "learning_rate": 0.00019999458566213025, - "loss": 46.0, - "step": 20579 - }, - { - "epoch": 3.314263859253593, - "grad_norm": 0.0022494960576295853, - "learning_rate": 0.0001999945851356661, - "loss": 46.0, - "step": 20580 - }, - { - "epoch": 3.3144248963323806, - "grad_norm": 0.0031505771912634373, - "learning_rate": 0.00019999458460917636, - "loss": 46.0, - "step": 20581 - }, - { - "epoch": 3.314585933411168, - "grad_norm": 0.002489285310730338, - "learning_rate": 0.000199994584082661, - "loss": 46.0, - "step": 20582 - }, - { - "epoch": 3.3147469704899555, - "grad_norm": 0.005234418902546167, - "learning_rate": 0.00019999458355612006, - "loss": 46.0, - "step": 20583 - }, - { - "epoch": 3.314908007568743, - "grad_norm": 0.0018298632930964231, - "learning_rate": 0.00019999458302955353, - "loss": 46.0, - "step": 20584 - }, - { - "epoch": 3.31506904464753, - "grad_norm": 0.004291005432605743, - "learning_rate": 0.00019999458250296142, - "loss": 46.0, - "step": 20585 - }, - { - "epoch": 3.3152300817263174, - "grad_norm": 0.002526351949200034, - "learning_rate": 0.0001999945819763437, - "loss": 46.0, - "step": 20586 - }, - { - "epoch": 3.315391118805105, - "grad_norm": 0.004323775414377451, - "learning_rate": 0.00019999458144970037, - "loss": 46.0, - "step": 20587 - }, - { - "epoch": 3.3155521558838923, - "grad_norm": 0.003098824294283986, - "learning_rate": 0.00019999458092303146, - "loss": 46.0, - "step": 20588 - }, - { - "epoch": 3.3157131929626797, - "grad_norm": 0.003980815876275301, - "learning_rate": 0.00019999458039633697, - "loss": 46.0, - "step": 20589 - }, - { - "epoch": 3.315874230041467, - "grad_norm": 0.0017970160115510225, - "learning_rate": 0.0001999945798696169, - "loss": 46.0, - "step": 20590 - }, - { - "epoch": 3.316035267120254, - "grad_norm": 0.0006784077850170434, - "learning_rate": 0.0001999945793428712, - "loss": 46.0, - "step": 20591 - }, - { - "epoch": 3.3161963041990417, - "grad_norm": 0.004842281807214022, - "learning_rate": 0.00019999457881609992, - "loss": 46.0, - "step": 20592 - }, - { - "epoch": 3.316357341277829, - "grad_norm": 0.0013882593484595418, - "learning_rate": 0.00019999457828930305, - "loss": 46.0, - "step": 20593 - }, - { - "epoch": 3.3165183783566166, - "grad_norm": 0.0033856176305562258, - "learning_rate": 0.0001999945777624806, - "loss": 46.0, - "step": 20594 - }, - { - "epoch": 3.316679415435404, - "grad_norm": 0.006645835470408201, - "learning_rate": 0.00019999457723563253, - "loss": 46.0, - "step": 20595 - }, - { - "epoch": 3.3168404525141915, - "grad_norm": 0.008744263090193272, - "learning_rate": 0.00019999457670875887, - "loss": 46.0, - "step": 20596 - }, - { - "epoch": 3.317001489592979, - "grad_norm": 0.004656627308577299, - "learning_rate": 0.00019999457618185963, - "loss": 46.0, - "step": 20597 - }, - { - "epoch": 3.3171625266717664, - "grad_norm": 0.0016099194763228297, - "learning_rate": 0.0001999945756549348, - "loss": 46.0, - "step": 20598 - }, - { - "epoch": 3.3173235637505534, - "grad_norm": 0.0004129723529331386, - "learning_rate": 0.00019999457512798435, - "loss": 46.0, - "step": 20599 - }, - { - "epoch": 3.317484600829341, - "grad_norm": 0.002357868477702141, - "learning_rate": 0.00019999457460100834, - "loss": 46.0, - "step": 20600 - }, - { - "epoch": 3.3176456379081283, - "grad_norm": 0.0013515668688341975, - "learning_rate": 0.0001999945740740067, - "loss": 46.0, - "step": 20601 - }, - { - "epoch": 3.3178066749869157, - "grad_norm": 0.0037109958939254284, - "learning_rate": 0.0001999945735469795, - "loss": 46.0, - "step": 20602 - }, - { - "epoch": 3.317967712065703, - "grad_norm": 0.010511173866689205, - "learning_rate": 0.00019999457301992667, - "loss": 46.0, - "step": 20603 - }, - { - "epoch": 3.3181287491444906, - "grad_norm": 0.002971078734844923, - "learning_rate": 0.0001999945724928483, - "loss": 46.0, - "step": 20604 - }, - { - "epoch": 3.318289786223278, - "grad_norm": 0.0021232753060758114, - "learning_rate": 0.0001999945719657443, - "loss": 46.0, - "step": 20605 - }, - { - "epoch": 3.318450823302065, - "grad_norm": 0.005472460761666298, - "learning_rate": 0.0001999945714386147, - "loss": 46.0, - "step": 20606 - }, - { - "epoch": 3.3186118603808525, - "grad_norm": 0.002818155800923705, - "learning_rate": 0.00019999457091145954, - "loss": 46.0, - "step": 20607 - }, - { - "epoch": 3.31877289745964, - "grad_norm": 0.002014744793996215, - "learning_rate": 0.00019999457038427876, - "loss": 46.0, - "step": 20608 - }, - { - "epoch": 3.3189339345384274, - "grad_norm": 0.003153668250888586, - "learning_rate": 0.0001999945698570724, - "loss": 46.0, - "step": 20609 - }, - { - "epoch": 3.319094971617215, - "grad_norm": 0.0007961684023030102, - "learning_rate": 0.00019999456932984043, - "loss": 46.0, - "step": 20610 - }, - { - "epoch": 3.3192560086960023, - "grad_norm": 0.0024413957726210356, - "learning_rate": 0.00019999456880258288, - "loss": 46.0, - "step": 20611 - }, - { - "epoch": 3.31941704577479, - "grad_norm": 0.004205880220979452, - "learning_rate": 0.00019999456827529972, - "loss": 46.0, - "step": 20612 - }, - { - "epoch": 3.3195780828535772, - "grad_norm": 0.0039676483720541, - "learning_rate": 0.00019999456774799098, - "loss": 46.0, - "step": 20613 - }, - { - "epoch": 3.3197391199323643, - "grad_norm": 0.0021052660886198282, - "learning_rate": 0.00019999456722065664, - "loss": 46.0, - "step": 20614 - }, - { - "epoch": 3.3199001570111517, - "grad_norm": 0.003295567352324724, - "learning_rate": 0.00019999456669329672, - "loss": 46.0, - "step": 20615 - }, - { - "epoch": 3.320061194089939, - "grad_norm": 0.0043800389394164085, - "learning_rate": 0.0001999945661659112, - "loss": 46.0, - "step": 20616 - }, - { - "epoch": 3.3202222311687266, - "grad_norm": 0.004360324703156948, - "learning_rate": 0.0001999945656385001, - "loss": 46.0, - "step": 20617 - }, - { - "epoch": 3.320383268247514, - "grad_norm": 0.002035733312368393, - "learning_rate": 0.00019999456511106338, - "loss": 46.0, - "step": 20618 - }, - { - "epoch": 3.3205443053263015, - "grad_norm": 0.0013910542475059628, - "learning_rate": 0.00019999456458360109, - "loss": 46.0, - "step": 20619 - }, - { - "epoch": 3.3207053424050885, - "grad_norm": 0.0024826517328619957, - "learning_rate": 0.00019999456405611317, - "loss": 46.0, - "step": 20620 - }, - { - "epoch": 3.320866379483876, - "grad_norm": 0.00580619927495718, - "learning_rate": 0.00019999456352859968, - "loss": 46.0, - "step": 20621 - }, - { - "epoch": 3.3210274165626634, - "grad_norm": 0.001343421288765967, - "learning_rate": 0.00019999456300106062, - "loss": 46.0, - "step": 20622 - }, - { - "epoch": 3.321188453641451, - "grad_norm": 0.0010835020802915096, - "learning_rate": 0.00019999456247349592, - "loss": 46.0, - "step": 20623 - }, - { - "epoch": 3.3213494907202383, - "grad_norm": 0.002239476889371872, - "learning_rate": 0.00019999456194590566, - "loss": 46.0, - "step": 20624 - }, - { - "epoch": 3.321510527799026, - "grad_norm": 0.0007758995052427053, - "learning_rate": 0.00019999456141828978, - "loss": 46.0, - "step": 20625 - }, - { - "epoch": 3.3216715648778132, - "grad_norm": 0.002318687504157424, - "learning_rate": 0.00019999456089064835, - "loss": 46.0, - "step": 20626 - }, - { - "epoch": 3.3218326019566007, - "grad_norm": 0.003191697411239147, - "learning_rate": 0.00019999456036298127, - "loss": 46.0, - "step": 20627 - }, - { - "epoch": 3.321993639035388, - "grad_norm": 0.0048633115366101265, - "learning_rate": 0.00019999455983528864, - "loss": 46.0, - "step": 20628 - }, - { - "epoch": 3.322154676114175, - "grad_norm": 0.004585013259202242, - "learning_rate": 0.00019999455930757042, - "loss": 46.0, - "step": 20629 - }, - { - "epoch": 3.3223157131929626, - "grad_norm": 0.002711718901991844, - "learning_rate": 0.00019999455877982658, - "loss": 46.0, - "step": 20630 - }, - { - "epoch": 3.32247675027175, - "grad_norm": 0.0012398186372593045, - "learning_rate": 0.00019999455825205715, - "loss": 46.0, - "step": 20631 - }, - { - "epoch": 3.3226377873505375, - "grad_norm": 0.0022244194988161325, - "learning_rate": 0.00019999455772426212, - "loss": 46.0, - "step": 20632 - }, - { - "epoch": 3.322798824429325, - "grad_norm": 0.000605227192863822, - "learning_rate": 0.00019999455719644154, - "loss": 46.0, - "step": 20633 - }, - { - "epoch": 3.3229598615081124, - "grad_norm": 0.013281482271850109, - "learning_rate": 0.00019999455666859533, - "loss": 46.0, - "step": 20634 - }, - { - "epoch": 3.3231208985868994, - "grad_norm": 0.003237626515328884, - "learning_rate": 0.00019999455614072353, - "loss": 46.0, - "step": 20635 - }, - { - "epoch": 3.323281935665687, - "grad_norm": 0.0018632387509569526, - "learning_rate": 0.00019999455561282612, - "loss": 46.0, - "step": 20636 - }, - { - "epoch": 3.3234429727444743, - "grad_norm": 0.0033254828304052353, - "learning_rate": 0.00019999455508490314, - "loss": 46.0, - "step": 20637 - }, - { - "epoch": 3.3236040098232618, - "grad_norm": 0.0015562031185254455, - "learning_rate": 0.00019999455455695458, - "loss": 46.0, - "step": 20638 - }, - { - "epoch": 3.323765046902049, - "grad_norm": 0.0019269655458629131, - "learning_rate": 0.0001999945540289804, - "loss": 46.0, - "step": 20639 - }, - { - "epoch": 3.3239260839808367, - "grad_norm": 0.004350072238594294, - "learning_rate": 0.00019999455350098061, - "loss": 46.0, - "step": 20640 - }, - { - "epoch": 3.324087121059624, - "grad_norm": 0.004888691008090973, - "learning_rate": 0.00019999455297295526, - "loss": 46.0, - "step": 20641 - }, - { - "epoch": 3.3242481581384116, - "grad_norm": 0.003112856298685074, - "learning_rate": 0.00019999455244490433, - "loss": 46.0, - "step": 20642 - }, - { - "epoch": 3.3244091952171986, - "grad_norm": 0.003854363923892379, - "learning_rate": 0.00019999455191682777, - "loss": 46.0, - "step": 20643 - }, - { - "epoch": 3.324570232295986, - "grad_norm": 0.010935456492006779, - "learning_rate": 0.00019999455138872564, - "loss": 46.0, - "step": 20644 - }, - { - "epoch": 3.3247312693747735, - "grad_norm": 0.004687285982072353, - "learning_rate": 0.0001999945508605979, - "loss": 46.0, - "step": 20645 - }, - { - "epoch": 3.324892306453561, - "grad_norm": 0.001487278612330556, - "learning_rate": 0.00019999455033244457, - "loss": 46.0, - "step": 20646 - }, - { - "epoch": 3.3250533435323484, - "grad_norm": 0.0009150960249826312, - "learning_rate": 0.00019999454980426564, - "loss": 46.0, - "step": 20647 - }, - { - "epoch": 3.325214380611136, - "grad_norm": 0.0037873967085033655, - "learning_rate": 0.00019999454927606113, - "loss": 46.0, - "step": 20648 - }, - { - "epoch": 3.3253754176899233, - "grad_norm": 0.0072565763257443905, - "learning_rate": 0.00019999454874783103, - "loss": 46.0, - "step": 20649 - }, - { - "epoch": 3.3255364547687103, - "grad_norm": 0.0011376062175258994, - "learning_rate": 0.00019999454821957536, - "loss": 46.0, - "step": 20650 - }, - { - "epoch": 3.3256974918474977, - "grad_norm": 0.0016701099229976535, - "learning_rate": 0.00019999454769129403, - "loss": 46.0, - "step": 20651 - }, - { - "epoch": 3.325858528926285, - "grad_norm": 0.000620406586676836, - "learning_rate": 0.00019999454716298714, - "loss": 46.0, - "step": 20652 - }, - { - "epoch": 3.3260195660050726, - "grad_norm": 0.006020907778292894, - "learning_rate": 0.00019999454663465467, - "loss": 46.0, - "step": 20653 - }, - { - "epoch": 3.32618060308386, - "grad_norm": 0.003065197728574276, - "learning_rate": 0.0001999945461062966, - "loss": 46.0, - "step": 20654 - }, - { - "epoch": 3.3263416401626476, - "grad_norm": 0.004784727469086647, - "learning_rate": 0.00019999454557791292, - "loss": 46.0, - "step": 20655 - }, - { - "epoch": 3.326502677241435, - "grad_norm": 0.0049361675046384335, - "learning_rate": 0.00019999454504950368, - "loss": 46.0, - "step": 20656 - }, - { - "epoch": 3.3266637143202225, - "grad_norm": 0.002360751386731863, - "learning_rate": 0.00019999454452106883, - "loss": 46.0, - "step": 20657 - }, - { - "epoch": 3.3268247513990095, - "grad_norm": 0.005002843681722879, - "learning_rate": 0.00019999454399260836, - "loss": 46.0, - "step": 20658 - }, - { - "epoch": 3.326985788477797, - "grad_norm": 0.005297200754284859, - "learning_rate": 0.00019999454346412233, - "loss": 46.0, - "step": 20659 - }, - { - "epoch": 3.3271468255565844, - "grad_norm": 0.004770212806761265, - "learning_rate": 0.00019999454293561072, - "loss": 46.0, - "step": 20660 - }, - { - "epoch": 3.327307862635372, - "grad_norm": 0.0026987900491803885, - "learning_rate": 0.00019999454240707346, - "loss": 46.0, - "step": 20661 - }, - { - "epoch": 3.3274688997141593, - "grad_norm": 0.0032691967207938433, - "learning_rate": 0.00019999454187851065, - "loss": 46.0, - "step": 20662 - }, - { - "epoch": 3.3276299367929467, - "grad_norm": 0.0004995659692212939, - "learning_rate": 0.00019999454134992224, - "loss": 46.0, - "step": 20663 - }, - { - "epoch": 3.3277909738717337, - "grad_norm": 0.005385675933212042, - "learning_rate": 0.00019999454082130822, - "loss": 46.0, - "step": 20664 - }, - { - "epoch": 3.327952010950521, - "grad_norm": 0.0010919248452410102, - "learning_rate": 0.00019999454029266862, - "loss": 46.0, - "step": 20665 - }, - { - "epoch": 3.3281130480293086, - "grad_norm": 0.0014182814629748464, - "learning_rate": 0.00019999453976400343, - "loss": 46.0, - "step": 20666 - }, - { - "epoch": 3.328274085108096, - "grad_norm": 0.0022046626545488834, - "learning_rate": 0.00019999453923531265, - "loss": 46.0, - "step": 20667 - }, - { - "epoch": 3.3284351221868835, - "grad_norm": 0.008756585419178009, - "learning_rate": 0.00019999453870659623, - "loss": 46.0, - "step": 20668 - }, - { - "epoch": 3.328596159265671, - "grad_norm": 0.0010858243331313133, - "learning_rate": 0.00019999453817785427, - "loss": 46.0, - "step": 20669 - }, - { - "epoch": 3.3287571963444584, - "grad_norm": 0.004240847192704678, - "learning_rate": 0.00019999453764908668, - "loss": 46.0, - "step": 20670 - }, - { - "epoch": 3.328918233423246, - "grad_norm": 0.004645610228180885, - "learning_rate": 0.00019999453712029355, - "loss": 46.0, - "step": 20671 - }, - { - "epoch": 3.3290792705020333, - "grad_norm": 0.00560349877923727, - "learning_rate": 0.00019999453659147478, - "loss": 46.0, - "step": 20672 - }, - { - "epoch": 3.3292403075808203, - "grad_norm": 0.0004197901289444417, - "learning_rate": 0.00019999453606263043, - "loss": 46.0, - "step": 20673 - }, - { - "epoch": 3.329401344659608, - "grad_norm": 0.0031594678293913603, - "learning_rate": 0.0001999945355337605, - "loss": 46.0, - "step": 20674 - }, - { - "epoch": 3.3295623817383952, - "grad_norm": 0.0054811895824968815, - "learning_rate": 0.00019999453500486495, - "loss": 46.0, - "step": 20675 - }, - { - "epoch": 3.3297234188171827, - "grad_norm": 0.006125556770712137, - "learning_rate": 0.00019999453447594383, - "loss": 46.0, - "step": 20676 - }, - { - "epoch": 3.32988445589597, - "grad_norm": 0.0014067491283640265, - "learning_rate": 0.0001999945339469971, - "loss": 46.0, - "step": 20677 - }, - { - "epoch": 3.3300454929747576, - "grad_norm": 0.004283860791474581, - "learning_rate": 0.00019999453341802478, - "loss": 46.0, - "step": 20678 - }, - { - "epoch": 3.3302065300535446, - "grad_norm": 0.006565620191395283, - "learning_rate": 0.00019999453288902687, - "loss": 46.0, - "step": 20679 - }, - { - "epoch": 3.330367567132332, - "grad_norm": 0.002439651405438781, - "learning_rate": 0.00019999453236000338, - "loss": 46.0, - "step": 20680 - }, - { - "epoch": 3.3305286042111195, - "grad_norm": 0.002741681644693017, - "learning_rate": 0.00019999453183095427, - "loss": 46.0, - "step": 20681 - }, - { - "epoch": 3.330689641289907, - "grad_norm": 0.0022727784235030413, - "learning_rate": 0.00019999453130187958, - "loss": 46.0, - "step": 20682 - }, - { - "epoch": 3.3308506783686944, - "grad_norm": 0.002211730694398284, - "learning_rate": 0.0001999945307727793, - "loss": 46.0, - "step": 20683 - }, - { - "epoch": 3.331011715447482, - "grad_norm": 0.0033187856897711754, - "learning_rate": 0.0001999945302436534, - "loss": 46.0, - "step": 20684 - }, - { - "epoch": 3.3311727525262693, - "grad_norm": 0.004431689623743296, - "learning_rate": 0.00019999452971450194, - "loss": 46.0, - "step": 20685 - }, - { - "epoch": 3.3313337896050568, - "grad_norm": 0.0006730607710778713, - "learning_rate": 0.00019999452918532487, - "loss": 46.0, - "step": 20686 - }, - { - "epoch": 3.331494826683844, - "grad_norm": 0.009347470477223396, - "learning_rate": 0.00019999452865612224, - "loss": 46.0, - "step": 20687 - }, - { - "epoch": 3.3316558637626312, - "grad_norm": 0.006930544041097164, - "learning_rate": 0.00019999452812689397, - "loss": 46.0, - "step": 20688 - }, - { - "epoch": 3.3318169008414187, - "grad_norm": 0.0034979088231921196, - "learning_rate": 0.00019999452759764013, - "loss": 46.0, - "step": 20689 - }, - { - "epoch": 3.331977937920206, - "grad_norm": 0.0031090572010725737, - "learning_rate": 0.0001999945270683607, - "loss": 46.0, - "step": 20690 - }, - { - "epoch": 3.3321389749989936, - "grad_norm": 0.0035414446610957384, - "learning_rate": 0.00019999452653905565, - "loss": 46.0, - "step": 20691 - }, - { - "epoch": 3.332300012077781, - "grad_norm": 0.0014024300035089254, - "learning_rate": 0.00019999452600972503, - "loss": 46.0, - "step": 20692 - }, - { - "epoch": 3.3324610491565685, - "grad_norm": 0.002385796280577779, - "learning_rate": 0.00019999452548036882, - "loss": 46.0, - "step": 20693 - }, - { - "epoch": 3.3326220862353555, - "grad_norm": 0.0024936727713793516, - "learning_rate": 0.000199994524950987, - "loss": 46.0, - "step": 20694 - }, - { - "epoch": 3.332783123314143, - "grad_norm": 0.0014235054841265082, - "learning_rate": 0.0001999945244215796, - "loss": 46.0, - "step": 20695 - }, - { - "epoch": 3.3329441603929304, - "grad_norm": 0.0037812914233654737, - "learning_rate": 0.0001999945238921466, - "loss": 46.0, - "step": 20696 - }, - { - "epoch": 3.333105197471718, - "grad_norm": 0.0016779991565272212, - "learning_rate": 0.000199994523362688, - "loss": 46.0, - "step": 20697 - }, - { - "epoch": 3.3332662345505053, - "grad_norm": 0.0014002828393131495, - "learning_rate": 0.0001999945228332038, - "loss": 46.0, - "step": 20698 - }, - { - "epoch": 3.3334272716292928, - "grad_norm": 0.00628651911392808, - "learning_rate": 0.00019999452230369405, - "loss": 46.0, - "step": 20699 - }, - { - "epoch": 3.33358830870808, - "grad_norm": 0.009666165336966515, - "learning_rate": 0.00019999452177415865, - "loss": 46.0, - "step": 20700 - }, - { - "epoch": 3.3337493457868677, - "grad_norm": 0.0023739002645015717, - "learning_rate": 0.00019999452124459767, - "loss": 46.0, - "step": 20701 - }, - { - "epoch": 3.3339103828656547, - "grad_norm": 0.0019127086270600557, - "learning_rate": 0.00019999452071501112, - "loss": 46.0, - "step": 20702 - }, - { - "epoch": 3.334071419944442, - "grad_norm": 0.002191151725128293, - "learning_rate": 0.00019999452018539898, - "loss": 46.0, - "step": 20703 - }, - { - "epoch": 3.3342324570232296, - "grad_norm": 0.004061200190335512, - "learning_rate": 0.00019999451965576124, - "loss": 46.0, - "step": 20704 - }, - { - "epoch": 3.334393494102017, - "grad_norm": 0.0008081065607257187, - "learning_rate": 0.00019999451912609787, - "loss": 46.0, - "step": 20705 - }, - { - "epoch": 3.3345545311808045, - "grad_norm": 0.001797973527573049, - "learning_rate": 0.00019999451859640895, - "loss": 46.0, - "step": 20706 - }, - { - "epoch": 3.334715568259592, - "grad_norm": 0.0018977973377332091, - "learning_rate": 0.0001999945180666944, - "loss": 46.0, - "step": 20707 - }, - { - "epoch": 3.334876605338379, - "grad_norm": 0.0018908806378021836, - "learning_rate": 0.0001999945175369543, - "loss": 46.0, - "step": 20708 - }, - { - "epoch": 3.3350376424171664, - "grad_norm": 0.001165832974947989, - "learning_rate": 0.00019999451700718858, - "loss": 46.0, - "step": 20709 - }, - { - "epoch": 3.335198679495954, - "grad_norm": 0.002274847123771906, - "learning_rate": 0.00019999451647739728, - "loss": 46.0, - "step": 20710 - }, - { - "epoch": 3.3353597165747413, - "grad_norm": 0.004373036324977875, - "learning_rate": 0.00019999451594758036, - "loss": 46.0, - "step": 20711 - }, - { - "epoch": 3.3355207536535287, - "grad_norm": 0.004236109554767609, - "learning_rate": 0.0001999945154177379, - "loss": 46.0, - "step": 20712 - }, - { - "epoch": 3.335681790732316, - "grad_norm": 0.005360749550163746, - "learning_rate": 0.00019999451488786978, - "loss": 46.0, - "step": 20713 - }, - { - "epoch": 3.3358428278111036, - "grad_norm": 0.0015582973137497902, - "learning_rate": 0.0001999945143579761, - "loss": 46.0, - "step": 20714 - }, - { - "epoch": 3.336003864889891, - "grad_norm": 0.007665487006306648, - "learning_rate": 0.00019999451382805684, - "loss": 46.0, - "step": 20715 - }, - { - "epoch": 3.336164901968678, - "grad_norm": 0.011600405909121037, - "learning_rate": 0.00019999451329811196, - "loss": 46.0, - "step": 20716 - }, - { - "epoch": 3.3363259390474655, - "grad_norm": 0.004321750719100237, - "learning_rate": 0.00019999451276814153, - "loss": 46.0, - "step": 20717 - }, - { - "epoch": 3.336486976126253, - "grad_norm": 0.008660581894218922, - "learning_rate": 0.00019999451223814545, - "loss": 46.0, - "step": 20718 - }, - { - "epoch": 3.3366480132050405, - "grad_norm": 0.003269096603617072, - "learning_rate": 0.0001999945117081238, - "loss": 46.0, - "step": 20719 - }, - { - "epoch": 3.336809050283828, - "grad_norm": 0.0034555543679744005, - "learning_rate": 0.00019999451117807656, - "loss": 46.0, - "step": 20720 - }, - { - "epoch": 3.3369700873626154, - "grad_norm": 0.001743566826917231, - "learning_rate": 0.00019999451064800372, - "loss": 46.0, - "step": 20721 - }, - { - "epoch": 3.337131124441403, - "grad_norm": 0.0014382983790710568, - "learning_rate": 0.0001999945101179053, - "loss": 46.0, - "step": 20722 - }, - { - "epoch": 3.33729216152019, - "grad_norm": 0.009087533690035343, - "learning_rate": 0.00019999450958778128, - "loss": 46.0, - "step": 20723 - }, - { - "epoch": 3.3374531985989773, - "grad_norm": 0.003041849471628666, - "learning_rate": 0.00019999450905763166, - "loss": 46.0, - "step": 20724 - }, - { - "epoch": 3.3376142356777647, - "grad_norm": 0.00196527736261487, - "learning_rate": 0.00019999450852745641, - "loss": 46.0, - "step": 20725 - }, - { - "epoch": 3.337775272756552, - "grad_norm": 0.003818437224254012, - "learning_rate": 0.00019999450799725564, - "loss": 46.0, - "step": 20726 - }, - { - "epoch": 3.3379363098353396, - "grad_norm": 0.004665777087211609, - "learning_rate": 0.00019999450746702922, - "loss": 46.0, - "step": 20727 - }, - { - "epoch": 3.338097346914127, - "grad_norm": 0.002855050377547741, - "learning_rate": 0.00019999450693677725, - "loss": 46.0, - "step": 20728 - }, - { - "epoch": 3.3382583839929145, - "grad_norm": 0.0007667517638765275, - "learning_rate": 0.00019999450640649966, - "loss": 46.0, - "step": 20729 - }, - { - "epoch": 3.338419421071702, - "grad_norm": 0.001363030169159174, - "learning_rate": 0.00019999450587619648, - "loss": 46.0, - "step": 20730 - }, - { - "epoch": 3.338580458150489, - "grad_norm": 0.0018201707862317562, - "learning_rate": 0.0001999945053458677, - "loss": 46.0, - "step": 20731 - }, - { - "epoch": 3.3387414952292764, - "grad_norm": 0.0019068780820816755, - "learning_rate": 0.00019999450481551336, - "loss": 46.0, - "step": 20732 - }, - { - "epoch": 3.338902532308064, - "grad_norm": 0.0014781117206439376, - "learning_rate": 0.0001999945042851334, - "loss": 46.0, - "step": 20733 - }, - { - "epoch": 3.3390635693868513, - "grad_norm": 0.017115868628025055, - "learning_rate": 0.00019999450375472784, - "loss": 46.0, - "step": 20734 - }, - { - "epoch": 3.339224606465639, - "grad_norm": 0.0011176539119333029, - "learning_rate": 0.0001999945032242967, - "loss": 46.0, - "step": 20735 - }, - { - "epoch": 3.3393856435444262, - "grad_norm": 0.002341602463275194, - "learning_rate": 0.00019999450269383995, - "loss": 46.0, - "step": 20736 - }, - { - "epoch": 3.3395466806232132, - "grad_norm": 0.0017520219553261995, - "learning_rate": 0.00019999450216335763, - "loss": 46.0, - "step": 20737 - }, - { - "epoch": 3.3397077177020007, - "grad_norm": 0.002885375637561083, - "learning_rate": 0.0001999945016328497, - "loss": 46.0, - "step": 20738 - }, - { - "epoch": 3.339868754780788, - "grad_norm": 0.005546938627958298, - "learning_rate": 0.00019999450110231619, - "loss": 46.0, - "step": 20739 - }, - { - "epoch": 3.3400297918595756, - "grad_norm": 0.002033561235293746, - "learning_rate": 0.00019999450057175708, - "loss": 46.0, - "step": 20740 - }, - { - "epoch": 3.340190828938363, - "grad_norm": 0.0027569993399083614, - "learning_rate": 0.00019999450004117234, - "loss": 46.0, - "step": 20741 - }, - { - "epoch": 3.3403518660171505, - "grad_norm": 0.00869830884039402, - "learning_rate": 0.00019999449951056206, - "loss": 46.0, - "step": 20742 - }, - { - "epoch": 3.340512903095938, - "grad_norm": 0.01141760777682066, - "learning_rate": 0.00019999449897992617, - "loss": 46.0, - "step": 20743 - }, - { - "epoch": 3.3406739401747254, - "grad_norm": 0.006766782607883215, - "learning_rate": 0.0001999944984492647, - "loss": 46.0, - "step": 20744 - }, - { - "epoch": 3.340834977253513, - "grad_norm": 0.007144279312342405, - "learning_rate": 0.0001999944979185776, - "loss": 46.0, - "step": 20745 - }, - { - "epoch": 3.3409960143323, - "grad_norm": 0.006176311988383532, - "learning_rate": 0.00019999449738786494, - "loss": 46.0, - "step": 20746 - }, - { - "epoch": 3.3411570514110873, - "grad_norm": 0.006303990725427866, - "learning_rate": 0.00019999449685712665, - "loss": 46.0, - "step": 20747 - }, - { - "epoch": 3.3413180884898748, - "grad_norm": 0.0009887849446386099, - "learning_rate": 0.0001999944963263628, - "loss": 46.0, - "step": 20748 - }, - { - "epoch": 3.3414791255686622, - "grad_norm": 0.004154487047344446, - "learning_rate": 0.00019999449579557338, - "loss": 46.0, - "step": 20749 - }, - { - "epoch": 3.3416401626474497, - "grad_norm": 0.002034200122579932, - "learning_rate": 0.00019999449526475832, - "loss": 46.0, - "step": 20750 - }, - { - "epoch": 3.341801199726237, - "grad_norm": 0.004265953321009874, - "learning_rate": 0.00019999449473391767, - "loss": 46.0, - "step": 20751 - }, - { - "epoch": 3.341962236805024, - "grad_norm": 0.0021867260802537203, - "learning_rate": 0.00019999449420305142, - "loss": 46.0, - "step": 20752 - }, - { - "epoch": 3.3421232738838116, - "grad_norm": 0.011533687822520733, - "learning_rate": 0.0001999944936721596, - "loss": 46.0, - "step": 20753 - }, - { - "epoch": 3.342284310962599, - "grad_norm": 0.0027390571776777506, - "learning_rate": 0.00019999449314124217, - "loss": 46.0, - "step": 20754 - }, - { - "epoch": 3.3424453480413865, - "grad_norm": 0.005125917959958315, - "learning_rate": 0.00019999449261029917, - "loss": 46.0, - "step": 20755 - }, - { - "epoch": 3.342606385120174, - "grad_norm": 0.0035561162512749434, - "learning_rate": 0.00019999449207933057, - "loss": 46.0, - "step": 20756 - }, - { - "epoch": 3.3427674221989614, - "grad_norm": 0.001143692061305046, - "learning_rate": 0.00019999449154833637, - "loss": 46.0, - "step": 20757 - }, - { - "epoch": 3.342928459277749, - "grad_norm": 0.0011206611525267363, - "learning_rate": 0.00019999449101731656, - "loss": 46.0, - "step": 20758 - }, - { - "epoch": 3.3430894963565363, - "grad_norm": 0.004940640646964312, - "learning_rate": 0.00019999449048627117, - "loss": 46.0, - "step": 20759 - }, - { - "epoch": 3.3432505334353233, - "grad_norm": 0.009246529079973698, - "learning_rate": 0.00019999448995520021, - "loss": 46.0, - "step": 20760 - }, - { - "epoch": 3.3434115705141108, - "grad_norm": 0.008048427291214466, - "learning_rate": 0.00019999448942410362, - "loss": 46.0, - "step": 20761 - }, - { - "epoch": 3.343572607592898, - "grad_norm": 0.004151604138314724, - "learning_rate": 0.00019999448889298143, - "loss": 46.0, - "step": 20762 - }, - { - "epoch": 3.3437336446716857, - "grad_norm": 0.002345653250813484, - "learning_rate": 0.0001999944883618337, - "loss": 46.0, - "step": 20763 - }, - { - "epoch": 3.343894681750473, - "grad_norm": 0.0022222164552658796, - "learning_rate": 0.00019999448783066033, - "loss": 46.0, - "step": 20764 - }, - { - "epoch": 3.3440557188292606, - "grad_norm": 0.0024431883357465267, - "learning_rate": 0.00019999448729946139, - "loss": 46.0, - "step": 20765 - }, - { - "epoch": 3.344216755908048, - "grad_norm": 0.0074889385141432285, - "learning_rate": 0.00019999448676823685, - "loss": 46.0, - "step": 20766 - }, - { - "epoch": 3.344377792986835, - "grad_norm": 0.0017557577230036259, - "learning_rate": 0.0001999944862369867, - "loss": 46.0, - "step": 20767 - }, - { - "epoch": 3.3445388300656225, - "grad_norm": 0.020771101117134094, - "learning_rate": 0.00019999448570571097, - "loss": 46.0, - "step": 20768 - }, - { - "epoch": 3.34469986714441, - "grad_norm": 0.008961359970271587, - "learning_rate": 0.00019999448517440965, - "loss": 46.0, - "step": 20769 - }, - { - "epoch": 3.3448609042231974, - "grad_norm": 0.002128560794517398, - "learning_rate": 0.00019999448464308274, - "loss": 46.0, - "step": 20770 - }, - { - "epoch": 3.345021941301985, - "grad_norm": 0.0015349421883001924, - "learning_rate": 0.00019999448411173022, - "loss": 46.0, - "step": 20771 - }, - { - "epoch": 3.3451829783807723, - "grad_norm": 0.0042289504781365395, - "learning_rate": 0.00019999448358035214, - "loss": 46.0, - "step": 20772 - }, - { - "epoch": 3.3453440154595597, - "grad_norm": 0.0038474167231470346, - "learning_rate": 0.00019999448304894844, - "loss": 46.0, - "step": 20773 - }, - { - "epoch": 3.345505052538347, - "grad_norm": 0.00923189613968134, - "learning_rate": 0.00019999448251751916, - "loss": 46.0, - "step": 20774 - }, - { - "epoch": 3.345666089617134, - "grad_norm": 0.0013017089804634452, - "learning_rate": 0.00019999448198606428, - "loss": 46.0, - "step": 20775 - }, - { - "epoch": 3.3458271266959216, - "grad_norm": 0.0007115930202417076, - "learning_rate": 0.00019999448145458377, - "loss": 46.0, - "step": 20776 - }, - { - "epoch": 3.345988163774709, - "grad_norm": 0.0033399956300854683, - "learning_rate": 0.0001999944809230777, - "loss": 46.0, - "step": 20777 - }, - { - "epoch": 3.3461492008534965, - "grad_norm": 0.001808327273465693, - "learning_rate": 0.00019999448039154604, - "loss": 46.0, - "step": 20778 - }, - { - "epoch": 3.346310237932284, - "grad_norm": 0.00222820439375937, - "learning_rate": 0.0001999944798599888, - "loss": 46.0, - "step": 20779 - }, - { - "epoch": 3.3464712750110714, - "grad_norm": 0.0009919166332110763, - "learning_rate": 0.00019999447932840593, - "loss": 46.0, - "step": 20780 - }, - { - "epoch": 3.3466323120898585, - "grad_norm": 0.0009279330843128264, - "learning_rate": 0.0001999944787967975, - "loss": 46.0, - "step": 20781 - }, - { - "epoch": 3.346793349168646, - "grad_norm": 0.0031397975981235504, - "learning_rate": 0.00019999447826516347, - "loss": 46.0, - "step": 20782 - }, - { - "epoch": 3.3469543862474334, - "grad_norm": 0.002643154002726078, - "learning_rate": 0.00019999447773350385, - "loss": 46.0, - "step": 20783 - }, - { - "epoch": 3.347115423326221, - "grad_norm": 0.0035772614646703005, - "learning_rate": 0.0001999944772018186, - "loss": 46.0, - "step": 20784 - }, - { - "epoch": 3.3472764604050083, - "grad_norm": 0.0010152192553505301, - "learning_rate": 0.0001999944766701078, - "loss": 46.0, - "step": 20785 - }, - { - "epoch": 3.3474374974837957, - "grad_norm": 0.0020548859611153603, - "learning_rate": 0.00019999447613837138, - "loss": 46.0, - "step": 20786 - }, - { - "epoch": 3.347598534562583, - "grad_norm": 0.0012068054638803005, - "learning_rate": 0.00019999447560660935, - "loss": 46.0, - "step": 20787 - }, - { - "epoch": 3.3477595716413706, - "grad_norm": 0.0028345868922770023, - "learning_rate": 0.00019999447507482176, - "loss": 46.0, - "step": 20788 - }, - { - "epoch": 3.347920608720158, - "grad_norm": 0.0026454576291143894, - "learning_rate": 0.0001999944745430086, - "loss": 46.0, - "step": 20789 - }, - { - "epoch": 3.348081645798945, - "grad_norm": 0.009147074073553085, - "learning_rate": 0.0001999944740111698, - "loss": 46.0, - "step": 20790 - }, - { - "epoch": 3.3482426828777325, - "grad_norm": 0.002347939182072878, - "learning_rate": 0.00019999447347930543, - "loss": 46.0, - "step": 20791 - }, - { - "epoch": 3.34840371995652, - "grad_norm": 0.004727926570922136, - "learning_rate": 0.00019999447294741544, - "loss": 46.0, - "step": 20792 - }, - { - "epoch": 3.3485647570353074, - "grad_norm": 0.002594444202259183, - "learning_rate": 0.0001999944724154999, - "loss": 46.0, - "step": 20793 - }, - { - "epoch": 3.348725794114095, - "grad_norm": 0.0014013260370120406, - "learning_rate": 0.00019999447188355873, - "loss": 46.0, - "step": 20794 - }, - { - "epoch": 3.3488868311928823, - "grad_norm": 0.005070163868367672, - "learning_rate": 0.00019999447135159198, - "loss": 46.0, - "step": 20795 - }, - { - "epoch": 3.3490478682716693, - "grad_norm": 0.0053039295598864555, - "learning_rate": 0.00019999447081959964, - "loss": 46.0, - "step": 20796 - }, - { - "epoch": 3.349208905350457, - "grad_norm": 0.005232988856732845, - "learning_rate": 0.0001999944702875817, - "loss": 46.0, - "step": 20797 - }, - { - "epoch": 3.3493699424292442, - "grad_norm": 0.0017715356079861522, - "learning_rate": 0.00019999446975553815, - "loss": 46.0, - "step": 20798 - }, - { - "epoch": 3.3495309795080317, - "grad_norm": 0.006321721710264683, - "learning_rate": 0.00019999446922346902, - "loss": 46.0, - "step": 20799 - }, - { - "epoch": 3.349692016586819, - "grad_norm": 0.00569496676325798, - "learning_rate": 0.0001999944686913743, - "loss": 46.0, - "step": 20800 - }, - { - "epoch": 3.3498530536656066, - "grad_norm": 0.005051599815487862, - "learning_rate": 0.000199994468159254, - "loss": 46.0, - "step": 20801 - }, - { - "epoch": 3.350014090744394, - "grad_norm": 0.003661056747660041, - "learning_rate": 0.00019999446762710807, - "loss": 46.0, - "step": 20802 - }, - { - "epoch": 3.3501751278231815, - "grad_norm": 0.0013093366287648678, - "learning_rate": 0.0001999944670949366, - "loss": 46.0, - "step": 20803 - }, - { - "epoch": 3.3503361649019685, - "grad_norm": 0.0018988519441336393, - "learning_rate": 0.00019999446656273948, - "loss": 46.0, - "step": 20804 - }, - { - "epoch": 3.350497201980756, - "grad_norm": 0.0051345848478376865, - "learning_rate": 0.00019999446603051683, - "loss": 46.0, - "step": 20805 - }, - { - "epoch": 3.3506582390595434, - "grad_norm": 0.0069570839405059814, - "learning_rate": 0.00019999446549826854, - "loss": 46.0, - "step": 20806 - }, - { - "epoch": 3.350819276138331, - "grad_norm": 0.008283072151243687, - "learning_rate": 0.00019999446496599466, - "loss": 46.0, - "step": 20807 - }, - { - "epoch": 3.3509803132171183, - "grad_norm": 0.0020841099321842194, - "learning_rate": 0.0001999944644336952, - "loss": 46.0, - "step": 20808 - }, - { - "epoch": 3.3511413502959058, - "grad_norm": 0.002222476527094841, - "learning_rate": 0.00019999446390137014, - "loss": 46.0, - "step": 20809 - }, - { - "epoch": 3.351302387374693, - "grad_norm": 0.005603512283414602, - "learning_rate": 0.00019999446336901947, - "loss": 46.0, - "step": 20810 - }, - { - "epoch": 3.3514634244534802, - "grad_norm": 0.008383144624531269, - "learning_rate": 0.0001999944628366432, - "loss": 46.0, - "step": 20811 - }, - { - "epoch": 3.3516244615322677, - "grad_norm": 0.0036360309459269047, - "learning_rate": 0.00019999446230424138, - "loss": 46.0, - "step": 20812 - }, - { - "epoch": 3.351785498611055, - "grad_norm": 0.005416340660303831, - "learning_rate": 0.00019999446177181395, - "loss": 46.0, - "step": 20813 - }, - { - "epoch": 3.3519465356898426, - "grad_norm": 0.002239226596429944, - "learning_rate": 0.0001999944612393609, - "loss": 46.0, - "step": 20814 - }, - { - "epoch": 3.35210757276863, - "grad_norm": 0.002920815022662282, - "learning_rate": 0.0001999944607068823, - "loss": 46.0, - "step": 20815 - }, - { - "epoch": 3.3522686098474175, - "grad_norm": 0.0020466644782572985, - "learning_rate": 0.0001999944601743781, - "loss": 46.0, - "step": 20816 - }, - { - "epoch": 3.352429646926205, - "grad_norm": 0.0018387290183454752, - "learning_rate": 0.0001999944596418483, - "loss": 46.0, - "step": 20817 - }, - { - "epoch": 3.3525906840049924, - "grad_norm": 0.003895149566233158, - "learning_rate": 0.00019999445910929287, - "loss": 46.0, - "step": 20818 - }, - { - "epoch": 3.3527517210837794, - "grad_norm": 0.002318927086889744, - "learning_rate": 0.00019999445857671186, - "loss": 46.0, - "step": 20819 - }, - { - "epoch": 3.352912758162567, - "grad_norm": 0.006851447746157646, - "learning_rate": 0.00019999445804410527, - "loss": 46.0, - "step": 20820 - }, - { - "epoch": 3.3530737952413543, - "grad_norm": 0.012200540862977505, - "learning_rate": 0.0001999944575114731, - "loss": 46.0, - "step": 20821 - }, - { - "epoch": 3.3532348323201417, - "grad_norm": 0.001614811597391963, - "learning_rate": 0.00019999445697881533, - "loss": 46.0, - "step": 20822 - }, - { - "epoch": 3.353395869398929, - "grad_norm": 0.00449097016826272, - "learning_rate": 0.00019999445644613197, - "loss": 46.0, - "step": 20823 - }, - { - "epoch": 3.3535569064777166, - "grad_norm": 0.005182918161153793, - "learning_rate": 0.00019999445591342298, - "loss": 46.0, - "step": 20824 - }, - { - "epoch": 3.3537179435565037, - "grad_norm": 0.001550475601106882, - "learning_rate": 0.00019999445538068845, - "loss": 46.0, - "step": 20825 - }, - { - "epoch": 3.353878980635291, - "grad_norm": 0.0004955654731020331, - "learning_rate": 0.00019999445484792828, - "loss": 46.0, - "step": 20826 - }, - { - "epoch": 3.3540400177140786, - "grad_norm": 0.017394132912158966, - "learning_rate": 0.00019999445431514255, - "loss": 46.0, - "step": 20827 - }, - { - "epoch": 3.354201054792866, - "grad_norm": 0.00375940278172493, - "learning_rate": 0.0001999944537823312, - "loss": 46.0, - "step": 20828 - }, - { - "epoch": 3.3543620918716535, - "grad_norm": 0.002217361005023122, - "learning_rate": 0.00019999445324949428, - "loss": 46.0, - "step": 20829 - }, - { - "epoch": 3.354523128950441, - "grad_norm": 0.007536798715591431, - "learning_rate": 0.00019999445271663176, - "loss": 46.0, - "step": 20830 - }, - { - "epoch": 3.3546841660292284, - "grad_norm": 0.0009419141570106149, - "learning_rate": 0.00019999445218374365, - "loss": 46.0, - "step": 20831 - }, - { - "epoch": 3.354845203108016, - "grad_norm": 0.0032349408138543367, - "learning_rate": 0.00019999445165082993, - "loss": 46.0, - "step": 20832 - }, - { - "epoch": 3.355006240186803, - "grad_norm": 0.007819443941116333, - "learning_rate": 0.00019999445111789063, - "loss": 46.0, - "step": 20833 - }, - { - "epoch": 3.3551672772655903, - "grad_norm": 0.014505263417959213, - "learning_rate": 0.00019999445058492573, - "loss": 46.0, - "step": 20834 - }, - { - "epoch": 3.3553283143443777, - "grad_norm": 0.001194495940580964, - "learning_rate": 0.00019999445005193525, - "loss": 46.0, - "step": 20835 - }, - { - "epoch": 3.355489351423165, - "grad_norm": 0.006805042736232281, - "learning_rate": 0.00019999444951891918, - "loss": 46.0, - "step": 20836 - }, - { - "epoch": 3.3556503885019526, - "grad_norm": 0.0037581122014671564, - "learning_rate": 0.00019999444898587747, - "loss": 46.0, - "step": 20837 - }, - { - "epoch": 3.35581142558074, - "grad_norm": 0.0020235648844391108, - "learning_rate": 0.0001999944484528102, - "loss": 46.0, - "step": 20838 - }, - { - "epoch": 3.3559724626595275, - "grad_norm": 0.00293528800830245, - "learning_rate": 0.00019999444791971732, - "loss": 46.0, - "step": 20839 - }, - { - "epoch": 3.3561334997383145, - "grad_norm": 0.008518904447555542, - "learning_rate": 0.0001999944473865989, - "loss": 46.0, - "step": 20840 - }, - { - "epoch": 3.356294536817102, - "grad_norm": 0.002349145244807005, - "learning_rate": 0.00019999444685345484, - "loss": 46.0, - "step": 20841 - }, - { - "epoch": 3.3564555738958894, - "grad_norm": 0.0014410704607143998, - "learning_rate": 0.0001999944463202852, - "loss": 46.0, - "step": 20842 - }, - { - "epoch": 3.356616610974677, - "grad_norm": 0.0021259256172925234, - "learning_rate": 0.00019999444578708996, - "loss": 46.0, - "step": 20843 - }, - { - "epoch": 3.3567776480534643, - "grad_norm": 0.007543244399130344, - "learning_rate": 0.00019999444525386912, - "loss": 46.0, - "step": 20844 - }, - { - "epoch": 3.356938685132252, - "grad_norm": 0.003808224806562066, - "learning_rate": 0.0001999944447206227, - "loss": 46.0, - "step": 20845 - }, - { - "epoch": 3.3570997222110392, - "grad_norm": 0.001223452971316874, - "learning_rate": 0.0001999944441873507, - "loss": 46.0, - "step": 20846 - }, - { - "epoch": 3.3572607592898267, - "grad_norm": 0.0026060177478939295, - "learning_rate": 0.00019999444365405305, - "loss": 46.0, - "step": 20847 - }, - { - "epoch": 3.3574217963686137, - "grad_norm": 0.0025034029968082905, - "learning_rate": 0.00019999444312072986, - "loss": 46.0, - "step": 20848 - }, - { - "epoch": 3.357582833447401, - "grad_norm": 0.004101970698684454, - "learning_rate": 0.00019999444258738107, - "loss": 46.0, - "step": 20849 - }, - { - "epoch": 3.3577438705261886, - "grad_norm": 0.006617759820073843, - "learning_rate": 0.00019999444205400665, - "loss": 46.0, - "step": 20850 - }, - { - "epoch": 3.357904907604976, - "grad_norm": 0.002592572011053562, - "learning_rate": 0.0001999944415206067, - "loss": 46.0, - "step": 20851 - }, - { - "epoch": 3.3580659446837635, - "grad_norm": 0.007662277668714523, - "learning_rate": 0.0001999944409871811, - "loss": 46.0, - "step": 20852 - }, - { - "epoch": 3.358226981762551, - "grad_norm": 0.0012603906216099858, - "learning_rate": 0.00019999444045372994, - "loss": 46.0, - "step": 20853 - }, - { - "epoch": 3.358388018841338, - "grad_norm": 0.0030396704096347094, - "learning_rate": 0.00019999443992025316, - "loss": 46.0, - "step": 20854 - }, - { - "epoch": 3.3585490559201254, - "grad_norm": 0.003122115507721901, - "learning_rate": 0.00019999443938675083, - "loss": 46.0, - "step": 20855 - }, - { - "epoch": 3.358710092998913, - "grad_norm": 0.0007894922746345401, - "learning_rate": 0.00019999443885322286, - "loss": 46.0, - "step": 20856 - }, - { - "epoch": 3.3588711300777003, - "grad_norm": 0.0028509756084531546, - "learning_rate": 0.00019999443831966932, - "loss": 46.0, - "step": 20857 - }, - { - "epoch": 3.359032167156488, - "grad_norm": 0.00408986397087574, - "learning_rate": 0.00019999443778609017, - "loss": 46.0, - "step": 20858 - }, - { - "epoch": 3.3591932042352752, - "grad_norm": 0.004244456999003887, - "learning_rate": 0.00019999443725248544, - "loss": 46.0, - "step": 20859 - }, - { - "epoch": 3.3593542413140627, - "grad_norm": 0.002518328605219722, - "learning_rate": 0.00019999443671885509, - "loss": 46.0, - "step": 20860 - }, - { - "epoch": 3.35951527839285, - "grad_norm": 0.0038628955371677876, - "learning_rate": 0.00019999443618519918, - "loss": 46.0, - "step": 20861 - }, - { - "epoch": 3.3596763154716376, - "grad_norm": 0.008417327888309956, - "learning_rate": 0.00019999443565151768, - "loss": 46.0, - "step": 20862 - }, - { - "epoch": 3.3598373525504246, - "grad_norm": 0.005671640392392874, - "learning_rate": 0.00019999443511781057, - "loss": 46.0, - "step": 20863 - }, - { - "epoch": 3.359998389629212, - "grad_norm": 0.0021164407953619957, - "learning_rate": 0.00019999443458407787, - "loss": 46.0, - "step": 20864 - }, - { - "epoch": 3.3601594267079995, - "grad_norm": 0.0012921376619488, - "learning_rate": 0.00019999443405031955, - "loss": 46.0, - "step": 20865 - }, - { - "epoch": 3.360320463786787, - "grad_norm": 0.005236590281128883, - "learning_rate": 0.00019999443351653568, - "loss": 46.0, - "step": 20866 - }, - { - "epoch": 3.3604815008655744, - "grad_norm": 0.0020093233324587345, - "learning_rate": 0.00019999443298272622, - "loss": 46.0, - "step": 20867 - }, - { - "epoch": 3.360642537944362, - "grad_norm": 0.0008796803303994238, - "learning_rate": 0.00019999443244889115, - "loss": 46.0, - "step": 20868 - }, - { - "epoch": 3.360803575023149, - "grad_norm": 0.0035525192506611347, - "learning_rate": 0.00019999443191503046, - "loss": 46.0, - "step": 20869 - }, - { - "epoch": 3.3609646121019363, - "grad_norm": 0.007032199762761593, - "learning_rate": 0.0001999944313811442, - "loss": 46.0, - "step": 20870 - }, - { - "epoch": 3.3611256491807238, - "grad_norm": 0.008084863424301147, - "learning_rate": 0.00019999443084723234, - "loss": 46.0, - "step": 20871 - }, - { - "epoch": 3.361286686259511, - "grad_norm": 0.0008347261464223266, - "learning_rate": 0.0001999944303132949, - "loss": 46.0, - "step": 20872 - }, - { - "epoch": 3.3614477233382987, - "grad_norm": 0.004060279577970505, - "learning_rate": 0.00019999442977933188, - "loss": 46.0, - "step": 20873 - }, - { - "epoch": 3.361608760417086, - "grad_norm": 0.0007594830240122974, - "learning_rate": 0.00019999442924534323, - "loss": 46.0, - "step": 20874 - }, - { - "epoch": 3.3617697974958736, - "grad_norm": 0.0027352285105735064, - "learning_rate": 0.000199994428711329, - "loss": 46.0, - "step": 20875 - }, - { - "epoch": 3.361930834574661, - "grad_norm": 0.0022483242210000753, - "learning_rate": 0.00019999442817728917, - "loss": 46.0, - "step": 20876 - }, - { - "epoch": 3.362091871653448, - "grad_norm": 0.001823390950448811, - "learning_rate": 0.00019999442764322375, - "loss": 46.0, - "step": 20877 - }, - { - "epoch": 3.3622529087322355, - "grad_norm": 0.003105304203927517, - "learning_rate": 0.00019999442710913275, - "loss": 46.0, - "step": 20878 - }, - { - "epoch": 3.362413945811023, - "grad_norm": 0.003525971435010433, - "learning_rate": 0.00019999442657501614, - "loss": 46.0, - "step": 20879 - }, - { - "epoch": 3.3625749828898104, - "grad_norm": 0.00048445616266690195, - "learning_rate": 0.00019999442604087396, - "loss": 46.0, - "step": 20880 - }, - { - "epoch": 3.362736019968598, - "grad_norm": 0.0010100880172103643, - "learning_rate": 0.00019999442550670615, - "loss": 46.0, - "step": 20881 - }, - { - "epoch": 3.3628970570473853, - "grad_norm": 0.003239578567445278, - "learning_rate": 0.0001999944249725128, - "loss": 46.0, - "step": 20882 - }, - { - "epoch": 3.3630580941261727, - "grad_norm": 0.0023300175089389086, - "learning_rate": 0.0001999944244382938, - "loss": 46.0, - "step": 20883 - }, - { - "epoch": 3.3632191312049597, - "grad_norm": 0.004318100865930319, - "learning_rate": 0.00019999442390404923, - "loss": 46.0, - "step": 20884 - }, - { - "epoch": 3.363380168283747, - "grad_norm": 0.0025229519233107567, - "learning_rate": 0.00019999442336977906, - "loss": 46.0, - "step": 20885 - }, - { - "epoch": 3.3635412053625346, - "grad_norm": 0.002387227723374963, - "learning_rate": 0.0001999944228354833, - "loss": 46.0, - "step": 20886 - }, - { - "epoch": 3.363702242441322, - "grad_norm": 0.00975829642266035, - "learning_rate": 0.00019999442230116197, - "loss": 46.0, - "step": 20887 - }, - { - "epoch": 3.3638632795201096, - "grad_norm": 0.0012357309460639954, - "learning_rate": 0.00019999442176681502, - "loss": 46.0, - "step": 20888 - }, - { - "epoch": 3.364024316598897, - "grad_norm": 0.002482104115188122, - "learning_rate": 0.0001999944212324425, - "loss": 46.0, - "step": 20889 - }, - { - "epoch": 3.3641853536776845, - "grad_norm": 0.0033045695163309574, - "learning_rate": 0.00019999442069804438, - "loss": 46.0, - "step": 20890 - }, - { - "epoch": 3.364346390756472, - "grad_norm": 0.01630895771086216, - "learning_rate": 0.00019999442016362063, - "loss": 46.0, - "step": 20891 - }, - { - "epoch": 3.364507427835259, - "grad_norm": 0.004521742928773165, - "learning_rate": 0.0001999944196291713, - "loss": 46.0, - "step": 20892 - }, - { - "epoch": 3.3646684649140464, - "grad_norm": 0.007557746022939682, - "learning_rate": 0.00019999441909469642, - "loss": 46.0, - "step": 20893 - }, - { - "epoch": 3.364829501992834, - "grad_norm": 0.002224199241027236, - "learning_rate": 0.00019999441856019589, - "loss": 46.0, - "step": 20894 - }, - { - "epoch": 3.3649905390716213, - "grad_norm": 0.001543963560834527, - "learning_rate": 0.0001999944180256698, - "loss": 46.0, - "step": 20895 - }, - { - "epoch": 3.3651515761504087, - "grad_norm": 0.001412329962477088, - "learning_rate": 0.00019999441749111812, - "loss": 46.0, - "step": 20896 - }, - { - "epoch": 3.365312613229196, - "grad_norm": 0.005306669976562262, - "learning_rate": 0.00019999441695654082, - "loss": 46.0, - "step": 20897 - }, - { - "epoch": 3.365473650307983, - "grad_norm": 0.007320174016058445, - "learning_rate": 0.00019999441642193797, - "loss": 46.0, - "step": 20898 - }, - { - "epoch": 3.3656346873867706, - "grad_norm": 0.0006209679995663464, - "learning_rate": 0.0001999944158873095, - "loss": 46.0, - "step": 20899 - }, - { - "epoch": 3.365795724465558, - "grad_norm": 0.0018192294519394636, - "learning_rate": 0.00019999441535265542, - "loss": 46.0, - "step": 20900 - }, - { - "epoch": 3.3659567615443455, - "grad_norm": 0.0006589615368284285, - "learning_rate": 0.00019999441481797576, - "loss": 46.0, - "step": 20901 - }, - { - "epoch": 3.366117798623133, - "grad_norm": 0.005652967374771833, - "learning_rate": 0.0001999944142832705, - "loss": 46.0, - "step": 20902 - }, - { - "epoch": 3.3662788357019204, - "grad_norm": 0.0008445366402156651, - "learning_rate": 0.00019999441374853966, - "loss": 46.0, - "step": 20903 - }, - { - "epoch": 3.366439872780708, - "grad_norm": 0.001166544039733708, - "learning_rate": 0.00019999441321378323, - "loss": 46.0, - "step": 20904 - }, - { - "epoch": 3.3666009098594953, - "grad_norm": 0.008133132942020893, - "learning_rate": 0.0001999944126790012, - "loss": 46.0, - "step": 20905 - }, - { - "epoch": 3.3667619469382823, - "grad_norm": 0.005981351714581251, - "learning_rate": 0.00019999441214419358, - "loss": 46.0, - "step": 20906 - }, - { - "epoch": 3.36692298401707, - "grad_norm": 0.0025186811108142138, - "learning_rate": 0.00019999441160936033, - "loss": 46.0, - "step": 20907 - }, - { - "epoch": 3.3670840210958572, - "grad_norm": 0.010102360509335995, - "learning_rate": 0.00019999441107450153, - "loss": 46.0, - "step": 20908 - }, - { - "epoch": 3.3672450581746447, - "grad_norm": 0.002637322759255767, - "learning_rate": 0.00019999441053961714, - "loss": 46.0, - "step": 20909 - }, - { - "epoch": 3.367406095253432, - "grad_norm": 0.010602711699903011, - "learning_rate": 0.00019999441000470713, - "loss": 46.0, - "step": 20910 - }, - { - "epoch": 3.3675671323322196, - "grad_norm": 0.006769978441298008, - "learning_rate": 0.00019999440946977154, - "loss": 46.0, - "step": 20911 - }, - { - "epoch": 3.367728169411007, - "grad_norm": 0.005505494307726622, - "learning_rate": 0.00019999440893481035, - "loss": 46.0, - "step": 20912 - }, - { - "epoch": 3.367889206489794, - "grad_norm": 0.0021205723751336336, - "learning_rate": 0.00019999440839982359, - "loss": 46.0, - "step": 20913 - }, - { - "epoch": 3.3680502435685815, - "grad_norm": 0.0019522298825904727, - "learning_rate": 0.0001999944078648112, - "loss": 46.0, - "step": 20914 - }, - { - "epoch": 3.368211280647369, - "grad_norm": 0.0017423428362235427, - "learning_rate": 0.00019999440732977326, - "loss": 46.0, - "step": 20915 - }, - { - "epoch": 3.3683723177261564, - "grad_norm": 0.002849302487447858, - "learning_rate": 0.00019999440679470968, - "loss": 46.0, - "step": 20916 - }, - { - "epoch": 3.368533354804944, - "grad_norm": 0.009484389796853065, - "learning_rate": 0.00019999440625962053, - "loss": 46.0, - "step": 20917 - }, - { - "epoch": 3.3686943918837313, - "grad_norm": 0.008852027356624603, - "learning_rate": 0.00019999440572450577, - "loss": 46.0, - "step": 20918 - }, - { - "epoch": 3.3688554289625188, - "grad_norm": 0.003986544441431761, - "learning_rate": 0.00019999440518936545, - "loss": 46.0, - "step": 20919 - }, - { - "epoch": 3.3690164660413062, - "grad_norm": 0.004097459837794304, - "learning_rate": 0.00019999440465419952, - "loss": 46.0, - "step": 20920 - }, - { - "epoch": 3.3691775031200932, - "grad_norm": 0.001590346684679389, - "learning_rate": 0.00019999440411900797, - "loss": 46.0, - "step": 20921 - }, - { - "epoch": 3.3693385401988807, - "grad_norm": 0.002898836974054575, - "learning_rate": 0.00019999440358379087, - "loss": 46.0, - "step": 20922 - }, - { - "epoch": 3.369499577277668, - "grad_norm": 0.0013032472925260663, - "learning_rate": 0.00019999440304854815, - "loss": 46.0, - "step": 20923 - }, - { - "epoch": 3.3696606143564556, - "grad_norm": 0.0042036776430904865, - "learning_rate": 0.00019999440251327984, - "loss": 46.0, - "step": 20924 - }, - { - "epoch": 3.369821651435243, - "grad_norm": 0.003879842348396778, - "learning_rate": 0.00019999440197798594, - "loss": 46.0, - "step": 20925 - }, - { - "epoch": 3.3699826885140305, - "grad_norm": 0.007399814669042826, - "learning_rate": 0.00019999440144266646, - "loss": 46.0, - "step": 20926 - }, - { - "epoch": 3.370143725592818, - "grad_norm": 0.00479771988466382, - "learning_rate": 0.00019999440090732136, - "loss": 46.0, - "step": 20927 - }, - { - "epoch": 3.370304762671605, - "grad_norm": 0.001091778976842761, - "learning_rate": 0.00019999440037195068, - "loss": 46.0, - "step": 20928 - }, - { - "epoch": 3.3704657997503924, - "grad_norm": 0.0009164019138552248, - "learning_rate": 0.0001999943998365544, - "loss": 46.0, - "step": 20929 - }, - { - "epoch": 3.37062683682918, - "grad_norm": 0.01292500365525484, - "learning_rate": 0.00019999439930113252, - "loss": 46.0, - "step": 20930 - }, - { - "epoch": 3.3707878739079673, - "grad_norm": 0.0014164549065753818, - "learning_rate": 0.00019999439876568507, - "loss": 46.0, - "step": 20931 - }, - { - "epoch": 3.3709489109867548, - "grad_norm": 0.0019916431047022343, - "learning_rate": 0.00019999439823021201, - "loss": 46.0, - "step": 20932 - }, - { - "epoch": 3.371109948065542, - "grad_norm": 0.002783977659419179, - "learning_rate": 0.00019999439769471337, - "loss": 46.0, - "step": 20933 - }, - { - "epoch": 3.3712709851443297, - "grad_norm": 0.012138224206864834, - "learning_rate": 0.00019999439715918916, - "loss": 46.0, - "step": 20934 - }, - { - "epoch": 3.371432022223117, - "grad_norm": 0.004607359878718853, - "learning_rate": 0.0001999943966236393, - "loss": 46.0, - "step": 20935 - }, - { - "epoch": 3.371593059301904, - "grad_norm": 0.0011318891774863005, - "learning_rate": 0.00019999439608806387, - "loss": 46.0, - "step": 20936 - }, - { - "epoch": 3.3717540963806916, - "grad_norm": 0.00155333848670125, - "learning_rate": 0.00019999439555246285, - "loss": 46.0, - "step": 20937 - }, - { - "epoch": 3.371915133459479, - "grad_norm": 0.011060883291065693, - "learning_rate": 0.00019999439501683624, - "loss": 46.0, - "step": 20938 - }, - { - "epoch": 3.3720761705382665, - "grad_norm": 0.003010694170370698, - "learning_rate": 0.00019999439448118404, - "loss": 46.0, - "step": 20939 - }, - { - "epoch": 3.372237207617054, - "grad_norm": 0.0024879195261746645, - "learning_rate": 0.00019999439394550623, - "loss": 46.0, - "step": 20940 - }, - { - "epoch": 3.3723982446958414, - "grad_norm": 0.0032771851401776075, - "learning_rate": 0.00019999439340980283, - "loss": 46.0, - "step": 20941 - }, - { - "epoch": 3.3725592817746284, - "grad_norm": 0.007326935417950153, - "learning_rate": 0.00019999439287407385, - "loss": 46.0, - "step": 20942 - }, - { - "epoch": 3.372720318853416, - "grad_norm": 0.0035892713349312544, - "learning_rate": 0.00019999439233831927, - "loss": 46.0, - "step": 20943 - }, - { - "epoch": 3.3728813559322033, - "grad_norm": 0.00965859368443489, - "learning_rate": 0.00019999439180253909, - "loss": 46.0, - "step": 20944 - }, - { - "epoch": 3.3730423930109907, - "grad_norm": 0.001588193466886878, - "learning_rate": 0.0001999943912667333, - "loss": 46.0, - "step": 20945 - }, - { - "epoch": 3.373203430089778, - "grad_norm": 0.012424630112946033, - "learning_rate": 0.00019999439073090195, - "loss": 46.0, - "step": 20946 - }, - { - "epoch": 3.3733644671685656, - "grad_norm": 0.0005508853355422616, - "learning_rate": 0.000199994390195045, - "loss": 46.0, - "step": 20947 - }, - { - "epoch": 3.373525504247353, - "grad_norm": 0.0025416258722543716, - "learning_rate": 0.00019999438965916246, - "loss": 46.0, - "step": 20948 - }, - { - "epoch": 3.3736865413261405, - "grad_norm": 0.0033665881492197514, - "learning_rate": 0.0001999943891232543, - "loss": 46.0, - "step": 20949 - }, - { - "epoch": 3.3738475784049275, - "grad_norm": 0.0008465584251098335, - "learning_rate": 0.00019999438858732055, - "loss": 46.0, - "step": 20950 - }, - { - "epoch": 3.374008615483715, - "grad_norm": 0.009099594317376614, - "learning_rate": 0.00019999438805136125, - "loss": 46.0, - "step": 20951 - }, - { - "epoch": 3.3741696525625025, - "grad_norm": 0.0026550244074314833, - "learning_rate": 0.00019999438751537634, - "loss": 46.0, - "step": 20952 - }, - { - "epoch": 3.37433068964129, - "grad_norm": 0.000848502735607326, - "learning_rate": 0.0001999943869793658, - "loss": 46.0, - "step": 20953 - }, - { - "epoch": 3.3744917267200774, - "grad_norm": 0.0026028442662209272, - "learning_rate": 0.0001999943864433297, - "loss": 46.0, - "step": 20954 - }, - { - "epoch": 3.374652763798865, - "grad_norm": 0.0013412300031632185, - "learning_rate": 0.00019999438590726797, - "loss": 46.0, - "step": 20955 - }, - { - "epoch": 3.3748138008776523, - "grad_norm": 0.005789418239146471, - "learning_rate": 0.00019999438537118068, - "loss": 46.0, - "step": 20956 - }, - { - "epoch": 3.3749748379564393, - "grad_norm": 0.0027159962337464094, - "learning_rate": 0.0001999943848350678, - "loss": 46.0, - "step": 20957 - }, - { - "epoch": 3.3751358750352267, - "grad_norm": 0.002325424226000905, - "learning_rate": 0.00019999438429892932, - "loss": 46.0, - "step": 20958 - }, - { - "epoch": 3.375296912114014, - "grad_norm": 0.0017380093922838569, - "learning_rate": 0.00019999438376276524, - "loss": 46.0, - "step": 20959 - }, - { - "epoch": 3.3754579491928016, - "grad_norm": 0.0010504063684493303, - "learning_rate": 0.00019999438322657558, - "loss": 46.0, - "step": 20960 - }, - { - "epoch": 3.375618986271589, - "grad_norm": 0.0008453310583718121, - "learning_rate": 0.0001999943826903603, - "loss": 46.0, - "step": 20961 - }, - { - "epoch": 3.3757800233503765, - "grad_norm": 0.009664828889071941, - "learning_rate": 0.00019999438215411946, - "loss": 46.0, - "step": 20962 - }, - { - "epoch": 3.375941060429164, - "grad_norm": 0.005448425188660622, - "learning_rate": 0.000199994381617853, - "loss": 46.0, - "step": 20963 - }, - { - "epoch": 3.3761020975079514, - "grad_norm": 0.0057162814773619175, - "learning_rate": 0.00019999438108156094, - "loss": 46.0, - "step": 20964 - }, - { - "epoch": 3.3762631345867384, - "grad_norm": 0.009896941483020782, - "learning_rate": 0.00019999438054524334, - "loss": 46.0, - "step": 20965 - }, - { - "epoch": 3.376424171665526, - "grad_norm": 0.0012061800807714462, - "learning_rate": 0.0001999943800089001, - "loss": 46.0, - "step": 20966 - }, - { - "epoch": 3.3765852087443133, - "grad_norm": 0.0017518760869279504, - "learning_rate": 0.00019999437947253128, - "loss": 46.0, - "step": 20967 - }, - { - "epoch": 3.376746245823101, - "grad_norm": 0.001236538402736187, - "learning_rate": 0.00019999437893613683, - "loss": 46.0, - "step": 20968 - }, - { - "epoch": 3.3769072829018882, - "grad_norm": 0.002920648083090782, - "learning_rate": 0.00019999437839971683, - "loss": 46.0, - "step": 20969 - }, - { - "epoch": 3.3770683199806757, - "grad_norm": 0.0034797561820596457, - "learning_rate": 0.00019999437786327121, - "loss": 46.0, - "step": 20970 - }, - { - "epoch": 3.3772293570594627, - "grad_norm": 0.004465105012059212, - "learning_rate": 0.00019999437732680004, - "loss": 46.0, - "step": 20971 - }, - { - "epoch": 3.37739039413825, - "grad_norm": 0.0009538336889818311, - "learning_rate": 0.00019999437679030325, - "loss": 46.0, - "step": 20972 - }, - { - "epoch": 3.3775514312170376, - "grad_norm": 0.001914582448080182, - "learning_rate": 0.00019999437625378087, - "loss": 46.0, - "step": 20973 - }, - { - "epoch": 3.377712468295825, - "grad_norm": 0.0038070983719080687, - "learning_rate": 0.00019999437571723288, - "loss": 46.0, - "step": 20974 - }, - { - "epoch": 3.3778735053746125, - "grad_norm": 0.0018001802964136004, - "learning_rate": 0.00019999437518065932, - "loss": 46.0, - "step": 20975 - }, - { - "epoch": 3.3780345424534, - "grad_norm": 0.001459547784179449, - "learning_rate": 0.00019999437464406016, - "loss": 46.0, - "step": 20976 - }, - { - "epoch": 3.3781955795321874, - "grad_norm": 0.0007161207031458616, - "learning_rate": 0.00019999437410743537, - "loss": 46.0, - "step": 20977 - }, - { - "epoch": 3.378356616610975, - "grad_norm": 0.002150939544662833, - "learning_rate": 0.00019999437357078503, - "loss": 46.0, - "step": 20978 - }, - { - "epoch": 3.3785176536897623, - "grad_norm": 0.002507770899683237, - "learning_rate": 0.00019999437303410908, - "loss": 46.0, - "step": 20979 - }, - { - "epoch": 3.3786786907685493, - "grad_norm": 0.0030368685256689787, - "learning_rate": 0.00019999437249740756, - "loss": 46.0, - "step": 20980 - }, - { - "epoch": 3.3788397278473368, - "grad_norm": 0.0024243092630058527, - "learning_rate": 0.0001999943719606804, - "loss": 46.0, - "step": 20981 - }, - { - "epoch": 3.3790007649261242, - "grad_norm": 0.013493322767317295, - "learning_rate": 0.00019999437142392766, - "loss": 46.0, - "step": 20982 - }, - { - "epoch": 3.3791618020049117, - "grad_norm": 0.001397455227561295, - "learning_rate": 0.00019999437088714936, - "loss": 46.0, - "step": 20983 - }, - { - "epoch": 3.379322839083699, - "grad_norm": 0.002155978698283434, - "learning_rate": 0.00019999437035034544, - "loss": 46.0, - "step": 20984 - }, - { - "epoch": 3.3794838761624866, - "grad_norm": 0.0027984476182609797, - "learning_rate": 0.00019999436981351596, - "loss": 46.0, - "step": 20985 - }, - { - "epoch": 3.3796449132412736, - "grad_norm": 0.012609382160007954, - "learning_rate": 0.00019999436927666084, - "loss": 46.0, - "step": 20986 - }, - { - "epoch": 3.379805950320061, - "grad_norm": 0.005818732548505068, - "learning_rate": 0.00019999436873978013, - "loss": 46.0, - "step": 20987 - }, - { - "epoch": 3.3799669873988485, - "grad_norm": 0.004531625658273697, - "learning_rate": 0.00019999436820287387, - "loss": 46.0, - "step": 20988 - }, - { - "epoch": 3.380128024477636, - "grad_norm": 0.007118010427802801, - "learning_rate": 0.00019999436766594199, - "loss": 46.0, - "step": 20989 - }, - { - "epoch": 3.3802890615564234, - "grad_norm": 0.0020114232320338488, - "learning_rate": 0.00019999436712898452, - "loss": 46.0, - "step": 20990 - }, - { - "epoch": 3.380450098635211, - "grad_norm": 0.00118837074842304, - "learning_rate": 0.00019999436659200143, - "loss": 46.0, - "step": 20991 - }, - { - "epoch": 3.3806111357139983, - "grad_norm": 0.006640010047703981, - "learning_rate": 0.00019999436605499276, - "loss": 46.0, - "step": 20992 - }, - { - "epoch": 3.3807721727927857, - "grad_norm": 0.006626845803111792, - "learning_rate": 0.00019999436551795853, - "loss": 46.0, - "step": 20993 - }, - { - "epoch": 3.3809332098715728, - "grad_norm": 0.0017630504444241524, - "learning_rate": 0.00019999436498089866, - "loss": 46.0, - "step": 20994 - }, - { - "epoch": 3.38109424695036, - "grad_norm": 0.008797770366072655, - "learning_rate": 0.00019999436444381323, - "loss": 46.0, - "step": 20995 - }, - { - "epoch": 3.3812552840291477, - "grad_norm": 0.0009718845249153674, - "learning_rate": 0.0001999943639067022, - "loss": 46.0, - "step": 20996 - }, - { - "epoch": 3.381416321107935, - "grad_norm": 0.008871011435985565, - "learning_rate": 0.00019999436336956558, - "loss": 46.0, - "step": 20997 - }, - { - "epoch": 3.3815773581867226, - "grad_norm": 0.0015421169809997082, - "learning_rate": 0.00019999436283240336, - "loss": 46.0, - "step": 20998 - }, - { - "epoch": 3.38173839526551, - "grad_norm": 0.0038646755274385214, - "learning_rate": 0.00019999436229521555, - "loss": 46.0, - "step": 20999 - }, - { - "epoch": 3.3818994323442975, - "grad_norm": 0.004479492548853159, - "learning_rate": 0.00019999436175800216, - "loss": 46.0, - "step": 21000 - }, - { - "epoch": 3.3820604694230845, - "grad_norm": 0.005288648419082165, - "learning_rate": 0.00019999436122076315, - "loss": 46.0, - "step": 21001 - }, - { - "epoch": 3.382221506501872, - "grad_norm": 0.000657967641018331, - "learning_rate": 0.00019999436068349855, - "loss": 46.0, - "step": 21002 - }, - { - "epoch": 3.3823825435806594, - "grad_norm": 0.0023342259228229523, - "learning_rate": 0.00019999436014620837, - "loss": 46.0, - "step": 21003 - }, - { - "epoch": 3.382543580659447, - "grad_norm": 0.0032856722828000784, - "learning_rate": 0.0001999943596088926, - "loss": 46.0, - "step": 21004 - }, - { - "epoch": 3.3827046177382343, - "grad_norm": 0.007843364961445332, - "learning_rate": 0.00019999435907155122, - "loss": 46.0, - "step": 21005 - }, - { - "epoch": 3.3828656548170217, - "grad_norm": 0.008105429820716381, - "learning_rate": 0.00019999435853418422, - "loss": 46.0, - "step": 21006 - }, - { - "epoch": 3.383026691895809, - "grad_norm": 0.007616278249770403, - "learning_rate": 0.00019999435799679166, - "loss": 46.0, - "step": 21007 - }, - { - "epoch": 3.3831877289745966, - "grad_norm": 0.0010869266698136926, - "learning_rate": 0.0001999943574593735, - "loss": 46.0, - "step": 21008 - }, - { - "epoch": 3.3833487660533836, - "grad_norm": 0.0077989003621041775, - "learning_rate": 0.00019999435692192975, - "loss": 46.0, - "step": 21009 - }, - { - "epoch": 3.383509803132171, - "grad_norm": 0.006215512752532959, - "learning_rate": 0.00019999435638446043, - "loss": 46.0, - "step": 21010 - }, - { - "epoch": 3.3836708402109585, - "grad_norm": 0.0038109011948108673, - "learning_rate": 0.0001999943558469655, - "loss": 46.0, - "step": 21011 - }, - { - "epoch": 3.383831877289746, - "grad_norm": 0.00961876567453146, - "learning_rate": 0.00019999435530944498, - "loss": 46.0, - "step": 21012 - }, - { - "epoch": 3.3839929143685334, - "grad_norm": 0.0007203230634331703, - "learning_rate": 0.00019999435477189884, - "loss": 46.0, - "step": 21013 - }, - { - "epoch": 3.384153951447321, - "grad_norm": 0.005657418165355921, - "learning_rate": 0.00019999435423432714, - "loss": 46.0, - "step": 21014 - }, - { - "epoch": 3.384314988526108, - "grad_norm": 0.004697679076343775, - "learning_rate": 0.00019999435369672983, - "loss": 46.0, - "step": 21015 - }, - { - "epoch": 3.3844760256048954, - "grad_norm": 0.005938044749200344, - "learning_rate": 0.0001999943531591069, - "loss": 46.0, - "step": 21016 - }, - { - "epoch": 3.384637062683683, - "grad_norm": 0.0028380025178194046, - "learning_rate": 0.0001999943526214584, - "loss": 46.0, - "step": 21017 - }, - { - "epoch": 3.3847980997624703, - "grad_norm": 0.003145261202007532, - "learning_rate": 0.00019999435208378432, - "loss": 46.0, - "step": 21018 - }, - { - "epoch": 3.3849591368412577, - "grad_norm": 0.006757540628314018, - "learning_rate": 0.00019999435154608467, - "loss": 46.0, - "step": 21019 - }, - { - "epoch": 3.385120173920045, - "grad_norm": 0.010384135879576206, - "learning_rate": 0.00019999435100835936, - "loss": 46.0, - "step": 21020 - }, - { - "epoch": 3.3852812109988326, - "grad_norm": 0.006183552090078592, - "learning_rate": 0.0001999943504706085, - "loss": 46.0, - "step": 21021 - }, - { - "epoch": 3.38544224807762, - "grad_norm": 0.0012846965109929442, - "learning_rate": 0.00019999434993283203, - "loss": 46.0, - "step": 21022 - }, - { - "epoch": 3.385603285156407, - "grad_norm": 0.00730533292517066, - "learning_rate": 0.00019999434939503, - "loss": 46.0, - "step": 21023 - }, - { - "epoch": 3.3857643222351945, - "grad_norm": 0.005410051438957453, - "learning_rate": 0.00019999434885720235, - "loss": 46.0, - "step": 21024 - }, - { - "epoch": 3.385925359313982, - "grad_norm": 0.0018060830188915133, - "learning_rate": 0.00019999434831934908, - "loss": 46.0, - "step": 21025 - }, - { - "epoch": 3.3860863963927694, - "grad_norm": 0.009844711050391197, - "learning_rate": 0.00019999434778147026, - "loss": 46.0, - "step": 21026 - }, - { - "epoch": 3.386247433471557, - "grad_norm": 0.0045937164686620235, - "learning_rate": 0.00019999434724356582, - "loss": 46.0, - "step": 21027 - }, - { - "epoch": 3.3864084705503443, - "grad_norm": 0.0038391142152249813, - "learning_rate": 0.00019999434670563582, - "loss": 46.0, - "step": 21028 - }, - { - "epoch": 3.386569507629132, - "grad_norm": 0.0021910294890403748, - "learning_rate": 0.00019999434616768018, - "loss": 46.0, - "step": 21029 - }, - { - "epoch": 3.386730544707919, - "grad_norm": 0.007108280900865793, - "learning_rate": 0.00019999434562969896, - "loss": 46.0, - "step": 21030 - }, - { - "epoch": 3.3868915817867062, - "grad_norm": 0.007247003726661205, - "learning_rate": 0.00019999434509169217, - "loss": 46.0, - "step": 21031 - }, - { - "epoch": 3.3870526188654937, - "grad_norm": 0.004079220816493034, - "learning_rate": 0.00019999434455365977, - "loss": 46.0, - "step": 21032 - }, - { - "epoch": 3.387213655944281, - "grad_norm": 0.0011850569862872362, - "learning_rate": 0.00019999434401560178, - "loss": 46.0, - "step": 21033 - }, - { - "epoch": 3.3873746930230686, - "grad_norm": 0.003189290640875697, - "learning_rate": 0.0001999943434775182, - "loss": 46.0, - "step": 21034 - }, - { - "epoch": 3.387535730101856, - "grad_norm": 0.0027853238862007856, - "learning_rate": 0.00019999434293940902, - "loss": 46.0, - "step": 21035 - }, - { - "epoch": 3.3876967671806435, - "grad_norm": 0.0017952574416995049, - "learning_rate": 0.00019999434240127424, - "loss": 46.0, - "step": 21036 - }, - { - "epoch": 3.387857804259431, - "grad_norm": 0.0017393914749845862, - "learning_rate": 0.00019999434186311388, - "loss": 46.0, - "step": 21037 - }, - { - "epoch": 3.388018841338218, - "grad_norm": 0.008094176650047302, - "learning_rate": 0.00019999434132492793, - "loss": 46.0, - "step": 21038 - }, - { - "epoch": 3.3881798784170054, - "grad_norm": 0.010397003963589668, - "learning_rate": 0.00019999434078671636, - "loss": 46.0, - "step": 21039 - }, - { - "epoch": 3.388340915495793, - "grad_norm": 0.001662988099269569, - "learning_rate": 0.00019999434024847924, - "loss": 46.0, - "step": 21040 - }, - { - "epoch": 3.3885019525745803, - "grad_norm": 0.0037286649458110332, - "learning_rate": 0.0001999943397102165, - "loss": 46.0, - "step": 21041 - }, - { - "epoch": 3.3886629896533678, - "grad_norm": 0.002169365994632244, - "learning_rate": 0.00019999433917192817, - "loss": 46.0, - "step": 21042 - }, - { - "epoch": 3.388824026732155, - "grad_norm": 0.010649395175278187, - "learning_rate": 0.00019999433863361423, - "loss": 46.0, - "step": 21043 - }, - { - "epoch": 3.3889850638109422, - "grad_norm": 0.004098736215382814, - "learning_rate": 0.00019999433809527473, - "loss": 46.0, - "step": 21044 - }, - { - "epoch": 3.3891461008897297, - "grad_norm": 0.002906162291765213, - "learning_rate": 0.00019999433755690959, - "loss": 46.0, - "step": 21045 - }, - { - "epoch": 3.389307137968517, - "grad_norm": 0.0025292462669312954, - "learning_rate": 0.0001999943370185189, - "loss": 46.0, - "step": 21046 - }, - { - "epoch": 3.3894681750473046, - "grad_norm": 0.0036772822495549917, - "learning_rate": 0.0001999943364801026, - "loss": 46.0, - "step": 21047 - }, - { - "epoch": 3.389629212126092, - "grad_norm": 0.0025302080903202295, - "learning_rate": 0.00019999433594166072, - "loss": 46.0, - "step": 21048 - }, - { - "epoch": 3.3897902492048795, - "grad_norm": 0.011087237857282162, - "learning_rate": 0.00019999433540319323, - "loss": 46.0, - "step": 21049 - }, - { - "epoch": 3.389951286283667, - "grad_norm": 0.003570837900042534, - "learning_rate": 0.00019999433486470017, - "loss": 46.0, - "step": 21050 - }, - { - "epoch": 3.3901123233624544, - "grad_norm": 0.005096086300909519, - "learning_rate": 0.00019999433432618145, - "loss": 46.0, - "step": 21051 - }, - { - "epoch": 3.390273360441242, - "grad_norm": 0.011205860413610935, - "learning_rate": 0.0001999943337876372, - "loss": 46.0, - "step": 21052 - }, - { - "epoch": 3.390434397520029, - "grad_norm": 0.0014744886429980397, - "learning_rate": 0.00019999433324906736, - "loss": 46.0, - "step": 21053 - }, - { - "epoch": 3.3905954345988163, - "grad_norm": 0.0060256351716816425, - "learning_rate": 0.0001999943327104719, - "loss": 46.0, - "step": 21054 - }, - { - "epoch": 3.3907564716776037, - "grad_norm": 0.0015035916585475206, - "learning_rate": 0.00019999433217185084, - "loss": 46.0, - "step": 21055 - }, - { - "epoch": 3.390917508756391, - "grad_norm": 0.0009834583615884185, - "learning_rate": 0.0001999943316332042, - "loss": 46.0, - "step": 21056 - }, - { - "epoch": 3.3910785458351786, - "grad_norm": 0.0048959203995764256, - "learning_rate": 0.000199994331094532, - "loss": 46.0, - "step": 21057 - }, - { - "epoch": 3.391239582913966, - "grad_norm": 0.002037177560850978, - "learning_rate": 0.00019999433055583414, - "loss": 46.0, - "step": 21058 - }, - { - "epoch": 3.391400619992753, - "grad_norm": 0.0025556283071637154, - "learning_rate": 0.00019999433001711072, - "loss": 46.0, - "step": 21059 - }, - { - "epoch": 3.3915616570715406, - "grad_norm": 0.012726373039186, - "learning_rate": 0.00019999432947836172, - "loss": 46.0, - "step": 21060 - }, - { - "epoch": 3.391722694150328, - "grad_norm": 0.0016494044102728367, - "learning_rate": 0.00019999432893958713, - "loss": 46.0, - "step": 21061 - }, - { - "epoch": 3.3918837312291155, - "grad_norm": 0.006000559311360121, - "learning_rate": 0.00019999432840078692, - "loss": 46.0, - "step": 21062 - }, - { - "epoch": 3.392044768307903, - "grad_norm": 0.003741178661584854, - "learning_rate": 0.0001999943278619611, - "loss": 46.0, - "step": 21063 - }, - { - "epoch": 3.3922058053866904, - "grad_norm": 0.002221198519691825, - "learning_rate": 0.00019999432732310975, - "loss": 46.0, - "step": 21064 - }, - { - "epoch": 3.392366842465478, - "grad_norm": 0.0015539692249149084, - "learning_rate": 0.00019999432678423275, - "loss": 46.0, - "step": 21065 - }, - { - "epoch": 3.3925278795442653, - "grad_norm": 0.0028189888689666986, - "learning_rate": 0.0001999943262453302, - "loss": 46.0, - "step": 21066 - }, - { - "epoch": 3.3926889166230523, - "grad_norm": 0.004286462441086769, - "learning_rate": 0.00019999432570640203, - "loss": 46.0, - "step": 21067 - }, - { - "epoch": 3.3928499537018397, - "grad_norm": 0.002745330799371004, - "learning_rate": 0.00019999432516744828, - "loss": 46.0, - "step": 21068 - }, - { - "epoch": 3.393010990780627, - "grad_norm": 0.009761913679540157, - "learning_rate": 0.0001999943246284689, - "loss": 46.0, - "step": 21069 - }, - { - "epoch": 3.3931720278594146, - "grad_norm": 0.009391088038682938, - "learning_rate": 0.00019999432408946395, - "loss": 46.0, - "step": 21070 - }, - { - "epoch": 3.393333064938202, - "grad_norm": 0.005231519229710102, - "learning_rate": 0.0001999943235504334, - "loss": 46.0, - "step": 21071 - }, - { - "epoch": 3.3934941020169895, - "grad_norm": 0.0017369450069963932, - "learning_rate": 0.00019999432301137727, - "loss": 46.0, - "step": 21072 - }, - { - "epoch": 3.393655139095777, - "grad_norm": 0.0015287279384210706, - "learning_rate": 0.00019999432247229555, - "loss": 46.0, - "step": 21073 - }, - { - "epoch": 3.393816176174564, - "grad_norm": 0.005664782132953405, - "learning_rate": 0.00019999432193318822, - "loss": 46.0, - "step": 21074 - }, - { - "epoch": 3.3939772132533514, - "grad_norm": 0.00287757464684546, - "learning_rate": 0.00019999432139405533, - "loss": 46.0, - "step": 21075 - }, - { - "epoch": 3.394138250332139, - "grad_norm": 0.00043558908510021865, - "learning_rate": 0.0001999943208548968, - "loss": 46.0, - "step": 21076 - }, - { - "epoch": 3.3942992874109263, - "grad_norm": 0.006284990813583136, - "learning_rate": 0.00019999432031571273, - "loss": 46.0, - "step": 21077 - }, - { - "epoch": 3.394460324489714, - "grad_norm": 0.0016005446668714285, - "learning_rate": 0.00019999431977650302, - "loss": 46.0, - "step": 21078 - }, - { - "epoch": 3.3946213615685012, - "grad_norm": 0.005038704257458448, - "learning_rate": 0.00019999431923726772, - "loss": 46.0, - "step": 21079 - }, - { - "epoch": 3.3947823986472887, - "grad_norm": 0.0013049077242612839, - "learning_rate": 0.00019999431869800684, - "loss": 46.0, - "step": 21080 - }, - { - "epoch": 3.394943435726076, - "grad_norm": 0.0030376403592526913, - "learning_rate": 0.00019999431815872037, - "loss": 46.0, - "step": 21081 - }, - { - "epoch": 3.395104472804863, - "grad_norm": 0.001290869782678783, - "learning_rate": 0.0001999943176194083, - "loss": 46.0, - "step": 21082 - }, - { - "epoch": 3.3952655098836506, - "grad_norm": 0.0034194362815469503, - "learning_rate": 0.00019999431708007064, - "loss": 46.0, - "step": 21083 - }, - { - "epoch": 3.395426546962438, - "grad_norm": 0.0015584477223455906, - "learning_rate": 0.0001999943165407074, - "loss": 46.0, - "step": 21084 - }, - { - "epoch": 3.3955875840412255, - "grad_norm": 0.008205071091651917, - "learning_rate": 0.00019999431600131856, - "loss": 46.0, - "step": 21085 - }, - { - "epoch": 3.395748621120013, - "grad_norm": 0.005005842074751854, - "learning_rate": 0.00019999431546190407, - "loss": 46.0, - "step": 21086 - }, - { - "epoch": 3.3959096581988004, - "grad_norm": 0.00037571846041828394, - "learning_rate": 0.00019999431492246408, - "loss": 46.0, - "step": 21087 - }, - { - "epoch": 3.3960706952775874, - "grad_norm": 0.0031257993541657925, - "learning_rate": 0.00019999431438299844, - "loss": 46.0, - "step": 21088 - }, - { - "epoch": 3.396231732356375, - "grad_norm": 0.002363610314205289, - "learning_rate": 0.0001999943138435072, - "loss": 46.0, - "step": 21089 - }, - { - "epoch": 3.3963927694351623, - "grad_norm": 0.004282078705728054, - "learning_rate": 0.00019999431330399041, - "loss": 46.0, - "step": 21090 - }, - { - "epoch": 3.39655380651395, - "grad_norm": 0.004170003812760115, - "learning_rate": 0.000199994312764448, - "loss": 46.0, - "step": 21091 - }, - { - "epoch": 3.3967148435927372, - "grad_norm": 0.004156829323619604, - "learning_rate": 0.00019999431222487998, - "loss": 46.0, - "step": 21092 - }, - { - "epoch": 3.3968758806715247, - "grad_norm": 0.0017569194315001369, - "learning_rate": 0.00019999431168528638, - "loss": 46.0, - "step": 21093 - }, - { - "epoch": 3.397036917750312, - "grad_norm": 0.009720665402710438, - "learning_rate": 0.0001999943111456672, - "loss": 46.0, - "step": 21094 - }, - { - "epoch": 3.3971979548290996, - "grad_norm": 0.0007093818858265877, - "learning_rate": 0.00019999431060602243, - "loss": 46.0, - "step": 21095 - }, - { - "epoch": 3.397358991907887, - "grad_norm": 0.002441916149109602, - "learning_rate": 0.00019999431006635204, - "loss": 46.0, - "step": 21096 - }, - { - "epoch": 3.397520028986674, - "grad_norm": 0.003565569408237934, - "learning_rate": 0.00019999430952665607, - "loss": 46.0, - "step": 21097 - }, - { - "epoch": 3.3976810660654615, - "grad_norm": 0.0024843630380928516, - "learning_rate": 0.0001999943089869345, - "loss": 46.0, - "step": 21098 - }, - { - "epoch": 3.397842103144249, - "grad_norm": 0.0024970900267362595, - "learning_rate": 0.00019999430844718736, - "loss": 46.0, - "step": 21099 - }, - { - "epoch": 3.3980031402230364, - "grad_norm": 0.00679752416908741, - "learning_rate": 0.0001999943079074146, - "loss": 46.0, - "step": 21100 - }, - { - "epoch": 3.398164177301824, - "grad_norm": 0.0013325853506103158, - "learning_rate": 0.00019999430736761627, - "loss": 46.0, - "step": 21101 - }, - { - "epoch": 3.3983252143806113, - "grad_norm": 0.0015867049805819988, - "learning_rate": 0.0001999943068277923, - "loss": 46.0, - "step": 21102 - }, - { - "epoch": 3.3984862514593983, - "grad_norm": 0.002462285803630948, - "learning_rate": 0.0001999943062879428, - "loss": 46.0, - "step": 21103 - }, - { - "epoch": 3.3986472885381858, - "grad_norm": 0.0008760459604673088, - "learning_rate": 0.00019999430574806768, - "loss": 46.0, - "step": 21104 - }, - { - "epoch": 3.398808325616973, - "grad_norm": 0.008111543022096157, - "learning_rate": 0.00019999430520816695, - "loss": 46.0, - "step": 21105 - }, - { - "epoch": 3.3989693626957607, - "grad_norm": 0.0011252565309405327, - "learning_rate": 0.00019999430466824064, - "loss": 46.0, - "step": 21106 - }, - { - "epoch": 3.399130399774548, - "grad_norm": 0.0054007177241146564, - "learning_rate": 0.00019999430412828874, - "loss": 46.0, - "step": 21107 - }, - { - "epoch": 3.3992914368533356, - "grad_norm": 0.0022789279464632273, - "learning_rate": 0.00019999430358831125, - "loss": 46.0, - "step": 21108 - }, - { - "epoch": 3.399452473932123, - "grad_norm": 0.0034397232811897993, - "learning_rate": 0.00019999430304830815, - "loss": 46.0, - "step": 21109 - }, - { - "epoch": 3.3996135110109105, - "grad_norm": 0.002536230254918337, - "learning_rate": 0.00019999430250827946, - "loss": 46.0, - "step": 21110 - }, - { - "epoch": 3.3997745480896975, - "grad_norm": 0.0027080022264271975, - "learning_rate": 0.0001999943019682252, - "loss": 46.0, - "step": 21111 - }, - { - "epoch": 3.399935585168485, - "grad_norm": 0.002194308442994952, - "learning_rate": 0.00019999430142814533, - "loss": 46.0, - "step": 21112 - }, - { - "epoch": 3.4000966222472724, - "grad_norm": 0.0021683378145098686, - "learning_rate": 0.00019999430088803988, - "loss": 46.0, - "step": 21113 - }, - { - "epoch": 3.40025765932606, - "grad_norm": 0.0029717704746872187, - "learning_rate": 0.0001999943003479088, - "loss": 46.0, - "step": 21114 - }, - { - "epoch": 3.4004186964048473, - "grad_norm": 0.001527405926026404, - "learning_rate": 0.00019999429980775216, - "loss": 46.0, - "step": 21115 - }, - { - "epoch": 3.4005797334836347, - "grad_norm": 0.002910199109464884, - "learning_rate": 0.0001999942992675699, - "loss": 46.0, - "step": 21116 - }, - { - "epoch": 3.400740770562422, - "grad_norm": 0.004136513452976942, - "learning_rate": 0.00019999429872736207, - "loss": 46.0, - "step": 21117 - }, - { - "epoch": 3.400901807641209, - "grad_norm": 0.007301343604922295, - "learning_rate": 0.00019999429818712866, - "loss": 46.0, - "step": 21118 - }, - { - "epoch": 3.4010628447199966, - "grad_norm": 0.008149239234626293, - "learning_rate": 0.0001999942976468696, - "loss": 46.0, - "step": 21119 - }, - { - "epoch": 3.401223881798784, - "grad_norm": 0.0015666043618693948, - "learning_rate": 0.000199994297106585, - "loss": 46.0, - "step": 21120 - }, - { - "epoch": 3.4013849188775716, - "grad_norm": 0.019910618662834167, - "learning_rate": 0.0001999942965662748, - "loss": 46.0, - "step": 21121 - }, - { - "epoch": 3.401545955956359, - "grad_norm": 0.0013587672729045153, - "learning_rate": 0.00019999429602593897, - "loss": 46.0, - "step": 21122 - }, - { - "epoch": 3.4017069930351465, - "grad_norm": 0.0022173745092004538, - "learning_rate": 0.00019999429548557757, - "loss": 46.0, - "step": 21123 - }, - { - "epoch": 3.401868030113934, - "grad_norm": 0.008212946355342865, - "learning_rate": 0.0001999942949451906, - "loss": 46.0, - "step": 21124 - }, - { - "epoch": 3.4020290671927214, - "grad_norm": 0.0037013136316090822, - "learning_rate": 0.000199994294404778, - "loss": 46.0, - "step": 21125 - }, - { - "epoch": 3.4021901042715084, - "grad_norm": 0.0040328605100512505, - "learning_rate": 0.00019999429386433984, - "loss": 46.0, - "step": 21126 - }, - { - "epoch": 3.402351141350296, - "grad_norm": 0.0009341384284198284, - "learning_rate": 0.00019999429332387604, - "loss": 46.0, - "step": 21127 - }, - { - "epoch": 3.4025121784290833, - "grad_norm": 0.006971676833927631, - "learning_rate": 0.0001999942927833867, - "loss": 46.0, - "step": 21128 - }, - { - "epoch": 3.4026732155078707, - "grad_norm": 0.007393034175038338, - "learning_rate": 0.00019999429224287174, - "loss": 46.0, - "step": 21129 - }, - { - "epoch": 3.402834252586658, - "grad_norm": 0.006465106271207333, - "learning_rate": 0.0001999942917023312, - "loss": 46.0, - "step": 21130 - }, - { - "epoch": 3.4029952896654456, - "grad_norm": 0.001826530322432518, - "learning_rate": 0.00019999429116176502, - "loss": 46.0, - "step": 21131 - }, - { - "epoch": 3.4031563267442326, - "grad_norm": 0.0026242146268486977, - "learning_rate": 0.0001999942906211733, - "loss": 46.0, - "step": 21132 - }, - { - "epoch": 3.40331736382302, - "grad_norm": 0.0050507173873484135, - "learning_rate": 0.00019999429008055598, - "loss": 46.0, - "step": 21133 - }, - { - "epoch": 3.4034784009018075, - "grad_norm": 0.00919990986585617, - "learning_rate": 0.00019999428953991304, - "loss": 46.0, - "step": 21134 - }, - { - "epoch": 3.403639437980595, - "grad_norm": 0.010222394950687885, - "learning_rate": 0.0001999942889992445, - "loss": 46.0, - "step": 21135 - }, - { - "epoch": 3.4038004750593824, - "grad_norm": 0.0015832679346203804, - "learning_rate": 0.0001999942884585504, - "loss": 46.0, - "step": 21136 - }, - { - "epoch": 3.40396151213817, - "grad_norm": 0.00965300016105175, - "learning_rate": 0.00019999428791783071, - "loss": 46.0, - "step": 21137 - }, - { - "epoch": 3.4041225492169573, - "grad_norm": 0.001704067224636674, - "learning_rate": 0.00019999428737708542, - "loss": 46.0, - "step": 21138 - }, - { - "epoch": 3.404283586295745, - "grad_norm": 0.0012747389264404774, - "learning_rate": 0.00019999428683631452, - "loss": 46.0, - "step": 21139 - }, - { - "epoch": 3.404444623374532, - "grad_norm": 0.0014449880691245198, - "learning_rate": 0.00019999428629551802, - "loss": 46.0, - "step": 21140 - }, - { - "epoch": 3.4046056604533192, - "grad_norm": 0.0006542811752296984, - "learning_rate": 0.00019999428575469594, - "loss": 46.0, - "step": 21141 - }, - { - "epoch": 3.4047666975321067, - "grad_norm": 0.0023396213073283434, - "learning_rate": 0.00019999428521384828, - "loss": 46.0, - "step": 21142 - }, - { - "epoch": 3.404927734610894, - "grad_norm": 0.001497275079600513, - "learning_rate": 0.000199994284672975, - "loss": 46.0, - "step": 21143 - }, - { - "epoch": 3.4050887716896816, - "grad_norm": 0.0011699807364493608, - "learning_rate": 0.00019999428413207615, - "loss": 46.0, - "step": 21144 - }, - { - "epoch": 3.405249808768469, - "grad_norm": 0.005118026863783598, - "learning_rate": 0.0001999942835911517, - "loss": 46.0, - "step": 21145 - }, - { - "epoch": 3.4054108458472565, - "grad_norm": 0.0187362190335989, - "learning_rate": 0.00019999428305020165, - "loss": 46.0, - "step": 21146 - }, - { - "epoch": 3.4055718829260435, - "grad_norm": 0.008424741216003895, - "learning_rate": 0.000199994282509226, - "loss": 46.0, - "step": 21147 - }, - { - "epoch": 3.405732920004831, - "grad_norm": 0.0019717966206371784, - "learning_rate": 0.00019999428196822478, - "loss": 46.0, - "step": 21148 - }, - { - "epoch": 3.4058939570836184, - "grad_norm": 0.010692927986383438, - "learning_rate": 0.00019999428142719795, - "loss": 46.0, - "step": 21149 - }, - { - "epoch": 3.406054994162406, - "grad_norm": 0.0025357073172926903, - "learning_rate": 0.00019999428088614553, - "loss": 46.0, - "step": 21150 - }, - { - "epoch": 3.4062160312411933, - "grad_norm": 0.0007546897977590561, - "learning_rate": 0.00019999428034506752, - "loss": 46.0, - "step": 21151 - }, - { - "epoch": 3.4063770683199808, - "grad_norm": 0.0032554753124713898, - "learning_rate": 0.00019999427980396393, - "loss": 46.0, - "step": 21152 - }, - { - "epoch": 3.4065381053987682, - "grad_norm": 0.002403033198788762, - "learning_rate": 0.0001999942792628347, - "loss": 46.0, - "step": 21153 - }, - { - "epoch": 3.4066991424775557, - "grad_norm": 0.007734985090792179, - "learning_rate": 0.00019999427872167993, - "loss": 46.0, - "step": 21154 - }, - { - "epoch": 3.4068601795563427, - "grad_norm": 0.006565115414559841, - "learning_rate": 0.00019999427818049955, - "loss": 46.0, - "step": 21155 - }, - { - "epoch": 3.40702121663513, - "grad_norm": 0.005572181195020676, - "learning_rate": 0.00019999427763929355, - "loss": 46.0, - "step": 21156 - }, - { - "epoch": 3.4071822537139176, - "grad_norm": 0.0022491810377687216, - "learning_rate": 0.00019999427709806197, - "loss": 46.0, - "step": 21157 - }, - { - "epoch": 3.407343290792705, - "grad_norm": 0.0040094442665576935, - "learning_rate": 0.0001999942765568048, - "loss": 46.0, - "step": 21158 - }, - { - "epoch": 3.4075043278714925, - "grad_norm": 0.012897714972496033, - "learning_rate": 0.00019999427601552204, - "loss": 46.0, - "step": 21159 - }, - { - "epoch": 3.40766536495028, - "grad_norm": 0.004199519287794828, - "learning_rate": 0.0001999942754742137, - "loss": 46.0, - "step": 21160 - }, - { - "epoch": 3.407826402029067, - "grad_norm": 0.0031029866077005863, - "learning_rate": 0.00019999427493287974, - "loss": 46.0, - "step": 21161 - }, - { - "epoch": 3.4079874391078544, - "grad_norm": 0.003604508936405182, - "learning_rate": 0.00019999427439152022, - "loss": 46.0, - "step": 21162 - }, - { - "epoch": 3.408148476186642, - "grad_norm": 0.0038238721899688244, - "learning_rate": 0.00019999427385013508, - "loss": 46.0, - "step": 21163 - }, - { - "epoch": 3.4083095132654293, - "grad_norm": 0.0029934379272162914, - "learning_rate": 0.00019999427330872434, - "loss": 46.0, - "step": 21164 - }, - { - "epoch": 3.4084705503442168, - "grad_norm": 0.004953837022185326, - "learning_rate": 0.00019999427276728803, - "loss": 46.0, - "step": 21165 - }, - { - "epoch": 3.408631587423004, - "grad_norm": 0.00626890454441309, - "learning_rate": 0.0001999942722258261, - "loss": 46.0, - "step": 21166 - }, - { - "epoch": 3.4087926245017917, - "grad_norm": 0.0042636338621377945, - "learning_rate": 0.0001999942716843386, - "loss": 46.0, - "step": 21167 - }, - { - "epoch": 3.408953661580579, - "grad_norm": 0.0049273790791630745, - "learning_rate": 0.0001999942711428255, - "loss": 46.0, - "step": 21168 - }, - { - "epoch": 3.4091146986593666, - "grad_norm": 0.007492925971746445, - "learning_rate": 0.00019999427060128682, - "loss": 46.0, - "step": 21169 - }, - { - "epoch": 3.4092757357381536, - "grad_norm": 0.005819031503051519, - "learning_rate": 0.00019999427005972252, - "loss": 46.0, - "step": 21170 - }, - { - "epoch": 3.409436772816941, - "grad_norm": 0.0044487239792943, - "learning_rate": 0.00019999426951813263, - "loss": 46.0, - "step": 21171 - }, - { - "epoch": 3.4095978098957285, - "grad_norm": 0.0006704427069053054, - "learning_rate": 0.00019999426897651716, - "loss": 46.0, - "step": 21172 - }, - { - "epoch": 3.409758846974516, - "grad_norm": 0.0037129633128643036, - "learning_rate": 0.0001999942684348761, - "loss": 46.0, - "step": 21173 - }, - { - "epoch": 3.4099198840533034, - "grad_norm": 0.004275023937225342, - "learning_rate": 0.00019999426789320943, - "loss": 46.0, - "step": 21174 - }, - { - "epoch": 3.410080921132091, - "grad_norm": 0.0008190679363906384, - "learning_rate": 0.00019999426735151717, - "loss": 46.0, - "step": 21175 - }, - { - "epoch": 3.410241958210878, - "grad_norm": 0.0014929737662896514, - "learning_rate": 0.00019999426680979935, - "loss": 46.0, - "step": 21176 - }, - { - "epoch": 3.4104029952896653, - "grad_norm": 0.008549555204808712, - "learning_rate": 0.00019999426626805589, - "loss": 46.0, - "step": 21177 - }, - { - "epoch": 3.4105640323684527, - "grad_norm": 0.0017408184939995408, - "learning_rate": 0.00019999426572628686, - "loss": 46.0, - "step": 21178 - }, - { - "epoch": 3.41072506944724, - "grad_norm": 0.0015750967431813478, - "learning_rate": 0.00019999426518449223, - "loss": 46.0, - "step": 21179 - }, - { - "epoch": 3.4108861065260276, - "grad_norm": 0.0037548651453107595, - "learning_rate": 0.000199994264642672, - "loss": 46.0, - "step": 21180 - }, - { - "epoch": 3.411047143604815, - "grad_norm": 0.0021783686242997646, - "learning_rate": 0.00019999426410082617, - "loss": 46.0, - "step": 21181 - }, - { - "epoch": 3.4112081806836025, - "grad_norm": 0.013517848215997219, - "learning_rate": 0.00019999426355895477, - "loss": 46.0, - "step": 21182 - }, - { - "epoch": 3.41136921776239, - "grad_norm": 0.008675516583025455, - "learning_rate": 0.00019999426301705778, - "loss": 46.0, - "step": 21183 - }, - { - "epoch": 3.411530254841177, - "grad_norm": 0.006291860248893499, - "learning_rate": 0.00019999426247513518, - "loss": 46.0, - "step": 21184 - }, - { - "epoch": 3.4116912919199645, - "grad_norm": 0.002247094875201583, - "learning_rate": 0.00019999426193318697, - "loss": 46.0, - "step": 21185 - }, - { - "epoch": 3.411852328998752, - "grad_norm": 0.00042392892646603286, - "learning_rate": 0.0001999942613912132, - "loss": 46.0, - "step": 21186 - }, - { - "epoch": 3.4120133660775394, - "grad_norm": 0.003107220632955432, - "learning_rate": 0.00019999426084921383, - "loss": 46.0, - "step": 21187 - }, - { - "epoch": 3.412174403156327, - "grad_norm": 0.008849323727190495, - "learning_rate": 0.00019999426030718886, - "loss": 46.0, - "step": 21188 - }, - { - "epoch": 3.4123354402351143, - "grad_norm": 0.006033473648130894, - "learning_rate": 0.0001999942597651383, - "loss": 46.0, - "step": 21189 - }, - { - "epoch": 3.4124964773139017, - "grad_norm": 0.0020630371291190386, - "learning_rate": 0.00019999425922306215, - "loss": 46.0, - "step": 21190 - }, - { - "epoch": 3.4126575143926887, - "grad_norm": 0.0033785656560212374, - "learning_rate": 0.0001999942586809604, - "loss": 46.0, - "step": 21191 - }, - { - "epoch": 3.412818551471476, - "grad_norm": 0.013755476102232933, - "learning_rate": 0.00019999425813883303, - "loss": 46.0, - "step": 21192 - }, - { - "epoch": 3.4129795885502636, - "grad_norm": 0.006385887041687965, - "learning_rate": 0.00019999425759668012, - "loss": 46.0, - "step": 21193 - }, - { - "epoch": 3.413140625629051, - "grad_norm": 0.002654741518199444, - "learning_rate": 0.0001999942570545016, - "loss": 46.0, - "step": 21194 - }, - { - "epoch": 3.4133016627078385, - "grad_norm": 0.003198145655915141, - "learning_rate": 0.00019999425651229746, - "loss": 46.0, - "step": 21195 - }, - { - "epoch": 3.413462699786626, - "grad_norm": 0.0029647655319422483, - "learning_rate": 0.00019999425597006773, - "loss": 46.0, - "step": 21196 - }, - { - "epoch": 3.4136237368654134, - "grad_norm": 0.013421309180557728, - "learning_rate": 0.00019999425542781242, - "loss": 46.0, - "step": 21197 - }, - { - "epoch": 3.413784773944201, - "grad_norm": 0.0011526071466505527, - "learning_rate": 0.00019999425488553154, - "loss": 46.0, - "step": 21198 - }, - { - "epoch": 3.413945811022988, - "grad_norm": 0.015555518679320812, - "learning_rate": 0.00019999425434322503, - "loss": 46.0, - "step": 21199 - }, - { - "epoch": 3.4141068481017753, - "grad_norm": 0.002551355864852667, - "learning_rate": 0.00019999425380089295, - "loss": 46.0, - "step": 21200 - }, - { - "epoch": 3.414267885180563, - "grad_norm": 0.0026627839542925358, - "learning_rate": 0.00019999425325853526, - "loss": 46.0, - "step": 21201 - }, - { - "epoch": 3.4144289222593502, - "grad_norm": 0.0009539287420921028, - "learning_rate": 0.00019999425271615198, - "loss": 46.0, - "step": 21202 - }, - { - "epoch": 3.4145899593381377, - "grad_norm": 0.0022598677314817905, - "learning_rate": 0.0001999942521737431, - "loss": 46.0, - "step": 21203 - }, - { - "epoch": 3.414750996416925, - "grad_norm": 0.0080317547544837, - "learning_rate": 0.00019999425163130864, - "loss": 46.0, - "step": 21204 - }, - { - "epoch": 3.414912033495712, - "grad_norm": 0.011915184557437897, - "learning_rate": 0.0001999942510888486, - "loss": 46.0, - "step": 21205 - }, - { - "epoch": 3.4150730705744996, - "grad_norm": 0.005996193736791611, - "learning_rate": 0.00019999425054636295, - "loss": 46.0, - "step": 21206 - }, - { - "epoch": 3.415234107653287, - "grad_norm": 0.004526139236986637, - "learning_rate": 0.0001999942500038517, - "loss": 46.0, - "step": 21207 - }, - { - "epoch": 3.4153951447320745, - "grad_norm": 0.004340293817222118, - "learning_rate": 0.00019999424946131486, - "loss": 46.0, - "step": 21208 - }, - { - "epoch": 3.415556181810862, - "grad_norm": 0.02035890333354473, - "learning_rate": 0.00019999424891875241, - "loss": 46.0, - "step": 21209 - }, - { - "epoch": 3.4157172188896494, - "grad_norm": 0.0010484060039743781, - "learning_rate": 0.0001999942483761644, - "loss": 46.0, - "step": 21210 - }, - { - "epoch": 3.415878255968437, - "grad_norm": 0.0016059390036389232, - "learning_rate": 0.0001999942478335508, - "loss": 46.0, - "step": 21211 - }, - { - "epoch": 3.4160392930472243, - "grad_norm": 0.009394737891852856, - "learning_rate": 0.0001999942472909116, - "loss": 46.0, - "step": 21212 - }, - { - "epoch": 3.4162003301260118, - "grad_norm": 0.0021858173422515392, - "learning_rate": 0.00019999424674824678, - "loss": 46.0, - "step": 21213 - }, - { - "epoch": 3.4163613672047988, - "grad_norm": 0.0031701961997896433, - "learning_rate": 0.00019999424620555637, - "loss": 46.0, - "step": 21214 - }, - { - "epoch": 3.4165224042835862, - "grad_norm": 0.00033398676896467805, - "learning_rate": 0.0001999942456628404, - "loss": 46.0, - "step": 21215 - }, - { - "epoch": 3.4166834413623737, - "grad_norm": 0.0027331404853612185, - "learning_rate": 0.0001999942451200988, - "loss": 46.0, - "step": 21216 - }, - { - "epoch": 3.416844478441161, - "grad_norm": 0.0019151372835040092, - "learning_rate": 0.00019999424457733164, - "loss": 46.0, - "step": 21217 - }, - { - "epoch": 3.4170055155199486, - "grad_norm": 0.006675058975815773, - "learning_rate": 0.00019999424403453886, - "loss": 46.0, - "step": 21218 - }, - { - "epoch": 3.417166552598736, - "grad_norm": 0.002675826195627451, - "learning_rate": 0.0001999942434917205, - "loss": 46.0, - "step": 21219 - }, - { - "epoch": 3.417327589677523, - "grad_norm": 0.005070195533335209, - "learning_rate": 0.00019999424294887653, - "loss": 46.0, - "step": 21220 - }, - { - "epoch": 3.4174886267563105, - "grad_norm": 0.0016517506446689367, - "learning_rate": 0.00019999424240600697, - "loss": 46.0, - "step": 21221 - }, - { - "epoch": 3.417649663835098, - "grad_norm": 0.0018827365711331367, - "learning_rate": 0.0001999942418631118, - "loss": 46.0, - "step": 21222 - }, - { - "epoch": 3.4178107009138854, - "grad_norm": 0.002622930333018303, - "learning_rate": 0.0001999942413201911, - "loss": 46.0, - "step": 21223 - }, - { - "epoch": 3.417971737992673, - "grad_norm": 0.006782460492104292, - "learning_rate": 0.0001999942407772448, - "loss": 46.0, - "step": 21224 - }, - { - "epoch": 3.4181327750714603, - "grad_norm": 0.0010855470318347216, - "learning_rate": 0.00019999424023427285, - "loss": 46.0, - "step": 21225 - }, - { - "epoch": 3.4182938121502477, - "grad_norm": 0.0012147071538493037, - "learning_rate": 0.00019999423969127534, - "loss": 46.0, - "step": 21226 - }, - { - "epoch": 3.418454849229035, - "grad_norm": 0.010797152295708656, - "learning_rate": 0.0001999942391482522, - "loss": 46.0, - "step": 21227 - }, - { - "epoch": 3.418615886307822, - "grad_norm": 0.002079980680719018, - "learning_rate": 0.0001999942386052035, - "loss": 46.0, - "step": 21228 - }, - { - "epoch": 3.4187769233866097, - "grad_norm": 0.0034605904947966337, - "learning_rate": 0.0001999942380621292, - "loss": 46.0, - "step": 21229 - }, - { - "epoch": 3.418937960465397, - "grad_norm": 0.008832719177007675, - "learning_rate": 0.00019999423751902932, - "loss": 46.0, - "step": 21230 - }, - { - "epoch": 3.4190989975441846, - "grad_norm": 0.0039038187824189663, - "learning_rate": 0.00019999423697590382, - "loss": 46.0, - "step": 21231 - }, - { - "epoch": 3.419260034622972, - "grad_norm": 0.0018081951420754194, - "learning_rate": 0.00019999423643275274, - "loss": 46.0, - "step": 21232 - }, - { - "epoch": 3.4194210717017595, - "grad_norm": 0.0006106903892941773, - "learning_rate": 0.00019999423588957607, - "loss": 46.0, - "step": 21233 - }, - { - "epoch": 3.419582108780547, - "grad_norm": 0.001498400466516614, - "learning_rate": 0.00019999423534637381, - "loss": 46.0, - "step": 21234 - }, - { - "epoch": 3.419743145859334, - "grad_norm": 0.0012723600957542658, - "learning_rate": 0.00019999423480314597, - "loss": 46.0, - "step": 21235 - }, - { - "epoch": 3.4199041829381214, - "grad_norm": 0.004716400522738695, - "learning_rate": 0.0001999942342598925, - "loss": 46.0, - "step": 21236 - }, - { - "epoch": 3.420065220016909, - "grad_norm": 0.014160579070448875, - "learning_rate": 0.00019999423371661344, - "loss": 46.0, - "step": 21237 - }, - { - "epoch": 3.4202262570956963, - "grad_norm": 0.014337912201881409, - "learning_rate": 0.0001999942331733088, - "loss": 46.0, - "step": 21238 - }, - { - "epoch": 3.4203872941744837, - "grad_norm": 0.0017312603304162621, - "learning_rate": 0.0001999942326299786, - "loss": 46.0, - "step": 21239 - }, - { - "epoch": 3.420548331253271, - "grad_norm": 0.0028110118582844734, - "learning_rate": 0.00019999423208662276, - "loss": 46.0, - "step": 21240 - }, - { - "epoch": 3.4207093683320586, - "grad_norm": 0.000721524003893137, - "learning_rate": 0.00019999423154324136, - "loss": 46.0, - "step": 21241 - }, - { - "epoch": 3.420870405410846, - "grad_norm": 0.007434132043272257, - "learning_rate": 0.00019999423099983433, - "loss": 46.0, - "step": 21242 - }, - { - "epoch": 3.421031442489633, - "grad_norm": 0.00933587271720171, - "learning_rate": 0.0001999942304564017, - "loss": 46.0, - "step": 21243 - }, - { - "epoch": 3.4211924795684205, - "grad_norm": 0.014096463099122047, - "learning_rate": 0.00019999422991294352, - "loss": 46.0, - "step": 21244 - }, - { - "epoch": 3.421353516647208, - "grad_norm": 0.003654714673757553, - "learning_rate": 0.00019999422936945973, - "loss": 46.0, - "step": 21245 - }, - { - "epoch": 3.4215145537259954, - "grad_norm": 0.004328264854848385, - "learning_rate": 0.00019999422882595034, - "loss": 46.0, - "step": 21246 - }, - { - "epoch": 3.421675590804783, - "grad_norm": 0.005616902373731136, - "learning_rate": 0.00019999422828241537, - "loss": 46.0, - "step": 21247 - }, - { - "epoch": 3.4218366278835703, - "grad_norm": 0.0061208028346300125, - "learning_rate": 0.0001999942277388548, - "loss": 46.0, - "step": 21248 - }, - { - "epoch": 3.4219976649623574, - "grad_norm": 0.008044317364692688, - "learning_rate": 0.00019999422719526862, - "loss": 46.0, - "step": 21249 - }, - { - "epoch": 3.422158702041145, - "grad_norm": 0.006132552865892649, - "learning_rate": 0.00019999422665165686, - "loss": 46.0, - "step": 21250 - }, - { - "epoch": 3.4223197391199323, - "grad_norm": 0.009378168731927872, - "learning_rate": 0.0001999942261080195, - "loss": 46.0, - "step": 21251 - }, - { - "epoch": 3.4224807761987197, - "grad_norm": 0.003203070955350995, - "learning_rate": 0.00019999422556435655, - "loss": 46.0, - "step": 21252 - }, - { - "epoch": 3.422641813277507, - "grad_norm": 0.0010591407772153616, - "learning_rate": 0.00019999422502066803, - "loss": 46.0, - "step": 21253 - }, - { - "epoch": 3.4228028503562946, - "grad_norm": 0.003611624240875244, - "learning_rate": 0.0001999942244769539, - "loss": 46.0, - "step": 21254 - }, - { - "epoch": 3.422963887435082, - "grad_norm": 0.0016722215805202723, - "learning_rate": 0.00019999422393321417, - "loss": 46.0, - "step": 21255 - }, - { - "epoch": 3.4231249245138695, - "grad_norm": 0.0010098578641191125, - "learning_rate": 0.00019999422338944884, - "loss": 46.0, - "step": 21256 - }, - { - "epoch": 3.4232859615926565, - "grad_norm": 0.003147292183712125, - "learning_rate": 0.00019999422284565794, - "loss": 46.0, - "step": 21257 - }, - { - "epoch": 3.423446998671444, - "grad_norm": 0.0037701742257922888, - "learning_rate": 0.00019999422230184143, - "loss": 46.0, - "step": 21258 - }, - { - "epoch": 3.4236080357502314, - "grad_norm": 0.0050891381688416, - "learning_rate": 0.0001999942217579993, - "loss": 46.0, - "step": 21259 - }, - { - "epoch": 3.423769072829019, - "grad_norm": 0.004334925673902035, - "learning_rate": 0.00019999422121413162, - "loss": 46.0, - "step": 21260 - }, - { - "epoch": 3.4239301099078063, - "grad_norm": 0.0013067360268905759, - "learning_rate": 0.00019999422067023834, - "loss": 46.0, - "step": 21261 - }, - { - "epoch": 3.424091146986594, - "grad_norm": 0.010193086229264736, - "learning_rate": 0.00019999422012631946, - "loss": 46.0, - "step": 21262 - }, - { - "epoch": 3.4242521840653812, - "grad_norm": 0.009990700520575047, - "learning_rate": 0.00019999421958237498, - "loss": 46.0, - "step": 21263 - }, - { - "epoch": 3.4244132211441682, - "grad_norm": 0.0032884918618947268, - "learning_rate": 0.00019999421903840492, - "loss": 46.0, - "step": 21264 - }, - { - "epoch": 3.4245742582229557, - "grad_norm": 0.0024800316896289587, - "learning_rate": 0.00019999421849440925, - "loss": 46.0, - "step": 21265 - }, - { - "epoch": 3.424735295301743, - "grad_norm": 0.0038425729144364595, - "learning_rate": 0.000199994217950388, - "loss": 46.0, - "step": 21266 - }, - { - "epoch": 3.4248963323805306, - "grad_norm": 0.000754939392209053, - "learning_rate": 0.00019999421740634116, - "loss": 46.0, - "step": 21267 - }, - { - "epoch": 3.425057369459318, - "grad_norm": 0.0014265953795984387, - "learning_rate": 0.00019999421686226873, - "loss": 46.0, - "step": 21268 - }, - { - "epoch": 3.4252184065381055, - "grad_norm": 0.0008738969336263835, - "learning_rate": 0.00019999421631817067, - "loss": 46.0, - "step": 21269 - }, - { - "epoch": 3.425379443616893, - "grad_norm": 0.0017767586978152394, - "learning_rate": 0.00019999421577404704, - "loss": 46.0, - "step": 21270 - }, - { - "epoch": 3.4255404806956804, - "grad_norm": 0.0005299150943756104, - "learning_rate": 0.00019999421522989784, - "loss": 46.0, - "step": 21271 - }, - { - "epoch": 3.4257015177744674, - "grad_norm": 0.005189400166273117, - "learning_rate": 0.00019999421468572303, - "loss": 46.0, - "step": 21272 - }, - { - "epoch": 3.425862554853255, - "grad_norm": 0.008974839001893997, - "learning_rate": 0.00019999421414152263, - "loss": 46.0, - "step": 21273 - }, - { - "epoch": 3.4260235919320423, - "grad_norm": 0.00281995115801692, - "learning_rate": 0.0001999942135972966, - "loss": 46.0, - "step": 21274 - }, - { - "epoch": 3.4261846290108298, - "grad_norm": 0.0035095063503831625, - "learning_rate": 0.000199994213053045, - "loss": 46.0, - "step": 21275 - }, - { - "epoch": 3.426345666089617, - "grad_norm": 0.002488999627530575, - "learning_rate": 0.00019999421250876785, - "loss": 46.0, - "step": 21276 - }, - { - "epoch": 3.4265067031684047, - "grad_norm": 0.00954401958733797, - "learning_rate": 0.00019999421196446507, - "loss": 46.0, - "step": 21277 - }, - { - "epoch": 3.4266677402471917, - "grad_norm": 0.006119216792285442, - "learning_rate": 0.00019999421142013666, - "loss": 46.0, - "step": 21278 - }, - { - "epoch": 3.426828777325979, - "grad_norm": 0.001271423534490168, - "learning_rate": 0.0001999942108757827, - "loss": 46.0, - "step": 21279 - }, - { - "epoch": 3.4269898144047666, - "grad_norm": 0.0037192278541624546, - "learning_rate": 0.00019999421033140314, - "loss": 46.0, - "step": 21280 - }, - { - "epoch": 3.427150851483554, - "grad_norm": 0.008798657916486263, - "learning_rate": 0.000199994209786998, - "loss": 46.0, - "step": 21281 - }, - { - "epoch": 3.4273118885623415, - "grad_norm": 0.0012121403124183416, - "learning_rate": 0.00019999420924256725, - "loss": 46.0, - "step": 21282 - }, - { - "epoch": 3.427472925641129, - "grad_norm": 0.002615212230011821, - "learning_rate": 0.0001999942086981109, - "loss": 46.0, - "step": 21283 - }, - { - "epoch": 3.4276339627199164, - "grad_norm": 0.005895392037928104, - "learning_rate": 0.00019999420815362896, - "loss": 46.0, - "step": 21284 - }, - { - "epoch": 3.427794999798704, - "grad_norm": 0.0015287752030417323, - "learning_rate": 0.00019999420760912143, - "loss": 46.0, - "step": 21285 - }, - { - "epoch": 3.4279560368774913, - "grad_norm": 0.0014868566067889333, - "learning_rate": 0.00019999420706458832, - "loss": 46.0, - "step": 21286 - }, - { - "epoch": 3.4281170739562783, - "grad_norm": 0.0016907043755054474, - "learning_rate": 0.0001999942065200296, - "loss": 46.0, - "step": 21287 - }, - { - "epoch": 3.4282781110350657, - "grad_norm": 0.007533952593803406, - "learning_rate": 0.0001999942059754453, - "loss": 46.0, - "step": 21288 - }, - { - "epoch": 3.428439148113853, - "grad_norm": 0.0011690924875438213, - "learning_rate": 0.00019999420543083537, - "loss": 46.0, - "step": 21289 - }, - { - "epoch": 3.4286001851926406, - "grad_norm": 0.007620951626449823, - "learning_rate": 0.00019999420488619988, - "loss": 46.0, - "step": 21290 - }, - { - "epoch": 3.428761222271428, - "grad_norm": 0.005403009708970785, - "learning_rate": 0.0001999942043415388, - "loss": 46.0, - "step": 21291 - }, - { - "epoch": 3.4289222593502156, - "grad_norm": 0.005006282590329647, - "learning_rate": 0.0001999942037968521, - "loss": 46.0, - "step": 21292 - }, - { - "epoch": 3.4290832964290026, - "grad_norm": 0.00840732455253601, - "learning_rate": 0.00019999420325213983, - "loss": 46.0, - "step": 21293 - }, - { - "epoch": 3.42924433350779, - "grad_norm": 0.004210444167256355, - "learning_rate": 0.000199994202707402, - "loss": 46.0, - "step": 21294 - }, - { - "epoch": 3.4294053705865775, - "grad_norm": 0.00911111943423748, - "learning_rate": 0.00019999420216263851, - "loss": 46.0, - "step": 21295 - }, - { - "epoch": 3.429566407665365, - "grad_norm": 0.0019327015615999699, - "learning_rate": 0.00019999420161784945, - "loss": 46.0, - "step": 21296 - }, - { - "epoch": 3.4297274447441524, - "grad_norm": 0.002551912795752287, - "learning_rate": 0.00019999420107303482, - "loss": 46.0, - "step": 21297 - }, - { - "epoch": 3.42988848182294, - "grad_norm": 0.0005681714392267168, - "learning_rate": 0.00019999420052819455, - "loss": 46.0, - "step": 21298 - }, - { - "epoch": 3.4300495189017273, - "grad_norm": 0.004439387936145067, - "learning_rate": 0.00019999419998332872, - "loss": 46.0, - "step": 21299 - }, - { - "epoch": 3.4302105559805147, - "grad_norm": 0.002708984771743417, - "learning_rate": 0.00019999419943843728, - "loss": 46.0, - "step": 21300 - }, - { - "epoch": 3.4303715930593017, - "grad_norm": 0.0018625356024131179, - "learning_rate": 0.0001999941988935203, - "loss": 46.0, - "step": 21301 - }, - { - "epoch": 3.430532630138089, - "grad_norm": 0.007309530396014452, - "learning_rate": 0.00019999419834857766, - "loss": 46.0, - "step": 21302 - }, - { - "epoch": 3.4306936672168766, - "grad_norm": 0.001809528679586947, - "learning_rate": 0.00019999419780360945, - "loss": 46.0, - "step": 21303 - }, - { - "epoch": 3.430854704295664, - "grad_norm": 0.00235823355615139, - "learning_rate": 0.00019999419725861563, - "loss": 46.0, - "step": 21304 - }, - { - "epoch": 3.4310157413744515, - "grad_norm": 0.003477443940937519, - "learning_rate": 0.00019999419671359625, - "loss": 46.0, - "step": 21305 - }, - { - "epoch": 3.431176778453239, - "grad_norm": 0.005481565371155739, - "learning_rate": 0.00019999419616855126, - "loss": 46.0, - "step": 21306 - }, - { - "epoch": 3.4313378155320264, - "grad_norm": 0.003387550823390484, - "learning_rate": 0.00019999419562348068, - "loss": 46.0, - "step": 21307 - }, - { - "epoch": 3.4314988526108134, - "grad_norm": 0.001911480096168816, - "learning_rate": 0.0001999941950783845, - "loss": 46.0, - "step": 21308 - }, - { - "epoch": 3.431659889689601, - "grad_norm": 0.0013350839726626873, - "learning_rate": 0.00019999419453326273, - "loss": 46.0, - "step": 21309 - }, - { - "epoch": 3.4318209267683883, - "grad_norm": 0.004663375671952963, - "learning_rate": 0.00019999419398811536, - "loss": 46.0, - "step": 21310 - }, - { - "epoch": 3.431981963847176, - "grad_norm": 0.008582983165979385, - "learning_rate": 0.0001999941934429424, - "loss": 46.0, - "step": 21311 - }, - { - "epoch": 3.4321430009259632, - "grad_norm": 0.00385684659704566, - "learning_rate": 0.0001999941928977439, - "loss": 46.0, - "step": 21312 - }, - { - "epoch": 3.4323040380047507, - "grad_norm": 0.0014047992881387472, - "learning_rate": 0.00019999419235251973, - "loss": 46.0, - "step": 21313 - }, - { - "epoch": 3.432465075083538, - "grad_norm": 0.005968023557215929, - "learning_rate": 0.00019999419180727, - "loss": 46.0, - "step": 21314 - }, - { - "epoch": 3.4326261121623256, - "grad_norm": 0.004191408399492502, - "learning_rate": 0.00019999419126199466, - "loss": 46.0, - "step": 21315 - }, - { - "epoch": 3.4327871492411126, - "grad_norm": 0.003994282800704241, - "learning_rate": 0.00019999419071669374, - "loss": 46.0, - "step": 21316 - }, - { - "epoch": 3.4329481863199, - "grad_norm": 0.011227629147469997, - "learning_rate": 0.0001999941901713672, - "loss": 46.0, - "step": 21317 - }, - { - "epoch": 3.4331092233986875, - "grad_norm": 0.005359116476029158, - "learning_rate": 0.0001999941896260151, - "loss": 46.0, - "step": 21318 - }, - { - "epoch": 3.433270260477475, - "grad_norm": 0.0013284857850521803, - "learning_rate": 0.0001999941890806374, - "loss": 46.0, - "step": 21319 - }, - { - "epoch": 3.4334312975562624, - "grad_norm": 0.00040370863280259073, - "learning_rate": 0.0001999941885352341, - "loss": 46.0, - "step": 21320 - }, - { - "epoch": 3.43359233463505, - "grad_norm": 0.009429258294403553, - "learning_rate": 0.0001999941879898052, - "loss": 46.0, - "step": 21321 - }, - { - "epoch": 3.433753371713837, - "grad_norm": 0.0114449979737401, - "learning_rate": 0.00019999418744435073, - "loss": 46.0, - "step": 21322 - }, - { - "epoch": 3.4339144087926243, - "grad_norm": 0.0015785489231348038, - "learning_rate": 0.00019999418689887067, - "loss": 46.0, - "step": 21323 - }, - { - "epoch": 3.434075445871412, - "grad_norm": 0.0022865927312523127, - "learning_rate": 0.00019999418635336497, - "loss": 46.0, - "step": 21324 - }, - { - "epoch": 3.4342364829501992, - "grad_norm": 0.004068242851644754, - "learning_rate": 0.00019999418580783372, - "loss": 46.0, - "step": 21325 - }, - { - "epoch": 3.4343975200289867, - "grad_norm": 0.006889275275170803, - "learning_rate": 0.00019999418526227687, - "loss": 46.0, - "step": 21326 - }, - { - "epoch": 3.434558557107774, - "grad_norm": 0.005282598081976175, - "learning_rate": 0.0001999941847166944, - "loss": 46.0, - "step": 21327 - }, - { - "epoch": 3.4347195941865616, - "grad_norm": 0.011786254122853279, - "learning_rate": 0.00019999418417108636, - "loss": 46.0, - "step": 21328 - }, - { - "epoch": 3.434880631265349, - "grad_norm": 0.0010525865945965052, - "learning_rate": 0.00019999418362545273, - "loss": 46.0, - "step": 21329 - }, - { - "epoch": 3.435041668344136, - "grad_norm": 0.01248264592140913, - "learning_rate": 0.0001999941830797935, - "loss": 46.0, - "step": 21330 - }, - { - "epoch": 3.4352027054229235, - "grad_norm": 0.0005735012236982584, - "learning_rate": 0.00019999418253410867, - "loss": 46.0, - "step": 21331 - }, - { - "epoch": 3.435363742501711, - "grad_norm": 0.00212243665009737, - "learning_rate": 0.00019999418198839828, - "loss": 46.0, - "step": 21332 - }, - { - "epoch": 3.4355247795804984, - "grad_norm": 0.0075389849953353405, - "learning_rate": 0.00019999418144266224, - "loss": 46.0, - "step": 21333 - }, - { - "epoch": 3.435685816659286, - "grad_norm": 0.002500149654224515, - "learning_rate": 0.00019999418089690062, - "loss": 46.0, - "step": 21334 - }, - { - "epoch": 3.4358468537380733, - "grad_norm": 0.008484416641294956, - "learning_rate": 0.00019999418035111343, - "loss": 46.0, - "step": 21335 - }, - { - "epoch": 3.4360078908168608, - "grad_norm": 0.0014663385227322578, - "learning_rate": 0.00019999417980530066, - "loss": 46.0, - "step": 21336 - }, - { - "epoch": 3.4361689278956478, - "grad_norm": 0.0013378283474594355, - "learning_rate": 0.00019999417925946228, - "loss": 46.0, - "step": 21337 - }, - { - "epoch": 3.436329964974435, - "grad_norm": 0.008930770680308342, - "learning_rate": 0.00019999417871359828, - "loss": 46.0, - "step": 21338 - }, - { - "epoch": 3.4364910020532227, - "grad_norm": 0.0026706643402576447, - "learning_rate": 0.00019999417816770875, - "loss": 46.0, - "step": 21339 - }, - { - "epoch": 3.43665203913201, - "grad_norm": 0.0024902811273932457, - "learning_rate": 0.00019999417762179354, - "loss": 46.0, - "step": 21340 - }, - { - "epoch": 3.4368130762107976, - "grad_norm": 0.005879633594304323, - "learning_rate": 0.00019999417707585278, - "loss": 46.0, - "step": 21341 - }, - { - "epoch": 3.436974113289585, - "grad_norm": 0.003570025088265538, - "learning_rate": 0.00019999417652988646, - "loss": 46.0, - "step": 21342 - }, - { - "epoch": 3.4371351503683725, - "grad_norm": 0.00975044071674347, - "learning_rate": 0.0001999941759838945, - "loss": 46.0, - "step": 21343 - }, - { - "epoch": 3.43729618744716, - "grad_norm": 0.0015033313538879156, - "learning_rate": 0.00019999417543787695, - "loss": 46.0, - "step": 21344 - }, - { - "epoch": 3.437457224525947, - "grad_norm": 0.0007017470779828727, - "learning_rate": 0.00019999417489183384, - "loss": 46.0, - "step": 21345 - }, - { - "epoch": 3.4376182616047344, - "grad_norm": 0.0010633855126798153, - "learning_rate": 0.00019999417434576512, - "loss": 46.0, - "step": 21346 - }, - { - "epoch": 3.437779298683522, - "grad_norm": 0.001470943447202444, - "learning_rate": 0.0001999941737996708, - "loss": 46.0, - "step": 21347 - }, - { - "epoch": 3.4379403357623093, - "grad_norm": 0.010627185925841331, - "learning_rate": 0.0001999941732535509, - "loss": 46.0, - "step": 21348 - }, - { - "epoch": 3.4381013728410967, - "grad_norm": 0.0013186817523092031, - "learning_rate": 0.00019999417270740537, - "loss": 46.0, - "step": 21349 - }, - { - "epoch": 3.438262409919884, - "grad_norm": 0.0008128781919367611, - "learning_rate": 0.0001999941721612343, - "loss": 46.0, - "step": 21350 - }, - { - "epoch": 3.438423446998671, - "grad_norm": 0.0015869452618062496, - "learning_rate": 0.00019999417161503758, - "loss": 46.0, - "step": 21351 - }, - { - "epoch": 3.4385844840774586, - "grad_norm": 0.006966514978557825, - "learning_rate": 0.0001999941710688153, - "loss": 46.0, - "step": 21352 - }, - { - "epoch": 3.438745521156246, - "grad_norm": 0.0005937858950346708, - "learning_rate": 0.00019999417052256742, - "loss": 46.0, - "step": 21353 - }, - { - "epoch": 3.4389065582350336, - "grad_norm": 0.006463267374783754, - "learning_rate": 0.00019999416997629394, - "loss": 46.0, - "step": 21354 - }, - { - "epoch": 3.439067595313821, - "grad_norm": 0.002489438047632575, - "learning_rate": 0.0001999941694299949, - "loss": 46.0, - "step": 21355 - }, - { - "epoch": 3.4392286323926085, - "grad_norm": 0.007998412474989891, - "learning_rate": 0.00019999416888367023, - "loss": 46.0, - "step": 21356 - }, - { - "epoch": 3.439389669471396, - "grad_norm": 0.006187098566442728, - "learning_rate": 0.00019999416833731996, - "loss": 46.0, - "step": 21357 - }, - { - "epoch": 3.4395507065501834, - "grad_norm": 0.012148981913924217, - "learning_rate": 0.00019999416779094414, - "loss": 46.0, - "step": 21358 - }, - { - "epoch": 3.439711743628971, - "grad_norm": 0.02225356362760067, - "learning_rate": 0.0001999941672445427, - "loss": 46.0, - "step": 21359 - }, - { - "epoch": 3.439872780707758, - "grad_norm": 0.0018617149908095598, - "learning_rate": 0.00019999416669811565, - "loss": 46.0, - "step": 21360 - }, - { - "epoch": 3.4400338177865453, - "grad_norm": 0.004765899386256933, - "learning_rate": 0.000199994166151663, - "loss": 46.0, - "step": 21361 - }, - { - "epoch": 3.4401948548653327, - "grad_norm": 0.0018667754484340549, - "learning_rate": 0.00019999416560518484, - "loss": 46.0, - "step": 21362 - }, - { - "epoch": 3.44035589194412, - "grad_norm": 0.004824182949960232, - "learning_rate": 0.000199994165058681, - "loss": 46.0, - "step": 21363 - }, - { - "epoch": 3.4405169290229076, - "grad_norm": 0.0029847051482647657, - "learning_rate": 0.0001999941645121516, - "loss": 46.0, - "step": 21364 - }, - { - "epoch": 3.440677966101695, - "grad_norm": 0.004837886895984411, - "learning_rate": 0.0001999941639655966, - "loss": 46.0, - "step": 21365 - }, - { - "epoch": 3.440839003180482, - "grad_norm": 0.005056528374552727, - "learning_rate": 0.00019999416341901603, - "loss": 46.0, - "step": 21366 - }, - { - "epoch": 3.4410000402592695, - "grad_norm": 0.007738937623798847, - "learning_rate": 0.0001999941628724098, - "loss": 46.0, - "step": 21367 - }, - { - "epoch": 3.441161077338057, - "grad_norm": 0.0034253697376698256, - "learning_rate": 0.00019999416232577803, - "loss": 46.0, - "step": 21368 - }, - { - "epoch": 3.4413221144168444, - "grad_norm": 0.0038828575052320957, - "learning_rate": 0.00019999416177912067, - "loss": 46.0, - "step": 21369 - }, - { - "epoch": 3.441483151495632, - "grad_norm": 0.003389406716451049, - "learning_rate": 0.00019999416123243772, - "loss": 46.0, - "step": 21370 - }, - { - "epoch": 3.4416441885744193, - "grad_norm": 0.0033943778835237026, - "learning_rate": 0.00019999416068572915, - "loss": 46.0, - "step": 21371 - }, - { - "epoch": 3.441805225653207, - "grad_norm": 0.002610705094411969, - "learning_rate": 0.000199994160138995, - "loss": 46.0, - "step": 21372 - }, - { - "epoch": 3.4419662627319942, - "grad_norm": 0.0014638545690104365, - "learning_rate": 0.00019999415959223526, - "loss": 46.0, - "step": 21373 - }, - { - "epoch": 3.4421272998107812, - "grad_norm": 0.0014145808527246118, - "learning_rate": 0.0001999941590454499, - "loss": 46.0, - "step": 21374 - }, - { - "epoch": 3.4422883368895687, - "grad_norm": 0.010198313742876053, - "learning_rate": 0.000199994158498639, - "loss": 46.0, - "step": 21375 - }, - { - "epoch": 3.442449373968356, - "grad_norm": 0.011615744791924953, - "learning_rate": 0.00019999415795180246, - "loss": 46.0, - "step": 21376 - }, - { - "epoch": 3.4426104110471436, - "grad_norm": 0.008249531500041485, - "learning_rate": 0.00019999415740494034, - "loss": 46.0, - "step": 21377 - }, - { - "epoch": 3.442771448125931, - "grad_norm": 0.0031641407404094934, - "learning_rate": 0.0001999941568580526, - "loss": 46.0, - "step": 21378 - }, - { - "epoch": 3.4429324852047185, - "grad_norm": 0.008500061929225922, - "learning_rate": 0.00019999415631113932, - "loss": 46.0, - "step": 21379 - }, - { - "epoch": 3.443093522283506, - "grad_norm": 0.006334215868264437, - "learning_rate": 0.0001999941557642004, - "loss": 46.0, - "step": 21380 - }, - { - "epoch": 3.443254559362293, - "grad_norm": 0.0016233554342761636, - "learning_rate": 0.00019999415521723592, - "loss": 46.0, - "step": 21381 - }, - { - "epoch": 3.4434155964410804, - "grad_norm": 0.002812237711623311, - "learning_rate": 0.00019999415467024582, - "loss": 46.0, - "step": 21382 - }, - { - "epoch": 3.443576633519868, - "grad_norm": 0.0022897920571267605, - "learning_rate": 0.00019999415412323015, - "loss": 46.0, - "step": 21383 - }, - { - "epoch": 3.4437376705986553, - "grad_norm": 0.003015248803421855, - "learning_rate": 0.0001999941535761889, - "loss": 46.0, - "step": 21384 - }, - { - "epoch": 3.4438987076774428, - "grad_norm": 0.0016156338388100266, - "learning_rate": 0.000199994153029122, - "loss": 46.0, - "step": 21385 - }, - { - "epoch": 3.4440597447562302, - "grad_norm": 0.003549477318301797, - "learning_rate": 0.00019999415248202955, - "loss": 46.0, - "step": 21386 - }, - { - "epoch": 3.4442207818350177, - "grad_norm": 0.001294828369282186, - "learning_rate": 0.0001999941519349115, - "loss": 46.0, - "step": 21387 - }, - { - "epoch": 3.444381818913805, - "grad_norm": 0.0013177813962101936, - "learning_rate": 0.00019999415138776785, - "loss": 46.0, - "step": 21388 - }, - { - "epoch": 3.444542855992592, - "grad_norm": 0.001766753033734858, - "learning_rate": 0.0001999941508405986, - "loss": 46.0, - "step": 21389 - }, - { - "epoch": 3.4447038930713796, - "grad_norm": 0.004019097425043583, - "learning_rate": 0.00019999415029340378, - "loss": 46.0, - "step": 21390 - }, - { - "epoch": 3.444864930150167, - "grad_norm": 0.004501206334680319, - "learning_rate": 0.00019999414974618336, - "loss": 46.0, - "step": 21391 - }, - { - "epoch": 3.4450259672289545, - "grad_norm": 0.0019878707826137543, - "learning_rate": 0.00019999414919893733, - "loss": 46.0, - "step": 21392 - }, - { - "epoch": 3.445187004307742, - "grad_norm": 0.011708315461874008, - "learning_rate": 0.00019999414865166573, - "loss": 46.0, - "step": 21393 - }, - { - "epoch": 3.4453480413865294, - "grad_norm": 0.0007050794083625078, - "learning_rate": 0.00019999414810436853, - "loss": 46.0, - "step": 21394 - }, - { - "epoch": 3.4455090784653164, - "grad_norm": 0.0035162714775651693, - "learning_rate": 0.0001999941475570457, - "loss": 46.0, - "step": 21395 - }, - { - "epoch": 3.445670115544104, - "grad_norm": 0.002092215232551098, - "learning_rate": 0.00019999414700969733, - "loss": 46.0, - "step": 21396 - }, - { - "epoch": 3.4458311526228913, - "grad_norm": 0.004627359099686146, - "learning_rate": 0.00019999414646232336, - "loss": 46.0, - "step": 21397 - }, - { - "epoch": 3.4459921897016788, - "grad_norm": 0.0012548464583232999, - "learning_rate": 0.00019999414591492375, - "loss": 46.0, - "step": 21398 - }, - { - "epoch": 3.446153226780466, - "grad_norm": 0.0010451386915519834, - "learning_rate": 0.00019999414536749858, - "loss": 46.0, - "step": 21399 - }, - { - "epoch": 3.4463142638592537, - "grad_norm": 0.001638429588638246, - "learning_rate": 0.00019999414482004782, - "loss": 46.0, - "step": 21400 - }, - { - "epoch": 3.446475300938041, - "grad_norm": 0.0039406828582286835, - "learning_rate": 0.00019999414427257145, - "loss": 46.0, - "step": 21401 - }, - { - "epoch": 3.4466363380168286, - "grad_norm": 0.0031661326065659523, - "learning_rate": 0.0001999941437250695, - "loss": 46.0, - "step": 21402 - }, - { - "epoch": 3.446797375095616, - "grad_norm": 0.008524918928742409, - "learning_rate": 0.00019999414317754195, - "loss": 46.0, - "step": 21403 - }, - { - "epoch": 3.446958412174403, - "grad_norm": 0.00435027526691556, - "learning_rate": 0.00019999414262998882, - "loss": 46.0, - "step": 21404 - }, - { - "epoch": 3.4471194492531905, - "grad_norm": 0.001814637565985322, - "learning_rate": 0.00019999414208241007, - "loss": 46.0, - "step": 21405 - }, - { - "epoch": 3.447280486331978, - "grad_norm": 0.0016347779892385006, - "learning_rate": 0.00019999414153480576, - "loss": 46.0, - "step": 21406 - }, - { - "epoch": 3.4474415234107654, - "grad_norm": 0.0037243920378386974, - "learning_rate": 0.00019999414098717581, - "loss": 46.0, - "step": 21407 - }, - { - "epoch": 3.447602560489553, - "grad_norm": 0.003347005695104599, - "learning_rate": 0.0001999941404395203, - "loss": 46.0, - "step": 21408 - }, - { - "epoch": 3.4477635975683403, - "grad_norm": 0.0016809668159112334, - "learning_rate": 0.00019999413989183918, - "loss": 46.0, - "step": 21409 - }, - { - "epoch": 3.4479246346471273, - "grad_norm": 0.006221352610737085, - "learning_rate": 0.0001999941393441325, - "loss": 46.0, - "step": 21410 - }, - { - "epoch": 3.4480856717259147, - "grad_norm": 0.001083912793546915, - "learning_rate": 0.0001999941387964002, - "loss": 46.0, - "step": 21411 - }, - { - "epoch": 3.448246708804702, - "grad_norm": 0.012469722889363766, - "learning_rate": 0.00019999413824864232, - "loss": 46.0, - "step": 21412 - }, - { - "epoch": 3.4484077458834896, - "grad_norm": 0.002171517815440893, - "learning_rate": 0.00019999413770085885, - "loss": 46.0, - "step": 21413 - }, - { - "epoch": 3.448568782962277, - "grad_norm": 0.005600192584097385, - "learning_rate": 0.00019999413715304976, - "loss": 46.0, - "step": 21414 - }, - { - "epoch": 3.4487298200410645, - "grad_norm": 0.0004889547126367688, - "learning_rate": 0.00019999413660521512, - "loss": 46.0, - "step": 21415 - }, - { - "epoch": 3.448890857119852, - "grad_norm": 0.0048583378084003925, - "learning_rate": 0.00019999413605735483, - "loss": 46.0, - "step": 21416 - }, - { - "epoch": 3.4490518941986394, - "grad_norm": 0.0009475324186496437, - "learning_rate": 0.00019999413550946899, - "loss": 46.0, - "step": 21417 - }, - { - "epoch": 3.4492129312774265, - "grad_norm": 0.0048226118087768555, - "learning_rate": 0.00019999413496155755, - "loss": 46.0, - "step": 21418 - }, - { - "epoch": 3.449373968356214, - "grad_norm": 0.0025488710962235928, - "learning_rate": 0.0001999941344136205, - "loss": 46.0, - "step": 21419 - }, - { - "epoch": 3.4495350054350014, - "grad_norm": 0.004803691990673542, - "learning_rate": 0.00019999413386565787, - "loss": 46.0, - "step": 21420 - }, - { - "epoch": 3.449696042513789, - "grad_norm": 0.000948400585912168, - "learning_rate": 0.00019999413331766962, - "loss": 46.0, - "step": 21421 - }, - { - "epoch": 3.4498570795925763, - "grad_norm": 0.0008942551212385297, - "learning_rate": 0.0001999941327696558, - "loss": 46.0, - "step": 21422 - }, - { - "epoch": 3.4500181166713637, - "grad_norm": 0.0011097368551418185, - "learning_rate": 0.0001999941322216164, - "loss": 46.0, - "step": 21423 - }, - { - "epoch": 3.450179153750151, - "grad_norm": 0.0043246932327747345, - "learning_rate": 0.0001999941316735514, - "loss": 46.0, - "step": 21424 - }, - { - "epoch": 3.450340190828938, - "grad_norm": 0.004683345090597868, - "learning_rate": 0.0001999941311254608, - "loss": 46.0, - "step": 21425 - }, - { - "epoch": 3.4505012279077256, - "grad_norm": 0.0017776319291442633, - "learning_rate": 0.0001999941305773446, - "loss": 46.0, - "step": 21426 - }, - { - "epoch": 3.450662264986513, - "grad_norm": 0.008514122106134892, - "learning_rate": 0.0001999941300292028, - "loss": 46.0, - "step": 21427 - }, - { - "epoch": 3.4508233020653005, - "grad_norm": 0.0015808396274223924, - "learning_rate": 0.00019999412948103543, - "loss": 46.0, - "step": 21428 - }, - { - "epoch": 3.450984339144088, - "grad_norm": 0.0011398581555113196, - "learning_rate": 0.00019999412893284245, - "loss": 46.0, - "step": 21429 - }, - { - "epoch": 3.4511453762228754, - "grad_norm": 0.013294342905282974, - "learning_rate": 0.0001999941283846239, - "loss": 46.0, - "step": 21430 - }, - { - "epoch": 3.451306413301663, - "grad_norm": 0.005192117765545845, - "learning_rate": 0.00019999412783637972, - "loss": 46.0, - "step": 21431 - }, - { - "epoch": 3.4514674503804503, - "grad_norm": 0.0011622088495641947, - "learning_rate": 0.00019999412728810998, - "loss": 46.0, - "step": 21432 - }, - { - "epoch": 3.4516284874592373, - "grad_norm": 0.0026824085507541895, - "learning_rate": 0.0001999941267398146, - "loss": 46.0, - "step": 21433 - }, - { - "epoch": 3.451789524538025, - "grad_norm": 0.004787949845194817, - "learning_rate": 0.00019999412619149367, - "loss": 46.0, - "step": 21434 - }, - { - "epoch": 3.4519505616168122, - "grad_norm": 0.0019320844439789653, - "learning_rate": 0.00019999412564314714, - "loss": 46.0, - "step": 21435 - }, - { - "epoch": 3.4521115986955997, - "grad_norm": 0.0012487316271290183, - "learning_rate": 0.00019999412509477503, - "loss": 46.0, - "step": 21436 - }, - { - "epoch": 3.452272635774387, - "grad_norm": 0.002958685392513871, - "learning_rate": 0.0001999941245463773, - "loss": 46.0, - "step": 21437 - }, - { - "epoch": 3.4524336728531746, - "grad_norm": 0.001750450930558145, - "learning_rate": 0.000199994123997954, - "loss": 46.0, - "step": 21438 - }, - { - "epoch": 3.4525947099319616, - "grad_norm": 0.0037361423019319773, - "learning_rate": 0.0001999941234495051, - "loss": 46.0, - "step": 21439 - }, - { - "epoch": 3.452755747010749, - "grad_norm": 0.0035370357800275087, - "learning_rate": 0.00019999412290103058, - "loss": 46.0, - "step": 21440 - }, - { - "epoch": 3.4529167840895365, - "grad_norm": 0.008352371864020824, - "learning_rate": 0.00019999412235253047, - "loss": 46.0, - "step": 21441 - }, - { - "epoch": 3.453077821168324, - "grad_norm": 0.005913614295423031, - "learning_rate": 0.00019999412180400479, - "loss": 46.0, - "step": 21442 - }, - { - "epoch": 3.4532388582471114, - "grad_norm": 0.01322778407484293, - "learning_rate": 0.0001999941212554535, - "loss": 46.0, - "step": 21443 - }, - { - "epoch": 3.453399895325899, - "grad_norm": 0.003756898920983076, - "learning_rate": 0.00019999412070687665, - "loss": 46.0, - "step": 21444 - }, - { - "epoch": 3.4535609324046863, - "grad_norm": 0.009156583808362484, - "learning_rate": 0.00019999412015827417, - "loss": 46.0, - "step": 21445 - }, - { - "epoch": 3.4537219694834738, - "grad_norm": 0.005296287126839161, - "learning_rate": 0.0001999941196096461, - "loss": 46.0, - "step": 21446 - }, - { - "epoch": 3.4538830065622608, - "grad_norm": 0.0010049953125417233, - "learning_rate": 0.00019999411906099245, - "loss": 46.0, - "step": 21447 - }, - { - "epoch": 3.4540440436410482, - "grad_norm": 0.001782875508069992, - "learning_rate": 0.0001999941185123132, - "loss": 46.0, - "step": 21448 - }, - { - "epoch": 3.4542050807198357, - "grad_norm": 0.005515060853213072, - "learning_rate": 0.00019999411796360836, - "loss": 46.0, - "step": 21449 - }, - { - "epoch": 3.454366117798623, - "grad_norm": 0.0038124844431877136, - "learning_rate": 0.00019999411741487792, - "loss": 46.0, - "step": 21450 - }, - { - "epoch": 3.4545271548774106, - "grad_norm": 0.011111240833997726, - "learning_rate": 0.0001999941168661219, - "loss": 46.0, - "step": 21451 - }, - { - "epoch": 3.454688191956198, - "grad_norm": 0.013927358202636242, - "learning_rate": 0.00019999411631734025, - "loss": 46.0, - "step": 21452 - }, - { - "epoch": 3.4548492290349855, - "grad_norm": 0.003935371525585651, - "learning_rate": 0.00019999411576853305, - "loss": 46.0, - "step": 21453 - }, - { - "epoch": 3.4550102661137725, - "grad_norm": 0.01726355217397213, - "learning_rate": 0.00019999411521970023, - "loss": 46.0, - "step": 21454 - }, - { - "epoch": 3.45517130319256, - "grad_norm": 0.005137636326253414, - "learning_rate": 0.00019999411467084186, - "loss": 46.0, - "step": 21455 - }, - { - "epoch": 3.4553323402713474, - "grad_norm": 0.0037466150242835283, - "learning_rate": 0.00019999411412195787, - "loss": 46.0, - "step": 21456 - }, - { - "epoch": 3.455493377350135, - "grad_norm": 0.0023211603984236717, - "learning_rate": 0.00019999411357304826, - "loss": 46.0, - "step": 21457 - }, - { - "epoch": 3.4556544144289223, - "grad_norm": 0.011249782517552376, - "learning_rate": 0.0001999941130241131, - "loss": 46.0, - "step": 21458 - }, - { - "epoch": 3.4558154515077097, - "grad_norm": 0.007194583769887686, - "learning_rate": 0.00019999411247515232, - "loss": 46.0, - "step": 21459 - }, - { - "epoch": 3.455976488586497, - "grad_norm": 0.0006801488343626261, - "learning_rate": 0.00019999411192616595, - "loss": 46.0, - "step": 21460 - }, - { - "epoch": 3.4561375256652846, - "grad_norm": 0.004215317778289318, - "learning_rate": 0.00019999411137715397, - "loss": 46.0, - "step": 21461 - }, - { - "epoch": 3.4562985627440717, - "grad_norm": 0.00459174532443285, - "learning_rate": 0.00019999411082811643, - "loss": 46.0, - "step": 21462 - }, - { - "epoch": 3.456459599822859, - "grad_norm": 0.0021795067004859447, - "learning_rate": 0.00019999411027905327, - "loss": 46.0, - "step": 21463 - }, - { - "epoch": 3.4566206369016466, - "grad_norm": 0.004822002723813057, - "learning_rate": 0.00019999410972996453, - "loss": 46.0, - "step": 21464 - }, - { - "epoch": 3.456781673980434, - "grad_norm": 0.006667776964604855, - "learning_rate": 0.0001999941091808502, - "loss": 46.0, - "step": 21465 - }, - { - "epoch": 3.4569427110592215, - "grad_norm": 0.012152226641774178, - "learning_rate": 0.00019999410863171026, - "loss": 46.0, - "step": 21466 - }, - { - "epoch": 3.457103748138009, - "grad_norm": 0.0074287299066782, - "learning_rate": 0.00019999410808254476, - "loss": 46.0, - "step": 21467 - }, - { - "epoch": 3.457264785216796, - "grad_norm": 0.0028173604514449835, - "learning_rate": 0.0001999941075333536, - "loss": 46.0, - "step": 21468 - }, - { - "epoch": 3.4574258222955834, - "grad_norm": 0.0025672900956124067, - "learning_rate": 0.0001999941069841369, - "loss": 46.0, - "step": 21469 - }, - { - "epoch": 3.457586859374371, - "grad_norm": 0.002528794575482607, - "learning_rate": 0.0001999941064348946, - "loss": 46.0, - "step": 21470 - }, - { - "epoch": 3.4577478964531583, - "grad_norm": 0.0007392846746370196, - "learning_rate": 0.00019999410588562673, - "loss": 46.0, - "step": 21471 - }, - { - "epoch": 3.4579089335319457, - "grad_norm": 0.0010529011487960815, - "learning_rate": 0.00019999410533633324, - "loss": 46.0, - "step": 21472 - }, - { - "epoch": 3.458069970610733, - "grad_norm": 0.00391079718247056, - "learning_rate": 0.00019999410478701416, - "loss": 46.0, - "step": 21473 - }, - { - "epoch": 3.4582310076895206, - "grad_norm": 0.001630091923289001, - "learning_rate": 0.00019999410423766946, - "loss": 46.0, - "step": 21474 - }, - { - "epoch": 3.458392044768308, - "grad_norm": 0.0015246031107380986, - "learning_rate": 0.0001999941036882992, - "loss": 46.0, - "step": 21475 - }, - { - "epoch": 3.4585530818470955, - "grad_norm": 0.0031663798727095127, - "learning_rate": 0.00019999410313890334, - "loss": 46.0, - "step": 21476 - }, - { - "epoch": 3.4587141189258825, - "grad_norm": 0.012410926632583141, - "learning_rate": 0.00019999410258948188, - "loss": 46.0, - "step": 21477 - }, - { - "epoch": 3.45887515600467, - "grad_norm": 0.007828181609511375, - "learning_rate": 0.00019999410204003484, - "loss": 46.0, - "step": 21478 - }, - { - "epoch": 3.4590361930834574, - "grad_norm": 0.0018423181027173996, - "learning_rate": 0.00019999410149056218, - "loss": 46.0, - "step": 21479 - }, - { - "epoch": 3.459197230162245, - "grad_norm": 0.0009254949982278049, - "learning_rate": 0.00019999410094106396, - "loss": 46.0, - "step": 21480 - }, - { - "epoch": 3.4593582672410323, - "grad_norm": 0.0020012175664305687, - "learning_rate": 0.00019999410039154013, - "loss": 46.0, - "step": 21481 - }, - { - "epoch": 3.45951930431982, - "grad_norm": 0.000831014767754823, - "learning_rate": 0.00019999409984199068, - "loss": 46.0, - "step": 21482 - }, - { - "epoch": 3.459680341398607, - "grad_norm": 0.004109567031264305, - "learning_rate": 0.00019999409929241568, - "loss": 46.0, - "step": 21483 - }, - { - "epoch": 3.4598413784773943, - "grad_norm": 0.0009089052909985185, - "learning_rate": 0.00019999409874281508, - "loss": 46.0, - "step": 21484 - }, - { - "epoch": 3.4600024155561817, - "grad_norm": 0.011840258724987507, - "learning_rate": 0.00019999409819318887, - "loss": 46.0, - "step": 21485 - }, - { - "epoch": 3.460163452634969, - "grad_norm": 0.0074869440868496895, - "learning_rate": 0.00019999409764353708, - "loss": 46.0, - "step": 21486 - }, - { - "epoch": 3.4603244897137566, - "grad_norm": 0.00601461436599493, - "learning_rate": 0.00019999409709385967, - "loss": 46.0, - "step": 21487 - }, - { - "epoch": 3.460485526792544, - "grad_norm": 0.001918905065394938, - "learning_rate": 0.00019999409654415667, - "loss": 46.0, - "step": 21488 - }, - { - "epoch": 3.4606465638713315, - "grad_norm": 0.010926822200417519, - "learning_rate": 0.00019999409599442811, - "loss": 46.0, - "step": 21489 - }, - { - "epoch": 3.460807600950119, - "grad_norm": 0.0026856965851038694, - "learning_rate": 0.00019999409544467394, - "loss": 46.0, - "step": 21490 - }, - { - "epoch": 3.460968638028906, - "grad_norm": 0.004998111166059971, - "learning_rate": 0.00019999409489489419, - "loss": 46.0, - "step": 21491 - }, - { - "epoch": 3.4611296751076934, - "grad_norm": 0.005897205322980881, - "learning_rate": 0.0001999940943450888, - "loss": 46.0, - "step": 21492 - }, - { - "epoch": 3.461290712186481, - "grad_norm": 0.008031473495066166, - "learning_rate": 0.00019999409379525788, - "loss": 46.0, - "step": 21493 - }, - { - "epoch": 3.4614517492652683, - "grad_norm": 0.0031186360865831375, - "learning_rate": 0.00019999409324540133, - "loss": 46.0, - "step": 21494 - }, - { - "epoch": 3.461612786344056, - "grad_norm": 0.004655584692955017, - "learning_rate": 0.0001999940926955192, - "loss": 46.0, - "step": 21495 - }, - { - "epoch": 3.4617738234228432, - "grad_norm": 0.0033942044246941805, - "learning_rate": 0.00019999409214561148, - "loss": 46.0, - "step": 21496 - }, - { - "epoch": 3.4619348605016307, - "grad_norm": 0.00680797453969717, - "learning_rate": 0.00019999409159567817, - "loss": 46.0, - "step": 21497 - }, - { - "epoch": 3.4620958975804177, - "grad_norm": 0.00296114943921566, - "learning_rate": 0.00019999409104571925, - "loss": 46.0, - "step": 21498 - }, - { - "epoch": 3.462256934659205, - "grad_norm": 0.0053552561439573765, - "learning_rate": 0.0001999940904957347, - "loss": 46.0, - "step": 21499 - }, - { - "epoch": 3.4624179717379926, - "grad_norm": 0.0022706228774040937, - "learning_rate": 0.0001999940899457246, - "loss": 46.0, - "step": 21500 - }, - { - "epoch": 3.46257900881678, - "grad_norm": 0.004283626563847065, - "learning_rate": 0.0001999940893956889, - "loss": 46.0, - "step": 21501 - }, - { - "epoch": 3.4627400458955675, - "grad_norm": 0.003928447607904673, - "learning_rate": 0.00019999408884562763, - "loss": 46.0, - "step": 21502 - }, - { - "epoch": 3.462901082974355, - "grad_norm": 0.002243251074105501, - "learning_rate": 0.00019999408829554077, - "loss": 46.0, - "step": 21503 - }, - { - "epoch": 3.4630621200531424, - "grad_norm": 0.0020860950462520123, - "learning_rate": 0.00019999408774542827, - "loss": 46.0, - "step": 21504 - }, - { - "epoch": 3.46322315713193, - "grad_norm": 0.0032327515073120594, - "learning_rate": 0.0001999940871952902, - "loss": 46.0, - "step": 21505 - }, - { - "epoch": 3.463384194210717, - "grad_norm": 0.0012006328906863928, - "learning_rate": 0.00019999408664512656, - "loss": 46.0, - "step": 21506 - }, - { - "epoch": 3.4635452312895043, - "grad_norm": 0.005046685691922903, - "learning_rate": 0.00019999408609493727, - "loss": 46.0, - "step": 21507 - }, - { - "epoch": 3.4637062683682918, - "grad_norm": 0.000724576530046761, - "learning_rate": 0.00019999408554472243, - "loss": 46.0, - "step": 21508 - }, - { - "epoch": 3.463867305447079, - "grad_norm": 0.0018612323328852654, - "learning_rate": 0.000199994084994482, - "loss": 46.0, - "step": 21509 - }, - { - "epoch": 3.4640283425258667, - "grad_norm": 0.0018015929963439703, - "learning_rate": 0.00019999408444421594, - "loss": 46.0, - "step": 21510 - }, - { - "epoch": 3.464189379604654, - "grad_norm": 0.003694532671943307, - "learning_rate": 0.0001999940838939243, - "loss": 46.0, - "step": 21511 - }, - { - "epoch": 3.464350416683441, - "grad_norm": 0.0059257992543280125, - "learning_rate": 0.0001999940833436071, - "loss": 46.0, - "step": 21512 - }, - { - "epoch": 3.4645114537622286, - "grad_norm": 0.006660667713731527, - "learning_rate": 0.0001999940827932643, - "loss": 46.0, - "step": 21513 - }, - { - "epoch": 3.464672490841016, - "grad_norm": 0.010414987802505493, - "learning_rate": 0.0001999940822428959, - "loss": 46.0, - "step": 21514 - }, - { - "epoch": 3.4648335279198035, - "grad_norm": 0.002524890238419175, - "learning_rate": 0.00019999408169250185, - "loss": 46.0, - "step": 21515 - }, - { - "epoch": 3.464994564998591, - "grad_norm": 0.0014486188301816583, - "learning_rate": 0.00019999408114208228, - "loss": 46.0, - "step": 21516 - }, - { - "epoch": 3.4651556020773784, - "grad_norm": 0.002709099790081382, - "learning_rate": 0.00019999408059163707, - "loss": 46.0, - "step": 21517 - }, - { - "epoch": 3.465316639156166, - "grad_norm": 0.0012349470052868128, - "learning_rate": 0.0001999940800411663, - "loss": 46.0, - "step": 21518 - }, - { - "epoch": 3.4654776762349533, - "grad_norm": 0.003846916602924466, - "learning_rate": 0.00019999407949066993, - "loss": 46.0, - "step": 21519 - }, - { - "epoch": 3.4656387133137407, - "grad_norm": 0.0047247339971363544, - "learning_rate": 0.00019999407894014795, - "loss": 46.0, - "step": 21520 - }, - { - "epoch": 3.4657997503925277, - "grad_norm": 0.00406165886670351, - "learning_rate": 0.0001999940783896004, - "loss": 46.0, - "step": 21521 - }, - { - "epoch": 3.465960787471315, - "grad_norm": 0.0019498143810778856, - "learning_rate": 0.00019999407783902724, - "loss": 46.0, - "step": 21522 - }, - { - "epoch": 3.4661218245501026, - "grad_norm": 0.0012497896095737815, - "learning_rate": 0.00019999407728842847, - "loss": 46.0, - "step": 21523 - }, - { - "epoch": 3.46628286162889, - "grad_norm": 0.003848744323477149, - "learning_rate": 0.00019999407673780412, - "loss": 46.0, - "step": 21524 - }, - { - "epoch": 3.4664438987076776, - "grad_norm": 0.0031660227105021477, - "learning_rate": 0.0001999940761871542, - "loss": 46.0, - "step": 21525 - }, - { - "epoch": 3.466604935786465, - "grad_norm": 0.001507553388364613, - "learning_rate": 0.00019999407563647868, - "loss": 46.0, - "step": 21526 - }, - { - "epoch": 3.466765972865252, - "grad_norm": 0.002985989674925804, - "learning_rate": 0.00019999407508577754, - "loss": 46.0, - "step": 21527 - }, - { - "epoch": 3.4669270099440395, - "grad_norm": 0.003641576273366809, - "learning_rate": 0.00019999407453505082, - "loss": 46.0, - "step": 21528 - }, - { - "epoch": 3.467088047022827, - "grad_norm": 0.00188413355499506, - "learning_rate": 0.00019999407398429853, - "loss": 46.0, - "step": 21529 - }, - { - "epoch": 3.4672490841016144, - "grad_norm": 0.0012499837903305888, - "learning_rate": 0.0001999940734335206, - "loss": 46.0, - "step": 21530 - }, - { - "epoch": 3.467410121180402, - "grad_norm": 0.00933252926915884, - "learning_rate": 0.0001999940728827171, - "loss": 46.0, - "step": 21531 - }, - { - "epoch": 3.4675711582591893, - "grad_norm": 0.006399719975888729, - "learning_rate": 0.00019999407233188803, - "loss": 46.0, - "step": 21532 - }, - { - "epoch": 3.4677321953379767, - "grad_norm": 0.0053024268709123135, - "learning_rate": 0.00019999407178103334, - "loss": 46.0, - "step": 21533 - }, - { - "epoch": 3.467893232416764, - "grad_norm": 0.007915375754237175, - "learning_rate": 0.0001999940712301531, - "loss": 46.0, - "step": 21534 - }, - { - "epoch": 3.468054269495551, - "grad_norm": 0.0030044482555240393, - "learning_rate": 0.00019999407067924717, - "loss": 46.0, - "step": 21535 - }, - { - "epoch": 3.4682153065743386, - "grad_norm": 0.01124865934252739, - "learning_rate": 0.00019999407012831572, - "loss": 46.0, - "step": 21536 - }, - { - "epoch": 3.468376343653126, - "grad_norm": 0.00439730379730463, - "learning_rate": 0.00019999406957735866, - "loss": 46.0, - "step": 21537 - }, - { - "epoch": 3.4685373807319135, - "grad_norm": 0.0021364956628531218, - "learning_rate": 0.000199994069026376, - "loss": 46.0, - "step": 21538 - }, - { - "epoch": 3.468698417810701, - "grad_norm": 0.009805376641452312, - "learning_rate": 0.0001999940684753678, - "loss": 46.0, - "step": 21539 - }, - { - "epoch": 3.4688594548894884, - "grad_norm": 0.002298088278621435, - "learning_rate": 0.00019999406792433396, - "loss": 46.0, - "step": 21540 - }, - { - "epoch": 3.469020491968276, - "grad_norm": 0.0025894942227751017, - "learning_rate": 0.00019999406737327452, - "loss": 46.0, - "step": 21541 - }, - { - "epoch": 3.469181529047063, - "grad_norm": 0.006048859562724829, - "learning_rate": 0.0001999940668221895, - "loss": 46.0, - "step": 21542 - }, - { - "epoch": 3.4693425661258503, - "grad_norm": 0.004307756666094065, - "learning_rate": 0.00019999406627107887, - "loss": 46.0, - "step": 21543 - }, - { - "epoch": 3.469503603204638, - "grad_norm": 0.001994300400838256, - "learning_rate": 0.00019999406571994267, - "loss": 46.0, - "step": 21544 - }, - { - "epoch": 3.4696646402834252, - "grad_norm": 0.003949034959077835, - "learning_rate": 0.00019999406516878085, - "loss": 46.0, - "step": 21545 - }, - { - "epoch": 3.4698256773622127, - "grad_norm": 0.0018495999975129962, - "learning_rate": 0.00019999406461759347, - "loss": 46.0, - "step": 21546 - }, - { - "epoch": 3.469986714441, - "grad_norm": 0.00537153659388423, - "learning_rate": 0.00019999406406638045, - "loss": 46.0, - "step": 21547 - }, - { - "epoch": 3.4701477515197876, - "grad_norm": 0.014439959079027176, - "learning_rate": 0.00019999406351514187, - "loss": 46.0, - "step": 21548 - }, - { - "epoch": 3.470308788598575, - "grad_norm": 0.0014400511281564832, - "learning_rate": 0.0001999940629638777, - "loss": 46.0, - "step": 21549 - }, - { - "epoch": 3.470469825677362, - "grad_norm": 0.003036829177290201, - "learning_rate": 0.00019999406241258793, - "loss": 46.0, - "step": 21550 - }, - { - "epoch": 3.4706308627561495, - "grad_norm": 0.01739700324833393, - "learning_rate": 0.00019999406186127256, - "loss": 46.0, - "step": 21551 - }, - { - "epoch": 3.470791899834937, - "grad_norm": 0.0022692920174449682, - "learning_rate": 0.0001999940613099316, - "loss": 46.0, - "step": 21552 - }, - { - "epoch": 3.4709529369137244, - "grad_norm": 0.007472389843314886, - "learning_rate": 0.00019999406075856506, - "loss": 46.0, - "step": 21553 - }, - { - "epoch": 3.471113973992512, - "grad_norm": 0.010819156654179096, - "learning_rate": 0.0001999940602071729, - "loss": 46.0, - "step": 21554 - }, - { - "epoch": 3.4712750110712993, - "grad_norm": 0.010931729339063168, - "learning_rate": 0.00019999405965575519, - "loss": 46.0, - "step": 21555 - }, - { - "epoch": 3.4714360481500863, - "grad_norm": 0.0035244664177298546, - "learning_rate": 0.00019999405910431185, - "loss": 46.0, - "step": 21556 - }, - { - "epoch": 3.471597085228874, - "grad_norm": 0.016931017860770226, - "learning_rate": 0.0001999940585528429, - "loss": 46.0, - "step": 21557 - }, - { - "epoch": 3.4717581223076612, - "grad_norm": 0.001266232691705227, - "learning_rate": 0.0001999940580013484, - "loss": 46.0, - "step": 21558 - }, - { - "epoch": 3.4719191593864487, - "grad_norm": 0.006256175227463245, - "learning_rate": 0.00019999405744982828, - "loss": 46.0, - "step": 21559 - }, - { - "epoch": 3.472080196465236, - "grad_norm": 0.0016495035961270332, - "learning_rate": 0.00019999405689828258, - "loss": 46.0, - "step": 21560 - }, - { - "epoch": 3.4722412335440236, - "grad_norm": 0.0034828991629183292, - "learning_rate": 0.00019999405634671128, - "loss": 46.0, - "step": 21561 - }, - { - "epoch": 3.472402270622811, - "grad_norm": 0.0018091370584443212, - "learning_rate": 0.0001999940557951144, - "loss": 46.0, - "step": 21562 - }, - { - "epoch": 3.4725633077015985, - "grad_norm": 0.0017083127750083804, - "learning_rate": 0.00019999405524349193, - "loss": 46.0, - "step": 21563 - }, - { - "epoch": 3.4727243447803855, - "grad_norm": 0.0019094264134764671, - "learning_rate": 0.00019999405469184382, - "loss": 46.0, - "step": 21564 - }, - { - "epoch": 3.472885381859173, - "grad_norm": 0.011336550116539001, - "learning_rate": 0.00019999405414017015, - "loss": 46.0, - "step": 21565 - }, - { - "epoch": 3.4730464189379604, - "grad_norm": 0.004751401487737894, - "learning_rate": 0.0001999940535884709, - "loss": 46.0, - "step": 21566 - }, - { - "epoch": 3.473207456016748, - "grad_norm": 0.001113905687816441, - "learning_rate": 0.00019999405303674603, - "loss": 46.0, - "step": 21567 - }, - { - "epoch": 3.4733684930955353, - "grad_norm": 0.0026605126913636923, - "learning_rate": 0.00019999405248499562, - "loss": 46.0, - "step": 21568 - }, - { - "epoch": 3.4735295301743228, - "grad_norm": 0.013115840964019299, - "learning_rate": 0.00019999405193321955, - "loss": 46.0, - "step": 21569 - }, - { - "epoch": 3.47369056725311, - "grad_norm": 0.0037995046004652977, - "learning_rate": 0.00019999405138141792, - "loss": 46.0, - "step": 21570 - }, - { - "epoch": 3.473851604331897, - "grad_norm": 0.004185269121080637, - "learning_rate": 0.0001999940508295907, - "loss": 46.0, - "step": 21571 - }, - { - "epoch": 3.4740126414106847, - "grad_norm": 0.00208605220541358, - "learning_rate": 0.00019999405027773786, - "loss": 46.0, - "step": 21572 - }, - { - "epoch": 3.474173678489472, - "grad_norm": 0.01016261987388134, - "learning_rate": 0.00019999404972585944, - "loss": 46.0, - "step": 21573 - }, - { - "epoch": 3.4743347155682596, - "grad_norm": 0.003020795062184334, - "learning_rate": 0.0001999940491739554, - "loss": 46.0, - "step": 21574 - }, - { - "epoch": 3.474495752647047, - "grad_norm": 0.012632275000214577, - "learning_rate": 0.0001999940486220258, - "loss": 46.0, - "step": 21575 - }, - { - "epoch": 3.4746567897258345, - "grad_norm": 0.007691246923059225, - "learning_rate": 0.00019999404807007063, - "loss": 46.0, - "step": 21576 - }, - { - "epoch": 3.474817826804622, - "grad_norm": 0.007989048957824707, - "learning_rate": 0.00019999404751808983, - "loss": 46.0, - "step": 21577 - }, - { - "epoch": 3.4749788638834094, - "grad_norm": 0.003225174266844988, - "learning_rate": 0.00019999404696608344, - "loss": 46.0, - "step": 21578 - }, - { - "epoch": 3.4751399009621964, - "grad_norm": 0.0073609910905361176, - "learning_rate": 0.00019999404641405147, - "loss": 46.0, - "step": 21579 - }, - { - "epoch": 3.475300938040984, - "grad_norm": 0.00303995655849576, - "learning_rate": 0.00019999404586199391, - "loss": 46.0, - "step": 21580 - }, - { - "epoch": 3.4754619751197713, - "grad_norm": 0.0037534211296588182, - "learning_rate": 0.0001999940453099107, - "loss": 46.0, - "step": 21581 - }, - { - "epoch": 3.4756230121985587, - "grad_norm": 0.004336799029260874, - "learning_rate": 0.00019999404475780198, - "loss": 46.0, - "step": 21582 - }, - { - "epoch": 3.475784049277346, - "grad_norm": 0.002320753177627921, - "learning_rate": 0.00019999404420566763, - "loss": 46.0, - "step": 21583 - }, - { - "epoch": 3.4759450863561336, - "grad_norm": 0.004559011198580265, - "learning_rate": 0.00019999404365350767, - "loss": 46.0, - "step": 21584 - }, - { - "epoch": 3.4761061234349206, - "grad_norm": 0.016857832670211792, - "learning_rate": 0.00019999404310132212, - "loss": 46.0, - "step": 21585 - }, - { - "epoch": 3.476267160513708, - "grad_norm": 0.001042317831888795, - "learning_rate": 0.00019999404254911101, - "loss": 46.0, - "step": 21586 - }, - { - "epoch": 3.4764281975924956, - "grad_norm": 0.015265410766005516, - "learning_rate": 0.0001999940419968743, - "loss": 46.0, - "step": 21587 - }, - { - "epoch": 3.476589234671283, - "grad_norm": 0.0011762077920138836, - "learning_rate": 0.00019999404144461198, - "loss": 46.0, - "step": 21588 - }, - { - "epoch": 3.4767502717500705, - "grad_norm": 0.0070272888988256454, - "learning_rate": 0.00019999404089232406, - "loss": 46.0, - "step": 21589 - }, - { - "epoch": 3.476911308828858, - "grad_norm": 0.0014839920913800597, - "learning_rate": 0.00019999404034001054, - "loss": 46.0, - "step": 21590 - }, - { - "epoch": 3.4770723459076454, - "grad_norm": 0.0043205576948821545, - "learning_rate": 0.00019999403978767144, - "loss": 46.0, - "step": 21591 - }, - { - "epoch": 3.477233382986433, - "grad_norm": 0.0053464495576918125, - "learning_rate": 0.00019999403923530676, - "loss": 46.0, - "step": 21592 - }, - { - "epoch": 3.4773944200652203, - "grad_norm": 0.00060122279683128, - "learning_rate": 0.00019999403868291649, - "loss": 46.0, - "step": 21593 - }, - { - "epoch": 3.4775554571440073, - "grad_norm": 0.007110511884093285, - "learning_rate": 0.0001999940381305006, - "loss": 46.0, - "step": 21594 - }, - { - "epoch": 3.4777164942227947, - "grad_norm": 0.002072003437206149, - "learning_rate": 0.00019999403757805912, - "loss": 46.0, - "step": 21595 - }, - { - "epoch": 3.477877531301582, - "grad_norm": 0.004132483154535294, - "learning_rate": 0.00019999403702559206, - "loss": 46.0, - "step": 21596 - }, - { - "epoch": 3.4780385683803696, - "grad_norm": 0.0025334444362670183, - "learning_rate": 0.0001999940364730994, - "loss": 46.0, - "step": 21597 - }, - { - "epoch": 3.478199605459157, - "grad_norm": 0.0011895459610968828, - "learning_rate": 0.00019999403592058115, - "loss": 46.0, - "step": 21598 - }, - { - "epoch": 3.4783606425379445, - "grad_norm": 0.005745948292315006, - "learning_rate": 0.00019999403536803733, - "loss": 46.0, - "step": 21599 - }, - { - "epoch": 3.4785216796167315, - "grad_norm": 0.0020802633371204138, - "learning_rate": 0.00019999403481546786, - "loss": 46.0, - "step": 21600 - }, - { - "epoch": 3.478682716695519, - "grad_norm": 0.003612247761338949, - "learning_rate": 0.0001999940342628728, - "loss": 46.0, - "step": 21601 - }, - { - "epoch": 3.4788437537743064, - "grad_norm": 0.0027700415812432766, - "learning_rate": 0.0001999940337102522, - "loss": 46.0, - "step": 21602 - }, - { - "epoch": 3.479004790853094, - "grad_norm": 0.0009137579472735524, - "learning_rate": 0.000199994033157606, - "loss": 46.0, - "step": 21603 - }, - { - "epoch": 3.4791658279318813, - "grad_norm": 0.002333424286916852, - "learning_rate": 0.00019999403260493418, - "loss": 46.0, - "step": 21604 - }, - { - "epoch": 3.479326865010669, - "grad_norm": 0.0031451343093067408, - "learning_rate": 0.00019999403205223678, - "loss": 46.0, - "step": 21605 - }, - { - "epoch": 3.4794879020894562, - "grad_norm": 0.0020351496059447527, - "learning_rate": 0.00019999403149951377, - "loss": 46.0, - "step": 21606 - }, - { - "epoch": 3.4796489391682437, - "grad_norm": 0.011380152776837349, - "learning_rate": 0.0001999940309467652, - "loss": 46.0, - "step": 21607 - }, - { - "epoch": 3.4798099762470307, - "grad_norm": 0.01850689947605133, - "learning_rate": 0.000199994030393991, - "loss": 46.0, - "step": 21608 - }, - { - "epoch": 3.479971013325818, - "grad_norm": 0.003453419543802738, - "learning_rate": 0.00019999402984119123, - "loss": 46.0, - "step": 21609 - }, - { - "epoch": 3.4801320504046056, - "grad_norm": 0.000979285454377532, - "learning_rate": 0.00019999402928836584, - "loss": 46.0, - "step": 21610 - }, - { - "epoch": 3.480293087483393, - "grad_norm": 0.0060765682719647884, - "learning_rate": 0.00019999402873551486, - "loss": 46.0, - "step": 21611 - }, - { - "epoch": 3.4804541245621805, - "grad_norm": 0.005047384649515152, - "learning_rate": 0.00019999402818263832, - "loss": 46.0, - "step": 21612 - }, - { - "epoch": 3.480615161640968, - "grad_norm": 0.012718395330011845, - "learning_rate": 0.00019999402762973617, - "loss": 46.0, - "step": 21613 - }, - { - "epoch": 3.4807761987197554, - "grad_norm": 0.006007462739944458, - "learning_rate": 0.00019999402707680843, - "loss": 46.0, - "step": 21614 - }, - { - "epoch": 3.4809372357985424, - "grad_norm": 0.004431604873389006, - "learning_rate": 0.00019999402652385508, - "loss": 46.0, - "step": 21615 - }, - { - "epoch": 3.48109827287733, - "grad_norm": 0.004615690093487501, - "learning_rate": 0.00019999402597087616, - "loss": 46.0, - "step": 21616 - }, - { - "epoch": 3.4812593099561173, - "grad_norm": 0.00464807590469718, - "learning_rate": 0.00019999402541787163, - "loss": 46.0, - "step": 21617 - }, - { - "epoch": 3.4814203470349048, - "grad_norm": 0.00957720447331667, - "learning_rate": 0.00019999402486484155, - "loss": 46.0, - "step": 21618 - }, - { - "epoch": 3.4815813841136922, - "grad_norm": 0.010536537505686283, - "learning_rate": 0.0001999940243117858, - "loss": 46.0, - "step": 21619 - }, - { - "epoch": 3.4817424211924797, - "grad_norm": 0.007567035965621471, - "learning_rate": 0.00019999402375870447, - "loss": 46.0, - "step": 21620 - }, - { - "epoch": 3.481903458271267, - "grad_norm": 0.003694914048537612, - "learning_rate": 0.0001999940232055976, - "loss": 46.0, - "step": 21621 - }, - { - "epoch": 3.4820644953500546, - "grad_norm": 0.013131159357726574, - "learning_rate": 0.0001999940226524651, - "loss": 46.0, - "step": 21622 - }, - { - "epoch": 3.4822255324288416, - "grad_norm": 0.0056958869099617004, - "learning_rate": 0.00019999402209930703, - "loss": 46.0, - "step": 21623 - }, - { - "epoch": 3.482386569507629, - "grad_norm": 0.0023376031313091516, - "learning_rate": 0.00019999402154612333, - "loss": 46.0, - "step": 21624 - }, - { - "epoch": 3.4825476065864165, - "grad_norm": 0.0026437039487063885, - "learning_rate": 0.00019999402099291408, - "loss": 46.0, - "step": 21625 - }, - { - "epoch": 3.482708643665204, - "grad_norm": 0.0037356445100158453, - "learning_rate": 0.00019999402043967922, - "loss": 46.0, - "step": 21626 - }, - { - "epoch": 3.4828696807439914, - "grad_norm": 0.0034827461931854486, - "learning_rate": 0.00019999401988641876, - "loss": 46.0, - "step": 21627 - }, - { - "epoch": 3.483030717822779, - "grad_norm": 0.0021162594202905893, - "learning_rate": 0.00019999401933313272, - "loss": 46.0, - "step": 21628 - }, - { - "epoch": 3.483191754901566, - "grad_norm": 0.0013706154422834516, - "learning_rate": 0.00019999401877982104, - "loss": 46.0, - "step": 21629 - }, - { - "epoch": 3.4833527919803533, - "grad_norm": 0.0022472376003861427, - "learning_rate": 0.0001999940182264838, - "loss": 46.0, - "step": 21630 - }, - { - "epoch": 3.4835138290591408, - "grad_norm": 0.005469535477459431, - "learning_rate": 0.000199994017673121, - "loss": 46.0, - "step": 21631 - }, - { - "epoch": 3.483674866137928, - "grad_norm": 0.00773253757506609, - "learning_rate": 0.00019999401711973255, - "loss": 46.0, - "step": 21632 - }, - { - "epoch": 3.4838359032167157, - "grad_norm": 0.003926425240933895, - "learning_rate": 0.00019999401656631854, - "loss": 46.0, - "step": 21633 - }, - { - "epoch": 3.483996940295503, - "grad_norm": 0.001304979552514851, - "learning_rate": 0.00019999401601287893, - "loss": 46.0, - "step": 21634 - }, - { - "epoch": 3.4841579773742906, - "grad_norm": 0.004565785638988018, - "learning_rate": 0.00019999401545941372, - "loss": 46.0, - "step": 21635 - }, - { - "epoch": 3.484319014453078, - "grad_norm": 0.003365050535649061, - "learning_rate": 0.0001999940149059229, - "loss": 46.0, - "step": 21636 - }, - { - "epoch": 3.484480051531865, - "grad_norm": 0.004340991843491793, - "learning_rate": 0.00019999401435240652, - "loss": 46.0, - "step": 21637 - }, - { - "epoch": 3.4846410886106525, - "grad_norm": 0.002922011073678732, - "learning_rate": 0.00019999401379886455, - "loss": 46.0, - "step": 21638 - }, - { - "epoch": 3.48480212568944, - "grad_norm": 0.002363980980589986, - "learning_rate": 0.00019999401324529697, - "loss": 46.0, - "step": 21639 - }, - { - "epoch": 3.4849631627682274, - "grad_norm": 0.0019061536295339465, - "learning_rate": 0.0001999940126917038, - "loss": 46.0, - "step": 21640 - }, - { - "epoch": 3.485124199847015, - "grad_norm": 0.007439350243657827, - "learning_rate": 0.00019999401213808505, - "loss": 46.0, - "step": 21641 - }, - { - "epoch": 3.4852852369258023, - "grad_norm": 0.0047789039090275764, - "learning_rate": 0.00019999401158444068, - "loss": 46.0, - "step": 21642 - }, - { - "epoch": 3.4854462740045897, - "grad_norm": 0.0035275130067020655, - "learning_rate": 0.00019999401103077072, - "loss": 46.0, - "step": 21643 - }, - { - "epoch": 3.4856073110833767, - "grad_norm": 0.0026339481119066477, - "learning_rate": 0.00019999401047707518, - "loss": 46.0, - "step": 21644 - }, - { - "epoch": 3.485768348162164, - "grad_norm": 0.00210524071007967, - "learning_rate": 0.00019999400992335405, - "loss": 46.0, - "step": 21645 - }, - { - "epoch": 3.4859293852409516, - "grad_norm": 0.015096982009708881, - "learning_rate": 0.0001999940093696073, - "loss": 46.0, - "step": 21646 - }, - { - "epoch": 3.486090422319739, - "grad_norm": 0.0012922576861456037, - "learning_rate": 0.000199994008815835, - "loss": 46.0, - "step": 21647 - }, - { - "epoch": 3.4862514593985265, - "grad_norm": 0.009506190195679665, - "learning_rate": 0.00019999400826203707, - "loss": 46.0, - "step": 21648 - }, - { - "epoch": 3.486412496477314, - "grad_norm": 0.004630511626601219, - "learning_rate": 0.00019999400770821354, - "loss": 46.0, - "step": 21649 - }, - { - "epoch": 3.4865735335561014, - "grad_norm": 0.007189747411757708, - "learning_rate": 0.00019999400715436445, - "loss": 46.0, - "step": 21650 - }, - { - "epoch": 3.486734570634889, - "grad_norm": 0.004328939598053694, - "learning_rate": 0.00019999400660048974, - "loss": 46.0, - "step": 21651 - }, - { - "epoch": 3.486895607713676, - "grad_norm": 0.0012114219134673476, - "learning_rate": 0.00019999400604658947, - "loss": 46.0, - "step": 21652 - }, - { - "epoch": 3.4870566447924634, - "grad_norm": 0.009022900834679604, - "learning_rate": 0.00019999400549266358, - "loss": 46.0, - "step": 21653 - }, - { - "epoch": 3.487217681871251, - "grad_norm": 0.002417529234662652, - "learning_rate": 0.0001999940049387121, - "loss": 46.0, - "step": 21654 - }, - { - "epoch": 3.4873787189500383, - "grad_norm": 0.0017306219087913632, - "learning_rate": 0.00019999400438473503, - "loss": 46.0, - "step": 21655 - }, - { - "epoch": 3.4875397560288257, - "grad_norm": 0.005544361192733049, - "learning_rate": 0.00019999400383073236, - "loss": 46.0, - "step": 21656 - }, - { - "epoch": 3.487700793107613, - "grad_norm": 0.005513602867722511, - "learning_rate": 0.0001999940032767041, - "loss": 46.0, - "step": 21657 - }, - { - "epoch": 3.4878618301864, - "grad_norm": 0.008863819763064384, - "learning_rate": 0.00019999400272265025, - "loss": 46.0, - "step": 21658 - }, - { - "epoch": 3.4880228672651876, - "grad_norm": 0.0014070312026888132, - "learning_rate": 0.0001999940021685708, - "loss": 46.0, - "step": 21659 - }, - { - "epoch": 3.488183904343975, - "grad_norm": 0.0028806105256080627, - "learning_rate": 0.00019999400161446577, - "loss": 46.0, - "step": 21660 - }, - { - "epoch": 3.4883449414227625, - "grad_norm": 0.0046287281438708305, - "learning_rate": 0.0001999940010603351, - "loss": 46.0, - "step": 21661 - }, - { - "epoch": 3.48850597850155, - "grad_norm": 0.0024114635307341814, - "learning_rate": 0.0001999940005061789, - "loss": 46.0, - "step": 21662 - }, - { - "epoch": 3.4886670155803374, - "grad_norm": 0.002221218543127179, - "learning_rate": 0.00019999399995199708, - "loss": 46.0, - "step": 21663 - }, - { - "epoch": 3.488828052659125, - "grad_norm": 0.004017350729554892, - "learning_rate": 0.00019999399939778968, - "loss": 46.0, - "step": 21664 - }, - { - "epoch": 3.4889890897379123, - "grad_norm": 0.0010599031811580062, - "learning_rate": 0.00019999399884355667, - "loss": 46.0, - "step": 21665 - }, - { - "epoch": 3.4891501268167, - "grad_norm": 0.005866139195859432, - "learning_rate": 0.00019999399828929808, - "loss": 46.0, - "step": 21666 - }, - { - "epoch": 3.489311163895487, - "grad_norm": 0.006685503292828798, - "learning_rate": 0.0001999939977350139, - "loss": 46.0, - "step": 21667 - }, - { - "epoch": 3.4894722009742742, - "grad_norm": 0.0009195986785925925, - "learning_rate": 0.00019999399718070412, - "loss": 46.0, - "step": 21668 - }, - { - "epoch": 3.4896332380530617, - "grad_norm": 0.002200279850512743, - "learning_rate": 0.00019999399662636873, - "loss": 46.0, - "step": 21669 - }, - { - "epoch": 3.489794275131849, - "grad_norm": 0.005131826736032963, - "learning_rate": 0.00019999399607200773, - "loss": 46.0, - "step": 21670 - }, - { - "epoch": 3.4899553122106366, - "grad_norm": 0.0012679877690970898, - "learning_rate": 0.0001999939955176212, - "loss": 46.0, - "step": 21671 - }, - { - "epoch": 3.490116349289424, - "grad_norm": 0.00447316188365221, - "learning_rate": 0.00019999399496320903, - "loss": 46.0, - "step": 21672 - }, - { - "epoch": 3.490277386368211, - "grad_norm": 0.0012581481132656336, - "learning_rate": 0.00019999399440877126, - "loss": 46.0, - "step": 21673 - }, - { - "epoch": 3.4904384234469985, - "grad_norm": 0.005315429996699095, - "learning_rate": 0.00019999399385430791, - "loss": 46.0, - "step": 21674 - }, - { - "epoch": 3.490599460525786, - "grad_norm": 0.007032371591776609, - "learning_rate": 0.00019999399329981898, - "loss": 46.0, - "step": 21675 - }, - { - "epoch": 3.4907604976045734, - "grad_norm": 0.0013241710839793086, - "learning_rate": 0.00019999399274530445, - "loss": 46.0, - "step": 21676 - }, - { - "epoch": 3.490921534683361, - "grad_norm": 0.01054837740957737, - "learning_rate": 0.00019999399219076432, - "loss": 46.0, - "step": 21677 - }, - { - "epoch": 3.4910825717621483, - "grad_norm": 0.012286514975130558, - "learning_rate": 0.00019999399163619862, - "loss": 46.0, - "step": 21678 - }, - { - "epoch": 3.4912436088409358, - "grad_norm": 0.001990196295082569, - "learning_rate": 0.0001999939910816073, - "loss": 46.0, - "step": 21679 - }, - { - "epoch": 3.491404645919723, - "grad_norm": 0.009741852059960365, - "learning_rate": 0.00019999399052699038, - "loss": 46.0, - "step": 21680 - }, - { - "epoch": 3.4915656829985102, - "grad_norm": 0.0013365488266572356, - "learning_rate": 0.0001999939899723479, - "loss": 46.0, - "step": 21681 - }, - { - "epoch": 3.4917267200772977, - "grad_norm": 0.0032115608919411898, - "learning_rate": 0.0001999939894176798, - "loss": 46.0, - "step": 21682 - }, - { - "epoch": 3.491887757156085, - "grad_norm": 0.006293166894465685, - "learning_rate": 0.00019999398886298613, - "loss": 46.0, - "step": 21683 - }, - { - "epoch": 3.4920487942348726, - "grad_norm": 0.00734447967261076, - "learning_rate": 0.00019999398830826683, - "loss": 46.0, - "step": 21684 - }, - { - "epoch": 3.49220983131366, - "grad_norm": 0.0027137629222124815, - "learning_rate": 0.00019999398775352197, - "loss": 46.0, - "step": 21685 - }, - { - "epoch": 3.4923708683924475, - "grad_norm": 0.006050765514373779, - "learning_rate": 0.00019999398719875152, - "loss": 46.0, - "step": 21686 - }, - { - "epoch": 3.492531905471235, - "grad_norm": 0.002486526034772396, - "learning_rate": 0.00019999398664395543, - "loss": 46.0, - "step": 21687 - }, - { - "epoch": 3.492692942550022, - "grad_norm": 0.005964949261397123, - "learning_rate": 0.00019999398608913378, - "loss": 46.0, - "step": 21688 - }, - { - "epoch": 3.4928539796288094, - "grad_norm": 0.003359105670824647, - "learning_rate": 0.00019999398553428654, - "loss": 46.0, - "step": 21689 - }, - { - "epoch": 3.493015016707597, - "grad_norm": 0.0008333980804309249, - "learning_rate": 0.0001999939849794137, - "loss": 46.0, - "step": 21690 - }, - { - "epoch": 3.4931760537863843, - "grad_norm": 0.00577760674059391, - "learning_rate": 0.0001999939844245153, - "loss": 46.0, - "step": 21691 - }, - { - "epoch": 3.4933370908651717, - "grad_norm": 0.008563734591007233, - "learning_rate": 0.00019999398386959125, - "loss": 46.0, - "step": 21692 - }, - { - "epoch": 3.493498127943959, - "grad_norm": 0.0029687120113521814, - "learning_rate": 0.00019999398331464163, - "loss": 46.0, - "step": 21693 - }, - { - "epoch": 3.4936591650227466, - "grad_norm": 0.020417695865035057, - "learning_rate": 0.0001999939827596664, - "loss": 46.0, - "step": 21694 - }, - { - "epoch": 3.493820202101534, - "grad_norm": 0.007189034018665552, - "learning_rate": 0.00019999398220466562, - "loss": 46.0, - "step": 21695 - }, - { - "epoch": 3.493981239180321, - "grad_norm": 0.0016737758414819837, - "learning_rate": 0.00019999398164963921, - "loss": 46.0, - "step": 21696 - }, - { - "epoch": 3.4941422762591086, - "grad_norm": 0.004586674738675356, - "learning_rate": 0.00019999398109458723, - "loss": 46.0, - "step": 21697 - }, - { - "epoch": 3.494303313337896, - "grad_norm": 0.0012549713719636202, - "learning_rate": 0.00019999398053950965, - "loss": 46.0, - "step": 21698 - }, - { - "epoch": 3.4944643504166835, - "grad_norm": 0.0020423668902367353, - "learning_rate": 0.00019999397998440648, - "loss": 46.0, - "step": 21699 - }, - { - "epoch": 3.494625387495471, - "grad_norm": 0.004017159342765808, - "learning_rate": 0.00019999397942927768, - "loss": 46.0, - "step": 21700 - }, - { - "epoch": 3.4947864245742584, - "grad_norm": 0.009589380584657192, - "learning_rate": 0.00019999397887412331, - "loss": 46.0, - "step": 21701 - }, - { - "epoch": 3.4949474616530454, - "grad_norm": 0.006555621977895498, - "learning_rate": 0.0001999939783189434, - "loss": 46.0, - "step": 21702 - }, - { - "epoch": 3.495108498731833, - "grad_norm": 0.0013683857396245003, - "learning_rate": 0.0001999939777637378, - "loss": 46.0, - "step": 21703 - }, - { - "epoch": 3.4952695358106203, - "grad_norm": 0.0011143877636641264, - "learning_rate": 0.00019999397720850667, - "loss": 46.0, - "step": 21704 - }, - { - "epoch": 3.4954305728894077, - "grad_norm": 0.017129722982645035, - "learning_rate": 0.00019999397665324993, - "loss": 46.0, - "step": 21705 - }, - { - "epoch": 3.495591609968195, - "grad_norm": 0.0059161619283258915, - "learning_rate": 0.0001999939760979676, - "loss": 46.0, - "step": 21706 - }, - { - "epoch": 3.4957526470469826, - "grad_norm": 0.004295977298170328, - "learning_rate": 0.00019999397554265968, - "loss": 46.0, - "step": 21707 - }, - { - "epoch": 3.49591368412577, - "grad_norm": 0.004680391866713762, - "learning_rate": 0.00019999397498732618, - "loss": 46.0, - "step": 21708 - }, - { - "epoch": 3.4960747212045575, - "grad_norm": 0.005192800424993038, - "learning_rate": 0.00019999397443196704, - "loss": 46.0, - "step": 21709 - }, - { - "epoch": 3.496235758283345, - "grad_norm": 0.0005860895616933703, - "learning_rate": 0.00019999397387658233, - "loss": 46.0, - "step": 21710 - }, - { - "epoch": 3.496396795362132, - "grad_norm": 0.007153054233640432, - "learning_rate": 0.00019999397332117204, - "loss": 46.0, - "step": 21711 - }, - { - "epoch": 3.4965578324409194, - "grad_norm": 0.005331411026418209, - "learning_rate": 0.00019999397276573616, - "loss": 46.0, - "step": 21712 - }, - { - "epoch": 3.496718869519707, - "grad_norm": 0.008422144688665867, - "learning_rate": 0.00019999397221027464, - "loss": 46.0, - "step": 21713 - }, - { - "epoch": 3.4968799065984943, - "grad_norm": 0.006241340655833483, - "learning_rate": 0.00019999397165478756, - "loss": 46.0, - "step": 21714 - }, - { - "epoch": 3.497040943677282, - "grad_norm": 0.014558670111000538, - "learning_rate": 0.00019999397109927492, - "loss": 46.0, - "step": 21715 - }, - { - "epoch": 3.4972019807560693, - "grad_norm": 0.0011415955377742648, - "learning_rate": 0.00019999397054373664, - "loss": 46.0, - "step": 21716 - }, - { - "epoch": 3.4973630178348563, - "grad_norm": 0.010599583387374878, - "learning_rate": 0.0001999939699881728, - "loss": 46.0, - "step": 21717 - }, - { - "epoch": 3.4975240549136437, - "grad_norm": 0.003147561103105545, - "learning_rate": 0.00019999396943258332, - "loss": 46.0, - "step": 21718 - }, - { - "epoch": 3.497685091992431, - "grad_norm": 0.007108535151928663, - "learning_rate": 0.0001999939688769683, - "loss": 46.0, - "step": 21719 - }, - { - "epoch": 3.4978461290712186, - "grad_norm": 0.0010863060597330332, - "learning_rate": 0.00019999396832132767, - "loss": 46.0, - "step": 21720 - }, - { - "epoch": 3.498007166150006, - "grad_norm": 0.002283665118739009, - "learning_rate": 0.00019999396776566143, - "loss": 46.0, - "step": 21721 - }, - { - "epoch": 3.4981682032287935, - "grad_norm": 0.004716191906481981, - "learning_rate": 0.0001999939672099696, - "loss": 46.0, - "step": 21722 - }, - { - "epoch": 3.498329240307581, - "grad_norm": 0.001842303667217493, - "learning_rate": 0.00019999396665425218, - "loss": 46.0, - "step": 21723 - }, - { - "epoch": 3.4984902773863684, - "grad_norm": 0.004425540566444397, - "learning_rate": 0.00019999396609850914, - "loss": 46.0, - "step": 21724 - }, - { - "epoch": 3.4986513144651554, - "grad_norm": 0.006759363692253828, - "learning_rate": 0.00019999396554274058, - "loss": 46.0, - "step": 21725 - }, - { - "epoch": 3.498812351543943, - "grad_norm": 0.00343524431809783, - "learning_rate": 0.00019999396498694637, - "loss": 46.0, - "step": 21726 - }, - { - "epoch": 3.4989733886227303, - "grad_norm": 0.010361671447753906, - "learning_rate": 0.00019999396443112657, - "loss": 46.0, - "step": 21727 - }, - { - "epoch": 3.499134425701518, - "grad_norm": 0.004125670995563269, - "learning_rate": 0.0001999939638752812, - "loss": 46.0, - "step": 21728 - }, - { - "epoch": 3.4992954627803052, - "grad_norm": 0.004922444466501474, - "learning_rate": 0.0001999939633194102, - "loss": 46.0, - "step": 21729 - }, - { - "epoch": 3.4994564998590927, - "grad_norm": 0.003544427454471588, - "learning_rate": 0.00019999396276351364, - "loss": 46.0, - "step": 21730 - }, - { - "epoch": 3.49961753693788, - "grad_norm": 0.001314751454629004, - "learning_rate": 0.00019999396220759147, - "loss": 46.0, - "step": 21731 - }, - { - "epoch": 3.499778574016667, - "grad_norm": 0.0017822623485699296, - "learning_rate": 0.0001999939616516437, - "loss": 46.0, - "step": 21732 - }, - { - "epoch": 3.4999396110954546, - "grad_norm": 0.009229912422597408, - "learning_rate": 0.00019999396109567037, - "loss": 46.0, - "step": 21733 - }, - { - "epoch": 3.500100648174242, - "grad_norm": 0.016182197257876396, - "learning_rate": 0.0001999939605396714, - "loss": 46.0, - "step": 21734 - }, - { - "epoch": 3.5002616852530295, - "grad_norm": 0.0021189632825553417, - "learning_rate": 0.0001999939599836469, - "loss": 46.0, - "step": 21735 - }, - { - "epoch": 3.500422722331817, - "grad_norm": 0.002374927746132016, - "learning_rate": 0.00019999395942759676, - "loss": 46.0, - "step": 21736 - }, - { - "epoch": 3.5005837594106044, - "grad_norm": 0.00620607566088438, - "learning_rate": 0.00019999395887152104, - "loss": 46.0, - "step": 21737 - }, - { - "epoch": 3.500744796489392, - "grad_norm": 0.006021073553711176, - "learning_rate": 0.0001999939583154197, - "loss": 46.0, - "step": 21738 - }, - { - "epoch": 3.5009058335681793, - "grad_norm": 0.007702481932938099, - "learning_rate": 0.00019999395775929278, - "loss": 46.0, - "step": 21739 - }, - { - "epoch": 3.5010668706469663, - "grad_norm": 0.0007809145608916879, - "learning_rate": 0.0001999939572031403, - "loss": 46.0, - "step": 21740 - }, - { - "epoch": 3.5012279077257538, - "grad_norm": 0.002246558666229248, - "learning_rate": 0.00019999395664696218, - "loss": 46.0, - "step": 21741 - }, - { - "epoch": 3.501388944804541, - "grad_norm": 0.0044090342707931995, - "learning_rate": 0.0001999939560907585, - "loss": 46.0, - "step": 21742 - }, - { - "epoch": 3.5015499818833287, - "grad_norm": 0.004719299264252186, - "learning_rate": 0.00019999395553452922, - "loss": 46.0, - "step": 21743 - }, - { - "epoch": 3.501711018962116, - "grad_norm": 0.0011664681369438767, - "learning_rate": 0.00019999395497827434, - "loss": 46.0, - "step": 21744 - }, - { - "epoch": 3.5018720560409036, - "grad_norm": 0.0008961429120972753, - "learning_rate": 0.00019999395442199386, - "loss": 46.0, - "step": 21745 - }, - { - "epoch": 3.5020330931196906, - "grad_norm": 0.004494569730013609, - "learning_rate": 0.0001999939538656878, - "loss": 46.0, - "step": 21746 - }, - { - "epoch": 3.502194130198478, - "grad_norm": 0.01446344144642353, - "learning_rate": 0.00019999395330935613, - "loss": 46.0, - "step": 21747 - }, - { - "epoch": 3.5023551672772655, - "grad_norm": 0.0036420905962586403, - "learning_rate": 0.0001999939527529989, - "loss": 46.0, - "step": 21748 - }, - { - "epoch": 3.502516204356053, - "grad_norm": 0.0023566405288875103, - "learning_rate": 0.00019999395219661605, - "loss": 46.0, - "step": 21749 - }, - { - "epoch": 3.5026772414348404, - "grad_norm": 0.001361611532047391, - "learning_rate": 0.00019999395164020762, - "loss": 46.0, - "step": 21750 - }, - { - "epoch": 3.502838278513628, - "grad_norm": 0.009291104972362518, - "learning_rate": 0.00019999395108377357, - "loss": 46.0, - "step": 21751 - }, - { - "epoch": 3.5029993155924153, - "grad_norm": 0.009813607670366764, - "learning_rate": 0.00019999395052731396, - "loss": 46.0, - "step": 21752 - }, - { - "epoch": 3.5031603526712027, - "grad_norm": 0.0020451825112104416, - "learning_rate": 0.00019999394997082873, - "loss": 46.0, - "step": 21753 - }, - { - "epoch": 3.50332138974999, - "grad_norm": 0.005616406444460154, - "learning_rate": 0.00019999394941431792, - "loss": 46.0, - "step": 21754 - }, - { - "epoch": 3.503482426828777, - "grad_norm": 0.002111807931214571, - "learning_rate": 0.0001999939488577815, - "loss": 46.0, - "step": 21755 - }, - { - "epoch": 3.5036434639075646, - "grad_norm": 0.001281952252611518, - "learning_rate": 0.0001999939483012195, - "loss": 46.0, - "step": 21756 - }, - { - "epoch": 3.503804500986352, - "grad_norm": 0.0019127260893583298, - "learning_rate": 0.00019999394774463194, - "loss": 46.0, - "step": 21757 - }, - { - "epoch": 3.5039655380651396, - "grad_norm": 0.0012628089170902967, - "learning_rate": 0.00019999394718801873, - "loss": 46.0, - "step": 21758 - }, - { - "epoch": 3.504126575143927, - "grad_norm": 0.001724792760796845, - "learning_rate": 0.00019999394663137995, - "loss": 46.0, - "step": 21759 - }, - { - "epoch": 3.504287612222714, - "grad_norm": 0.003039327682927251, - "learning_rate": 0.0001999939460747156, - "loss": 46.0, - "step": 21760 - }, - { - "epoch": 3.5044486493015015, - "grad_norm": 0.0009637277689762414, - "learning_rate": 0.00019999394551802564, - "loss": 46.0, - "step": 21761 - }, - { - "epoch": 3.504609686380289, - "grad_norm": 0.0008546972530893981, - "learning_rate": 0.00019999394496131008, - "loss": 46.0, - "step": 21762 - }, - { - "epoch": 3.5047707234590764, - "grad_norm": 0.008093750104308128, - "learning_rate": 0.0001999939444045689, - "loss": 46.0, - "step": 21763 - }, - { - "epoch": 3.504931760537864, - "grad_norm": 0.006316937506198883, - "learning_rate": 0.0001999939438478022, - "loss": 46.0, - "step": 21764 - }, - { - "epoch": 3.5050927976166513, - "grad_norm": 0.003119508270174265, - "learning_rate": 0.00019999394329100985, - "loss": 46.0, - "step": 21765 - }, - { - "epoch": 3.5052538346954387, - "grad_norm": 0.004355509765446186, - "learning_rate": 0.0001999939427341919, - "loss": 46.0, - "step": 21766 - }, - { - "epoch": 3.505414871774226, - "grad_norm": 0.002460635267198086, - "learning_rate": 0.00019999394217734838, - "loss": 46.0, - "step": 21767 - }, - { - "epoch": 3.5055759088530136, - "grad_norm": 0.003075494896620512, - "learning_rate": 0.0001999939416204793, - "loss": 46.0, - "step": 21768 - }, - { - "epoch": 3.505736945931801, - "grad_norm": 0.003784006228670478, - "learning_rate": 0.00019999394106358454, - "loss": 46.0, - "step": 21769 - }, - { - "epoch": 3.505897983010588, - "grad_norm": 0.005429904907941818, - "learning_rate": 0.00019999394050666426, - "loss": 46.0, - "step": 21770 - }, - { - "epoch": 3.5060590200893755, - "grad_norm": 0.006247547920793295, - "learning_rate": 0.00019999393994971835, - "loss": 46.0, - "step": 21771 - }, - { - "epoch": 3.506220057168163, - "grad_norm": 0.00142267148476094, - "learning_rate": 0.00019999393939274687, - "loss": 46.0, - "step": 21772 - }, - { - "epoch": 3.5063810942469504, - "grad_norm": 0.010881015099585056, - "learning_rate": 0.00019999393883574976, - "loss": 46.0, - "step": 21773 - }, - { - "epoch": 3.506542131325738, - "grad_norm": 0.006131685804575682, - "learning_rate": 0.0001999939382787271, - "loss": 46.0, - "step": 21774 - }, - { - "epoch": 3.506703168404525, - "grad_norm": 0.0019825256895273924, - "learning_rate": 0.00019999393772167885, - "loss": 46.0, - "step": 21775 - }, - { - "epoch": 3.5068642054833123, - "grad_norm": 0.002985789906233549, - "learning_rate": 0.00019999393716460496, - "loss": 46.0, - "step": 21776 - }, - { - "epoch": 3.5070252425621, - "grad_norm": 0.0010978684294968843, - "learning_rate": 0.0001999939366075055, - "loss": 46.0, - "step": 21777 - }, - { - "epoch": 3.5071862796408872, - "grad_norm": 0.0014760037884116173, - "learning_rate": 0.00019999393605038047, - "loss": 46.0, - "step": 21778 - }, - { - "epoch": 3.5073473167196747, - "grad_norm": 0.0018413078505545855, - "learning_rate": 0.00019999393549322982, - "loss": 46.0, - "step": 21779 - }, - { - "epoch": 3.507508353798462, - "grad_norm": 0.0013160889502614737, - "learning_rate": 0.00019999393493605358, - "loss": 46.0, - "step": 21780 - }, - { - "epoch": 3.5076693908772496, - "grad_norm": 0.004745855461806059, - "learning_rate": 0.00019999393437885175, - "loss": 46.0, - "step": 21781 - }, - { - "epoch": 3.507830427956037, - "grad_norm": 0.00353249697946012, - "learning_rate": 0.00019999393382162434, - "loss": 46.0, - "step": 21782 - }, - { - "epoch": 3.5079914650348245, - "grad_norm": 0.0032081259414553642, - "learning_rate": 0.0001999939332643713, - "loss": 46.0, - "step": 21783 - }, - { - "epoch": 3.5081525021136115, - "grad_norm": 0.008396645076572895, - "learning_rate": 0.00019999393270709267, - "loss": 46.0, - "step": 21784 - }, - { - "epoch": 3.508313539192399, - "grad_norm": 0.002217477886006236, - "learning_rate": 0.00019999393214978846, - "loss": 46.0, - "step": 21785 - }, - { - "epoch": 3.5084745762711864, - "grad_norm": 0.0020149927586317062, - "learning_rate": 0.0001999939315924587, - "loss": 46.0, - "step": 21786 - }, - { - "epoch": 3.508635613349974, - "grad_norm": 0.0017513299826532602, - "learning_rate": 0.0001999939310351033, - "loss": 46.0, - "step": 21787 - }, - { - "epoch": 3.5087966504287613, - "grad_norm": 0.0013605781132355332, - "learning_rate": 0.00019999393047772233, - "loss": 46.0, - "step": 21788 - }, - { - "epoch": 3.5089576875075488, - "grad_norm": 0.0031284557189792395, - "learning_rate": 0.00019999392992031573, - "loss": 46.0, - "step": 21789 - }, - { - "epoch": 3.509118724586336, - "grad_norm": 0.0033485747408121824, - "learning_rate": 0.00019999392936288356, - "loss": 46.0, - "step": 21790 - }, - { - "epoch": 3.5092797616651232, - "grad_norm": 0.0019145788392052054, - "learning_rate": 0.00019999392880542583, - "loss": 46.0, - "step": 21791 - }, - { - "epoch": 3.5094407987439107, - "grad_norm": 0.0011674072593450546, - "learning_rate": 0.00019999392824794244, - "loss": 46.0, - "step": 21792 - }, - { - "epoch": 3.509601835822698, - "grad_norm": 0.0018394498620182276, - "learning_rate": 0.0001999939276904335, - "loss": 46.0, - "step": 21793 - }, - { - "epoch": 3.5097628729014856, - "grad_norm": 0.002685382729396224, - "learning_rate": 0.00019999392713289894, - "loss": 46.0, - "step": 21794 - }, - { - "epoch": 3.509923909980273, - "grad_norm": 0.003416551975533366, - "learning_rate": 0.00019999392657533882, - "loss": 46.0, - "step": 21795 - }, - { - "epoch": 3.5100849470590605, - "grad_norm": 0.0033724710810929537, - "learning_rate": 0.00019999392601775307, - "loss": 46.0, - "step": 21796 - }, - { - "epoch": 3.510245984137848, - "grad_norm": 0.004365818575024605, - "learning_rate": 0.00019999392546014177, - "loss": 46.0, - "step": 21797 - }, - { - "epoch": 3.5104070212166354, - "grad_norm": 0.018542608246207237, - "learning_rate": 0.00019999392490250485, - "loss": 46.0, - "step": 21798 - }, - { - "epoch": 3.5105680582954224, - "grad_norm": 0.0024413387291133404, - "learning_rate": 0.00019999392434484232, - "loss": 46.0, - "step": 21799 - }, - { - "epoch": 3.51072909537421, - "grad_norm": 0.0019974494352936745, - "learning_rate": 0.00019999392378715426, - "loss": 46.0, - "step": 21800 - }, - { - "epoch": 3.5108901324529973, - "grad_norm": 0.0056679206900298595, - "learning_rate": 0.00019999392322944055, - "loss": 46.0, - "step": 21801 - }, - { - "epoch": 3.5110511695317848, - "grad_norm": 0.0060379174537956715, - "learning_rate": 0.00019999392267170123, - "loss": 46.0, - "step": 21802 - }, - { - "epoch": 3.511212206610572, - "grad_norm": 0.0011425487464293838, - "learning_rate": 0.00019999392211393638, - "loss": 46.0, - "step": 21803 - }, - { - "epoch": 3.511373243689359, - "grad_norm": 0.001393473008647561, - "learning_rate": 0.00019999392155614588, - "loss": 46.0, - "step": 21804 - }, - { - "epoch": 3.5115342807681467, - "grad_norm": 0.01002795435488224, - "learning_rate": 0.0001999939209983298, - "loss": 46.0, - "step": 21805 - }, - { - "epoch": 3.511695317846934, - "grad_norm": 0.006619532126933336, - "learning_rate": 0.00019999392044048816, - "loss": 46.0, - "step": 21806 - }, - { - "epoch": 3.5118563549257216, - "grad_norm": 0.0060730138793587685, - "learning_rate": 0.00019999391988262088, - "loss": 46.0, - "step": 21807 - }, - { - "epoch": 3.512017392004509, - "grad_norm": 0.006986928638070822, - "learning_rate": 0.00019999391932472803, - "loss": 46.0, - "step": 21808 - }, - { - "epoch": 3.5121784290832965, - "grad_norm": 0.0018528768559917808, - "learning_rate": 0.0001999939187668096, - "loss": 46.0, - "step": 21809 - }, - { - "epoch": 3.512339466162084, - "grad_norm": 0.001747790607623756, - "learning_rate": 0.00019999391820886558, - "loss": 46.0, - "step": 21810 - }, - { - "epoch": 3.5125005032408714, - "grad_norm": 0.00402861600741744, - "learning_rate": 0.00019999391765089595, - "loss": 46.0, - "step": 21811 - }, - { - "epoch": 3.512661540319659, - "grad_norm": 0.008654254488646984, - "learning_rate": 0.0001999939170929007, - "loss": 46.0, - "step": 21812 - }, - { - "epoch": 3.512822577398446, - "grad_norm": 0.004390066955238581, - "learning_rate": 0.0001999939165348799, - "loss": 46.0, - "step": 21813 - }, - { - "epoch": 3.5129836144772333, - "grad_norm": 0.005463108420372009, - "learning_rate": 0.00019999391597683348, - "loss": 46.0, - "step": 21814 - }, - { - "epoch": 3.5131446515560207, - "grad_norm": 0.0008358869818039238, - "learning_rate": 0.00019999391541876147, - "loss": 46.0, - "step": 21815 - }, - { - "epoch": 3.513305688634808, - "grad_norm": 0.002591717289760709, - "learning_rate": 0.00019999391486066388, - "loss": 46.0, - "step": 21816 - }, - { - "epoch": 3.5134667257135956, - "grad_norm": 0.009204046800732613, - "learning_rate": 0.0001999939143025407, - "loss": 46.0, - "step": 21817 - }, - { - "epoch": 3.513627762792383, - "grad_norm": 0.0013155278284102678, - "learning_rate": 0.0001999939137443919, - "loss": 46.0, - "step": 21818 - }, - { - "epoch": 3.51378879987117, - "grad_norm": 0.00045975917601026595, - "learning_rate": 0.00019999391318621754, - "loss": 46.0, - "step": 21819 - }, - { - "epoch": 3.5139498369499576, - "grad_norm": 0.0005814654286950827, - "learning_rate": 0.00019999391262801757, - "loss": 46.0, - "step": 21820 - }, - { - "epoch": 3.514110874028745, - "grad_norm": 0.0014132732758298516, - "learning_rate": 0.00019999391206979201, - "loss": 46.0, - "step": 21821 - }, - { - "epoch": 3.5142719111075325, - "grad_norm": 0.003257524687796831, - "learning_rate": 0.00019999391151154084, - "loss": 46.0, - "step": 21822 - }, - { - "epoch": 3.51443294818632, - "grad_norm": 0.001962295500561595, - "learning_rate": 0.00019999391095326408, - "loss": 46.0, - "step": 21823 - }, - { - "epoch": 3.5145939852651074, - "grad_norm": 0.008457900024950504, - "learning_rate": 0.00019999391039496174, - "loss": 46.0, - "step": 21824 - }, - { - "epoch": 3.514755022343895, - "grad_norm": 0.005166944116353989, - "learning_rate": 0.00019999390983663383, - "loss": 46.0, - "step": 21825 - }, - { - "epoch": 3.5149160594226823, - "grad_norm": 0.004429202061146498, - "learning_rate": 0.00019999390927828028, - "loss": 46.0, - "step": 21826 - }, - { - "epoch": 3.5150770965014697, - "grad_norm": 0.008082548156380653, - "learning_rate": 0.00019999390871990115, - "loss": 46.0, - "step": 21827 - }, - { - "epoch": 3.5152381335802567, - "grad_norm": 0.004783708602190018, - "learning_rate": 0.00019999390816149642, - "loss": 46.0, - "step": 21828 - }, - { - "epoch": 3.515399170659044, - "grad_norm": 0.007234132383018732, - "learning_rate": 0.00019999390760306614, - "loss": 46.0, - "step": 21829 - }, - { - "epoch": 3.5155602077378316, - "grad_norm": 0.006134225986897945, - "learning_rate": 0.00019999390704461022, - "loss": 46.0, - "step": 21830 - }, - { - "epoch": 3.515721244816619, - "grad_norm": 0.007768153678625822, - "learning_rate": 0.00019999390648612874, - "loss": 46.0, - "step": 21831 - }, - { - "epoch": 3.5158822818954065, - "grad_norm": 0.016721682623028755, - "learning_rate": 0.00019999390592762164, - "loss": 46.0, - "step": 21832 - }, - { - "epoch": 3.5160433189741935, - "grad_norm": 0.005124760791659355, - "learning_rate": 0.00019999390536908895, - "loss": 46.0, - "step": 21833 - }, - { - "epoch": 3.516204356052981, - "grad_norm": 0.0076878671534359455, - "learning_rate": 0.00019999390481053068, - "loss": 46.0, - "step": 21834 - }, - { - "epoch": 3.5163653931317684, - "grad_norm": 0.00998994242399931, - "learning_rate": 0.0001999939042519468, - "loss": 46.0, - "step": 21835 - }, - { - "epoch": 3.516526430210556, - "grad_norm": 0.0011731629492715001, - "learning_rate": 0.00019999390369333735, - "loss": 46.0, - "step": 21836 - }, - { - "epoch": 3.5166874672893433, - "grad_norm": 0.007419189903885126, - "learning_rate": 0.00019999390313470229, - "loss": 46.0, - "step": 21837 - }, - { - "epoch": 3.516848504368131, - "grad_norm": 0.0008965274901129305, - "learning_rate": 0.00019999390257604164, - "loss": 46.0, - "step": 21838 - }, - { - "epoch": 3.5170095414469182, - "grad_norm": 0.0019052295247092843, - "learning_rate": 0.0001999939020173554, - "loss": 46.0, - "step": 21839 - }, - { - "epoch": 3.5171705785257057, - "grad_norm": 0.003496067365631461, - "learning_rate": 0.00019999390145864355, - "loss": 46.0, - "step": 21840 - }, - { - "epoch": 3.517331615604493, - "grad_norm": 0.0008139025303535163, - "learning_rate": 0.00019999390089990612, - "loss": 46.0, - "step": 21841 - }, - { - "epoch": 3.5174926526832806, - "grad_norm": 0.0031868871301412582, - "learning_rate": 0.00019999390034114312, - "loss": 46.0, - "step": 21842 - }, - { - "epoch": 3.5176536897620676, - "grad_norm": 0.0012927409261465073, - "learning_rate": 0.0001999938997823545, - "loss": 46.0, - "step": 21843 - }, - { - "epoch": 3.517814726840855, - "grad_norm": 0.004887499380856752, - "learning_rate": 0.00019999389922354028, - "loss": 46.0, - "step": 21844 - }, - { - "epoch": 3.5179757639196425, - "grad_norm": 0.004289343021810055, - "learning_rate": 0.0001999938986647005, - "loss": 46.0, - "step": 21845 - }, - { - "epoch": 3.51813680099843, - "grad_norm": 0.007844308391213417, - "learning_rate": 0.0001999938981058351, - "loss": 46.0, - "step": 21846 - }, - { - "epoch": 3.5182978380772174, - "grad_norm": 0.0016463998472318053, - "learning_rate": 0.0001999938975469441, - "loss": 46.0, - "step": 21847 - }, - { - "epoch": 3.5184588751560044, - "grad_norm": 0.0053892736323177814, - "learning_rate": 0.0001999938969880275, - "loss": 46.0, - "step": 21848 - }, - { - "epoch": 3.518619912234792, - "grad_norm": 0.006787822116166353, - "learning_rate": 0.00019999389642908532, - "loss": 46.0, - "step": 21849 - }, - { - "epoch": 3.5187809493135793, - "grad_norm": 0.002457831520587206, - "learning_rate": 0.00019999389587011757, - "loss": 46.0, - "step": 21850 - }, - { - "epoch": 3.5189419863923668, - "grad_norm": 0.013491563498973846, - "learning_rate": 0.0001999938953111242, - "loss": 46.0, - "step": 21851 - }, - { - "epoch": 3.5191030234711542, - "grad_norm": 0.009038117714226246, - "learning_rate": 0.00019999389475210526, - "loss": 46.0, - "step": 21852 - }, - { - "epoch": 3.5192640605499417, - "grad_norm": 0.0032559256069362164, - "learning_rate": 0.0001999938941930607, - "loss": 46.0, - "step": 21853 - }, - { - "epoch": 3.519425097628729, - "grad_norm": 0.002037127036601305, - "learning_rate": 0.00019999389363399054, - "loss": 46.0, - "step": 21854 - }, - { - "epoch": 3.5195861347075166, - "grad_norm": 0.0012325349962338805, - "learning_rate": 0.00019999389307489483, - "loss": 46.0, - "step": 21855 - }, - { - "epoch": 3.519747171786304, - "grad_norm": 0.009791783057153225, - "learning_rate": 0.0001999938925157735, - "loss": 46.0, - "step": 21856 - }, - { - "epoch": 3.519908208865091, - "grad_norm": 0.0015503528993576765, - "learning_rate": 0.00019999389195662657, - "loss": 46.0, - "step": 21857 - }, - { - "epoch": 3.5200692459438785, - "grad_norm": 0.01240500621497631, - "learning_rate": 0.00019999389139745404, - "loss": 46.0, - "step": 21858 - }, - { - "epoch": 3.520230283022666, - "grad_norm": 0.005723054055124521, - "learning_rate": 0.00019999389083825595, - "loss": 46.0, - "step": 21859 - }, - { - "epoch": 3.5203913201014534, - "grad_norm": 0.002513917861506343, - "learning_rate": 0.00019999389027903222, - "loss": 46.0, - "step": 21860 - }, - { - "epoch": 3.520552357180241, - "grad_norm": 0.009991351515054703, - "learning_rate": 0.00019999388971978294, - "loss": 46.0, - "step": 21861 - }, - { - "epoch": 3.5207133942590283, - "grad_norm": 0.00209894054569304, - "learning_rate": 0.00019999388916050806, - "loss": 46.0, - "step": 21862 - }, - { - "epoch": 3.5208744313378153, - "grad_norm": 0.004655070602893829, - "learning_rate": 0.00019999388860120757, - "loss": 46.0, - "step": 21863 - }, - { - "epoch": 3.5210354684166028, - "grad_norm": 0.0017953141359612346, - "learning_rate": 0.0001999938880418815, - "loss": 46.0, - "step": 21864 - }, - { - "epoch": 3.52119650549539, - "grad_norm": 0.008750016801059246, - "learning_rate": 0.00019999388748252983, - "loss": 46.0, - "step": 21865 - }, - { - "epoch": 3.5213575425741777, - "grad_norm": 0.0009777933591976762, - "learning_rate": 0.00019999388692315258, - "loss": 46.0, - "step": 21866 - }, - { - "epoch": 3.521518579652965, - "grad_norm": 0.0009155089501291513, - "learning_rate": 0.0001999938863637497, - "loss": 46.0, - "step": 21867 - }, - { - "epoch": 3.5216796167317526, - "grad_norm": 0.0011881996178999543, - "learning_rate": 0.00019999388580432126, - "loss": 46.0, - "step": 21868 - }, - { - "epoch": 3.52184065381054, - "grad_norm": 0.007826287299394608, - "learning_rate": 0.00019999388524486722, - "loss": 46.0, - "step": 21869 - }, - { - "epoch": 3.5220016908893275, - "grad_norm": 0.0018998130690306425, - "learning_rate": 0.0001999938846853876, - "loss": 46.0, - "step": 21870 - }, - { - "epoch": 3.522162727968115, - "grad_norm": 0.0011056291405111551, - "learning_rate": 0.00019999388412588235, - "loss": 46.0, - "step": 21871 - }, - { - "epoch": 3.522323765046902, - "grad_norm": 0.004786670673638582, - "learning_rate": 0.00019999388356635155, - "loss": 46.0, - "step": 21872 - }, - { - "epoch": 3.5224848021256894, - "grad_norm": 0.0033293634187430143, - "learning_rate": 0.0001999938830067951, - "loss": 46.0, - "step": 21873 - }, - { - "epoch": 3.522645839204477, - "grad_norm": 0.0009380434639751911, - "learning_rate": 0.0001999938824472131, - "loss": 46.0, - "step": 21874 - }, - { - "epoch": 3.5228068762832643, - "grad_norm": 0.005221309140324593, - "learning_rate": 0.0001999938818876055, - "loss": 46.0, - "step": 21875 - }, - { - "epoch": 3.5229679133620517, - "grad_norm": 0.0027566649951040745, - "learning_rate": 0.0001999938813279723, - "loss": 46.0, - "step": 21876 - }, - { - "epoch": 3.5231289504408387, - "grad_norm": 0.0045267026871442795, - "learning_rate": 0.00019999388076831354, - "loss": 46.0, - "step": 21877 - }, - { - "epoch": 3.523289987519626, - "grad_norm": 0.002671432215720415, - "learning_rate": 0.00019999388020862914, - "loss": 46.0, - "step": 21878 - }, - { - "epoch": 3.5234510245984136, - "grad_norm": 0.0039221071638166904, - "learning_rate": 0.00019999387964891917, - "loss": 46.0, - "step": 21879 - }, - { - "epoch": 3.523612061677201, - "grad_norm": 0.002746603451669216, - "learning_rate": 0.0001999938790891836, - "loss": 46.0, - "step": 21880 - }, - { - "epoch": 3.5237730987559885, - "grad_norm": 0.0025265049189329147, - "learning_rate": 0.00019999387852942245, - "loss": 46.0, - "step": 21881 - }, - { - "epoch": 3.523934135834776, - "grad_norm": 0.0025974467862397432, - "learning_rate": 0.00019999387796963567, - "loss": 46.0, - "step": 21882 - }, - { - "epoch": 3.5240951729135634, - "grad_norm": 0.015995929017663002, - "learning_rate": 0.00019999387740982333, - "loss": 46.0, - "step": 21883 - }, - { - "epoch": 3.524256209992351, - "grad_norm": 0.0018486761255189776, - "learning_rate": 0.0001999938768499854, - "loss": 46.0, - "step": 21884 - }, - { - "epoch": 3.5244172470711383, - "grad_norm": 0.0051537067629396915, - "learning_rate": 0.00019999387629012185, - "loss": 46.0, - "step": 21885 - }, - { - "epoch": 3.5245782841499254, - "grad_norm": 0.0029616758693009615, - "learning_rate": 0.00019999387573023275, - "loss": 46.0, - "step": 21886 - }, - { - "epoch": 3.524739321228713, - "grad_norm": 0.0038254093378782272, - "learning_rate": 0.000199993875170318, - "loss": 46.0, - "step": 21887 - }, - { - "epoch": 3.5249003583075003, - "grad_norm": 0.007253831252455711, - "learning_rate": 0.00019999387461037767, - "loss": 46.0, - "step": 21888 - }, - { - "epoch": 3.5250613953862877, - "grad_norm": 0.007263258099555969, - "learning_rate": 0.00019999387405041178, - "loss": 46.0, - "step": 21889 - }, - { - "epoch": 3.525222432465075, - "grad_norm": 0.0022202448453754187, - "learning_rate": 0.00019999387349042027, - "loss": 46.0, - "step": 21890 - }, - { - "epoch": 3.5253834695438626, - "grad_norm": 0.006503438111394644, - "learning_rate": 0.00019999387293040318, - "loss": 46.0, - "step": 21891 - }, - { - "epoch": 3.5255445066226496, - "grad_norm": 0.004652680829167366, - "learning_rate": 0.0001999938723703605, - "loss": 46.0, - "step": 21892 - }, - { - "epoch": 3.525705543701437, - "grad_norm": 0.0049568782560527325, - "learning_rate": 0.00019999387181029223, - "loss": 46.0, - "step": 21893 - }, - { - "epoch": 3.5258665807802245, - "grad_norm": 0.0015088096261024475, - "learning_rate": 0.00019999387125019835, - "loss": 46.0, - "step": 21894 - }, - { - "epoch": 3.526027617859012, - "grad_norm": 0.017559386789798737, - "learning_rate": 0.00019999387069007888, - "loss": 46.0, - "step": 21895 - }, - { - "epoch": 3.5261886549377994, - "grad_norm": 0.02956242486834526, - "learning_rate": 0.0001999938701299338, - "loss": 46.0, - "step": 21896 - }, - { - "epoch": 3.526349692016587, - "grad_norm": 0.001476361881941557, - "learning_rate": 0.00019999386956976315, - "loss": 46.0, - "step": 21897 - }, - { - "epoch": 3.5265107290953743, - "grad_norm": 0.0020461431704461575, - "learning_rate": 0.00019999386900956692, - "loss": 46.0, - "step": 21898 - }, - { - "epoch": 3.526671766174162, - "grad_norm": 0.001690125442110002, - "learning_rate": 0.00019999386844934508, - "loss": 46.0, - "step": 21899 - }, - { - "epoch": 3.5268328032529492, - "grad_norm": 0.0011963190045207739, - "learning_rate": 0.00019999386788909762, - "loss": 46.0, - "step": 21900 - }, - { - "epoch": 3.5269938403317362, - "grad_norm": 0.0076559740118682384, - "learning_rate": 0.0001999938673288246, - "loss": 46.0, - "step": 21901 - }, - { - "epoch": 3.5271548774105237, - "grad_norm": 0.005284005310386419, - "learning_rate": 0.000199993866768526, - "loss": 46.0, - "step": 21902 - }, - { - "epoch": 3.527315914489311, - "grad_norm": 0.012967171147465706, - "learning_rate": 0.00019999386620820177, - "loss": 46.0, - "step": 21903 - }, - { - "epoch": 3.5274769515680986, - "grad_norm": 0.0018241682555526495, - "learning_rate": 0.00019999386564785196, - "loss": 46.0, - "step": 21904 - }, - { - "epoch": 3.527637988646886, - "grad_norm": 0.002812751103192568, - "learning_rate": 0.00019999386508747654, - "loss": 46.0, - "step": 21905 - }, - { - "epoch": 3.5277990257256735, - "grad_norm": 0.003000043798238039, - "learning_rate": 0.0001999938645270756, - "loss": 46.0, - "step": 21906 - }, - { - "epoch": 3.5279600628044605, - "grad_norm": 0.001961148576810956, - "learning_rate": 0.000199993863966649, - "loss": 46.0, - "step": 21907 - }, - { - "epoch": 3.528121099883248, - "grad_norm": 0.009626796469092369, - "learning_rate": 0.0001999938634061968, - "loss": 46.0, - "step": 21908 - }, - { - "epoch": 3.5282821369620354, - "grad_norm": 0.0048358505591750145, - "learning_rate": 0.000199993862845719, - "loss": 46.0, - "step": 21909 - }, - { - "epoch": 3.528443174040823, - "grad_norm": 0.0034459978342056274, - "learning_rate": 0.00019999386228521565, - "loss": 46.0, - "step": 21910 - }, - { - "epoch": 3.5286042111196103, - "grad_norm": 0.0021168130915611982, - "learning_rate": 0.00019999386172468668, - "loss": 46.0, - "step": 21911 - }, - { - "epoch": 3.5287652481983978, - "grad_norm": 0.006441343575716019, - "learning_rate": 0.00019999386116413215, - "loss": 46.0, - "step": 21912 - }, - { - "epoch": 3.528926285277185, - "grad_norm": 0.0010449185501784086, - "learning_rate": 0.000199993860603552, - "loss": 46.0, - "step": 21913 - }, - { - "epoch": 3.5290873223559727, - "grad_norm": 0.0031315439846366644, - "learning_rate": 0.00019999386004294624, - "loss": 46.0, - "step": 21914 - }, - { - "epoch": 3.52924835943476, - "grad_norm": 0.002528954530134797, - "learning_rate": 0.00019999385948231492, - "loss": 46.0, - "step": 21915 - }, - { - "epoch": 3.529409396513547, - "grad_norm": 0.008319096639752388, - "learning_rate": 0.00019999385892165798, - "loss": 46.0, - "step": 21916 - }, - { - "epoch": 3.5295704335923346, - "grad_norm": 0.011010879650712013, - "learning_rate": 0.00019999385836097546, - "loss": 46.0, - "step": 21917 - }, - { - "epoch": 3.529731470671122, - "grad_norm": 0.002860456705093384, - "learning_rate": 0.00019999385780026735, - "loss": 46.0, - "step": 21918 - }, - { - "epoch": 3.5298925077499095, - "grad_norm": 0.0017834343016147614, - "learning_rate": 0.00019999385723953365, - "loss": 46.0, - "step": 21919 - }, - { - "epoch": 3.530053544828697, - "grad_norm": 0.0068252370692789555, - "learning_rate": 0.00019999385667877434, - "loss": 46.0, - "step": 21920 - }, - { - "epoch": 3.530214581907484, - "grad_norm": 0.002756329718977213, - "learning_rate": 0.00019999385611798944, - "loss": 46.0, - "step": 21921 - }, - { - "epoch": 3.5303756189862714, - "grad_norm": 0.001273614470846951, - "learning_rate": 0.00019999385555717898, - "loss": 46.0, - "step": 21922 - }, - { - "epoch": 3.530536656065059, - "grad_norm": 0.008705164305865765, - "learning_rate": 0.00019999385499634288, - "loss": 46.0, - "step": 21923 - }, - { - "epoch": 3.5306976931438463, - "grad_norm": 0.005385234951972961, - "learning_rate": 0.00019999385443548122, - "loss": 46.0, - "step": 21924 - }, - { - "epoch": 3.5308587302226337, - "grad_norm": 0.014770200476050377, - "learning_rate": 0.00019999385387459395, - "loss": 46.0, - "step": 21925 - }, - { - "epoch": 3.531019767301421, - "grad_norm": 0.0028039859607815742, - "learning_rate": 0.0001999938533136811, - "loss": 46.0, - "step": 21926 - }, - { - "epoch": 3.5311808043802086, - "grad_norm": 0.003740460379049182, - "learning_rate": 0.00019999385275274264, - "loss": 46.0, - "step": 21927 - }, - { - "epoch": 3.531341841458996, - "grad_norm": 0.008975562639534473, - "learning_rate": 0.00019999385219177858, - "loss": 46.0, - "step": 21928 - }, - { - "epoch": 3.5315028785377836, - "grad_norm": 0.0015580275794491172, - "learning_rate": 0.00019999385163078895, - "loss": 46.0, - "step": 21929 - }, - { - "epoch": 3.5316639156165706, - "grad_norm": 0.0022880041506141424, - "learning_rate": 0.00019999385106977372, - "loss": 46.0, - "step": 21930 - }, - { - "epoch": 3.531824952695358, - "grad_norm": 0.0011812779121100903, - "learning_rate": 0.0001999938505087329, - "loss": 46.0, - "step": 21931 - }, - { - "epoch": 3.5319859897741455, - "grad_norm": 0.0015835604863241315, - "learning_rate": 0.00019999384994766648, - "loss": 46.0, - "step": 21932 - }, - { - "epoch": 3.532147026852933, - "grad_norm": 0.004070988390594721, - "learning_rate": 0.00019999384938657446, - "loss": 46.0, - "step": 21933 - }, - { - "epoch": 3.5323080639317204, - "grad_norm": 0.01473455224186182, - "learning_rate": 0.00019999384882545687, - "loss": 46.0, - "step": 21934 - }, - { - "epoch": 3.532469101010508, - "grad_norm": 0.002096122596412897, - "learning_rate": 0.00019999384826431364, - "loss": 46.0, - "step": 21935 - }, - { - "epoch": 3.532630138089295, - "grad_norm": 0.0006884551839902997, - "learning_rate": 0.00019999384770314486, - "loss": 46.0, - "step": 21936 - }, - { - "epoch": 3.5327911751680823, - "grad_norm": 0.0013674602378159761, - "learning_rate": 0.00019999384714195045, - "loss": 46.0, - "step": 21937 - }, - { - "epoch": 3.5329522122468697, - "grad_norm": 0.012345785275101662, - "learning_rate": 0.0001999938465807305, - "loss": 46.0, - "step": 21938 - }, - { - "epoch": 3.533113249325657, - "grad_norm": 0.005417299922555685, - "learning_rate": 0.00019999384601948492, - "loss": 46.0, - "step": 21939 - }, - { - "epoch": 3.5332742864044446, - "grad_norm": 0.002332830335944891, - "learning_rate": 0.00019999384545821375, - "loss": 46.0, - "step": 21940 - }, - { - "epoch": 3.533435323483232, - "grad_norm": 0.0035465157125145197, - "learning_rate": 0.000199993844896917, - "loss": 46.0, - "step": 21941 - }, - { - "epoch": 3.5335963605620195, - "grad_norm": 0.003067759098485112, - "learning_rate": 0.00019999384433559464, - "loss": 46.0, - "step": 21942 - }, - { - "epoch": 3.533757397640807, - "grad_norm": 0.0008980606799013913, - "learning_rate": 0.00019999384377424672, - "loss": 46.0, - "step": 21943 - }, - { - "epoch": 3.5339184347195944, - "grad_norm": 0.0015847431495785713, - "learning_rate": 0.00019999384321287315, - "loss": 46.0, - "step": 21944 - }, - { - "epoch": 3.5340794717983814, - "grad_norm": 0.006590429227799177, - "learning_rate": 0.00019999384265147402, - "loss": 46.0, - "step": 21945 - }, - { - "epoch": 3.534240508877169, - "grad_norm": 0.0019933029543608427, - "learning_rate": 0.0001999938420900493, - "loss": 46.0, - "step": 21946 - }, - { - "epoch": 3.5344015459559563, - "grad_norm": 0.0012358695967122912, - "learning_rate": 0.00019999384152859896, - "loss": 46.0, - "step": 21947 - }, - { - "epoch": 3.534562583034744, - "grad_norm": 0.006818469613790512, - "learning_rate": 0.00019999384096712307, - "loss": 46.0, - "step": 21948 - }, - { - "epoch": 3.5347236201135313, - "grad_norm": 0.005868944805115461, - "learning_rate": 0.00019999384040562154, - "loss": 46.0, - "step": 21949 - }, - { - "epoch": 3.5348846571923183, - "grad_norm": 0.003276033792644739, - "learning_rate": 0.00019999383984409445, - "loss": 46.0, - "step": 21950 - }, - { - "epoch": 3.5350456942711057, - "grad_norm": 0.003228364046663046, - "learning_rate": 0.00019999383928254177, - "loss": 46.0, - "step": 21951 - }, - { - "epoch": 3.535206731349893, - "grad_norm": 0.0011344053782522678, - "learning_rate": 0.00019999383872096348, - "loss": 46.0, - "step": 21952 - }, - { - "epoch": 3.5353677684286806, - "grad_norm": 0.004558082204312086, - "learning_rate": 0.00019999383815935958, - "loss": 46.0, - "step": 21953 - }, - { - "epoch": 3.535528805507468, - "grad_norm": 0.004243944771587849, - "learning_rate": 0.00019999383759773011, - "loss": 46.0, - "step": 21954 - }, - { - "epoch": 3.5356898425862555, - "grad_norm": 0.008999931626021862, - "learning_rate": 0.00019999383703607506, - "loss": 46.0, - "step": 21955 - }, - { - "epoch": 3.535850879665043, - "grad_norm": 0.002882792381569743, - "learning_rate": 0.0001999938364743944, - "loss": 46.0, - "step": 21956 - }, - { - "epoch": 3.5360119167438304, - "grad_norm": 0.0042806644923985004, - "learning_rate": 0.00019999383591268814, - "loss": 46.0, - "step": 21957 - }, - { - "epoch": 3.536172953822618, - "grad_norm": 0.0050038970075547695, - "learning_rate": 0.0001999938353509563, - "loss": 46.0, - "step": 21958 - }, - { - "epoch": 3.5363339909014053, - "grad_norm": 0.0014869271544739604, - "learning_rate": 0.00019999383478919887, - "loss": 46.0, - "step": 21959 - }, - { - "epoch": 3.5364950279801923, - "grad_norm": 0.006352775730192661, - "learning_rate": 0.00019999383422741583, - "loss": 46.0, - "step": 21960 - }, - { - "epoch": 3.53665606505898, - "grad_norm": 0.011851463466882706, - "learning_rate": 0.0001999938336656072, - "loss": 46.0, - "step": 21961 - }, - { - "epoch": 3.5368171021377672, - "grad_norm": 0.0010939991334453225, - "learning_rate": 0.000199993833103773, - "loss": 46.0, - "step": 21962 - }, - { - "epoch": 3.5369781392165547, - "grad_norm": 0.0022448664531111717, - "learning_rate": 0.00019999383254191318, - "loss": 46.0, - "step": 21963 - }, - { - "epoch": 3.537139176295342, - "grad_norm": 0.0050621237605810165, - "learning_rate": 0.00019999383198002777, - "loss": 46.0, - "step": 21964 - }, - { - "epoch": 3.537300213374129, - "grad_norm": 0.002287766896188259, - "learning_rate": 0.00019999383141811676, - "loss": 46.0, - "step": 21965 - }, - { - "epoch": 3.5374612504529166, - "grad_norm": 0.004195712506771088, - "learning_rate": 0.00019999383085618017, - "loss": 46.0, - "step": 21966 - }, - { - "epoch": 3.537622287531704, - "grad_norm": 0.00612446665763855, - "learning_rate": 0.000199993830294218, - "loss": 46.0, - "step": 21967 - }, - { - "epoch": 3.5377833246104915, - "grad_norm": 0.004373784642666578, - "learning_rate": 0.00019999382973223022, - "loss": 46.0, - "step": 21968 - }, - { - "epoch": 3.537944361689279, - "grad_norm": 0.0027409158647060394, - "learning_rate": 0.00019999382917021684, - "loss": 46.0, - "step": 21969 - }, - { - "epoch": 3.5381053987680664, - "grad_norm": 0.006163121201097965, - "learning_rate": 0.00019999382860817788, - "loss": 46.0, - "step": 21970 - }, - { - "epoch": 3.538266435846854, - "grad_norm": 0.0014037862420082092, - "learning_rate": 0.00019999382804611332, - "loss": 46.0, - "step": 21971 - }, - { - "epoch": 3.5384274729256413, - "grad_norm": 0.00676178652793169, - "learning_rate": 0.00019999382748402318, - "loss": 46.0, - "step": 21972 - }, - { - "epoch": 3.5385885100044288, - "grad_norm": 0.002491881838068366, - "learning_rate": 0.00019999382692190742, - "loss": 46.0, - "step": 21973 - }, - { - "epoch": 3.5387495470832158, - "grad_norm": 0.004652262199670076, - "learning_rate": 0.00019999382635976608, - "loss": 46.0, - "step": 21974 - }, - { - "epoch": 3.538910584162003, - "grad_norm": 0.0011893954360857606, - "learning_rate": 0.00019999382579759915, - "loss": 46.0, - "step": 21975 - }, - { - "epoch": 3.5390716212407907, - "grad_norm": 0.0014562904834747314, - "learning_rate": 0.00019999382523540663, - "loss": 46.0, - "step": 21976 - }, - { - "epoch": 3.539232658319578, - "grad_norm": 0.006258485373109579, - "learning_rate": 0.0001999938246731885, - "loss": 46.0, - "step": 21977 - }, - { - "epoch": 3.5393936953983656, - "grad_norm": 0.006208564154803753, - "learning_rate": 0.00019999382411094478, - "loss": 46.0, - "step": 21978 - }, - { - "epoch": 3.539554732477153, - "grad_norm": 0.017545703798532486, - "learning_rate": 0.0001999938235486755, - "loss": 46.0, - "step": 21979 - }, - { - "epoch": 3.53971576955594, - "grad_norm": 0.0008426980930380523, - "learning_rate": 0.0001999938229863806, - "loss": 46.0, - "step": 21980 - }, - { - "epoch": 3.5398768066347275, - "grad_norm": 0.00999778974801302, - "learning_rate": 0.0001999938224240601, - "loss": 46.0, - "step": 21981 - }, - { - "epoch": 3.540037843713515, - "grad_norm": 0.004886379465460777, - "learning_rate": 0.00019999382186171403, - "loss": 46.0, - "step": 21982 - }, - { - "epoch": 3.5401988807923024, - "grad_norm": 0.0026424226816743612, - "learning_rate": 0.00019999382129934234, - "loss": 46.0, - "step": 21983 - }, - { - "epoch": 3.54035991787109, - "grad_norm": 0.010750465095043182, - "learning_rate": 0.00019999382073694508, - "loss": 46.0, - "step": 21984 - }, - { - "epoch": 3.5405209549498773, - "grad_norm": 0.014166489243507385, - "learning_rate": 0.00019999382017452222, - "loss": 46.0, - "step": 21985 - }, - { - "epoch": 3.5406819920286647, - "grad_norm": 0.0014444702537730336, - "learning_rate": 0.00019999381961207375, - "loss": 46.0, - "step": 21986 - }, - { - "epoch": 3.540843029107452, - "grad_norm": 0.002986549399793148, - "learning_rate": 0.00019999381904959972, - "loss": 46.0, - "step": 21987 - }, - { - "epoch": 3.5410040661862396, - "grad_norm": 0.0007509914576075971, - "learning_rate": 0.00019999381848710007, - "loss": 46.0, - "step": 21988 - }, - { - "epoch": 3.5411651032650266, - "grad_norm": 0.00562213035300374, - "learning_rate": 0.0001999938179245748, - "loss": 46.0, - "step": 21989 - }, - { - "epoch": 3.541326140343814, - "grad_norm": 0.009790195152163506, - "learning_rate": 0.000199993817362024, - "loss": 46.0, - "step": 21990 - }, - { - "epoch": 3.5414871774226016, - "grad_norm": 0.006605955772101879, - "learning_rate": 0.00019999381679944756, - "loss": 46.0, - "step": 21991 - }, - { - "epoch": 3.541648214501389, - "grad_norm": 0.003194431308656931, - "learning_rate": 0.00019999381623684554, - "loss": 46.0, - "step": 21992 - }, - { - "epoch": 3.5418092515801765, - "grad_norm": 0.0016847802326083183, - "learning_rate": 0.00019999381567421793, - "loss": 46.0, - "step": 21993 - }, - { - "epoch": 3.5419702886589635, - "grad_norm": 0.001483723521232605, - "learning_rate": 0.00019999381511156473, - "loss": 46.0, - "step": 21994 - }, - { - "epoch": 3.542131325737751, - "grad_norm": 0.005787108093500137, - "learning_rate": 0.00019999381454888592, - "loss": 46.0, - "step": 21995 - }, - { - "epoch": 3.5422923628165384, - "grad_norm": 0.0010338110150769353, - "learning_rate": 0.00019999381398618153, - "loss": 46.0, - "step": 21996 - }, - { - "epoch": 3.542453399895326, - "grad_norm": 0.006773020140826702, - "learning_rate": 0.00019999381342345157, - "loss": 46.0, - "step": 21997 - }, - { - "epoch": 3.5426144369741133, - "grad_norm": 0.0016948096454143524, - "learning_rate": 0.00019999381286069597, - "loss": 46.0, - "step": 21998 - }, - { - "epoch": 3.5427754740529007, - "grad_norm": 0.00602155365049839, - "learning_rate": 0.0001999938122979148, - "loss": 46.0, - "step": 21999 - }, - { - "epoch": 3.542936511131688, - "grad_norm": 0.0042361048981547356, - "learning_rate": 0.00019999381173510804, - "loss": 46.0, - "step": 22000 - }, - { - "epoch": 3.5430975482104756, - "grad_norm": 0.0024669792037457228, - "learning_rate": 0.00019999381117227568, - "loss": 46.0, - "step": 22001 - }, - { - "epoch": 3.543258585289263, - "grad_norm": 0.005744407419115305, - "learning_rate": 0.00019999381060941773, - "loss": 46.0, - "step": 22002 - }, - { - "epoch": 3.54341962236805, - "grad_norm": 0.0015349757159128785, - "learning_rate": 0.0001999938100465342, - "loss": 46.0, - "step": 22003 - }, - { - "epoch": 3.5435806594468375, - "grad_norm": 0.008937159553170204, - "learning_rate": 0.00019999380948362505, - "loss": 46.0, - "step": 22004 - }, - { - "epoch": 3.543741696525625, - "grad_norm": 0.0010599809465929866, - "learning_rate": 0.00019999380892069034, - "loss": 46.0, - "step": 22005 - }, - { - "epoch": 3.5439027336044124, - "grad_norm": 0.0013040232006460428, - "learning_rate": 0.00019999380835773002, - "loss": 46.0, - "step": 22006 - }, - { - "epoch": 3.5440637706832, - "grad_norm": 0.001095239887945354, - "learning_rate": 0.00019999380779474408, - "loss": 46.0, - "step": 22007 - }, - { - "epoch": 3.5442248077619873, - "grad_norm": 0.0011410439619794488, - "learning_rate": 0.00019999380723173258, - "loss": 46.0, - "step": 22008 - }, - { - "epoch": 3.5443858448407743, - "grad_norm": 0.004196349997073412, - "learning_rate": 0.00019999380666869547, - "loss": 46.0, - "step": 22009 - }, - { - "epoch": 3.544546881919562, - "grad_norm": 0.0010497071780264378, - "learning_rate": 0.00019999380610563277, - "loss": 46.0, - "step": 22010 - }, - { - "epoch": 3.5447079189983492, - "grad_norm": 0.001308976672589779, - "learning_rate": 0.0001999938055425445, - "loss": 46.0, - "step": 22011 - }, - { - "epoch": 3.5448689560771367, - "grad_norm": 0.0029437458142638206, - "learning_rate": 0.0001999938049794306, - "loss": 46.0, - "step": 22012 - }, - { - "epoch": 3.545029993155924, - "grad_norm": 0.007164867129176855, - "learning_rate": 0.00019999380441629113, - "loss": 46.0, - "step": 22013 - }, - { - "epoch": 3.5451910302347116, - "grad_norm": 0.0010761006269603968, - "learning_rate": 0.00019999380385312608, - "loss": 46.0, - "step": 22014 - }, - { - "epoch": 3.545352067313499, - "grad_norm": 0.003038262017071247, - "learning_rate": 0.0001999938032899354, - "loss": 46.0, - "step": 22015 - }, - { - "epoch": 3.5455131043922865, - "grad_norm": 0.004080723039805889, - "learning_rate": 0.00019999380272671914, - "loss": 46.0, - "step": 22016 - }, - { - "epoch": 3.545674141471074, - "grad_norm": 0.007721003144979477, - "learning_rate": 0.0001999938021634773, - "loss": 46.0, - "step": 22017 - }, - { - "epoch": 3.545835178549861, - "grad_norm": 0.00481992494314909, - "learning_rate": 0.00019999380160020983, - "loss": 46.0, - "step": 22018 - }, - { - "epoch": 3.5459962156286484, - "grad_norm": 0.0033730817958712578, - "learning_rate": 0.00019999380103691682, - "loss": 46.0, - "step": 22019 - }, - { - "epoch": 3.546157252707436, - "grad_norm": 0.00402312446385622, - "learning_rate": 0.00019999380047359817, - "loss": 46.0, - "step": 22020 - }, - { - "epoch": 3.5463182897862233, - "grad_norm": 0.0019284755690023303, - "learning_rate": 0.00019999379991025395, - "loss": 46.0, - "step": 22021 - }, - { - "epoch": 3.5464793268650108, - "grad_norm": 0.004927211441099644, - "learning_rate": 0.00019999379934688413, - "loss": 46.0, - "step": 22022 - }, - { - "epoch": 3.5466403639437982, - "grad_norm": 0.005154408048838377, - "learning_rate": 0.00019999379878348874, - "loss": 46.0, - "step": 22023 - }, - { - "epoch": 3.5468014010225852, - "grad_norm": 0.001954978331923485, - "learning_rate": 0.00019999379822006774, - "loss": 46.0, - "step": 22024 - }, - { - "epoch": 3.5469624381013727, - "grad_norm": 0.0043811374343931675, - "learning_rate": 0.00019999379765662113, - "loss": 46.0, - "step": 22025 - }, - { - "epoch": 3.54712347518016, - "grad_norm": 0.0025637864600867033, - "learning_rate": 0.00019999379709314893, - "loss": 46.0, - "step": 22026 - }, - { - "epoch": 3.5472845122589476, - "grad_norm": 0.002644615015015006, - "learning_rate": 0.00019999379652965116, - "loss": 46.0, - "step": 22027 - }, - { - "epoch": 3.547445549337735, - "grad_norm": 0.022094856947660446, - "learning_rate": 0.00019999379596612776, - "loss": 46.0, - "step": 22028 - }, - { - "epoch": 3.5476065864165225, - "grad_norm": 0.0016870190156623721, - "learning_rate": 0.0001999937954025788, - "loss": 46.0, - "step": 22029 - }, - { - "epoch": 3.54776762349531, - "grad_norm": 0.0030062077566981316, - "learning_rate": 0.00019999379483900427, - "loss": 46.0, - "step": 22030 - }, - { - "epoch": 3.5479286605740974, - "grad_norm": 0.0015344704734161496, - "learning_rate": 0.0001999937942754041, - "loss": 46.0, - "step": 22031 - }, - { - "epoch": 3.548089697652885, - "grad_norm": 0.0033449300099164248, - "learning_rate": 0.00019999379371177836, - "loss": 46.0, - "step": 22032 - }, - { - "epoch": 3.548250734731672, - "grad_norm": 0.005942798685282469, - "learning_rate": 0.00019999379314812702, - "loss": 46.0, - "step": 22033 - }, - { - "epoch": 3.5484117718104593, - "grad_norm": 0.0012079703155905008, - "learning_rate": 0.0001999937925844501, - "loss": 46.0, - "step": 22034 - }, - { - "epoch": 3.5485728088892468, - "grad_norm": 0.007327585481107235, - "learning_rate": 0.00019999379202074752, - "loss": 46.0, - "step": 22035 - }, - { - "epoch": 3.548733845968034, - "grad_norm": 0.0015873860102146864, - "learning_rate": 0.00019999379145701942, - "loss": 46.0, - "step": 22036 - }, - { - "epoch": 3.5488948830468217, - "grad_norm": 0.00800500251352787, - "learning_rate": 0.0001999937908932657, - "loss": 46.0, - "step": 22037 - }, - { - "epoch": 3.5490559201256087, - "grad_norm": 0.010370759293437004, - "learning_rate": 0.0001999937903294864, - "loss": 46.0, - "step": 22038 - }, - { - "epoch": 3.549216957204396, - "grad_norm": 0.01294117234647274, - "learning_rate": 0.0001999937897656815, - "loss": 46.0, - "step": 22039 - }, - { - "epoch": 3.5493779942831836, - "grad_norm": 0.006117288023233414, - "learning_rate": 0.00019999378920185099, - "loss": 46.0, - "step": 22040 - }, - { - "epoch": 3.549539031361971, - "grad_norm": 0.006722374353557825, - "learning_rate": 0.00019999378863799492, - "loss": 46.0, - "step": 22041 - }, - { - "epoch": 3.5497000684407585, - "grad_norm": 0.003587601939216256, - "learning_rate": 0.00019999378807411322, - "loss": 46.0, - "step": 22042 - }, - { - "epoch": 3.549861105519546, - "grad_norm": 0.0036452380008995533, - "learning_rate": 0.00019999378751020595, - "loss": 46.0, - "step": 22043 - }, - { - "epoch": 3.5500221425983334, - "grad_norm": 0.006890326738357544, - "learning_rate": 0.0001999937869462731, - "loss": 46.0, - "step": 22044 - }, - { - "epoch": 3.550183179677121, - "grad_norm": 0.003870619460940361, - "learning_rate": 0.00019999378638231463, - "loss": 46.0, - "step": 22045 - }, - { - "epoch": 3.5503442167559083, - "grad_norm": 0.015682950615882874, - "learning_rate": 0.00019999378581833058, - "loss": 46.0, - "step": 22046 - }, - { - "epoch": 3.5505052538346953, - "grad_norm": 0.0009960850002244115, - "learning_rate": 0.0001999937852543209, - "loss": 46.0, - "step": 22047 - }, - { - "epoch": 3.5506662909134827, - "grad_norm": 0.002020682441070676, - "learning_rate": 0.00019999378469028568, - "loss": 46.0, - "step": 22048 - }, - { - "epoch": 3.55082732799227, - "grad_norm": 0.00999528355896473, - "learning_rate": 0.00019999378412622484, - "loss": 46.0, - "step": 22049 - }, - { - "epoch": 3.5509883650710576, - "grad_norm": 0.0014531823107972741, - "learning_rate": 0.0001999937835621384, - "loss": 46.0, - "step": 22050 - }, - { - "epoch": 3.551149402149845, - "grad_norm": 0.0036034679505974054, - "learning_rate": 0.0001999937829980264, - "loss": 46.0, - "step": 22051 - }, - { - "epoch": 3.5513104392286325, - "grad_norm": 0.002651785733178258, - "learning_rate": 0.0001999937824338888, - "loss": 46.0, - "step": 22052 - }, - { - "epoch": 3.5514714763074196, - "grad_norm": 0.0015067094936966896, - "learning_rate": 0.00019999378186972557, - "loss": 46.0, - "step": 22053 - }, - { - "epoch": 3.551632513386207, - "grad_norm": 0.0019392030080780387, - "learning_rate": 0.00019999378130553677, - "loss": 46.0, - "step": 22054 - }, - { - "epoch": 3.5517935504649945, - "grad_norm": 0.0034423451870679855, - "learning_rate": 0.00019999378074132238, - "loss": 46.0, - "step": 22055 - }, - { - "epoch": 3.551954587543782, - "grad_norm": 0.002157733077183366, - "learning_rate": 0.0001999937801770824, - "loss": 46.0, - "step": 22056 - }, - { - "epoch": 3.5521156246225694, - "grad_norm": 0.008837435394525528, - "learning_rate": 0.0001999937796128168, - "loss": 46.0, - "step": 22057 - }, - { - "epoch": 3.552276661701357, - "grad_norm": 0.003991037141531706, - "learning_rate": 0.00019999377904852565, - "loss": 46.0, - "step": 22058 - }, - { - "epoch": 3.5524376987801443, - "grad_norm": 0.0008896852377802134, - "learning_rate": 0.00019999377848420888, - "loss": 46.0, - "step": 22059 - }, - { - "epoch": 3.5525987358589317, - "grad_norm": 0.0017367544351145625, - "learning_rate": 0.0001999937779198665, - "loss": 46.0, - "step": 22060 - }, - { - "epoch": 3.552759772937719, - "grad_norm": 0.0024984185583889484, - "learning_rate": 0.00019999377735549856, - "loss": 46.0, - "step": 22061 - }, - { - "epoch": 3.552920810016506, - "grad_norm": 0.002191744511947036, - "learning_rate": 0.000199993776791105, - "loss": 46.0, - "step": 22062 - }, - { - "epoch": 3.5530818470952936, - "grad_norm": 0.004819722380489111, - "learning_rate": 0.00019999377622668588, - "loss": 46.0, - "step": 22063 - }, - { - "epoch": 3.553242884174081, - "grad_norm": 0.00223218509927392, - "learning_rate": 0.00019999377566224112, - "loss": 46.0, - "step": 22064 - }, - { - "epoch": 3.5534039212528685, - "grad_norm": 0.0016874123830348253, - "learning_rate": 0.00019999377509777083, - "loss": 46.0, - "step": 22065 - }, - { - "epoch": 3.553564958331656, - "grad_norm": 0.0043662418611347675, - "learning_rate": 0.0001999937745332749, - "loss": 46.0, - "step": 22066 - }, - { - "epoch": 3.553725995410443, - "grad_norm": 0.008035285398364067, - "learning_rate": 0.00019999377396875335, - "loss": 46.0, - "step": 22067 - }, - { - "epoch": 3.5538870324892304, - "grad_norm": 0.000836613355204463, - "learning_rate": 0.00019999377340420627, - "loss": 46.0, - "step": 22068 - }, - { - "epoch": 3.554048069568018, - "grad_norm": 0.004797850735485554, - "learning_rate": 0.00019999377283963355, - "loss": 46.0, - "step": 22069 - }, - { - "epoch": 3.5542091066468053, - "grad_norm": 0.002900391351431608, - "learning_rate": 0.00019999377227503527, - "loss": 46.0, - "step": 22070 - }, - { - "epoch": 3.554370143725593, - "grad_norm": 0.0030640731565654278, - "learning_rate": 0.00019999377171041137, - "loss": 46.0, - "step": 22071 - }, - { - "epoch": 3.5545311808043802, - "grad_norm": 0.012759199365973473, - "learning_rate": 0.00019999377114576192, - "loss": 46.0, - "step": 22072 - }, - { - "epoch": 3.5546922178831677, - "grad_norm": 0.00236497912555933, - "learning_rate": 0.00019999377058108685, - "loss": 46.0, - "step": 22073 - }, - { - "epoch": 3.554853254961955, - "grad_norm": 0.006937025114893913, - "learning_rate": 0.00019999377001638616, - "loss": 46.0, - "step": 22074 - }, - { - "epoch": 3.5550142920407426, - "grad_norm": 0.005155367311090231, - "learning_rate": 0.00019999376945165992, - "loss": 46.0, - "step": 22075 - }, - { - "epoch": 3.55517532911953, - "grad_norm": 0.004127301275730133, - "learning_rate": 0.00019999376888690806, - "loss": 46.0, - "step": 22076 - }, - { - "epoch": 3.555336366198317, - "grad_norm": 0.007588563486933708, - "learning_rate": 0.0001999937683221306, - "loss": 46.0, - "step": 22077 - }, - { - "epoch": 3.5554974032771045, - "grad_norm": 0.00534937996417284, - "learning_rate": 0.00019999376775732756, - "loss": 46.0, - "step": 22078 - }, - { - "epoch": 3.555658440355892, - "grad_norm": 0.006973283365368843, - "learning_rate": 0.00019999376719249894, - "loss": 46.0, - "step": 22079 - }, - { - "epoch": 3.5558194774346794, - "grad_norm": 0.0032623731531202793, - "learning_rate": 0.0001999937666276447, - "loss": 46.0, - "step": 22080 - }, - { - "epoch": 3.555980514513467, - "grad_norm": 0.006116578821092844, - "learning_rate": 0.00019999376606276488, - "loss": 46.0, - "step": 22081 - }, - { - "epoch": 3.556141551592254, - "grad_norm": 0.007953274063766003, - "learning_rate": 0.00019999376549785947, - "loss": 46.0, - "step": 22082 - }, - { - "epoch": 3.5563025886710413, - "grad_norm": 0.005206270609050989, - "learning_rate": 0.00019999376493292848, - "loss": 46.0, - "step": 22083 - }, - { - "epoch": 3.5564636257498288, - "grad_norm": 0.004771303851157427, - "learning_rate": 0.00019999376436797187, - "loss": 46.0, - "step": 22084 - }, - { - "epoch": 3.5566246628286162, - "grad_norm": 0.0026180166751146317, - "learning_rate": 0.00019999376380298967, - "loss": 46.0, - "step": 22085 - }, - { - "epoch": 3.5567856999074037, - "grad_norm": 0.0017999495612457395, - "learning_rate": 0.00019999376323798188, - "loss": 46.0, - "step": 22086 - }, - { - "epoch": 3.556946736986191, - "grad_norm": 0.0015788378659635782, - "learning_rate": 0.0001999937626729485, - "loss": 46.0, - "step": 22087 - }, - { - "epoch": 3.5571077740649786, - "grad_norm": 0.00141175277531147, - "learning_rate": 0.00019999376210788953, - "loss": 46.0, - "step": 22088 - }, - { - "epoch": 3.557268811143766, - "grad_norm": 0.00064327463041991, - "learning_rate": 0.00019999376154280498, - "loss": 46.0, - "step": 22089 - }, - { - "epoch": 3.5574298482225535, - "grad_norm": 0.0036487989127635956, - "learning_rate": 0.00019999376097769482, - "loss": 46.0, - "step": 22090 - }, - { - "epoch": 3.5575908853013405, - "grad_norm": 0.00048203894402831793, - "learning_rate": 0.00019999376041255907, - "loss": 46.0, - "step": 22091 - }, - { - "epoch": 3.557751922380128, - "grad_norm": 0.0007147903670556843, - "learning_rate": 0.0001999937598473977, - "loss": 46.0, - "step": 22092 - }, - { - "epoch": 3.5579129594589154, - "grad_norm": 0.003537654411047697, - "learning_rate": 0.0001999937592822108, - "loss": 46.0, - "step": 22093 - }, - { - "epoch": 3.558073996537703, - "grad_norm": 0.01155766285955906, - "learning_rate": 0.00019999375871699825, - "loss": 46.0, - "step": 22094 - }, - { - "epoch": 3.5582350336164903, - "grad_norm": 0.0012491022935137153, - "learning_rate": 0.00019999375815176013, - "loss": 46.0, - "step": 22095 - }, - { - "epoch": 3.5583960706952777, - "grad_norm": 0.0009142942726612091, - "learning_rate": 0.00019999375758649642, - "loss": 46.0, - "step": 22096 - }, - { - "epoch": 3.5585571077740648, - "grad_norm": 0.000657429569400847, - "learning_rate": 0.0001999937570212071, - "loss": 46.0, - "step": 22097 - }, - { - "epoch": 3.558718144852852, - "grad_norm": 0.0018619636539369822, - "learning_rate": 0.0001999937564558922, - "loss": 46.0, - "step": 22098 - }, - { - "epoch": 3.5588791819316397, - "grad_norm": 0.0061980425380170345, - "learning_rate": 0.00019999375589055168, - "loss": 46.0, - "step": 22099 - }, - { - "epoch": 3.559040219010427, - "grad_norm": 0.001235340372659266, - "learning_rate": 0.0001999937553251856, - "loss": 46.0, - "step": 22100 - }, - { - "epoch": 3.5592012560892146, - "grad_norm": 0.002880545100197196, - "learning_rate": 0.00019999375475979392, - "loss": 46.0, - "step": 22101 - }, - { - "epoch": 3.559362293168002, - "grad_norm": 0.0030691258143633604, - "learning_rate": 0.00019999375419437664, - "loss": 46.0, - "step": 22102 - }, - { - "epoch": 3.5595233302467895, - "grad_norm": 0.007847237400710583, - "learning_rate": 0.00019999375362893376, - "loss": 46.0, - "step": 22103 - }, - { - "epoch": 3.559684367325577, - "grad_norm": 0.0024644576478749514, - "learning_rate": 0.0001999937530634653, - "loss": 46.0, - "step": 22104 - }, - { - "epoch": 3.5598454044043644, - "grad_norm": 0.005099670495837927, - "learning_rate": 0.00019999375249797122, - "loss": 46.0, - "step": 22105 - }, - { - "epoch": 3.5600064414831514, - "grad_norm": 0.0015072562964633107, - "learning_rate": 0.00019999375193245161, - "loss": 46.0, - "step": 22106 - }, - { - "epoch": 3.560167478561939, - "grad_norm": 0.010963588021695614, - "learning_rate": 0.00019999375136690634, - "loss": 46.0, - "step": 22107 - }, - { - "epoch": 3.5603285156407263, - "grad_norm": 0.0012547129299491644, - "learning_rate": 0.0001999937508013355, - "loss": 46.0, - "step": 22108 - }, - { - "epoch": 3.5604895527195137, - "grad_norm": 0.006265297997742891, - "learning_rate": 0.00019999375023573907, - "loss": 46.0, - "step": 22109 - }, - { - "epoch": 3.560650589798301, - "grad_norm": 0.0017919526435434818, - "learning_rate": 0.00019999374967011706, - "loss": 46.0, - "step": 22110 - }, - { - "epoch": 3.560811626877088, - "grad_norm": 0.003235555486753583, - "learning_rate": 0.00019999374910446944, - "loss": 46.0, - "step": 22111 - }, - { - "epoch": 3.5609726639558756, - "grad_norm": 0.0021480501163750887, - "learning_rate": 0.00019999374853879622, - "loss": 46.0, - "step": 22112 - }, - { - "epoch": 3.561133701034663, - "grad_norm": 0.0016245265724137425, - "learning_rate": 0.00019999374797309742, - "loss": 46.0, - "step": 22113 - }, - { - "epoch": 3.5612947381134505, - "grad_norm": 0.0018488435307517648, - "learning_rate": 0.00019999374740737303, - "loss": 46.0, - "step": 22114 - }, - { - "epoch": 3.561455775192238, - "grad_norm": 0.00484824925661087, - "learning_rate": 0.00019999374684162303, - "loss": 46.0, - "step": 22115 - }, - { - "epoch": 3.5616168122710254, - "grad_norm": 0.0064271786250174046, - "learning_rate": 0.00019999374627584744, - "loss": 46.0, - "step": 22116 - }, - { - "epoch": 3.561777849349813, - "grad_norm": 0.0033978682477027178, - "learning_rate": 0.00019999374571004626, - "loss": 46.0, - "step": 22117 - }, - { - "epoch": 3.5619388864286003, - "grad_norm": 0.00105980911757797, - "learning_rate": 0.0001999937451442195, - "loss": 46.0, - "step": 22118 - }, - { - "epoch": 3.562099923507388, - "grad_norm": 0.0012255714973434806, - "learning_rate": 0.00019999374457836715, - "loss": 46.0, - "step": 22119 - }, - { - "epoch": 3.562260960586175, - "grad_norm": 0.0011637681163847446, - "learning_rate": 0.00019999374401248919, - "loss": 46.0, - "step": 22120 - }, - { - "epoch": 3.5624219976649623, - "grad_norm": 0.0009744488052092493, - "learning_rate": 0.00019999374344658563, - "loss": 46.0, - "step": 22121 - }, - { - "epoch": 3.5625830347437497, - "grad_norm": 0.012898662127554417, - "learning_rate": 0.0001999937428806565, - "loss": 46.0, - "step": 22122 - }, - { - "epoch": 3.562744071822537, - "grad_norm": 0.0022565291728824377, - "learning_rate": 0.00019999374231470177, - "loss": 46.0, - "step": 22123 - }, - { - "epoch": 3.5629051089013246, - "grad_norm": 0.002504949225112796, - "learning_rate": 0.00019999374174872143, - "loss": 46.0, - "step": 22124 - }, - { - "epoch": 3.563066145980112, - "grad_norm": 0.0019123753299936652, - "learning_rate": 0.0001999937411827155, - "loss": 46.0, - "step": 22125 - }, - { - "epoch": 3.563227183058899, - "grad_norm": 0.0017348452238366008, - "learning_rate": 0.00019999374061668398, - "loss": 46.0, - "step": 22126 - }, - { - "epoch": 3.5633882201376865, - "grad_norm": 0.0009706579148769379, - "learning_rate": 0.00019999374005062688, - "loss": 46.0, - "step": 22127 - }, - { - "epoch": 3.563549257216474, - "grad_norm": 0.010791081003844738, - "learning_rate": 0.00019999373948454416, - "loss": 46.0, - "step": 22128 - }, - { - "epoch": 3.5637102942952614, - "grad_norm": 0.0021594117861241102, - "learning_rate": 0.00019999373891843586, - "loss": 46.0, - "step": 22129 - }, - { - "epoch": 3.563871331374049, - "grad_norm": 0.007558625657111406, - "learning_rate": 0.00019999373835230197, - "loss": 46.0, - "step": 22130 - }, - { - "epoch": 3.5640323684528363, - "grad_norm": 0.008450819179415703, - "learning_rate": 0.0001999937377861425, - "loss": 46.0, - "step": 22131 - }, - { - "epoch": 3.564193405531624, - "grad_norm": 0.00552174961194396, - "learning_rate": 0.00019999373721995742, - "loss": 46.0, - "step": 22132 - }, - { - "epoch": 3.5643544426104112, - "grad_norm": 0.004941765684634447, - "learning_rate": 0.00019999373665374677, - "loss": 46.0, - "step": 22133 - }, - { - "epoch": 3.5645154796891987, - "grad_norm": 0.004483547993004322, - "learning_rate": 0.00019999373608751048, - "loss": 46.0, - "step": 22134 - }, - { - "epoch": 3.5646765167679857, - "grad_norm": 0.00358788575977087, - "learning_rate": 0.00019999373552124862, - "loss": 46.0, - "step": 22135 - }, - { - "epoch": 3.564837553846773, - "grad_norm": 0.0013157286448404193, - "learning_rate": 0.00019999373495496118, - "loss": 46.0, - "step": 22136 - }, - { - "epoch": 3.5649985909255606, - "grad_norm": 0.007234329357743263, - "learning_rate": 0.00019999373438864815, - "loss": 46.0, - "step": 22137 - }, - { - "epoch": 3.565159628004348, - "grad_norm": 0.0037574535235762596, - "learning_rate": 0.0001999937338223095, - "loss": 46.0, - "step": 22138 - }, - { - "epoch": 3.5653206650831355, - "grad_norm": 0.0013072319561615586, - "learning_rate": 0.00019999373325594528, - "loss": 46.0, - "step": 22139 - }, - { - "epoch": 3.5654817021619225, - "grad_norm": 0.005404674913734198, - "learning_rate": 0.00019999373268955547, - "loss": 46.0, - "step": 22140 - }, - { - "epoch": 3.56564273924071, - "grad_norm": 0.003842002945020795, - "learning_rate": 0.00019999373212314003, - "loss": 46.0, - "step": 22141 - }, - { - "epoch": 3.5658037763194974, - "grad_norm": 0.004123489372432232, - "learning_rate": 0.00019999373155669902, - "loss": 46.0, - "step": 22142 - }, - { - "epoch": 3.565964813398285, - "grad_norm": 0.004591065458953381, - "learning_rate": 0.00019999373099023244, - "loss": 46.0, - "step": 22143 - }, - { - "epoch": 3.5661258504770723, - "grad_norm": 0.01184425875544548, - "learning_rate": 0.00019999373042374024, - "loss": 46.0, - "step": 22144 - }, - { - "epoch": 3.5662868875558598, - "grad_norm": 0.005107114091515541, - "learning_rate": 0.00019999372985722244, - "loss": 46.0, - "step": 22145 - }, - { - "epoch": 3.566447924634647, - "grad_norm": 0.0010000468464568257, - "learning_rate": 0.00019999372929067904, - "loss": 46.0, - "step": 22146 - }, - { - "epoch": 3.5666089617134347, - "grad_norm": 0.003872596425935626, - "learning_rate": 0.0001999937287241101, - "loss": 46.0, - "step": 22147 - }, - { - "epoch": 3.566769998792222, - "grad_norm": 0.0057190777733922005, - "learning_rate": 0.00019999372815751552, - "loss": 46.0, - "step": 22148 - }, - { - "epoch": 3.5669310358710096, - "grad_norm": 0.007215212564915419, - "learning_rate": 0.00019999372759089536, - "loss": 46.0, - "step": 22149 - }, - { - "epoch": 3.5670920729497966, - "grad_norm": 0.0023381332866847515, - "learning_rate": 0.00019999372702424962, - "loss": 46.0, - "step": 22150 - }, - { - "epoch": 3.567253110028584, - "grad_norm": 0.006481852848082781, - "learning_rate": 0.00019999372645757826, - "loss": 46.0, - "step": 22151 - }, - { - "epoch": 3.5674141471073715, - "grad_norm": 0.0016577077331021428, - "learning_rate": 0.00019999372589088132, - "loss": 46.0, - "step": 22152 - }, - { - "epoch": 3.567575184186159, - "grad_norm": 0.004699667915701866, - "learning_rate": 0.0001999937253241588, - "loss": 46.0, - "step": 22153 - }, - { - "epoch": 3.5677362212649464, - "grad_norm": 0.0026436829939484596, - "learning_rate": 0.00019999372475741064, - "loss": 46.0, - "step": 22154 - }, - { - "epoch": 3.5678972583437334, - "grad_norm": 0.0045671770349144936, - "learning_rate": 0.00019999372419063694, - "loss": 46.0, - "step": 22155 - }, - { - "epoch": 3.568058295422521, - "grad_norm": 0.0021484759636223316, - "learning_rate": 0.00019999372362383762, - "loss": 46.0, - "step": 22156 - }, - { - "epoch": 3.5682193325013083, - "grad_norm": 0.0014306337106972933, - "learning_rate": 0.0001999937230570127, - "loss": 46.0, - "step": 22157 - }, - { - "epoch": 3.5683803695800957, - "grad_norm": 0.00875294953584671, - "learning_rate": 0.0001999937224901622, - "loss": 46.0, - "step": 22158 - }, - { - "epoch": 3.568541406658883, - "grad_norm": 0.0014109316980466247, - "learning_rate": 0.0001999937219232861, - "loss": 46.0, - "step": 22159 - }, - { - "epoch": 3.5687024437376706, - "grad_norm": 0.001319185714237392, - "learning_rate": 0.0001999937213563844, - "loss": 46.0, - "step": 22160 - }, - { - "epoch": 3.568863480816458, - "grad_norm": 0.005401642993092537, - "learning_rate": 0.00019999372078945715, - "loss": 46.0, - "step": 22161 - }, - { - "epoch": 3.5690245178952456, - "grad_norm": 0.0022573701571673155, - "learning_rate": 0.00019999372022250426, - "loss": 46.0, - "step": 22162 - }, - { - "epoch": 3.569185554974033, - "grad_norm": 0.00423166248947382, - "learning_rate": 0.0001999937196555258, - "loss": 46.0, - "step": 22163 - }, - { - "epoch": 3.56934659205282, - "grad_norm": 0.004280682187527418, - "learning_rate": 0.00019999371908852173, - "loss": 46.0, - "step": 22164 - }, - { - "epoch": 3.5695076291316075, - "grad_norm": 0.007625343278050423, - "learning_rate": 0.0001999937185214921, - "loss": 46.0, - "step": 22165 - }, - { - "epoch": 3.569668666210395, - "grad_norm": 0.002587782684713602, - "learning_rate": 0.00019999371795443685, - "loss": 46.0, - "step": 22166 - }, - { - "epoch": 3.5698297032891824, - "grad_norm": 0.0010397207224741578, - "learning_rate": 0.000199993717387356, - "loss": 46.0, - "step": 22167 - }, - { - "epoch": 3.56999074036797, - "grad_norm": 0.001793302595615387, - "learning_rate": 0.00019999371682024957, - "loss": 46.0, - "step": 22168 - }, - { - "epoch": 3.5701517774467573, - "grad_norm": 0.010664881207048893, - "learning_rate": 0.00019999371625311753, - "loss": 46.0, - "step": 22169 - }, - { - "epoch": 3.5703128145255443, - "grad_norm": 0.004823134280741215, - "learning_rate": 0.00019999371568595994, - "loss": 46.0, - "step": 22170 - }, - { - "epoch": 3.5704738516043317, - "grad_norm": 0.008988243527710438, - "learning_rate": 0.0001999937151187767, - "loss": 46.0, - "step": 22171 - }, - { - "epoch": 3.570634888683119, - "grad_norm": 0.004488545004278421, - "learning_rate": 0.0001999937145515679, - "loss": 46.0, - "step": 22172 - }, - { - "epoch": 3.5707959257619066, - "grad_norm": 0.00324258953332901, - "learning_rate": 0.0001999937139843335, - "loss": 46.0, - "step": 22173 - }, - { - "epoch": 3.570956962840694, - "grad_norm": 0.0019249595934525132, - "learning_rate": 0.00019999371341707353, - "loss": 46.0, - "step": 22174 - }, - { - "epoch": 3.5711179999194815, - "grad_norm": 0.0022335245739668608, - "learning_rate": 0.00019999371284978791, - "loss": 46.0, - "step": 22175 - }, - { - "epoch": 3.571279036998269, - "grad_norm": 0.0029795197769999504, - "learning_rate": 0.00019999371228247674, - "loss": 46.0, - "step": 22176 - }, - { - "epoch": 3.5714400740770564, - "grad_norm": 0.0024626704398542643, - "learning_rate": 0.00019999371171513996, - "loss": 46.0, - "step": 22177 - }, - { - "epoch": 3.571601111155844, - "grad_norm": 0.0039766328409314156, - "learning_rate": 0.0001999937111477776, - "loss": 46.0, - "step": 22178 - }, - { - "epoch": 3.571762148234631, - "grad_norm": 0.0012694451725110412, - "learning_rate": 0.00019999371058038965, - "loss": 46.0, - "step": 22179 - }, - { - "epoch": 3.5719231853134183, - "grad_norm": 0.0032486470881849527, - "learning_rate": 0.0001999937100129761, - "loss": 46.0, - "step": 22180 - }, - { - "epoch": 3.572084222392206, - "grad_norm": 0.0004951511509716511, - "learning_rate": 0.00019999370944553694, - "loss": 46.0, - "step": 22181 - }, - { - "epoch": 3.5722452594709933, - "grad_norm": 0.0026333224959671497, - "learning_rate": 0.00019999370887807222, - "loss": 46.0, - "step": 22182 - }, - { - "epoch": 3.5724062965497807, - "grad_norm": 0.006623640190809965, - "learning_rate": 0.00019999370831058188, - "loss": 46.0, - "step": 22183 - }, - { - "epoch": 3.5725673336285677, - "grad_norm": 0.008266623131930828, - "learning_rate": 0.00019999370774306593, - "loss": 46.0, - "step": 22184 - }, - { - "epoch": 3.572728370707355, - "grad_norm": 0.005043573211878538, - "learning_rate": 0.00019999370717552442, - "loss": 46.0, - "step": 22185 - }, - { - "epoch": 3.5728894077861426, - "grad_norm": 0.003764103865250945, - "learning_rate": 0.00019999370660795732, - "loss": 46.0, - "step": 22186 - }, - { - "epoch": 3.57305044486493, - "grad_norm": 0.006669378373771906, - "learning_rate": 0.0001999937060403646, - "loss": 46.0, - "step": 22187 - }, - { - "epoch": 3.5732114819437175, - "grad_norm": 0.0010816592257469893, - "learning_rate": 0.0001999937054727463, - "loss": 46.0, - "step": 22188 - }, - { - "epoch": 3.573372519022505, - "grad_norm": 0.0041970666497945786, - "learning_rate": 0.00019999370490510242, - "loss": 46.0, - "step": 22189 - }, - { - "epoch": 3.5735335561012924, - "grad_norm": 0.012897015549242496, - "learning_rate": 0.00019999370433743292, - "loss": 46.0, - "step": 22190 - }, - { - "epoch": 3.57369459318008, - "grad_norm": 0.004952274262905121, - "learning_rate": 0.00019999370376973786, - "loss": 46.0, - "step": 22191 - }, - { - "epoch": 3.5738556302588673, - "grad_norm": 0.003932930063456297, - "learning_rate": 0.00019999370320201719, - "loss": 46.0, - "step": 22192 - }, - { - "epoch": 3.5740166673376543, - "grad_norm": 0.0011036846553906798, - "learning_rate": 0.0001999937026342709, - "loss": 46.0, - "step": 22193 - }, - { - "epoch": 3.574177704416442, - "grad_norm": 0.0029764981009066105, - "learning_rate": 0.00019999370206649905, - "loss": 46.0, - "step": 22194 - }, - { - "epoch": 3.5743387414952292, - "grad_norm": 0.002830793149769306, - "learning_rate": 0.0001999937014987016, - "loss": 46.0, - "step": 22195 - }, - { - "epoch": 3.5744997785740167, - "grad_norm": 0.01494094729423523, - "learning_rate": 0.00019999370093087856, - "loss": 46.0, - "step": 22196 - }, - { - "epoch": 3.574660815652804, - "grad_norm": 0.0011509484611451626, - "learning_rate": 0.00019999370036302992, - "loss": 46.0, - "step": 22197 - }, - { - "epoch": 3.5748218527315916, - "grad_norm": 0.002567818621173501, - "learning_rate": 0.0001999936997951557, - "loss": 46.0, - "step": 22198 - }, - { - "epoch": 3.5749828898103786, - "grad_norm": 0.0015267434064298868, - "learning_rate": 0.00019999369922725588, - "loss": 46.0, - "step": 22199 - }, - { - "epoch": 3.575143926889166, - "grad_norm": 0.0023083004634827375, - "learning_rate": 0.00019999369865933043, - "loss": 46.0, - "step": 22200 - }, - { - "epoch": 3.5753049639679535, - "grad_norm": 0.008898667059838772, - "learning_rate": 0.00019999369809137942, - "loss": 46.0, - "step": 22201 - }, - { - "epoch": 3.575466001046741, - "grad_norm": 0.003886572550982237, - "learning_rate": 0.00019999369752340281, - "loss": 46.0, - "step": 22202 - }, - { - "epoch": 3.5756270381255284, - "grad_norm": 0.008097635582089424, - "learning_rate": 0.00019999369695540065, - "loss": 46.0, - "step": 22203 - }, - { - "epoch": 3.575788075204316, - "grad_norm": 0.0014989827759563923, - "learning_rate": 0.00019999369638737285, - "loss": 46.0, - "step": 22204 - }, - { - "epoch": 3.5759491122831033, - "grad_norm": 0.0005136894178576767, - "learning_rate": 0.00019999369581931946, - "loss": 46.0, - "step": 22205 - }, - { - "epoch": 3.5761101493618908, - "grad_norm": 0.0012887001503258944, - "learning_rate": 0.0001999936952512405, - "loss": 46.0, - "step": 22206 - }, - { - "epoch": 3.576271186440678, - "grad_norm": 0.008748008869588375, - "learning_rate": 0.00019999369468313592, - "loss": 46.0, - "step": 22207 - }, - { - "epoch": 3.576432223519465, - "grad_norm": 0.006734238937497139, - "learning_rate": 0.00019999369411500574, - "loss": 46.0, - "step": 22208 - }, - { - "epoch": 3.5765932605982527, - "grad_norm": 0.004319359548389912, - "learning_rate": 0.00019999369354685, - "loss": 46.0, - "step": 22209 - }, - { - "epoch": 3.57675429767704, - "grad_norm": 0.0028410619124770164, - "learning_rate": 0.00019999369297866862, - "loss": 46.0, - "step": 22210 - }, - { - "epoch": 3.5769153347558276, - "grad_norm": 0.0024277695920318365, - "learning_rate": 0.00019999369241046168, - "loss": 46.0, - "step": 22211 - }, - { - "epoch": 3.577076371834615, - "grad_norm": 0.004947687964886427, - "learning_rate": 0.00019999369184222916, - "loss": 46.0, - "step": 22212 - }, - { - "epoch": 3.5772374089134025, - "grad_norm": 0.010552571155130863, - "learning_rate": 0.00019999369127397102, - "loss": 46.0, - "step": 22213 - }, - { - "epoch": 3.5773984459921895, - "grad_norm": 0.007573977112770081, - "learning_rate": 0.00019999369070568731, - "loss": 46.0, - "step": 22214 - }, - { - "epoch": 3.577559483070977, - "grad_norm": 0.002108183689415455, - "learning_rate": 0.000199993690137378, - "loss": 46.0, - "step": 22215 - }, - { - "epoch": 3.5777205201497644, - "grad_norm": 0.0011062516132369637, - "learning_rate": 0.00019999368956904307, - "loss": 46.0, - "step": 22216 - }, - { - "epoch": 3.577881557228552, - "grad_norm": 0.0013869494432583451, - "learning_rate": 0.00019999368900068255, - "loss": 46.0, - "step": 22217 - }, - { - "epoch": 3.5780425943073393, - "grad_norm": 0.005112015642225742, - "learning_rate": 0.00019999368843229647, - "loss": 46.0, - "step": 22218 - }, - { - "epoch": 3.5782036313861267, - "grad_norm": 0.0071973856538534164, - "learning_rate": 0.00019999368786388478, - "loss": 46.0, - "step": 22219 - }, - { - "epoch": 3.578364668464914, - "grad_norm": 0.007174696307629347, - "learning_rate": 0.0001999936872954475, - "loss": 46.0, - "step": 22220 - }, - { - "epoch": 3.5785257055437016, - "grad_norm": 0.0036894932854920626, - "learning_rate": 0.0001999936867269846, - "loss": 46.0, - "step": 22221 - }, - { - "epoch": 3.578686742622489, - "grad_norm": 0.004031945485621691, - "learning_rate": 0.00019999368615849616, - "loss": 46.0, - "step": 22222 - }, - { - "epoch": 3.578847779701276, - "grad_norm": 0.00776843074709177, - "learning_rate": 0.0001999936855899821, - "loss": 46.0, - "step": 22223 - }, - { - "epoch": 3.5790088167800636, - "grad_norm": 0.01725834608078003, - "learning_rate": 0.00019999368502144244, - "loss": 46.0, - "step": 22224 - }, - { - "epoch": 3.579169853858851, - "grad_norm": 0.0049018352292478085, - "learning_rate": 0.0001999936844528772, - "loss": 46.0, - "step": 22225 - }, - { - "epoch": 3.5793308909376385, - "grad_norm": 0.0024316362105309963, - "learning_rate": 0.00019999368388428637, - "loss": 46.0, - "step": 22226 - }, - { - "epoch": 3.579491928016426, - "grad_norm": 0.0011868409346789122, - "learning_rate": 0.00019999368331566992, - "loss": 46.0, - "step": 22227 - }, - { - "epoch": 3.579652965095213, - "grad_norm": 0.004764891229569912, - "learning_rate": 0.0001999936827470279, - "loss": 46.0, - "step": 22228 - }, - { - "epoch": 3.5798140021740004, - "grad_norm": 0.005578783340752125, - "learning_rate": 0.00019999368217836025, - "loss": 46.0, - "step": 22229 - }, - { - "epoch": 3.579975039252788, - "grad_norm": 0.0029380801133811474, - "learning_rate": 0.00019999368160966704, - "loss": 46.0, - "step": 22230 - }, - { - "epoch": 3.5801360763315753, - "grad_norm": 0.005727612879127264, - "learning_rate": 0.00019999368104094823, - "loss": 46.0, - "step": 22231 - }, - { - "epoch": 3.5802971134103627, - "grad_norm": 0.01643526181578636, - "learning_rate": 0.00019999368047220385, - "loss": 46.0, - "step": 22232 - }, - { - "epoch": 3.58045815048915, - "grad_norm": 0.00436242762953043, - "learning_rate": 0.00019999367990343383, - "loss": 46.0, - "step": 22233 - }, - { - "epoch": 3.5806191875679376, - "grad_norm": 0.004011080134660006, - "learning_rate": 0.00019999367933463825, - "loss": 46.0, - "step": 22234 - }, - { - "epoch": 3.580780224646725, - "grad_norm": 0.0024378944654017687, - "learning_rate": 0.00019999367876581708, - "loss": 46.0, - "step": 22235 - }, - { - "epoch": 3.5809412617255125, - "grad_norm": 0.0018386357696726918, - "learning_rate": 0.00019999367819697027, - "loss": 46.0, - "step": 22236 - }, - { - "epoch": 3.5811022988042995, - "grad_norm": 0.003888638922944665, - "learning_rate": 0.00019999367762809793, - "loss": 46.0, - "step": 22237 - }, - { - "epoch": 3.581263335883087, - "grad_norm": 0.0023935684002935886, - "learning_rate": 0.00019999367705919995, - "loss": 46.0, - "step": 22238 - }, - { - "epoch": 3.5814243729618744, - "grad_norm": 0.005769809242337942, - "learning_rate": 0.0001999936764902764, - "loss": 46.0, - "step": 22239 - }, - { - "epoch": 3.581585410040662, - "grad_norm": 0.0028870890382677317, - "learning_rate": 0.00019999367592132727, - "loss": 46.0, - "step": 22240 - }, - { - "epoch": 3.5817464471194493, - "grad_norm": 0.003583308309316635, - "learning_rate": 0.00019999367535235253, - "loss": 46.0, - "step": 22241 - }, - { - "epoch": 3.581907484198237, - "grad_norm": 0.006188003346323967, - "learning_rate": 0.00019999367478335217, - "loss": 46.0, - "step": 22242 - }, - { - "epoch": 3.582068521277024, - "grad_norm": 0.004220211878418922, - "learning_rate": 0.00019999367421432625, - "loss": 46.0, - "step": 22243 - }, - { - "epoch": 3.5822295583558112, - "grad_norm": 0.0036809046287089586, - "learning_rate": 0.00019999367364527472, - "loss": 46.0, - "step": 22244 - }, - { - "epoch": 3.5823905954345987, - "grad_norm": 0.01166660524904728, - "learning_rate": 0.00019999367307619762, - "loss": 46.0, - "step": 22245 - }, - { - "epoch": 3.582551632513386, - "grad_norm": 0.008661210536956787, - "learning_rate": 0.00019999367250709491, - "loss": 46.0, - "step": 22246 - }, - { - "epoch": 3.5827126695921736, - "grad_norm": 0.00344940391369164, - "learning_rate": 0.00019999367193796662, - "loss": 46.0, - "step": 22247 - }, - { - "epoch": 3.582873706670961, - "grad_norm": 0.005918078124523163, - "learning_rate": 0.00019999367136881274, - "loss": 46.0, - "step": 22248 - }, - { - "epoch": 3.5830347437497485, - "grad_norm": 0.008551975712180138, - "learning_rate": 0.00019999367079963321, - "loss": 46.0, - "step": 22249 - }, - { - "epoch": 3.583195780828536, - "grad_norm": 0.0004640891856979579, - "learning_rate": 0.00019999367023042813, - "loss": 46.0, - "step": 22250 - }, - { - "epoch": 3.5833568179073234, - "grad_norm": 0.0029130319599062204, - "learning_rate": 0.0001999936696611975, - "loss": 46.0, - "step": 22251 - }, - { - "epoch": 3.5835178549861104, - "grad_norm": 0.003528533736243844, - "learning_rate": 0.0001999936690919412, - "loss": 46.0, - "step": 22252 - }, - { - "epoch": 3.583678892064898, - "grad_norm": 0.005134756676852703, - "learning_rate": 0.00019999366852265936, - "loss": 46.0, - "step": 22253 - }, - { - "epoch": 3.5838399291436853, - "grad_norm": 0.006688056513667107, - "learning_rate": 0.00019999366795335187, - "loss": 46.0, - "step": 22254 - }, - { - "epoch": 3.5840009662224728, - "grad_norm": 0.002679308643564582, - "learning_rate": 0.00019999366738401885, - "loss": 46.0, - "step": 22255 - }, - { - "epoch": 3.5841620033012602, - "grad_norm": 0.0009020857396535575, - "learning_rate": 0.00019999366681466022, - "loss": 46.0, - "step": 22256 - }, - { - "epoch": 3.5843230403800472, - "grad_norm": 0.003891663858667016, - "learning_rate": 0.00019999366624527597, - "loss": 46.0, - "step": 22257 - }, - { - "epoch": 3.5844840774588347, - "grad_norm": 0.006270218174904585, - "learning_rate": 0.00019999366567586614, - "loss": 46.0, - "step": 22258 - }, - { - "epoch": 3.584645114537622, - "grad_norm": 0.012125732377171516, - "learning_rate": 0.00019999366510643074, - "loss": 46.0, - "step": 22259 - }, - { - "epoch": 3.5848061516164096, - "grad_norm": 0.0018512671813368797, - "learning_rate": 0.00019999366453696973, - "loss": 46.0, - "step": 22260 - }, - { - "epoch": 3.584967188695197, - "grad_norm": 0.00933990627527237, - "learning_rate": 0.0001999936639674831, - "loss": 46.0, - "step": 22261 - }, - { - "epoch": 3.5851282257739845, - "grad_norm": 0.002727724378928542, - "learning_rate": 0.00019999366339797092, - "loss": 46.0, - "step": 22262 - }, - { - "epoch": 3.585289262852772, - "grad_norm": 0.001745673012919724, - "learning_rate": 0.00019999366282843313, - "loss": 46.0, - "step": 22263 - }, - { - "epoch": 3.5854502999315594, - "grad_norm": 0.0011512573109939694, - "learning_rate": 0.00019999366225886974, - "loss": 46.0, - "step": 22264 - }, - { - "epoch": 3.585611337010347, - "grad_norm": 0.0033175284042954445, - "learning_rate": 0.00019999366168928074, - "loss": 46.0, - "step": 22265 - }, - { - "epoch": 3.5857723740891343, - "grad_norm": 0.0012629320845007896, - "learning_rate": 0.00019999366111966618, - "loss": 46.0, - "step": 22266 - }, - { - "epoch": 3.5859334111679213, - "grad_norm": 0.007327020168304443, - "learning_rate": 0.000199993660550026, - "loss": 46.0, - "step": 22267 - }, - { - "epoch": 3.5860944482467088, - "grad_norm": 0.0014256129506975412, - "learning_rate": 0.00019999365998036025, - "loss": 46.0, - "step": 22268 - }, - { - "epoch": 3.586255485325496, - "grad_norm": 0.0027817138470709324, - "learning_rate": 0.00019999365941066893, - "loss": 46.0, - "step": 22269 - }, - { - "epoch": 3.5864165224042837, - "grad_norm": 0.001265592174604535, - "learning_rate": 0.00019999365884095196, - "loss": 46.0, - "step": 22270 - }, - { - "epoch": 3.586577559483071, - "grad_norm": 0.008917558006942272, - "learning_rate": 0.00019999365827120941, - "loss": 46.0, - "step": 22271 - }, - { - "epoch": 3.586738596561858, - "grad_norm": 0.0007122106617316604, - "learning_rate": 0.00019999365770144128, - "loss": 46.0, - "step": 22272 - }, - { - "epoch": 3.5868996336406456, - "grad_norm": 0.004836119245737791, - "learning_rate": 0.00019999365713164755, - "loss": 46.0, - "step": 22273 - }, - { - "epoch": 3.587060670719433, - "grad_norm": 0.0038063570391386747, - "learning_rate": 0.00019999365656182824, - "loss": 46.0, - "step": 22274 - }, - { - "epoch": 3.5872217077982205, - "grad_norm": 0.0018937435233965516, - "learning_rate": 0.00019999365599198332, - "loss": 46.0, - "step": 22275 - }, - { - "epoch": 3.587382744877008, - "grad_norm": 0.006482832133769989, - "learning_rate": 0.0001999936554221128, - "loss": 46.0, - "step": 22276 - }, - { - "epoch": 3.5875437819557954, - "grad_norm": 0.00454520620405674, - "learning_rate": 0.00019999365485221673, - "loss": 46.0, - "step": 22277 - }, - { - "epoch": 3.587704819034583, - "grad_norm": 0.0012868338963016868, - "learning_rate": 0.00019999365428229504, - "loss": 46.0, - "step": 22278 - }, - { - "epoch": 3.5878658561133703, - "grad_norm": 0.021897710859775543, - "learning_rate": 0.00019999365371234774, - "loss": 46.0, - "step": 22279 - }, - { - "epoch": 3.5880268931921577, - "grad_norm": 0.0013524509267881513, - "learning_rate": 0.00019999365314237485, - "loss": 46.0, - "step": 22280 - }, - { - "epoch": 3.5881879302709447, - "grad_norm": 0.0031641307286918163, - "learning_rate": 0.0001999936525723764, - "loss": 46.0, - "step": 22281 - }, - { - "epoch": 3.588348967349732, - "grad_norm": 0.001093899947591126, - "learning_rate": 0.00019999365200235234, - "loss": 46.0, - "step": 22282 - }, - { - "epoch": 3.5885100044285196, - "grad_norm": 0.0018966369098052382, - "learning_rate": 0.0001999936514323027, - "loss": 46.0, - "step": 22283 - }, - { - "epoch": 3.588671041507307, - "grad_norm": 0.0013478579930961132, - "learning_rate": 0.00019999365086222743, - "loss": 46.0, - "step": 22284 - }, - { - "epoch": 3.5888320785860945, - "grad_norm": 0.008609525859355927, - "learning_rate": 0.0001999936502921266, - "loss": 46.0, - "step": 22285 - }, - { - "epoch": 3.588993115664882, - "grad_norm": 0.006135663483291864, - "learning_rate": 0.00019999364972200017, - "loss": 46.0, - "step": 22286 - }, - { - "epoch": 3.589154152743669, - "grad_norm": 0.0014241492608562112, - "learning_rate": 0.00019999364915184811, - "loss": 46.0, - "step": 22287 - }, - { - "epoch": 3.5893151898224565, - "grad_norm": 0.006008440162986517, - "learning_rate": 0.0001999936485816705, - "loss": 46.0, - "step": 22288 - }, - { - "epoch": 3.589476226901244, - "grad_norm": 0.002906738081946969, - "learning_rate": 0.0001999936480114673, - "loss": 46.0, - "step": 22289 - }, - { - "epoch": 3.5896372639800314, - "grad_norm": 0.0026723395567387342, - "learning_rate": 0.00019999364744123846, - "loss": 46.0, - "step": 22290 - }, - { - "epoch": 3.589798301058819, - "grad_norm": 0.0034699132665991783, - "learning_rate": 0.00019999364687098409, - "loss": 46.0, - "step": 22291 - }, - { - "epoch": 3.5899593381376063, - "grad_norm": 0.001396158360876143, - "learning_rate": 0.00019999364630070407, - "loss": 46.0, - "step": 22292 - }, - { - "epoch": 3.5901203752163937, - "grad_norm": 0.0033217184245586395, - "learning_rate": 0.0001999936457303985, - "loss": 46.0, - "step": 22293 - }, - { - "epoch": 3.590281412295181, - "grad_norm": 0.014631196856498718, - "learning_rate": 0.00019999364516006728, - "loss": 46.0, - "step": 22294 - }, - { - "epoch": 3.5904424493739686, - "grad_norm": 0.001453693606890738, - "learning_rate": 0.0001999936445897105, - "loss": 46.0, - "step": 22295 - }, - { - "epoch": 3.5906034864527556, - "grad_norm": 0.0015400357078760862, - "learning_rate": 0.00019999364401932814, - "loss": 46.0, - "step": 22296 - }, - { - "epoch": 3.590764523531543, - "grad_norm": 0.005252873990684748, - "learning_rate": 0.0001999936434489202, - "loss": 46.0, - "step": 22297 - }, - { - "epoch": 3.5909255606103305, - "grad_norm": 0.0033839144743978977, - "learning_rate": 0.00019999364287848662, - "loss": 46.0, - "step": 22298 - }, - { - "epoch": 3.591086597689118, - "grad_norm": 0.005861698184162378, - "learning_rate": 0.0001999936423080275, - "loss": 46.0, - "step": 22299 - }, - { - "epoch": 3.5912476347679054, - "grad_norm": 0.001217004144564271, - "learning_rate": 0.00019999364173754273, - "loss": 46.0, - "step": 22300 - }, - { - "epoch": 3.5914086718466924, - "grad_norm": 0.0015326649881899357, - "learning_rate": 0.0001999936411670324, - "loss": 46.0, - "step": 22301 - }, - { - "epoch": 3.59156970892548, - "grad_norm": 0.0013569958973675966, - "learning_rate": 0.00019999364059649648, - "loss": 46.0, - "step": 22302 - }, - { - "epoch": 3.5917307460042673, - "grad_norm": 0.007940462790429592, - "learning_rate": 0.00019999364002593495, - "loss": 46.0, - "step": 22303 - }, - { - "epoch": 3.591891783083055, - "grad_norm": 0.0034608205314725637, - "learning_rate": 0.00019999363945534784, - "loss": 46.0, - "step": 22304 - }, - { - "epoch": 3.5920528201618422, - "grad_norm": 0.004165707156062126, - "learning_rate": 0.00019999363888473514, - "loss": 46.0, - "step": 22305 - }, - { - "epoch": 3.5922138572406297, - "grad_norm": 0.0016852917615324259, - "learning_rate": 0.00019999363831409685, - "loss": 46.0, - "step": 22306 - }, - { - "epoch": 3.592374894319417, - "grad_norm": 0.0013217369560152292, - "learning_rate": 0.0001999936377434329, - "loss": 46.0, - "step": 22307 - }, - { - "epoch": 3.5925359313982046, - "grad_norm": 0.001958205597475171, - "learning_rate": 0.00019999363717274342, - "loss": 46.0, - "step": 22308 - }, - { - "epoch": 3.592696968476992, - "grad_norm": 0.007180316373705864, - "learning_rate": 0.00019999363660202834, - "loss": 46.0, - "step": 22309 - }, - { - "epoch": 3.592858005555779, - "grad_norm": 0.0033955092076212168, - "learning_rate": 0.00019999363603128768, - "loss": 46.0, - "step": 22310 - }, - { - "epoch": 3.5930190426345665, - "grad_norm": 0.004092859569936991, - "learning_rate": 0.00019999363546052142, - "loss": 46.0, - "step": 22311 - }, - { - "epoch": 3.593180079713354, - "grad_norm": 0.0018009928753599524, - "learning_rate": 0.00019999363488972956, - "loss": 46.0, - "step": 22312 - }, - { - "epoch": 3.5933411167921414, - "grad_norm": 0.009082680568099022, - "learning_rate": 0.0001999936343189121, - "loss": 46.0, - "step": 22313 - }, - { - "epoch": 3.593502153870929, - "grad_norm": 0.001087721437215805, - "learning_rate": 0.00019999363374806906, - "loss": 46.0, - "step": 22314 - }, - { - "epoch": 3.5936631909497163, - "grad_norm": 0.003085762495175004, - "learning_rate": 0.0001999936331772004, - "loss": 46.0, - "step": 22315 - }, - { - "epoch": 3.5938242280285033, - "grad_norm": 0.007600707001984119, - "learning_rate": 0.00019999363260630616, - "loss": 46.0, - "step": 22316 - }, - { - "epoch": 3.5939852651072908, - "grad_norm": 0.0040770480409264565, - "learning_rate": 0.00019999363203538636, - "loss": 46.0, - "step": 22317 - }, - { - "epoch": 3.5941463021860782, - "grad_norm": 0.00740771135315299, - "learning_rate": 0.0001999936314644409, - "loss": 46.0, - "step": 22318 - }, - { - "epoch": 3.5943073392648657, - "grad_norm": 0.00436649052426219, - "learning_rate": 0.0001999936308934699, - "loss": 46.0, - "step": 22319 - }, - { - "epoch": 3.594468376343653, - "grad_norm": 0.004158633295446634, - "learning_rate": 0.0001999936303224733, - "loss": 46.0, - "step": 22320 - }, - { - "epoch": 3.5946294134224406, - "grad_norm": 0.005858105607330799, - "learning_rate": 0.00019999362975145108, - "loss": 46.0, - "step": 22321 - }, - { - "epoch": 3.594790450501228, - "grad_norm": 0.004049082286655903, - "learning_rate": 0.0001999936291804033, - "loss": 46.0, - "step": 22322 - }, - { - "epoch": 3.5949514875800155, - "grad_norm": 0.003086875192821026, - "learning_rate": 0.00019999362860932993, - "loss": 46.0, - "step": 22323 - }, - { - "epoch": 3.595112524658803, - "grad_norm": 0.007564725819975138, - "learning_rate": 0.00019999362803823094, - "loss": 46.0, - "step": 22324 - }, - { - "epoch": 3.59527356173759, - "grad_norm": 0.0008835851913318038, - "learning_rate": 0.00019999362746710636, - "loss": 46.0, - "step": 22325 - }, - { - "epoch": 3.5954345988163774, - "grad_norm": 0.0029259880539029837, - "learning_rate": 0.0001999936268959562, - "loss": 46.0, - "step": 22326 - }, - { - "epoch": 3.595595635895165, - "grad_norm": 0.002532773884013295, - "learning_rate": 0.00019999362632478046, - "loss": 46.0, - "step": 22327 - }, - { - "epoch": 3.5957566729739523, - "grad_norm": 0.006726366933435202, - "learning_rate": 0.0001999936257535791, - "loss": 46.0, - "step": 22328 - }, - { - "epoch": 3.5959177100527397, - "grad_norm": 0.002553165890276432, - "learning_rate": 0.00019999362518235213, - "loss": 46.0, - "step": 22329 - }, - { - "epoch": 3.596078747131527, - "grad_norm": 0.0013454457512125373, - "learning_rate": 0.0001999936246110996, - "loss": 46.0, - "step": 22330 - }, - { - "epoch": 3.596239784210314, - "grad_norm": 0.004314367659389973, - "learning_rate": 0.00019999362403982148, - "loss": 46.0, - "step": 22331 - }, - { - "epoch": 3.5964008212891017, - "grad_norm": 0.0022494480945169926, - "learning_rate": 0.00019999362346851773, - "loss": 46.0, - "step": 22332 - }, - { - "epoch": 3.596561858367889, - "grad_norm": 0.0018538335571065545, - "learning_rate": 0.00019999362289718843, - "loss": 46.0, - "step": 22333 - }, - { - "epoch": 3.5967228954466766, - "grad_norm": 0.011925830505788326, - "learning_rate": 0.0001999936223258335, - "loss": 46.0, - "step": 22334 - }, - { - "epoch": 3.596883932525464, - "grad_norm": 0.008757457137107849, - "learning_rate": 0.000199993621754453, - "loss": 46.0, - "step": 22335 - }, - { - "epoch": 3.5970449696042515, - "grad_norm": 0.015112852677702904, - "learning_rate": 0.0001999936211830469, - "loss": 46.0, - "step": 22336 - }, - { - "epoch": 3.597206006683039, - "grad_norm": 0.006661464925855398, - "learning_rate": 0.00019999362061161522, - "loss": 46.0, - "step": 22337 - }, - { - "epoch": 3.5973670437618264, - "grad_norm": 0.0035234594251960516, - "learning_rate": 0.00019999362004015792, - "loss": 46.0, - "step": 22338 - }, - { - "epoch": 3.597528080840614, - "grad_norm": 0.003458704799413681, - "learning_rate": 0.00019999361946867504, - "loss": 46.0, - "step": 22339 - }, - { - "epoch": 3.597689117919401, - "grad_norm": 0.0035416092723608017, - "learning_rate": 0.00019999361889716657, - "loss": 46.0, - "step": 22340 - }, - { - "epoch": 3.5978501549981883, - "grad_norm": 0.0010801405878737569, - "learning_rate": 0.0001999936183256325, - "loss": 46.0, - "step": 22341 - }, - { - "epoch": 3.5980111920769757, - "grad_norm": 0.0035934043116867542, - "learning_rate": 0.00019999361775407284, - "loss": 46.0, - "step": 22342 - }, - { - "epoch": 3.598172229155763, - "grad_norm": 0.0009040441946126521, - "learning_rate": 0.00019999361718248758, - "loss": 46.0, - "step": 22343 - }, - { - "epoch": 3.5983332662345506, - "grad_norm": 0.00912285316735506, - "learning_rate": 0.00019999361661087673, - "loss": 46.0, - "step": 22344 - }, - { - "epoch": 3.5984943033133376, - "grad_norm": 0.006449673790484667, - "learning_rate": 0.0001999936160392403, - "loss": 46.0, - "step": 22345 - }, - { - "epoch": 3.598655340392125, - "grad_norm": 0.009721963666379452, - "learning_rate": 0.00019999361546757828, - "loss": 46.0, - "step": 22346 - }, - { - "epoch": 3.5988163774709125, - "grad_norm": 0.0009141361224465072, - "learning_rate": 0.00019999361489589067, - "loss": 46.0, - "step": 22347 - }, - { - "epoch": 3.5989774145497, - "grad_norm": 0.0021284744143486023, - "learning_rate": 0.00019999361432417742, - "loss": 46.0, - "step": 22348 - }, - { - "epoch": 3.5991384516284874, - "grad_norm": 0.001282767509110272, - "learning_rate": 0.0001999936137524386, - "loss": 46.0, - "step": 22349 - }, - { - "epoch": 3.599299488707275, - "grad_norm": 0.002047148533165455, - "learning_rate": 0.0001999936131806742, - "loss": 46.0, - "step": 22350 - }, - { - "epoch": 3.5994605257860623, - "grad_norm": 0.001878916984423995, - "learning_rate": 0.0001999936126088842, - "loss": 46.0, - "step": 22351 - }, - { - "epoch": 3.59962156286485, - "grad_norm": 0.0034665060229599476, - "learning_rate": 0.00019999361203706863, - "loss": 46.0, - "step": 22352 - }, - { - "epoch": 3.5997825999436373, - "grad_norm": 0.0017352188006043434, - "learning_rate": 0.00019999361146522744, - "loss": 46.0, - "step": 22353 - }, - { - "epoch": 3.5999436370224243, - "grad_norm": 0.002765955636277795, - "learning_rate": 0.00019999361089336067, - "loss": 46.0, - "step": 22354 - }, - { - "epoch": 3.6001046741012117, - "grad_norm": 0.003913640510290861, - "learning_rate": 0.0001999936103214683, - "loss": 46.0, - "step": 22355 - }, - { - "epoch": 3.600265711179999, - "grad_norm": 0.00532541424036026, - "learning_rate": 0.00019999360974955034, - "loss": 46.0, - "step": 22356 - }, - { - "epoch": 3.6004267482587866, - "grad_norm": 0.00533573841676116, - "learning_rate": 0.00019999360917760678, - "loss": 46.0, - "step": 22357 - }, - { - "epoch": 3.600587785337574, - "grad_norm": 0.001956868451088667, - "learning_rate": 0.00019999360860563763, - "loss": 46.0, - "step": 22358 - }, - { - "epoch": 3.6007488224163615, - "grad_norm": 0.0009051914094015956, - "learning_rate": 0.00019999360803364287, - "loss": 46.0, - "step": 22359 - }, - { - "epoch": 3.6009098594951485, - "grad_norm": 0.003430198645219207, - "learning_rate": 0.00019999360746162252, - "loss": 46.0, - "step": 22360 - }, - { - "epoch": 3.601070896573936, - "grad_norm": 0.0006446716142818332, - "learning_rate": 0.0001999936068895766, - "loss": 46.0, - "step": 22361 - }, - { - "epoch": 3.6012319336527234, - "grad_norm": 0.006032352335751057, - "learning_rate": 0.0001999936063175051, - "loss": 46.0, - "step": 22362 - }, - { - "epoch": 3.601392970731511, - "grad_norm": 0.00455701956525445, - "learning_rate": 0.00019999360574540797, - "loss": 46.0, - "step": 22363 - }, - { - "epoch": 3.6015540078102983, - "grad_norm": 0.002215759363025427, - "learning_rate": 0.00019999360517328527, - "loss": 46.0, - "step": 22364 - }, - { - "epoch": 3.601715044889086, - "grad_norm": 0.002742103999480605, - "learning_rate": 0.00019999360460113693, - "loss": 46.0, - "step": 22365 - }, - { - "epoch": 3.6018760819678732, - "grad_norm": 0.002330132992938161, - "learning_rate": 0.00019999360402896306, - "loss": 46.0, - "step": 22366 - }, - { - "epoch": 3.6020371190466607, - "grad_norm": 0.011597679927945137, - "learning_rate": 0.00019999360345676357, - "loss": 46.0, - "step": 22367 - }, - { - "epoch": 3.602198156125448, - "grad_norm": 0.002671410096809268, - "learning_rate": 0.00019999360288453847, - "loss": 46.0, - "step": 22368 - }, - { - "epoch": 3.602359193204235, - "grad_norm": 0.0009399244445376098, - "learning_rate": 0.0001999936023122878, - "loss": 46.0, - "step": 22369 - }, - { - "epoch": 3.6025202302830226, - "grad_norm": 0.002231578342616558, - "learning_rate": 0.00019999360174001153, - "loss": 46.0, - "step": 22370 - }, - { - "epoch": 3.60268126736181, - "grad_norm": 0.00409920047968626, - "learning_rate": 0.00019999360116770967, - "loss": 46.0, - "step": 22371 - }, - { - "epoch": 3.6028423044405975, - "grad_norm": 0.001007885904982686, - "learning_rate": 0.00019999360059538225, - "loss": 46.0, - "step": 22372 - }, - { - "epoch": 3.603003341519385, - "grad_norm": 0.00783870555460453, - "learning_rate": 0.00019999360002302918, - "loss": 46.0, - "step": 22373 - }, - { - "epoch": 3.603164378598172, - "grad_norm": 0.005030629690736532, - "learning_rate": 0.00019999359945065053, - "loss": 46.0, - "step": 22374 - }, - { - "epoch": 3.6033254156769594, - "grad_norm": 0.005600613541901112, - "learning_rate": 0.0001999935988782463, - "loss": 46.0, - "step": 22375 - }, - { - "epoch": 3.603486452755747, - "grad_norm": 0.0009224683162756264, - "learning_rate": 0.00019999359830581647, - "loss": 46.0, - "step": 22376 - }, - { - "epoch": 3.6036474898345343, - "grad_norm": 0.002280866727232933, - "learning_rate": 0.00019999359773336102, - "loss": 46.0, - "step": 22377 - }, - { - "epoch": 3.6038085269133218, - "grad_norm": 0.0023356250021606684, - "learning_rate": 0.00019999359716088005, - "loss": 46.0, - "step": 22378 - }, - { - "epoch": 3.603969563992109, - "grad_norm": 0.001748585607856512, - "learning_rate": 0.0001999935965883734, - "loss": 46.0, - "step": 22379 - }, - { - "epoch": 3.6041306010708967, - "grad_norm": 0.003502840409055352, - "learning_rate": 0.00019999359601584124, - "loss": 46.0, - "step": 22380 - }, - { - "epoch": 3.604291638149684, - "grad_norm": 0.002135103801265359, - "learning_rate": 0.00019999359544328342, - "loss": 46.0, - "step": 22381 - }, - { - "epoch": 3.6044526752284716, - "grad_norm": 0.002440153155475855, - "learning_rate": 0.00019999359487070004, - "loss": 46.0, - "step": 22382 - }, - { - "epoch": 3.604613712307259, - "grad_norm": 0.0034764546435326338, - "learning_rate": 0.00019999359429809105, - "loss": 46.0, - "step": 22383 - }, - { - "epoch": 3.604774749386046, - "grad_norm": 0.0025229365564882755, - "learning_rate": 0.00019999359372545648, - "loss": 46.0, - "step": 22384 - }, - { - "epoch": 3.6049357864648335, - "grad_norm": 0.0026008551940321922, - "learning_rate": 0.0001999935931527963, - "loss": 46.0, - "step": 22385 - }, - { - "epoch": 3.605096823543621, - "grad_norm": 0.0062901414930820465, - "learning_rate": 0.00019999359258011053, - "loss": 46.0, - "step": 22386 - }, - { - "epoch": 3.6052578606224084, - "grad_norm": 0.004310450050979853, - "learning_rate": 0.0001999935920073992, - "loss": 46.0, - "step": 22387 - }, - { - "epoch": 3.605418897701196, - "grad_norm": 0.007083808537572622, - "learning_rate": 0.00019999359143466227, - "loss": 46.0, - "step": 22388 - }, - { - "epoch": 3.605579934779983, - "grad_norm": 0.0016448672395199537, - "learning_rate": 0.0001999935908618997, - "loss": 46.0, - "step": 22389 - }, - { - "epoch": 3.6057409718587703, - "grad_norm": 0.0026761542540043592, - "learning_rate": 0.00019999359028911157, - "loss": 46.0, - "step": 22390 - }, - { - "epoch": 3.6059020089375577, - "grad_norm": 0.0023413097951561213, - "learning_rate": 0.00019999358971629786, - "loss": 46.0, - "step": 22391 - }, - { - "epoch": 3.606063046016345, - "grad_norm": 0.00468074344098568, - "learning_rate": 0.00019999358914345853, - "loss": 46.0, - "step": 22392 - }, - { - "epoch": 3.6062240830951326, - "grad_norm": 0.004899135325103998, - "learning_rate": 0.0001999935885705936, - "loss": 46.0, - "step": 22393 - }, - { - "epoch": 3.60638512017392, - "grad_norm": 0.001748733571730554, - "learning_rate": 0.0001999935879977031, - "loss": 46.0, - "step": 22394 - }, - { - "epoch": 3.6065461572527076, - "grad_norm": 0.0028976218309253454, - "learning_rate": 0.000199993587424787, - "loss": 46.0, - "step": 22395 - }, - { - "epoch": 3.606707194331495, - "grad_norm": 0.001199536956846714, - "learning_rate": 0.0001999935868518453, - "loss": 46.0, - "step": 22396 - }, - { - "epoch": 3.6068682314102825, - "grad_norm": 0.003271740395575762, - "learning_rate": 0.00019999358627887802, - "loss": 46.0, - "step": 22397 - }, - { - "epoch": 3.6070292684890695, - "grad_norm": 0.002882123226299882, - "learning_rate": 0.00019999358570588514, - "loss": 46.0, - "step": 22398 - }, - { - "epoch": 3.607190305567857, - "grad_norm": 0.0012152903946116567, - "learning_rate": 0.00019999358513286665, - "loss": 46.0, - "step": 22399 - }, - { - "epoch": 3.6073513426466444, - "grad_norm": 0.0051603373140096664, - "learning_rate": 0.0001999935845598226, - "loss": 46.0, - "step": 22400 - }, - { - "epoch": 3.607512379725432, - "grad_norm": 0.0010274507803842425, - "learning_rate": 0.00019999358398675295, - "loss": 46.0, - "step": 22401 - }, - { - "epoch": 3.6076734168042193, - "grad_norm": 0.0016115158796310425, - "learning_rate": 0.0001999935834136577, - "loss": 46.0, - "step": 22402 - }, - { - "epoch": 3.6078344538830067, - "grad_norm": 0.0038267478812485933, - "learning_rate": 0.00019999358284053685, - "loss": 46.0, - "step": 22403 - }, - { - "epoch": 3.6079954909617937, - "grad_norm": 0.0030010014306753874, - "learning_rate": 0.0001999935822673904, - "loss": 46.0, - "step": 22404 - }, - { - "epoch": 3.608156528040581, - "grad_norm": 0.0015449613565579057, - "learning_rate": 0.00019999358169421838, - "loss": 46.0, - "step": 22405 - }, - { - "epoch": 3.6083175651193686, - "grad_norm": 0.0016683997819200158, - "learning_rate": 0.00019999358112102072, - "loss": 46.0, - "step": 22406 - }, - { - "epoch": 3.608478602198156, - "grad_norm": 0.0031114269513636827, - "learning_rate": 0.00019999358054779753, - "loss": 46.0, - "step": 22407 - }, - { - "epoch": 3.6086396392769435, - "grad_norm": 0.0033562202006578445, - "learning_rate": 0.0001999935799745487, - "loss": 46.0, - "step": 22408 - }, - { - "epoch": 3.608800676355731, - "grad_norm": 0.0034233522601425648, - "learning_rate": 0.0001999935794012743, - "loss": 46.0, - "step": 22409 - }, - { - "epoch": 3.6089617134345184, - "grad_norm": 0.007132889237254858, - "learning_rate": 0.0001999935788279743, - "loss": 46.0, - "step": 22410 - }, - { - "epoch": 3.609122750513306, - "grad_norm": 0.0042678555473685265, - "learning_rate": 0.00019999357825464873, - "loss": 46.0, - "step": 22411 - }, - { - "epoch": 3.6092837875920933, - "grad_norm": 0.00155303452629596, - "learning_rate": 0.00019999357768129753, - "loss": 46.0, - "step": 22412 - }, - { - "epoch": 3.6094448246708803, - "grad_norm": 0.0023707281798124313, - "learning_rate": 0.00019999357710792076, - "loss": 46.0, - "step": 22413 - }, - { - "epoch": 3.609605861749668, - "grad_norm": 0.0056295557878911495, - "learning_rate": 0.0001999935765345184, - "loss": 46.0, - "step": 22414 - }, - { - "epoch": 3.6097668988284553, - "grad_norm": 0.013550247065722942, - "learning_rate": 0.00019999357596109043, - "loss": 46.0, - "step": 22415 - }, - { - "epoch": 3.6099279359072427, - "grad_norm": 0.007372173015028238, - "learning_rate": 0.00019999357538763688, - "loss": 46.0, - "step": 22416 - }, - { - "epoch": 3.61008897298603, - "grad_norm": 0.001707296003587544, - "learning_rate": 0.0001999935748141577, - "loss": 46.0, - "step": 22417 - }, - { - "epoch": 3.610250010064817, - "grad_norm": 0.004992510657757521, - "learning_rate": 0.00019999357424065297, - "loss": 46.0, - "step": 22418 - }, - { - "epoch": 3.6104110471436046, - "grad_norm": 0.009980971924960613, - "learning_rate": 0.00019999357366712263, - "loss": 46.0, - "step": 22419 - }, - { - "epoch": 3.610572084222392, - "grad_norm": 0.00764093641191721, - "learning_rate": 0.0001999935730935667, - "loss": 46.0, - "step": 22420 - }, - { - "epoch": 3.6107331213011795, - "grad_norm": 0.002733397763222456, - "learning_rate": 0.00019999357251998518, - "loss": 46.0, - "step": 22421 - }, - { - "epoch": 3.610894158379967, - "grad_norm": 0.0007677925750613213, - "learning_rate": 0.00019999357194637807, - "loss": 46.0, - "step": 22422 - }, - { - "epoch": 3.6110551954587544, - "grad_norm": 0.005811657290905714, - "learning_rate": 0.00019999357137274535, - "loss": 46.0, - "step": 22423 - }, - { - "epoch": 3.611216232537542, - "grad_norm": 0.0019189569866284728, - "learning_rate": 0.00019999357079908704, - "loss": 46.0, - "step": 22424 - }, - { - "epoch": 3.6113772696163293, - "grad_norm": 0.007289526518434286, - "learning_rate": 0.00019999357022540314, - "loss": 46.0, - "step": 22425 - }, - { - "epoch": 3.6115383066951168, - "grad_norm": 0.0019161478849127889, - "learning_rate": 0.00019999356965169364, - "loss": 46.0, - "step": 22426 - }, - { - "epoch": 3.611699343773904, - "grad_norm": 0.0037309550680220127, - "learning_rate": 0.0001999935690779586, - "loss": 46.0, - "step": 22427 - }, - { - "epoch": 3.6118603808526912, - "grad_norm": 0.001906686113215983, - "learning_rate": 0.0001999935685041979, - "loss": 46.0, - "step": 22428 - }, - { - "epoch": 3.6120214179314787, - "grad_norm": 0.0019001959590241313, - "learning_rate": 0.00019999356793041164, - "loss": 46.0, - "step": 22429 - }, - { - "epoch": 3.612182455010266, - "grad_norm": 0.003228002693504095, - "learning_rate": 0.00019999356735659975, - "loss": 46.0, - "step": 22430 - }, - { - "epoch": 3.6123434920890536, - "grad_norm": 0.01450062170624733, - "learning_rate": 0.0001999935667827623, - "loss": 46.0, - "step": 22431 - }, - { - "epoch": 3.612504529167841, - "grad_norm": 0.005800783168524504, - "learning_rate": 0.00019999356620889927, - "loss": 46.0, - "step": 22432 - }, - { - "epoch": 3.612665566246628, - "grad_norm": 0.005044400691986084, - "learning_rate": 0.00019999356563501063, - "loss": 46.0, - "step": 22433 - }, - { - "epoch": 3.6128266033254155, - "grad_norm": 0.00861057173460722, - "learning_rate": 0.0001999935650610964, - "loss": 46.0, - "step": 22434 - }, - { - "epoch": 3.612987640404203, - "grad_norm": 0.008550865575671196, - "learning_rate": 0.00019999356448715654, - "loss": 46.0, - "step": 22435 - }, - { - "epoch": 3.6131486774829904, - "grad_norm": 0.010306457057595253, - "learning_rate": 0.00019999356391319113, - "loss": 46.0, - "step": 22436 - }, - { - "epoch": 3.613309714561778, - "grad_norm": 0.007211598102003336, - "learning_rate": 0.0001999935633392001, - "loss": 46.0, - "step": 22437 - }, - { - "epoch": 3.6134707516405653, - "grad_norm": 0.007781315129250288, - "learning_rate": 0.0001999935627651835, - "loss": 46.0, - "step": 22438 - }, - { - "epoch": 3.6136317887193528, - "grad_norm": 0.0017539658583700657, - "learning_rate": 0.0001999935621911413, - "loss": 46.0, - "step": 22439 - }, - { - "epoch": 3.61379282579814, - "grad_norm": 0.003925439901649952, - "learning_rate": 0.00019999356161707352, - "loss": 46.0, - "step": 22440 - }, - { - "epoch": 3.6139538628769277, - "grad_norm": 0.0048811486922204494, - "learning_rate": 0.00019999356104298012, - "loss": 46.0, - "step": 22441 - }, - { - "epoch": 3.6141148999557147, - "grad_norm": 0.0011200091103091836, - "learning_rate": 0.00019999356046886116, - "loss": 46.0, - "step": 22442 - }, - { - "epoch": 3.614275937034502, - "grad_norm": 0.0029285536147654057, - "learning_rate": 0.00019999355989471656, - "loss": 46.0, - "step": 22443 - }, - { - "epoch": 3.6144369741132896, - "grad_norm": 0.0019486559322103858, - "learning_rate": 0.0001999935593205464, - "loss": 46.0, - "step": 22444 - }, - { - "epoch": 3.614598011192077, - "grad_norm": 0.0021144987549632788, - "learning_rate": 0.00019999355874635063, - "loss": 46.0, - "step": 22445 - }, - { - "epoch": 3.6147590482708645, - "grad_norm": 0.0012204087106510997, - "learning_rate": 0.0001999935581721293, - "loss": 46.0, - "step": 22446 - }, - { - "epoch": 3.614920085349652, - "grad_norm": 0.008844881318509579, - "learning_rate": 0.00019999355759788232, - "loss": 46.0, - "step": 22447 - }, - { - "epoch": 3.615081122428439, - "grad_norm": 0.0020680285524576902, - "learning_rate": 0.00019999355702360978, - "loss": 46.0, - "step": 22448 - }, - { - "epoch": 3.6152421595072264, - "grad_norm": 0.004936013836413622, - "learning_rate": 0.00019999355644931166, - "loss": 46.0, - "step": 22449 - }, - { - "epoch": 3.615403196586014, - "grad_norm": 0.0047101108357310295, - "learning_rate": 0.00019999355587498792, - "loss": 46.0, - "step": 22450 - }, - { - "epoch": 3.6155642336648013, - "grad_norm": 0.005580500699579716, - "learning_rate": 0.00019999355530063862, - "loss": 46.0, - "step": 22451 - }, - { - "epoch": 3.6157252707435887, - "grad_norm": 0.007232708390802145, - "learning_rate": 0.00019999355472626368, - "loss": 46.0, - "step": 22452 - }, - { - "epoch": 3.615886307822376, - "grad_norm": 0.005742973182350397, - "learning_rate": 0.0001999935541518632, - "loss": 46.0, - "step": 22453 - }, - { - "epoch": 3.6160473449011636, - "grad_norm": 0.0022650575265288353, - "learning_rate": 0.0001999935535774371, - "loss": 46.0, - "step": 22454 - }, - { - "epoch": 3.616208381979951, - "grad_norm": 0.00304702902212739, - "learning_rate": 0.0001999935530029854, - "loss": 46.0, - "step": 22455 - }, - { - "epoch": 3.6163694190587385, - "grad_norm": 0.0013421442126855254, - "learning_rate": 0.0001999935524285081, - "loss": 46.0, - "step": 22456 - }, - { - "epoch": 3.6165304561375256, - "grad_norm": 0.003503709565848112, - "learning_rate": 0.0001999935518540052, - "loss": 46.0, - "step": 22457 - }, - { - "epoch": 3.616691493216313, - "grad_norm": 0.0025620809756219387, - "learning_rate": 0.00019999355127947675, - "loss": 46.0, - "step": 22458 - }, - { - "epoch": 3.6168525302951005, - "grad_norm": 0.0027202838100492954, - "learning_rate": 0.0001999935507049227, - "loss": 46.0, - "step": 22459 - }, - { - "epoch": 3.617013567373888, - "grad_norm": 0.008707068860530853, - "learning_rate": 0.00019999355013034303, - "loss": 46.0, - "step": 22460 - }, - { - "epoch": 3.6171746044526754, - "grad_norm": 0.003709145588800311, - "learning_rate": 0.00019999354955573776, - "loss": 46.0, - "step": 22461 - }, - { - "epoch": 3.6173356415314624, - "grad_norm": 0.0035873798187822104, - "learning_rate": 0.00019999354898110695, - "loss": 46.0, - "step": 22462 - }, - { - "epoch": 3.61749667861025, - "grad_norm": 0.0026390121784061193, - "learning_rate": 0.0001999935484064505, - "loss": 46.0, - "step": 22463 - }, - { - "epoch": 3.6176577156890373, - "grad_norm": 0.0010557264322414994, - "learning_rate": 0.00019999354783176846, - "loss": 46.0, - "step": 22464 - }, - { - "epoch": 3.6178187527678247, - "grad_norm": 0.0024030692875385284, - "learning_rate": 0.00019999354725706083, - "loss": 46.0, - "step": 22465 - }, - { - "epoch": 3.617979789846612, - "grad_norm": 0.002708001993596554, - "learning_rate": 0.00019999354668232762, - "loss": 46.0, - "step": 22466 - }, - { - "epoch": 3.6181408269253996, - "grad_norm": 0.007245389278978109, - "learning_rate": 0.0001999935461075688, - "loss": 46.0, - "step": 22467 - }, - { - "epoch": 3.618301864004187, - "grad_norm": 0.001983353868126869, - "learning_rate": 0.0001999935455327844, - "loss": 46.0, - "step": 22468 - }, - { - "epoch": 3.6184629010829745, - "grad_norm": 0.013206932693719864, - "learning_rate": 0.0001999935449579744, - "loss": 46.0, - "step": 22469 - }, - { - "epoch": 3.618623938161762, - "grad_norm": 0.004026846028864384, - "learning_rate": 0.0001999935443831388, - "loss": 46.0, - "step": 22470 - }, - { - "epoch": 3.618784975240549, - "grad_norm": 0.0019163753604516387, - "learning_rate": 0.0001999935438082776, - "loss": 46.0, - "step": 22471 - }, - { - "epoch": 3.6189460123193364, - "grad_norm": 0.0031291507184505463, - "learning_rate": 0.00019999354323339087, - "loss": 46.0, - "step": 22472 - }, - { - "epoch": 3.619107049398124, - "grad_norm": 0.0070044719614088535, - "learning_rate": 0.00019999354265847846, - "loss": 46.0, - "step": 22473 - }, - { - "epoch": 3.6192680864769113, - "grad_norm": 0.0052513424307107925, - "learning_rate": 0.0001999935420835405, - "loss": 46.0, - "step": 22474 - }, - { - "epoch": 3.619429123555699, - "grad_norm": 0.0043580965138971806, - "learning_rate": 0.00019999354150857695, - "loss": 46.0, - "step": 22475 - }, - { - "epoch": 3.6195901606344862, - "grad_norm": 0.006770983338356018, - "learning_rate": 0.0001999935409335878, - "loss": 46.0, - "step": 22476 - }, - { - "epoch": 3.6197511977132732, - "grad_norm": 0.005758809391409159, - "learning_rate": 0.00019999354035857305, - "loss": 46.0, - "step": 22477 - }, - { - "epoch": 3.6199122347920607, - "grad_norm": 0.013581417500972748, - "learning_rate": 0.0001999935397835327, - "loss": 46.0, - "step": 22478 - }, - { - "epoch": 3.620073271870848, - "grad_norm": 0.012798821553587914, - "learning_rate": 0.00019999353920846678, - "loss": 46.0, - "step": 22479 - }, - { - "epoch": 3.6202343089496356, - "grad_norm": 0.005479360464960337, - "learning_rate": 0.00019999353863337524, - "loss": 46.0, - "step": 22480 - }, - { - "epoch": 3.620395346028423, - "grad_norm": 0.01105831004679203, - "learning_rate": 0.00019999353805825814, - "loss": 46.0, - "step": 22481 - }, - { - "epoch": 3.6205563831072105, - "grad_norm": 0.004589867778122425, - "learning_rate": 0.00019999353748311545, - "loss": 46.0, - "step": 22482 - }, - { - "epoch": 3.620717420185998, - "grad_norm": 0.006644019857048988, - "learning_rate": 0.00019999353690794712, - "loss": 46.0, - "step": 22483 - }, - { - "epoch": 3.6208784572647854, - "grad_norm": 0.0012000190326943994, - "learning_rate": 0.00019999353633275323, - "loss": 46.0, - "step": 22484 - }, - { - "epoch": 3.621039494343573, - "grad_norm": 0.0040437402203679085, - "learning_rate": 0.00019999353575753375, - "loss": 46.0, - "step": 22485 - }, - { - "epoch": 3.62120053142236, - "grad_norm": 0.001889971666969359, - "learning_rate": 0.00019999353518228868, - "loss": 46.0, - "step": 22486 - }, - { - "epoch": 3.6213615685011473, - "grad_norm": 0.005469277035444975, - "learning_rate": 0.000199993534607018, - "loss": 46.0, - "step": 22487 - }, - { - "epoch": 3.6215226055799348, - "grad_norm": 0.003779173828661442, - "learning_rate": 0.00019999353403172168, - "loss": 46.0, - "step": 22488 - }, - { - "epoch": 3.6216836426587222, - "grad_norm": 0.004029697738587856, - "learning_rate": 0.00019999353345639986, - "loss": 46.0, - "step": 22489 - }, - { - "epoch": 3.6218446797375097, - "grad_norm": 0.009057889692485332, - "learning_rate": 0.00019999353288105242, - "loss": 46.0, - "step": 22490 - }, - { - "epoch": 3.6220057168162967, - "grad_norm": 0.0011810354189947248, - "learning_rate": 0.00019999353230567933, - "loss": 46.0, - "step": 22491 - }, - { - "epoch": 3.622166753895084, - "grad_norm": 0.010252796113491058, - "learning_rate": 0.0001999935317302807, - "loss": 46.0, - "step": 22492 - }, - { - "epoch": 3.6223277909738716, - "grad_norm": 0.002155910013243556, - "learning_rate": 0.00019999353115485646, - "loss": 46.0, - "step": 22493 - }, - { - "epoch": 3.622488828052659, - "grad_norm": 0.003056129440665245, - "learning_rate": 0.00019999353057940664, - "loss": 46.0, - "step": 22494 - }, - { - "epoch": 3.6226498651314465, - "grad_norm": 0.0030514122918248177, - "learning_rate": 0.0001999935300039312, - "loss": 46.0, - "step": 22495 - }, - { - "epoch": 3.622810902210234, - "grad_norm": 0.00620141951367259, - "learning_rate": 0.00019999352942843022, - "loss": 46.0, - "step": 22496 - }, - { - "epoch": 3.6229719392890214, - "grad_norm": 0.00171517429407686, - "learning_rate": 0.0001999935288529036, - "loss": 46.0, - "step": 22497 - }, - { - "epoch": 3.623132976367809, - "grad_norm": 0.004122231155633926, - "learning_rate": 0.0001999935282773514, - "loss": 46.0, - "step": 22498 - }, - { - "epoch": 3.6232940134465963, - "grad_norm": 0.005840396508574486, - "learning_rate": 0.0001999935277017736, - "loss": 46.0, - "step": 22499 - }, - { - "epoch": 3.6234550505253833, - "grad_norm": 0.01794731430709362, - "learning_rate": 0.00019999352712617022, - "loss": 46.0, - "step": 22500 - }, - { - "epoch": 3.6236160876041708, - "grad_norm": 0.007381750736385584, - "learning_rate": 0.00019999352655054121, - "loss": 46.0, - "step": 22501 - }, - { - "epoch": 3.623777124682958, - "grad_norm": 0.00266893464140594, - "learning_rate": 0.00019999352597488665, - "loss": 46.0, - "step": 22502 - }, - { - "epoch": 3.6239381617617457, - "grad_norm": 0.002578998450189829, - "learning_rate": 0.0001999935253992065, - "loss": 46.0, - "step": 22503 - }, - { - "epoch": 3.624099198840533, - "grad_norm": 0.004135339520871639, - "learning_rate": 0.00019999352482350072, - "loss": 46.0, - "step": 22504 - }, - { - "epoch": 3.6242602359193206, - "grad_norm": 0.005464186891913414, - "learning_rate": 0.0001999935242477694, - "loss": 46.0, - "step": 22505 - }, - { - "epoch": 3.6244212729981076, - "grad_norm": 0.0039016236551105976, - "learning_rate": 0.00019999352367201242, - "loss": 46.0, - "step": 22506 - }, - { - "epoch": 3.624582310076895, - "grad_norm": 0.0026380294002592564, - "learning_rate": 0.0001999935230962299, - "loss": 46.0, - "step": 22507 - }, - { - "epoch": 3.6247433471556825, - "grad_norm": 0.0013833181001245975, - "learning_rate": 0.00019999352252042174, - "loss": 46.0, - "step": 22508 - }, - { - "epoch": 3.62490438423447, - "grad_norm": 0.005701172631233931, - "learning_rate": 0.000199993521944588, - "loss": 46.0, - "step": 22509 - }, - { - "epoch": 3.6250654213132574, - "grad_norm": 0.0013801305321976542, - "learning_rate": 0.0001999935213687287, - "loss": 46.0, - "step": 22510 - }, - { - "epoch": 3.625226458392045, - "grad_norm": 0.0017217963468283415, - "learning_rate": 0.00019999352079284378, - "loss": 46.0, - "step": 22511 - }, - { - "epoch": 3.6253874954708323, - "grad_norm": 0.0034533457364887, - "learning_rate": 0.00019999352021693326, - "loss": 46.0, - "step": 22512 - }, - { - "epoch": 3.6255485325496197, - "grad_norm": 0.003828912042081356, - "learning_rate": 0.00019999351964099718, - "loss": 46.0, - "step": 22513 - }, - { - "epoch": 3.625709569628407, - "grad_norm": 0.0025549596175551414, - "learning_rate": 0.00019999351906503548, - "loss": 46.0, - "step": 22514 - }, - { - "epoch": 3.625870606707194, - "grad_norm": 0.013700297102332115, - "learning_rate": 0.0001999935184890482, - "loss": 46.0, - "step": 22515 - }, - { - "epoch": 3.6260316437859816, - "grad_norm": 0.002976646414026618, - "learning_rate": 0.0001999935179130353, - "loss": 46.0, - "step": 22516 - }, - { - "epoch": 3.626192680864769, - "grad_norm": 0.004166321363300085, - "learning_rate": 0.00019999351733699684, - "loss": 46.0, - "step": 22517 - }, - { - "epoch": 3.6263537179435565, - "grad_norm": 0.004388280212879181, - "learning_rate": 0.00019999351676093277, - "loss": 46.0, - "step": 22518 - }, - { - "epoch": 3.626514755022344, - "grad_norm": 0.01295491773635149, - "learning_rate": 0.00019999351618484311, - "loss": 46.0, - "step": 22519 - }, - { - "epoch": 3.6266757921011314, - "grad_norm": 0.0008753619622439146, - "learning_rate": 0.00019999351560872787, - "loss": 46.0, - "step": 22520 - }, - { - "epoch": 3.6268368291799185, - "grad_norm": 0.006063347682356834, - "learning_rate": 0.00019999351503258703, - "loss": 46.0, - "step": 22521 - }, - { - "epoch": 3.626997866258706, - "grad_norm": 0.002712126588448882, - "learning_rate": 0.0001999935144564206, - "loss": 46.0, - "step": 22522 - }, - { - "epoch": 3.6271589033374934, - "grad_norm": 0.0017489992314949632, - "learning_rate": 0.00019999351388022855, - "loss": 46.0, - "step": 22523 - }, - { - "epoch": 3.627319940416281, - "grad_norm": 0.020075609907507896, - "learning_rate": 0.00019999351330401093, - "loss": 46.0, - "step": 22524 - }, - { - "epoch": 3.6274809774950683, - "grad_norm": 0.008219759911298752, - "learning_rate": 0.00019999351272776772, - "loss": 46.0, - "step": 22525 - }, - { - "epoch": 3.6276420145738557, - "grad_norm": 0.0033160836901515722, - "learning_rate": 0.0001999935121514989, - "loss": 46.0, - "step": 22526 - }, - { - "epoch": 3.627803051652643, - "grad_norm": 0.002313353819772601, - "learning_rate": 0.00019999351157520446, - "loss": 46.0, - "step": 22527 - }, - { - "epoch": 3.6279640887314306, - "grad_norm": 0.0032709252554923296, - "learning_rate": 0.0001999935109988845, - "loss": 46.0, - "step": 22528 - }, - { - "epoch": 3.628125125810218, - "grad_norm": 0.0021192855201661587, - "learning_rate": 0.0001999935104225389, - "loss": 46.0, - "step": 22529 - }, - { - "epoch": 3.628286162889005, - "grad_norm": 0.0017459650989621878, - "learning_rate": 0.0001999935098461677, - "loss": 46.0, - "step": 22530 - }, - { - "epoch": 3.6284471999677925, - "grad_norm": 0.0024031877983361483, - "learning_rate": 0.00019999350926977095, - "loss": 46.0, - "step": 22531 - }, - { - "epoch": 3.62860823704658, - "grad_norm": 0.005624727811664343, - "learning_rate": 0.00019999350869334857, - "loss": 46.0, - "step": 22532 - }, - { - "epoch": 3.6287692741253674, - "grad_norm": 0.0025609242729842663, - "learning_rate": 0.0001999935081169006, - "loss": 46.0, - "step": 22533 - }, - { - "epoch": 3.628930311204155, - "grad_norm": 0.0017392337322235107, - "learning_rate": 0.00019999350754042704, - "loss": 46.0, - "step": 22534 - }, - { - "epoch": 3.629091348282942, - "grad_norm": 0.00645176088437438, - "learning_rate": 0.00019999350696392788, - "loss": 46.0, - "step": 22535 - }, - { - "epoch": 3.6292523853617293, - "grad_norm": 0.0018682080553844571, - "learning_rate": 0.00019999350638740315, - "loss": 46.0, - "step": 22536 - }, - { - "epoch": 3.629413422440517, - "grad_norm": 0.004116647411137819, - "learning_rate": 0.00019999350581085282, - "loss": 46.0, - "step": 22537 - }, - { - "epoch": 3.6295744595193042, - "grad_norm": 0.001600995077751577, - "learning_rate": 0.00019999350523427687, - "loss": 46.0, - "step": 22538 - }, - { - "epoch": 3.6297354965980917, - "grad_norm": 0.0016844253987073898, - "learning_rate": 0.00019999350465767536, - "loss": 46.0, - "step": 22539 - }, - { - "epoch": 3.629896533676879, - "grad_norm": 0.0014293370768427849, - "learning_rate": 0.00019999350408104823, - "loss": 46.0, - "step": 22540 - }, - { - "epoch": 3.6300575707556666, - "grad_norm": 0.00366139505058527, - "learning_rate": 0.00019999350350439552, - "loss": 46.0, - "step": 22541 - }, - { - "epoch": 3.630218607834454, - "grad_norm": 0.004743609577417374, - "learning_rate": 0.00019999350292771725, - "loss": 46.0, - "step": 22542 - }, - { - "epoch": 3.6303796449132415, - "grad_norm": 0.0012500014854595065, - "learning_rate": 0.00019999350235101333, - "loss": 46.0, - "step": 22543 - }, - { - "epoch": 3.6305406819920285, - "grad_norm": 0.002645831787958741, - "learning_rate": 0.00019999350177428383, - "loss": 46.0, - "step": 22544 - }, - { - "epoch": 3.630701719070816, - "grad_norm": 0.0033212376292794943, - "learning_rate": 0.00019999350119752877, - "loss": 46.0, - "step": 22545 - }, - { - "epoch": 3.6308627561496034, - "grad_norm": 0.0028429566882550716, - "learning_rate": 0.0001999935006207481, - "loss": 46.0, - "step": 22546 - }, - { - "epoch": 3.631023793228391, - "grad_norm": 0.0028267125599086285, - "learning_rate": 0.0001999935000439418, - "loss": 46.0, - "step": 22547 - }, - { - "epoch": 3.6311848303071783, - "grad_norm": 0.0005956628010608256, - "learning_rate": 0.00019999349946710996, - "loss": 46.0, - "step": 22548 - }, - { - "epoch": 3.6313458673859658, - "grad_norm": 0.005481266416609287, - "learning_rate": 0.0001999934988902525, - "loss": 46.0, - "step": 22549 - }, - { - "epoch": 3.6315069044647528, - "grad_norm": 0.000716834794729948, - "learning_rate": 0.00019999349831336947, - "loss": 46.0, - "step": 22550 - }, - { - "epoch": 3.6316679415435402, - "grad_norm": 0.0015818835236132145, - "learning_rate": 0.0001999934977364608, - "loss": 46.0, - "step": 22551 - }, - { - "epoch": 3.6318289786223277, - "grad_norm": 0.0025429250672459602, - "learning_rate": 0.00019999349715952658, - "loss": 46.0, - "step": 22552 - }, - { - "epoch": 3.631990015701115, - "grad_norm": 0.003357258625328541, - "learning_rate": 0.00019999349658256674, - "loss": 46.0, - "step": 22553 - }, - { - "epoch": 3.6321510527799026, - "grad_norm": 0.0018024045275524259, - "learning_rate": 0.00019999349600558131, - "loss": 46.0, - "step": 22554 - }, - { - "epoch": 3.63231208985869, - "grad_norm": 0.0032567300368100405, - "learning_rate": 0.0001999934954285703, - "loss": 46.0, - "step": 22555 - }, - { - "epoch": 3.6324731269374775, - "grad_norm": 0.005689000245183706, - "learning_rate": 0.00019999349485153373, - "loss": 46.0, - "step": 22556 - }, - { - "epoch": 3.632634164016265, - "grad_norm": 0.0036533710081130266, - "learning_rate": 0.00019999349427447148, - "loss": 46.0, - "step": 22557 - }, - { - "epoch": 3.6327952010950524, - "grad_norm": 0.000923704297747463, - "learning_rate": 0.00019999349369738368, - "loss": 46.0, - "step": 22558 - }, - { - "epoch": 3.6329562381738394, - "grad_norm": 0.0011604356113821268, - "learning_rate": 0.00019999349312027032, - "loss": 46.0, - "step": 22559 - }, - { - "epoch": 3.633117275252627, - "grad_norm": 0.008964471518993378, - "learning_rate": 0.00019999349254313134, - "loss": 46.0, - "step": 22560 - }, - { - "epoch": 3.6332783123314143, - "grad_norm": 0.000986400875262916, - "learning_rate": 0.00019999349196596678, - "loss": 46.0, - "step": 22561 - }, - { - "epoch": 3.6334393494102017, - "grad_norm": 0.0027297590859234333, - "learning_rate": 0.0001999934913887766, - "loss": 46.0, - "step": 22562 - }, - { - "epoch": 3.633600386488989, - "grad_norm": 0.001780897960998118, - "learning_rate": 0.00019999349081156084, - "loss": 46.0, - "step": 22563 - }, - { - "epoch": 3.633761423567776, - "grad_norm": 0.0015038541750982404, - "learning_rate": 0.00019999349023431948, - "loss": 46.0, - "step": 22564 - }, - { - "epoch": 3.6339224606465637, - "grad_norm": 0.00709498580545187, - "learning_rate": 0.00019999348965705254, - "loss": 46.0, - "step": 22565 - }, - { - "epoch": 3.634083497725351, - "grad_norm": 0.0028029889799654484, - "learning_rate": 0.00019999348907976, - "loss": 46.0, - "step": 22566 - }, - { - "epoch": 3.6342445348041386, - "grad_norm": 0.001673622871749103, - "learning_rate": 0.00019999348850244187, - "loss": 46.0, - "step": 22567 - }, - { - "epoch": 3.634405571882926, - "grad_norm": 0.0035601984709501266, - "learning_rate": 0.00019999348792509815, - "loss": 46.0, - "step": 22568 - }, - { - "epoch": 3.6345666089617135, - "grad_norm": 0.0008578160195611417, - "learning_rate": 0.0001999934873477288, - "loss": 46.0, - "step": 22569 - }, - { - "epoch": 3.634727646040501, - "grad_norm": 0.0053849415853619576, - "learning_rate": 0.00019999348677033393, - "loss": 46.0, - "step": 22570 - }, - { - "epoch": 3.6348886831192884, - "grad_norm": 0.003762367647141218, - "learning_rate": 0.0001999934861929134, - "loss": 46.0, - "step": 22571 - }, - { - "epoch": 3.635049720198076, - "grad_norm": 0.002221018308773637, - "learning_rate": 0.0001999934856154673, - "loss": 46.0, - "step": 22572 - }, - { - "epoch": 3.6352107572768633, - "grad_norm": 0.0036930839996784925, - "learning_rate": 0.0001999934850379956, - "loss": 46.0, - "step": 22573 - }, - { - "epoch": 3.6353717943556503, - "grad_norm": 0.003176774363964796, - "learning_rate": 0.00019999348446049834, - "loss": 46.0, - "step": 22574 - }, - { - "epoch": 3.6355328314344377, - "grad_norm": 0.004482978954911232, - "learning_rate": 0.00019999348388297542, - "loss": 46.0, - "step": 22575 - }, - { - "epoch": 3.635693868513225, - "grad_norm": 0.003068018238991499, - "learning_rate": 0.00019999348330542696, - "loss": 46.0, - "step": 22576 - }, - { - "epoch": 3.6358549055920126, - "grad_norm": 0.0014689676463603973, - "learning_rate": 0.0001999934827278529, - "loss": 46.0, - "step": 22577 - }, - { - "epoch": 3.6360159426708, - "grad_norm": 0.0028130931314080954, - "learning_rate": 0.00019999348215025324, - "loss": 46.0, - "step": 22578 - }, - { - "epoch": 3.636176979749587, - "grad_norm": 0.0009816274978220463, - "learning_rate": 0.000199993481572628, - "loss": 46.0, - "step": 22579 - }, - { - "epoch": 3.6363380168283745, - "grad_norm": 0.0030423817224800587, - "learning_rate": 0.00019999348099497715, - "loss": 46.0, - "step": 22580 - }, - { - "epoch": 3.636499053907162, - "grad_norm": 0.01929938606917858, - "learning_rate": 0.0001999934804173007, - "loss": 46.0, - "step": 22581 - }, - { - "epoch": 3.6366600909859494, - "grad_norm": 0.0015261085936799645, - "learning_rate": 0.00019999347983959867, - "loss": 46.0, - "step": 22582 - }, - { - "epoch": 3.636821128064737, - "grad_norm": 0.0012616056483238935, - "learning_rate": 0.00019999347926187106, - "loss": 46.0, - "step": 22583 - }, - { - "epoch": 3.6369821651435243, - "grad_norm": 0.006738286931067705, - "learning_rate": 0.00019999347868411785, - "loss": 46.0, - "step": 22584 - }, - { - "epoch": 3.637143202222312, - "grad_norm": 0.004843592178076506, - "learning_rate": 0.000199993478106339, - "loss": 46.0, - "step": 22585 - }, - { - "epoch": 3.6373042393010993, - "grad_norm": 0.005053396802395582, - "learning_rate": 0.00019999347752853463, - "loss": 46.0, - "step": 22586 - }, - { - "epoch": 3.6374652763798867, - "grad_norm": 0.0014434074983000755, - "learning_rate": 0.00019999347695070463, - "loss": 46.0, - "step": 22587 - }, - { - "epoch": 3.6376263134586737, - "grad_norm": 0.0031371042132377625, - "learning_rate": 0.00019999347637284905, - "loss": 46.0, - "step": 22588 - }, - { - "epoch": 3.637787350537461, - "grad_norm": 0.004133532755076885, - "learning_rate": 0.00019999347579496786, - "loss": 46.0, - "step": 22589 - }, - { - "epoch": 3.6379483876162486, - "grad_norm": 0.008824850432574749, - "learning_rate": 0.00019999347521706108, - "loss": 46.0, - "step": 22590 - }, - { - "epoch": 3.638109424695036, - "grad_norm": 0.00482999486848712, - "learning_rate": 0.0001999934746391287, - "loss": 46.0, - "step": 22591 - }, - { - "epoch": 3.6382704617738235, - "grad_norm": 0.004027730785310268, - "learning_rate": 0.00019999347406117072, - "loss": 46.0, - "step": 22592 - }, - { - "epoch": 3.638431498852611, - "grad_norm": 0.005034802481532097, - "learning_rate": 0.0001999934734831872, - "loss": 46.0, - "step": 22593 - }, - { - "epoch": 3.638592535931398, - "grad_norm": 0.0011818541679531336, - "learning_rate": 0.00019999347290517805, - "loss": 46.0, - "step": 22594 - }, - { - "epoch": 3.6387535730101854, - "grad_norm": 0.001119572902098298, - "learning_rate": 0.0001999934723271433, - "loss": 46.0, - "step": 22595 - }, - { - "epoch": 3.638914610088973, - "grad_norm": 0.0011435940396040678, - "learning_rate": 0.00019999347174908295, - "loss": 46.0, - "step": 22596 - }, - { - "epoch": 3.6390756471677603, - "grad_norm": 0.026443036273121834, - "learning_rate": 0.00019999347117099703, - "loss": 46.0, - "step": 22597 - }, - { - "epoch": 3.639236684246548, - "grad_norm": 0.007674022577702999, - "learning_rate": 0.00019999347059288552, - "loss": 46.0, - "step": 22598 - }, - { - "epoch": 3.6393977213253352, - "grad_norm": 0.0014888565056025982, - "learning_rate": 0.0001999934700147484, - "loss": 46.0, - "step": 22599 - }, - { - "epoch": 3.6395587584041227, - "grad_norm": 0.00165718758944422, - "learning_rate": 0.00019999346943658567, - "loss": 46.0, - "step": 22600 - }, - { - "epoch": 3.63971979548291, - "grad_norm": 0.003740053391084075, - "learning_rate": 0.00019999346885839737, - "loss": 46.0, - "step": 22601 - }, - { - "epoch": 3.6398808325616976, - "grad_norm": 0.01177155040204525, - "learning_rate": 0.0001999934682801835, - "loss": 46.0, - "step": 22602 - }, - { - "epoch": 3.6400418696404846, - "grad_norm": 0.0014776635216549039, - "learning_rate": 0.000199993467701944, - "loss": 46.0, - "step": 22603 - }, - { - "epoch": 3.640202906719272, - "grad_norm": 0.021222947165369987, - "learning_rate": 0.0001999934671236789, - "loss": 46.0, - "step": 22604 - }, - { - "epoch": 3.6403639437980595, - "grad_norm": 0.006313573103398085, - "learning_rate": 0.00019999346654538824, - "loss": 46.0, - "step": 22605 - }, - { - "epoch": 3.640524980876847, - "grad_norm": 0.014756487682461739, - "learning_rate": 0.00019999346596707196, - "loss": 46.0, - "step": 22606 - }, - { - "epoch": 3.6406860179556344, - "grad_norm": 0.0023969586472958326, - "learning_rate": 0.0001999934653887301, - "loss": 46.0, - "step": 22607 - }, - { - "epoch": 3.6408470550344214, - "grad_norm": 0.004246568772941828, - "learning_rate": 0.00019999346481036265, - "loss": 46.0, - "step": 22608 - }, - { - "epoch": 3.641008092113209, - "grad_norm": 0.002121901838108897, - "learning_rate": 0.0001999934642319696, - "loss": 46.0, - "step": 22609 - }, - { - "epoch": 3.6411691291919963, - "grad_norm": 0.0021776400972157717, - "learning_rate": 0.00019999346365355097, - "loss": 46.0, - "step": 22610 - }, - { - "epoch": 3.6413301662707838, - "grad_norm": 0.00777210108935833, - "learning_rate": 0.00019999346307510672, - "loss": 46.0, - "step": 22611 - }, - { - "epoch": 3.641491203349571, - "grad_norm": 0.0012758438242599368, - "learning_rate": 0.00019999346249663692, - "loss": 46.0, - "step": 22612 - }, - { - "epoch": 3.6416522404283587, - "grad_norm": 0.018482966348528862, - "learning_rate": 0.00019999346191814147, - "loss": 46.0, - "step": 22613 - }, - { - "epoch": 3.641813277507146, - "grad_norm": 0.0029913706239312887, - "learning_rate": 0.00019999346133962046, - "loss": 46.0, - "step": 22614 - }, - { - "epoch": 3.6419743145859336, - "grad_norm": 0.008060762658715248, - "learning_rate": 0.0001999934607610739, - "loss": 46.0, - "step": 22615 - }, - { - "epoch": 3.642135351664721, - "grad_norm": 0.003985735587775707, - "learning_rate": 0.00019999346018250168, - "loss": 46.0, - "step": 22616 - }, - { - "epoch": 3.642296388743508, - "grad_norm": 0.012810556218028069, - "learning_rate": 0.00019999345960390388, - "loss": 46.0, - "step": 22617 - }, - { - "epoch": 3.6424574258222955, - "grad_norm": 0.005633770488202572, - "learning_rate": 0.0001999934590252805, - "loss": 46.0, - "step": 22618 - }, - { - "epoch": 3.642618462901083, - "grad_norm": 0.0047984495759010315, - "learning_rate": 0.00019999345844663152, - "loss": 46.0, - "step": 22619 - }, - { - "epoch": 3.6427794999798704, - "grad_norm": 0.0024316892959177494, - "learning_rate": 0.00019999345786795694, - "loss": 46.0, - "step": 22620 - }, - { - "epoch": 3.642940537058658, - "grad_norm": 0.003199871862307191, - "learning_rate": 0.0001999934572892568, - "loss": 46.0, - "step": 22621 - }, - { - "epoch": 3.6431015741374453, - "grad_norm": 0.002251243218779564, - "learning_rate": 0.000199993456710531, - "loss": 46.0, - "step": 22622 - }, - { - "epoch": 3.6432626112162323, - "grad_norm": 0.005125878844410181, - "learning_rate": 0.00019999345613177965, - "loss": 46.0, - "step": 22623 - }, - { - "epoch": 3.6434236482950197, - "grad_norm": 0.003056485904380679, - "learning_rate": 0.00019999345555300272, - "loss": 46.0, - "step": 22624 - }, - { - "epoch": 3.643584685373807, - "grad_norm": 0.005509105511009693, - "learning_rate": 0.0001999934549742002, - "loss": 46.0, - "step": 22625 - }, - { - "epoch": 3.6437457224525946, - "grad_norm": 0.004110974259674549, - "learning_rate": 0.00019999345439537206, - "loss": 46.0, - "step": 22626 - }, - { - "epoch": 3.643906759531382, - "grad_norm": 0.0008788192644715309, - "learning_rate": 0.00019999345381651833, - "loss": 46.0, - "step": 22627 - }, - { - "epoch": 3.6440677966101696, - "grad_norm": 0.00850562285631895, - "learning_rate": 0.000199993453237639, - "loss": 46.0, - "step": 22628 - }, - { - "epoch": 3.644228833688957, - "grad_norm": 0.0026348510291427374, - "learning_rate": 0.0001999934526587341, - "loss": 46.0, - "step": 22629 - }, - { - "epoch": 3.6443898707677445, - "grad_norm": 0.008174367249011993, - "learning_rate": 0.0001999934520798036, - "loss": 46.0, - "step": 22630 - }, - { - "epoch": 3.644550907846532, - "grad_norm": 0.0071050627157092094, - "learning_rate": 0.00019999345150084749, - "loss": 46.0, - "step": 22631 - }, - { - "epoch": 3.644711944925319, - "grad_norm": 0.005144836381077766, - "learning_rate": 0.0001999934509218658, - "loss": 46.0, - "step": 22632 - }, - { - "epoch": 3.6448729820041064, - "grad_norm": 0.0004223309806548059, - "learning_rate": 0.00019999345034285853, - "loss": 46.0, - "step": 22633 - }, - { - "epoch": 3.645034019082894, - "grad_norm": 0.006718894932419062, - "learning_rate": 0.00019999344976382564, - "loss": 46.0, - "step": 22634 - }, - { - "epoch": 3.6451950561616813, - "grad_norm": 0.01032285112887621, - "learning_rate": 0.00019999344918476716, - "loss": 46.0, - "step": 22635 - }, - { - "epoch": 3.6453560932404687, - "grad_norm": 0.005285908468067646, - "learning_rate": 0.0001999934486056831, - "loss": 46.0, - "step": 22636 - }, - { - "epoch": 3.645517130319256, - "grad_norm": 0.0031795850954949856, - "learning_rate": 0.00019999344802657345, - "loss": 46.0, - "step": 22637 - }, - { - "epoch": 3.645678167398043, - "grad_norm": 0.0035892643500119448, - "learning_rate": 0.00019999344744743819, - "loss": 46.0, - "step": 22638 - }, - { - "epoch": 3.6458392044768306, - "grad_norm": 0.004392752889543772, - "learning_rate": 0.00019999344686827733, - "loss": 46.0, - "step": 22639 - }, - { - "epoch": 3.646000241555618, - "grad_norm": 0.005292119458317757, - "learning_rate": 0.00019999344628909092, - "loss": 46.0, - "step": 22640 - }, - { - "epoch": 3.6461612786344055, - "grad_norm": 0.0012819821713492274, - "learning_rate": 0.00019999344570987887, - "loss": 46.0, - "step": 22641 - }, - { - "epoch": 3.646322315713193, - "grad_norm": 0.0009550033137202263, - "learning_rate": 0.00019999344513064123, - "loss": 46.0, - "step": 22642 - }, - { - "epoch": 3.6464833527919804, - "grad_norm": 0.009785851463675499, - "learning_rate": 0.00019999344455137803, - "loss": 46.0, - "step": 22643 - }, - { - "epoch": 3.646644389870768, - "grad_norm": 0.0019457914168015122, - "learning_rate": 0.00019999344397208924, - "loss": 46.0, - "step": 22644 - }, - { - "epoch": 3.6468054269495553, - "grad_norm": 0.0035382984206080437, - "learning_rate": 0.00019999344339277481, - "loss": 46.0, - "step": 22645 - }, - { - "epoch": 3.646966464028343, - "grad_norm": 0.003357143606990576, - "learning_rate": 0.00019999344281343483, - "loss": 46.0, - "step": 22646 - }, - { - "epoch": 3.64712750110713, - "grad_norm": 0.002282208763062954, - "learning_rate": 0.00019999344223406925, - "loss": 46.0, - "step": 22647 - }, - { - "epoch": 3.6472885381859173, - "grad_norm": 0.003200829029083252, - "learning_rate": 0.00019999344165467806, - "loss": 46.0, - "step": 22648 - }, - { - "epoch": 3.6474495752647047, - "grad_norm": 0.00044119858648627996, - "learning_rate": 0.0001999934410752613, - "loss": 46.0, - "step": 22649 - }, - { - "epoch": 3.647610612343492, - "grad_norm": 0.0016092200530692935, - "learning_rate": 0.00019999344049581892, - "loss": 46.0, - "step": 22650 - }, - { - "epoch": 3.6477716494222796, - "grad_norm": 0.0016649457393214107, - "learning_rate": 0.00019999343991635097, - "loss": 46.0, - "step": 22651 - }, - { - "epoch": 3.6479326865010666, - "grad_norm": 0.008032422512769699, - "learning_rate": 0.0001999934393368574, - "loss": 46.0, - "step": 22652 - }, - { - "epoch": 3.648093723579854, - "grad_norm": 0.001791222020983696, - "learning_rate": 0.00019999343875733825, - "loss": 46.0, - "step": 22653 - }, - { - "epoch": 3.6482547606586415, - "grad_norm": 0.001990992808714509, - "learning_rate": 0.00019999343817779354, - "loss": 46.0, - "step": 22654 - }, - { - "epoch": 3.648415797737429, - "grad_norm": 0.0013464881340041757, - "learning_rate": 0.00019999343759822318, - "loss": 46.0, - "step": 22655 - }, - { - "epoch": 3.6485768348162164, - "grad_norm": 0.0010086926631629467, - "learning_rate": 0.00019999343701862724, - "loss": 46.0, - "step": 22656 - }, - { - "epoch": 3.648737871895004, - "grad_norm": 0.005623297765851021, - "learning_rate": 0.00019999343643900574, - "loss": 46.0, - "step": 22657 - }, - { - "epoch": 3.6488989089737913, - "grad_norm": 0.0007925299578346312, - "learning_rate": 0.00019999343585935862, - "loss": 46.0, - "step": 22658 - }, - { - "epoch": 3.6490599460525788, - "grad_norm": 0.0023344990331679583, - "learning_rate": 0.0001999934352796859, - "loss": 46.0, - "step": 22659 - }, - { - "epoch": 3.6492209831313662, - "grad_norm": 0.0036977152340114117, - "learning_rate": 0.0001999934346999876, - "loss": 46.0, - "step": 22660 - }, - { - "epoch": 3.6493820202101532, - "grad_norm": 0.004310498014092445, - "learning_rate": 0.00019999343412026372, - "loss": 46.0, - "step": 22661 - }, - { - "epoch": 3.6495430572889407, - "grad_norm": 0.0026043320540338755, - "learning_rate": 0.00019999343354051423, - "loss": 46.0, - "step": 22662 - }, - { - "epoch": 3.649704094367728, - "grad_norm": 0.0022007126826792955, - "learning_rate": 0.00019999343296073913, - "loss": 46.0, - "step": 22663 - }, - { - "epoch": 3.6498651314465156, - "grad_norm": 0.004146067891269922, - "learning_rate": 0.0001999934323809385, - "loss": 46.0, - "step": 22664 - }, - { - "epoch": 3.650026168525303, - "grad_norm": 0.0017406063852831721, - "learning_rate": 0.0001999934318011122, - "loss": 46.0, - "step": 22665 - }, - { - "epoch": 3.6501872056040905, - "grad_norm": 0.006764392368495464, - "learning_rate": 0.00019999343122126034, - "loss": 46.0, - "step": 22666 - }, - { - "epoch": 3.6503482426828775, - "grad_norm": 0.004552258178591728, - "learning_rate": 0.00019999343064138289, - "loss": 46.0, - "step": 22667 - }, - { - "epoch": 3.650509279761665, - "grad_norm": 0.004710499197244644, - "learning_rate": 0.00019999343006147987, - "loss": 46.0, - "step": 22668 - }, - { - "epoch": 3.6506703168404524, - "grad_norm": 0.0016715852543711662, - "learning_rate": 0.00019999342948155122, - "loss": 46.0, - "step": 22669 - }, - { - "epoch": 3.65083135391924, - "grad_norm": 0.0037318391259759665, - "learning_rate": 0.00019999342890159697, - "loss": 46.0, - "step": 22670 - }, - { - "epoch": 3.6509923909980273, - "grad_norm": 0.0016722220461815596, - "learning_rate": 0.00019999342832161717, - "loss": 46.0, - "step": 22671 - }, - { - "epoch": 3.6511534280768148, - "grad_norm": 0.0030264039523899555, - "learning_rate": 0.00019999342774161175, - "loss": 46.0, - "step": 22672 - }, - { - "epoch": 3.651314465155602, - "grad_norm": 0.007123306393623352, - "learning_rate": 0.00019999342716158075, - "loss": 46.0, - "step": 22673 - }, - { - "epoch": 3.6514755022343897, - "grad_norm": 0.006374321412295103, - "learning_rate": 0.0001999934265815241, - "loss": 46.0, - "step": 22674 - }, - { - "epoch": 3.651636539313177, - "grad_norm": 0.002023697830736637, - "learning_rate": 0.00019999342600144192, - "loss": 46.0, - "step": 22675 - }, - { - "epoch": 3.651797576391964, - "grad_norm": 0.003963215742260218, - "learning_rate": 0.00019999342542133413, - "loss": 46.0, - "step": 22676 - }, - { - "epoch": 3.6519586134707516, - "grad_norm": 0.001480724080465734, - "learning_rate": 0.00019999342484120077, - "loss": 46.0, - "step": 22677 - }, - { - "epoch": 3.652119650549539, - "grad_norm": 0.01023608073592186, - "learning_rate": 0.00019999342426104178, - "loss": 46.0, - "step": 22678 - }, - { - "epoch": 3.6522806876283265, - "grad_norm": 0.00320429471321404, - "learning_rate": 0.0001999934236808572, - "loss": 46.0, - "step": 22679 - }, - { - "epoch": 3.652441724707114, - "grad_norm": 0.004085510969161987, - "learning_rate": 0.00019999342310064703, - "loss": 46.0, - "step": 22680 - }, - { - "epoch": 3.652602761785901, - "grad_norm": 0.016598118469119072, - "learning_rate": 0.0001999934225204113, - "loss": 46.0, - "step": 22681 - }, - { - "epoch": 3.6527637988646884, - "grad_norm": 0.0022013175766915083, - "learning_rate": 0.00019999342194014993, - "loss": 46.0, - "step": 22682 - }, - { - "epoch": 3.652924835943476, - "grad_norm": 0.009513985365629196, - "learning_rate": 0.000199993421359863, - "loss": 46.0, - "step": 22683 - }, - { - "epoch": 3.6530858730222633, - "grad_norm": 0.015665218234062195, - "learning_rate": 0.00019999342077955045, - "loss": 46.0, - "step": 22684 - }, - { - "epoch": 3.6532469101010507, - "grad_norm": 0.00104470644146204, - "learning_rate": 0.00019999342019921232, - "loss": 46.0, - "step": 22685 - }, - { - "epoch": 3.653407947179838, - "grad_norm": 0.0038735801354050636, - "learning_rate": 0.0001999934196188486, - "loss": 46.0, - "step": 22686 - }, - { - "epoch": 3.6535689842586256, - "grad_norm": 0.001528553431853652, - "learning_rate": 0.00019999341903845927, - "loss": 46.0, - "step": 22687 - }, - { - "epoch": 3.653730021337413, - "grad_norm": 0.023378996178507805, - "learning_rate": 0.00019999341845804435, - "loss": 46.0, - "step": 22688 - }, - { - "epoch": 3.6538910584162005, - "grad_norm": 0.008008182048797607, - "learning_rate": 0.00019999341787760387, - "loss": 46.0, - "step": 22689 - }, - { - "epoch": 3.654052095494988, - "grad_norm": 0.007209602277725935, - "learning_rate": 0.00019999341729713777, - "loss": 46.0, - "step": 22690 - }, - { - "epoch": 3.654213132573775, - "grad_norm": 0.002953160787001252, - "learning_rate": 0.0001999934167166461, - "loss": 46.0, - "step": 22691 - }, - { - "epoch": 3.6543741696525625, - "grad_norm": 0.0018243159865960479, - "learning_rate": 0.00019999341613612882, - "loss": 46.0, - "step": 22692 - }, - { - "epoch": 3.65453520673135, - "grad_norm": 0.002337329089641571, - "learning_rate": 0.0001999934155555859, - "loss": 46.0, - "step": 22693 - }, - { - "epoch": 3.6546962438101374, - "grad_norm": 0.0028950306586921215, - "learning_rate": 0.00019999341497501746, - "loss": 46.0, - "step": 22694 - }, - { - "epoch": 3.654857280888925, - "grad_norm": 0.002868007170036435, - "learning_rate": 0.0001999934143944234, - "loss": 46.0, - "step": 22695 - }, - { - "epoch": 3.655018317967712, - "grad_norm": 0.0024472856894135475, - "learning_rate": 0.00019999341381380373, - "loss": 46.0, - "step": 22696 - }, - { - "epoch": 3.6551793550464993, - "grad_norm": 0.0033766641281545162, - "learning_rate": 0.0001999934132331585, - "loss": 46.0, - "step": 22697 - }, - { - "epoch": 3.6553403921252867, - "grad_norm": 0.00121620437130332, - "learning_rate": 0.00019999341265248765, - "loss": 46.0, - "step": 22698 - }, - { - "epoch": 3.655501429204074, - "grad_norm": 0.006206164136528969, - "learning_rate": 0.00019999341207179122, - "loss": 46.0, - "step": 22699 - }, - { - "epoch": 3.6556624662828616, - "grad_norm": 0.00413085101172328, - "learning_rate": 0.00019999341149106917, - "loss": 46.0, - "step": 22700 - }, - { - "epoch": 3.655823503361649, - "grad_norm": 0.004665317479521036, - "learning_rate": 0.00019999341091032156, - "loss": 46.0, - "step": 22701 - }, - { - "epoch": 3.6559845404404365, - "grad_norm": 0.002895243000239134, - "learning_rate": 0.00019999341032954834, - "loss": 46.0, - "step": 22702 - }, - { - "epoch": 3.656145577519224, - "grad_norm": 0.0028475916478782892, - "learning_rate": 0.00019999340974874953, - "loss": 46.0, - "step": 22703 - }, - { - "epoch": 3.6563066145980114, - "grad_norm": 0.00355276046320796, - "learning_rate": 0.00019999340916792516, - "loss": 46.0, - "step": 22704 - }, - { - "epoch": 3.6564676516767984, - "grad_norm": 0.0028274033684283495, - "learning_rate": 0.00019999340858707515, - "loss": 46.0, - "step": 22705 - }, - { - "epoch": 3.656628688755586, - "grad_norm": 0.0059944987297058105, - "learning_rate": 0.00019999340800619955, - "loss": 46.0, - "step": 22706 - }, - { - "epoch": 3.6567897258343733, - "grad_norm": 0.009118501096963882, - "learning_rate": 0.0001999934074252984, - "loss": 46.0, - "step": 22707 - }, - { - "epoch": 3.656950762913161, - "grad_norm": 0.0010084727546200156, - "learning_rate": 0.00019999340684437162, - "loss": 46.0, - "step": 22708 - }, - { - "epoch": 3.6571117999919482, - "grad_norm": 0.0014942653942853212, - "learning_rate": 0.00019999340626341924, - "loss": 46.0, - "step": 22709 - }, - { - "epoch": 3.6572728370707357, - "grad_norm": 0.001462449668906629, - "learning_rate": 0.0001999934056824413, - "loss": 46.0, - "step": 22710 - }, - { - "epoch": 3.6574338741495227, - "grad_norm": 0.004902323242276907, - "learning_rate": 0.00019999340510143773, - "loss": 46.0, - "step": 22711 - }, - { - "epoch": 3.65759491122831, - "grad_norm": 0.0022143602836877108, - "learning_rate": 0.00019999340452040858, - "loss": 46.0, - "step": 22712 - }, - { - "epoch": 3.6577559483070976, - "grad_norm": 0.0005657552974298596, - "learning_rate": 0.00019999340393935384, - "loss": 46.0, - "step": 22713 - }, - { - "epoch": 3.657916985385885, - "grad_norm": 0.004769987426698208, - "learning_rate": 0.0001999934033582735, - "loss": 46.0, - "step": 22714 - }, - { - "epoch": 3.6580780224646725, - "grad_norm": 0.002967857290059328, - "learning_rate": 0.00019999340277716758, - "loss": 46.0, - "step": 22715 - }, - { - "epoch": 3.65823905954346, - "grad_norm": 0.010575953871011734, - "learning_rate": 0.00019999340219603609, - "loss": 46.0, - "step": 22716 - }, - { - "epoch": 3.6584000966222474, - "grad_norm": 0.0020869679283350706, - "learning_rate": 0.00019999340161487895, - "loss": 46.0, - "step": 22717 - }, - { - "epoch": 3.658561133701035, - "grad_norm": 0.00603108387440443, - "learning_rate": 0.00019999340103369625, - "loss": 46.0, - "step": 22718 - }, - { - "epoch": 3.6587221707798223, - "grad_norm": 0.0023619262501597404, - "learning_rate": 0.00019999340045248796, - "loss": 46.0, - "step": 22719 - }, - { - "epoch": 3.6588832078586093, - "grad_norm": 0.0019366255728527904, - "learning_rate": 0.00019999339987125406, - "loss": 46.0, - "step": 22720 - }, - { - "epoch": 3.6590442449373968, - "grad_norm": 0.002384476363658905, - "learning_rate": 0.00019999339928999458, - "loss": 46.0, - "step": 22721 - }, - { - "epoch": 3.6592052820161842, - "grad_norm": 0.003032886190339923, - "learning_rate": 0.0001999933987087095, - "loss": 46.0, - "step": 22722 - }, - { - "epoch": 3.6593663190949717, - "grad_norm": 0.007723737508058548, - "learning_rate": 0.00019999339812739884, - "loss": 46.0, - "step": 22723 - }, - { - "epoch": 3.659527356173759, - "grad_norm": 0.0014552040956914425, - "learning_rate": 0.00019999339754606256, - "loss": 46.0, - "step": 22724 - }, - { - "epoch": 3.659688393252546, - "grad_norm": 0.0017237073043361306, - "learning_rate": 0.00019999339696470073, - "loss": 46.0, - "step": 22725 - }, - { - "epoch": 3.6598494303313336, - "grad_norm": 0.0014080456458032131, - "learning_rate": 0.00019999339638331325, - "loss": 46.0, - "step": 22726 - }, - { - "epoch": 3.660010467410121, - "grad_norm": 0.002336216624826193, - "learning_rate": 0.0001999933958019002, - "loss": 46.0, - "step": 22727 - }, - { - "epoch": 3.6601715044889085, - "grad_norm": 0.006157963536679745, - "learning_rate": 0.00019999339522046156, - "loss": 46.0, - "step": 22728 - }, - { - "epoch": 3.660332541567696, - "grad_norm": 0.0021693038288503885, - "learning_rate": 0.00019999339463899735, - "loss": 46.0, - "step": 22729 - }, - { - "epoch": 3.6604935786464834, - "grad_norm": 0.01331277471035719, - "learning_rate": 0.00019999339405750753, - "loss": 46.0, - "step": 22730 - }, - { - "epoch": 3.660654615725271, - "grad_norm": 0.007339002098888159, - "learning_rate": 0.0001999933934759921, - "loss": 46.0, - "step": 22731 - }, - { - "epoch": 3.6608156528040583, - "grad_norm": 0.0050214375369250774, - "learning_rate": 0.00019999339289445108, - "loss": 46.0, - "step": 22732 - }, - { - "epoch": 3.6609766898828457, - "grad_norm": 0.011197277344763279, - "learning_rate": 0.0001999933923128845, - "loss": 46.0, - "step": 22733 - }, - { - "epoch": 3.6611377269616328, - "grad_norm": 0.006236036773771048, - "learning_rate": 0.0001999933917312923, - "loss": 46.0, - "step": 22734 - }, - { - "epoch": 3.66129876404042, - "grad_norm": 0.0030410068575292826, - "learning_rate": 0.0001999933911496745, - "loss": 46.0, - "step": 22735 - }, - { - "epoch": 3.6614598011192077, - "grad_norm": 0.00892493687570095, - "learning_rate": 0.00019999339056803113, - "loss": 46.0, - "step": 22736 - }, - { - "epoch": 3.661620838197995, - "grad_norm": 0.002359558828175068, - "learning_rate": 0.00019999338998636214, - "loss": 46.0, - "step": 22737 - }, - { - "epoch": 3.6617818752767826, - "grad_norm": 0.0013390594394877553, - "learning_rate": 0.0001999933894046676, - "loss": 46.0, - "step": 22738 - }, - { - "epoch": 3.66194291235557, - "grad_norm": 0.0014586279867216945, - "learning_rate": 0.00019999338882294743, - "loss": 46.0, - "step": 22739 - }, - { - "epoch": 3.662103949434357, - "grad_norm": 0.003718315390869975, - "learning_rate": 0.00019999338824120167, - "loss": 46.0, - "step": 22740 - }, - { - "epoch": 3.6622649865131445, - "grad_norm": 0.0015525443013757467, - "learning_rate": 0.0001999933876594303, - "loss": 46.0, - "step": 22741 - }, - { - "epoch": 3.662426023591932, - "grad_norm": 0.0017321790801361203, - "learning_rate": 0.00019999338707763338, - "loss": 46.0, - "step": 22742 - }, - { - "epoch": 3.6625870606707194, - "grad_norm": 0.004759287461638451, - "learning_rate": 0.00019999338649581084, - "loss": 46.0, - "step": 22743 - }, - { - "epoch": 3.662748097749507, - "grad_norm": 0.0031508072279393673, - "learning_rate": 0.0001999933859139627, - "loss": 46.0, - "step": 22744 - }, - { - "epoch": 3.6629091348282943, - "grad_norm": 0.001341440831311047, - "learning_rate": 0.000199993385332089, - "loss": 46.0, - "step": 22745 - }, - { - "epoch": 3.6630701719070817, - "grad_norm": 0.011570530012249947, - "learning_rate": 0.00019999338475018967, - "loss": 46.0, - "step": 22746 - }, - { - "epoch": 3.663231208985869, - "grad_norm": 0.0018969964003190398, - "learning_rate": 0.00019999338416826475, - "loss": 46.0, - "step": 22747 - }, - { - "epoch": 3.6633922460646566, - "grad_norm": 0.0043100775219500065, - "learning_rate": 0.00019999338358631428, - "loss": 46.0, - "step": 22748 - }, - { - "epoch": 3.6635532831434436, - "grad_norm": 0.0054091825149953365, - "learning_rate": 0.00019999338300433816, - "loss": 46.0, - "step": 22749 - }, - { - "epoch": 3.663714320222231, - "grad_norm": 0.0025551102589815855, - "learning_rate": 0.00019999338242233648, - "loss": 46.0, - "step": 22750 - }, - { - "epoch": 3.6638753573010185, - "grad_norm": 0.009712565690279007, - "learning_rate": 0.0001999933818403092, - "loss": 46.0, - "step": 22751 - }, - { - "epoch": 3.664036394379806, - "grad_norm": 0.0016537596238777041, - "learning_rate": 0.00019999338125825636, - "loss": 46.0, - "step": 22752 - }, - { - "epoch": 3.6641974314585934, - "grad_norm": 0.003614256391301751, - "learning_rate": 0.00019999338067617787, - "loss": 46.0, - "step": 22753 - }, - { - "epoch": 3.664358468537381, - "grad_norm": 0.007853053510189056, - "learning_rate": 0.00019999338009407382, - "loss": 46.0, - "step": 22754 - }, - { - "epoch": 3.664519505616168, - "grad_norm": 0.002018106635659933, - "learning_rate": 0.00019999337951194415, - "loss": 46.0, - "step": 22755 - }, - { - "epoch": 3.6646805426949554, - "grad_norm": 0.00292217917740345, - "learning_rate": 0.00019999337892978892, - "loss": 46.0, - "step": 22756 - }, - { - "epoch": 3.664841579773743, - "grad_norm": 0.005501434672623873, - "learning_rate": 0.00019999337834760808, - "loss": 46.0, - "step": 22757 - }, - { - "epoch": 3.6650026168525303, - "grad_norm": 0.009175337851047516, - "learning_rate": 0.00019999337776540162, - "loss": 46.0, - "step": 22758 - }, - { - "epoch": 3.6651636539313177, - "grad_norm": 0.0013669782783836126, - "learning_rate": 0.0001999933771831696, - "loss": 46.0, - "step": 22759 - }, - { - "epoch": 3.665324691010105, - "grad_norm": 0.0018125309143215418, - "learning_rate": 0.000199993376600912, - "loss": 46.0, - "step": 22760 - }, - { - "epoch": 3.6654857280888926, - "grad_norm": 0.002656195079907775, - "learning_rate": 0.00019999337601862878, - "loss": 46.0, - "step": 22761 - }, - { - "epoch": 3.66564676516768, - "grad_norm": 0.004852490499615669, - "learning_rate": 0.00019999337543632, - "loss": 46.0, - "step": 22762 - }, - { - "epoch": 3.6658078022464675, - "grad_norm": 0.004935932345688343, - "learning_rate": 0.0001999933748539856, - "loss": 46.0, - "step": 22763 - }, - { - "epoch": 3.6659688393252545, - "grad_norm": 0.006723203230649233, - "learning_rate": 0.00019999337427162558, - "loss": 46.0, - "step": 22764 - }, - { - "epoch": 3.666129876404042, - "grad_norm": 0.00183191429823637, - "learning_rate": 0.00019999337368924001, - "loss": 46.0, - "step": 22765 - }, - { - "epoch": 3.6662909134828294, - "grad_norm": 0.0024352543987333775, - "learning_rate": 0.0001999933731068288, - "loss": 46.0, - "step": 22766 - }, - { - "epoch": 3.666451950561617, - "grad_norm": 0.010108031332492828, - "learning_rate": 0.00019999337252439207, - "loss": 46.0, - "step": 22767 - }, - { - "epoch": 3.6666129876404043, - "grad_norm": 0.015142385847866535, - "learning_rate": 0.0001999933719419297, - "loss": 46.0, - "step": 22768 - }, - { - "epoch": 3.6667740247191913, - "grad_norm": 0.0029771251138299704, - "learning_rate": 0.00019999337135944174, - "loss": 46.0, - "step": 22769 - }, - { - "epoch": 3.666935061797979, - "grad_norm": 0.00233553652651608, - "learning_rate": 0.00019999337077692818, - "loss": 46.0, - "step": 22770 - }, - { - "epoch": 3.6670960988767662, - "grad_norm": 0.0017812150763347745, - "learning_rate": 0.00019999337019438907, - "loss": 46.0, - "step": 22771 - }, - { - "epoch": 3.6672571359555537, - "grad_norm": 0.002252899343147874, - "learning_rate": 0.0001999933696118243, - "loss": 46.0, - "step": 22772 - }, - { - "epoch": 3.667418173034341, - "grad_norm": 0.0011877642245963216, - "learning_rate": 0.000199993369029234, - "loss": 46.0, - "step": 22773 - }, - { - "epoch": 3.6675792101131286, - "grad_norm": 0.0007877996540628374, - "learning_rate": 0.00019999336844661806, - "loss": 46.0, - "step": 22774 - }, - { - "epoch": 3.667740247191916, - "grad_norm": 0.001983505440875888, - "learning_rate": 0.00019999336786397654, - "loss": 46.0, - "step": 22775 - }, - { - "epoch": 3.6679012842707035, - "grad_norm": 0.005540449172258377, - "learning_rate": 0.00019999336728130943, - "loss": 46.0, - "step": 22776 - }, - { - "epoch": 3.668062321349491, - "grad_norm": 0.010100196115672588, - "learning_rate": 0.00019999336669861674, - "loss": 46.0, - "step": 22777 - }, - { - "epoch": 3.668223358428278, - "grad_norm": 0.005990343634039164, - "learning_rate": 0.00019999336611589843, - "loss": 46.0, - "step": 22778 - }, - { - "epoch": 3.6683843955070654, - "grad_norm": 0.0009399388218298554, - "learning_rate": 0.00019999336553315456, - "loss": 46.0, - "step": 22779 - }, - { - "epoch": 3.668545432585853, - "grad_norm": 0.0028247591108083725, - "learning_rate": 0.00019999336495038508, - "loss": 46.0, - "step": 22780 - }, - { - "epoch": 3.6687064696646403, - "grad_norm": 0.0014354345621541142, - "learning_rate": 0.00019999336436758998, - "loss": 46.0, - "step": 22781 - }, - { - "epoch": 3.6688675067434278, - "grad_norm": 0.002890727249905467, - "learning_rate": 0.00019999336378476932, - "loss": 46.0, - "step": 22782 - }, - { - "epoch": 3.669028543822215, - "grad_norm": 0.023550447076559067, - "learning_rate": 0.00019999336320192305, - "loss": 46.0, - "step": 22783 - }, - { - "epoch": 3.6691895809010022, - "grad_norm": 0.002335806144401431, - "learning_rate": 0.0001999933626190512, - "loss": 46.0, - "step": 22784 - }, - { - "epoch": 3.6693506179797897, - "grad_norm": 0.0026196411345154047, - "learning_rate": 0.00019999336203615375, - "loss": 46.0, - "step": 22785 - }, - { - "epoch": 3.669511655058577, - "grad_norm": 0.0011785373790189624, - "learning_rate": 0.00019999336145323072, - "loss": 46.0, - "step": 22786 - }, - { - "epoch": 3.6696726921373646, - "grad_norm": 0.00959487073123455, - "learning_rate": 0.00019999336087028207, - "loss": 46.0, - "step": 22787 - }, - { - "epoch": 3.669833729216152, - "grad_norm": 0.016064202412962914, - "learning_rate": 0.00019999336028730786, - "loss": 46.0, - "step": 22788 - }, - { - "epoch": 3.6699947662949395, - "grad_norm": 0.0059156883507966995, - "learning_rate": 0.00019999335970430804, - "loss": 46.0, - "step": 22789 - }, - { - "epoch": 3.670155803373727, - "grad_norm": 0.0074160280637443066, - "learning_rate": 0.00019999335912128263, - "loss": 46.0, - "step": 22790 - }, - { - "epoch": 3.6703168404525144, - "grad_norm": 0.0012285802513360977, - "learning_rate": 0.0001999933585382316, - "loss": 46.0, - "step": 22791 - }, - { - "epoch": 3.670477877531302, - "grad_norm": 0.0019702634308487177, - "learning_rate": 0.00019999335795515503, - "loss": 46.0, - "step": 22792 - }, - { - "epoch": 3.670638914610089, - "grad_norm": 0.002632237272337079, - "learning_rate": 0.0001999933573720528, - "loss": 46.0, - "step": 22793 - }, - { - "epoch": 3.6707999516888763, - "grad_norm": 0.01589401438832283, - "learning_rate": 0.00019999335678892505, - "loss": 46.0, - "step": 22794 - }, - { - "epoch": 3.6709609887676637, - "grad_norm": 0.0021344409324228764, - "learning_rate": 0.00019999335620577165, - "loss": 46.0, - "step": 22795 - }, - { - "epoch": 3.671122025846451, - "grad_norm": 0.004890965297818184, - "learning_rate": 0.00019999335562259266, - "loss": 46.0, - "step": 22796 - }, - { - "epoch": 3.6712830629252386, - "grad_norm": 0.015462430194020271, - "learning_rate": 0.00019999335503938811, - "loss": 46.0, - "step": 22797 - }, - { - "epoch": 3.6714441000040257, - "grad_norm": 0.00885778572410345, - "learning_rate": 0.00019999335445615795, - "loss": 46.0, - "step": 22798 - }, - { - "epoch": 3.671605137082813, - "grad_norm": 0.002308700466528535, - "learning_rate": 0.00019999335387290218, - "loss": 46.0, - "step": 22799 - }, - { - "epoch": 3.6717661741616006, - "grad_norm": 0.0008633616380393505, - "learning_rate": 0.00019999335328962085, - "loss": 46.0, - "step": 22800 - }, - { - "epoch": 3.671927211240388, - "grad_norm": 0.0028252352494746447, - "learning_rate": 0.00019999335270631392, - "loss": 46.0, - "step": 22801 - }, - { - "epoch": 3.6720882483191755, - "grad_norm": 0.0023980673868209124, - "learning_rate": 0.00019999335212298142, - "loss": 46.0, - "step": 22802 - }, - { - "epoch": 3.672249285397963, - "grad_norm": 0.008981111459434032, - "learning_rate": 0.00019999335153962327, - "loss": 46.0, - "step": 22803 - }, - { - "epoch": 3.6724103224767504, - "grad_norm": 0.016783330589532852, - "learning_rate": 0.00019999335095623956, - "loss": 46.0, - "step": 22804 - }, - { - "epoch": 3.672571359555538, - "grad_norm": 0.004916629754006863, - "learning_rate": 0.00019999335037283023, - "loss": 46.0, - "step": 22805 - }, - { - "epoch": 3.6727323966343253, - "grad_norm": 0.002610010327771306, - "learning_rate": 0.00019999334978939532, - "loss": 46.0, - "step": 22806 - }, - { - "epoch": 3.6728934337131127, - "grad_norm": 0.0028579039499163628, - "learning_rate": 0.00019999334920593485, - "loss": 46.0, - "step": 22807 - }, - { - "epoch": 3.6730544707918997, - "grad_norm": 0.0032562275882810354, - "learning_rate": 0.00019999334862244873, - "loss": 46.0, - "step": 22808 - }, - { - "epoch": 3.673215507870687, - "grad_norm": 0.003705337643623352, - "learning_rate": 0.00019999334803893706, - "loss": 46.0, - "step": 22809 - }, - { - "epoch": 3.6733765449494746, - "grad_norm": 0.012126673012971878, - "learning_rate": 0.0001999933474553998, - "loss": 46.0, - "step": 22810 - }, - { - "epoch": 3.673537582028262, - "grad_norm": 0.006201410666108131, - "learning_rate": 0.0001999933468718369, - "loss": 46.0, - "step": 22811 - }, - { - "epoch": 3.6736986191070495, - "grad_norm": 0.003494767239317298, - "learning_rate": 0.00019999334628824844, - "loss": 46.0, - "step": 22812 - }, - { - "epoch": 3.6738596561858365, - "grad_norm": 0.0015345205320045352, - "learning_rate": 0.0001999933457046344, - "loss": 46.0, - "step": 22813 - }, - { - "epoch": 3.674020693264624, - "grad_norm": 0.0024018334224820137, - "learning_rate": 0.00019999334512099475, - "loss": 46.0, - "step": 22814 - }, - { - "epoch": 3.6741817303434114, - "grad_norm": 0.004173180088400841, - "learning_rate": 0.0001999933445373295, - "loss": 46.0, - "step": 22815 - }, - { - "epoch": 3.674342767422199, - "grad_norm": 0.010577252134680748, - "learning_rate": 0.00019999334395363866, - "loss": 46.0, - "step": 22816 - }, - { - "epoch": 3.6745038045009863, - "grad_norm": 0.0017120972042903304, - "learning_rate": 0.00019999334336992224, - "loss": 46.0, - "step": 22817 - }, - { - "epoch": 3.674664841579774, - "grad_norm": 0.004271545447409153, - "learning_rate": 0.0001999933427861802, - "loss": 46.0, - "step": 22818 - }, - { - "epoch": 3.6748258786585613, - "grad_norm": 0.001638551359064877, - "learning_rate": 0.0001999933422024126, - "loss": 46.0, - "step": 22819 - }, - { - "epoch": 3.6749869157373487, - "grad_norm": 0.0024707496631890535, - "learning_rate": 0.00019999334161861941, - "loss": 46.0, - "step": 22820 - }, - { - "epoch": 3.675147952816136, - "grad_norm": 0.003369895974174142, - "learning_rate": 0.00019999334103480059, - "loss": 46.0, - "step": 22821 - }, - { - "epoch": 3.675308989894923, - "grad_norm": 0.002848181175068021, - "learning_rate": 0.0001999933404509562, - "loss": 46.0, - "step": 22822 - }, - { - "epoch": 3.6754700269737106, - "grad_norm": 0.0007846674998290837, - "learning_rate": 0.00019999333986708622, - "loss": 46.0, - "step": 22823 - }, - { - "epoch": 3.675631064052498, - "grad_norm": 0.009992103092372417, - "learning_rate": 0.0001999933392831906, - "loss": 46.0, - "step": 22824 - }, - { - "epoch": 3.6757921011312855, - "grad_norm": 0.004940797109156847, - "learning_rate": 0.00019999333869926943, - "loss": 46.0, - "step": 22825 - }, - { - "epoch": 3.675953138210073, - "grad_norm": 0.0035269337240606546, - "learning_rate": 0.0001999933381153227, - "loss": 46.0, - "step": 22826 - }, - { - "epoch": 3.6761141752888604, - "grad_norm": 0.0008664656197652221, - "learning_rate": 0.00019999333753135032, - "loss": 46.0, - "step": 22827 - }, - { - "epoch": 3.6762752123676474, - "grad_norm": 0.0016531017608940601, - "learning_rate": 0.00019999333694735235, - "loss": 46.0, - "step": 22828 - }, - { - "epoch": 3.676436249446435, - "grad_norm": 0.002864454174414277, - "learning_rate": 0.00019999333636332883, - "loss": 46.0, - "step": 22829 - }, - { - "epoch": 3.6765972865252223, - "grad_norm": 0.004423864651471376, - "learning_rate": 0.0001999933357792797, - "loss": 46.0, - "step": 22830 - }, - { - "epoch": 3.67675832360401, - "grad_norm": 0.006899977568536997, - "learning_rate": 0.00019999333519520493, - "loss": 46.0, - "step": 22831 - }, - { - "epoch": 3.6769193606827972, - "grad_norm": 0.00943421944975853, - "learning_rate": 0.00019999333461110462, - "loss": 46.0, - "step": 22832 - }, - { - "epoch": 3.6770803977615847, - "grad_norm": 0.0018641840433701873, - "learning_rate": 0.00019999333402697872, - "loss": 46.0, - "step": 22833 - }, - { - "epoch": 3.677241434840372, - "grad_norm": 0.0019748711492866278, - "learning_rate": 0.00019999333344282718, - "loss": 46.0, - "step": 22834 - }, - { - "epoch": 3.6774024719191596, - "grad_norm": 0.002223348943516612, - "learning_rate": 0.00019999333285865007, - "loss": 46.0, - "step": 22835 - }, - { - "epoch": 3.677563508997947, - "grad_norm": 0.0017048557056114078, - "learning_rate": 0.00019999333227444738, - "loss": 46.0, - "step": 22836 - }, - { - "epoch": 3.677724546076734, - "grad_norm": 0.001784069580025971, - "learning_rate": 0.00019999333169021908, - "loss": 46.0, - "step": 22837 - }, - { - "epoch": 3.6778855831555215, - "grad_norm": 0.00346533115953207, - "learning_rate": 0.0001999933311059652, - "loss": 46.0, - "step": 22838 - }, - { - "epoch": 3.678046620234309, - "grad_norm": 0.001329404185526073, - "learning_rate": 0.0001999933305216857, - "loss": 46.0, - "step": 22839 - }, - { - "epoch": 3.6782076573130964, - "grad_norm": 0.0019629541784524918, - "learning_rate": 0.00019999332993738065, - "loss": 46.0, - "step": 22840 - }, - { - "epoch": 3.678368694391884, - "grad_norm": 0.00393206812441349, - "learning_rate": 0.00019999332935305, - "loss": 46.0, - "step": 22841 - }, - { - "epoch": 3.678529731470671, - "grad_norm": 0.0031270713079720736, - "learning_rate": 0.00019999332876869373, - "loss": 46.0, - "step": 22842 - }, - { - "epoch": 3.6786907685494583, - "grad_norm": 0.002972578862681985, - "learning_rate": 0.00019999332818431187, - "loss": 46.0, - "step": 22843 - }, - { - "epoch": 3.6788518056282458, - "grad_norm": 0.0027528656646609306, - "learning_rate": 0.00019999332759990443, - "loss": 46.0, - "step": 22844 - }, - { - "epoch": 3.679012842707033, - "grad_norm": 0.003687758930027485, - "learning_rate": 0.00019999332701547138, - "loss": 46.0, - "step": 22845 - }, - { - "epoch": 3.6791738797858207, - "grad_norm": 0.0015499844448640943, - "learning_rate": 0.00019999332643101276, - "loss": 46.0, - "step": 22846 - }, - { - "epoch": 3.679334916864608, - "grad_norm": 0.006773034110665321, - "learning_rate": 0.00019999332584652853, - "loss": 46.0, - "step": 22847 - }, - { - "epoch": 3.6794959539433956, - "grad_norm": 0.00490977056324482, - "learning_rate": 0.00019999332526201872, - "loss": 46.0, - "step": 22848 - }, - { - "epoch": 3.679656991022183, - "grad_norm": 0.002157560782507062, - "learning_rate": 0.0001999933246774833, - "loss": 46.0, - "step": 22849 - }, - { - "epoch": 3.6798180281009705, - "grad_norm": 0.0025769590865820646, - "learning_rate": 0.00019999332409292232, - "loss": 46.0, - "step": 22850 - }, - { - "epoch": 3.6799790651797575, - "grad_norm": 0.0049954731948673725, - "learning_rate": 0.00019999332350833571, - "loss": 46.0, - "step": 22851 - }, - { - "epoch": 3.680140102258545, - "grad_norm": 0.00099918432533741, - "learning_rate": 0.00019999332292372352, - "loss": 46.0, - "step": 22852 - }, - { - "epoch": 3.6803011393373324, - "grad_norm": 0.002734280424192548, - "learning_rate": 0.00019999332233908574, - "loss": 46.0, - "step": 22853 - }, - { - "epoch": 3.68046217641612, - "grad_norm": 0.01219154428690672, - "learning_rate": 0.00019999332175442235, - "loss": 46.0, - "step": 22854 - }, - { - "epoch": 3.6806232134949073, - "grad_norm": 0.0005331409047357738, - "learning_rate": 0.0001999933211697334, - "loss": 46.0, - "step": 22855 - }, - { - "epoch": 3.6807842505736947, - "grad_norm": 0.006523890886455774, - "learning_rate": 0.00019999332058501883, - "loss": 46.0, - "step": 22856 - }, - { - "epoch": 3.6809452876524817, - "grad_norm": 0.0036100205034017563, - "learning_rate": 0.00019999332000027867, - "loss": 46.0, - "step": 22857 - }, - { - "epoch": 3.681106324731269, - "grad_norm": 0.0022948416881263256, - "learning_rate": 0.00019999331941551293, - "loss": 46.0, - "step": 22858 - }, - { - "epoch": 3.6812673618100566, - "grad_norm": 0.01894768513739109, - "learning_rate": 0.0001999933188307216, - "loss": 46.0, - "step": 22859 - }, - { - "epoch": 3.681428398888844, - "grad_norm": 0.00970908347517252, - "learning_rate": 0.00019999331824590465, - "loss": 46.0, - "step": 22860 - }, - { - "epoch": 3.6815894359676316, - "grad_norm": 0.0023871108423918486, - "learning_rate": 0.00019999331766106212, - "loss": 46.0, - "step": 22861 - }, - { - "epoch": 3.681750473046419, - "grad_norm": 0.0014528199099004269, - "learning_rate": 0.000199993317076194, - "loss": 46.0, - "step": 22862 - }, - { - "epoch": 3.6819115101252065, - "grad_norm": 0.005543417762964964, - "learning_rate": 0.0001999933164913003, - "loss": 46.0, - "step": 22863 - }, - { - "epoch": 3.682072547203994, - "grad_norm": 0.0044127884320914745, - "learning_rate": 0.00019999331590638098, - "loss": 46.0, - "step": 22864 - }, - { - "epoch": 3.6822335842827814, - "grad_norm": 0.0009569072281010449, - "learning_rate": 0.00019999331532143607, - "loss": 46.0, - "step": 22865 - }, - { - "epoch": 3.6823946213615684, - "grad_norm": 0.004044860135763884, - "learning_rate": 0.00019999331473646558, - "loss": 46.0, - "step": 22866 - }, - { - "epoch": 3.682555658440356, - "grad_norm": 0.006370786111801863, - "learning_rate": 0.0001999933141514695, - "loss": 46.0, - "step": 22867 - }, - { - "epoch": 3.6827166955191433, - "grad_norm": 0.0022870569955557585, - "learning_rate": 0.00019999331356644783, - "loss": 46.0, - "step": 22868 - }, - { - "epoch": 3.6828777325979307, - "grad_norm": 0.003003267338499427, - "learning_rate": 0.00019999331298140054, - "loss": 46.0, - "step": 22869 - }, - { - "epoch": 3.683038769676718, - "grad_norm": 0.0029237500857561827, - "learning_rate": 0.00019999331239632765, - "loss": 46.0, - "step": 22870 - }, - { - "epoch": 3.683199806755505, - "grad_norm": 0.0016908635152503848, - "learning_rate": 0.0001999933118112292, - "loss": 46.0, - "step": 22871 - }, - { - "epoch": 3.6833608438342926, - "grad_norm": 0.0012524742633104324, - "learning_rate": 0.00019999331122610517, - "loss": 46.0, - "step": 22872 - }, - { - "epoch": 3.68352188091308, - "grad_norm": 0.0029560548719018698, - "learning_rate": 0.0001999933106409555, - "loss": 46.0, - "step": 22873 - }, - { - "epoch": 3.6836829179918675, - "grad_norm": 0.00247136689722538, - "learning_rate": 0.00019999331005578027, - "loss": 46.0, - "step": 22874 - }, - { - "epoch": 3.683843955070655, - "grad_norm": 0.018603768199682236, - "learning_rate": 0.00019999330947057943, - "loss": 46.0, - "step": 22875 - }, - { - "epoch": 3.6840049921494424, - "grad_norm": 0.002960095414891839, - "learning_rate": 0.000199993308885353, - "loss": 46.0, - "step": 22876 - }, - { - "epoch": 3.68416602922823, - "grad_norm": 0.005040630232542753, - "learning_rate": 0.000199993308300101, - "loss": 46.0, - "step": 22877 - }, - { - "epoch": 3.6843270663070173, - "grad_norm": 0.0059850625693798065, - "learning_rate": 0.00019999330771482338, - "loss": 46.0, - "step": 22878 - }, - { - "epoch": 3.684488103385805, - "grad_norm": 0.0017186513869091868, - "learning_rate": 0.00019999330712952017, - "loss": 46.0, - "step": 22879 - }, - { - "epoch": 3.6846491404645922, - "grad_norm": 0.013015073724091053, - "learning_rate": 0.00019999330654419138, - "loss": 46.0, - "step": 22880 - }, - { - "epoch": 3.6848101775433793, - "grad_norm": 0.004506245721131563, - "learning_rate": 0.000199993305958837, - "loss": 46.0, - "step": 22881 - }, - { - "epoch": 3.6849712146221667, - "grad_norm": 0.0019291311036795378, - "learning_rate": 0.00019999330537345702, - "loss": 46.0, - "step": 22882 - }, - { - "epoch": 3.685132251700954, - "grad_norm": 0.007094101049005985, - "learning_rate": 0.00019999330478805144, - "loss": 46.0, - "step": 22883 - }, - { - "epoch": 3.6852932887797416, - "grad_norm": 0.0028363880701363087, - "learning_rate": 0.00019999330420262027, - "loss": 46.0, - "step": 22884 - }, - { - "epoch": 3.685454325858529, - "grad_norm": 0.005502133630216122, - "learning_rate": 0.0001999933036171635, - "loss": 46.0, - "step": 22885 - }, - { - "epoch": 3.685615362937316, - "grad_norm": 0.005104415584355593, - "learning_rate": 0.00019999330303168114, - "loss": 46.0, - "step": 22886 - }, - { - "epoch": 3.6857764000161035, - "grad_norm": 0.005357289686799049, - "learning_rate": 0.00019999330244617318, - "loss": 46.0, - "step": 22887 - }, - { - "epoch": 3.685937437094891, - "grad_norm": 0.0006300599779933691, - "learning_rate": 0.00019999330186063966, - "loss": 46.0, - "step": 22888 - }, - { - "epoch": 3.6860984741736784, - "grad_norm": 0.013662843964993954, - "learning_rate": 0.0001999933012750805, - "loss": 46.0, - "step": 22889 - }, - { - "epoch": 3.686259511252466, - "grad_norm": 0.0019073965959250927, - "learning_rate": 0.00019999330068949577, - "loss": 46.0, - "step": 22890 - }, - { - "epoch": 3.6864205483312533, - "grad_norm": 0.0030715062748640776, - "learning_rate": 0.00019999330010388543, - "loss": 46.0, - "step": 22891 - }, - { - "epoch": 3.6865815854100408, - "grad_norm": 0.0018682855879887938, - "learning_rate": 0.00019999329951824954, - "loss": 46.0, - "step": 22892 - }, - { - "epoch": 3.6867426224888282, - "grad_norm": 0.0003367076569702476, - "learning_rate": 0.00019999329893258803, - "loss": 46.0, - "step": 22893 - }, - { - "epoch": 3.6869036595676157, - "grad_norm": 0.005779862403869629, - "learning_rate": 0.00019999329834690093, - "loss": 46.0, - "step": 22894 - }, - { - "epoch": 3.6870646966464027, - "grad_norm": 0.0035860328935086727, - "learning_rate": 0.00019999329776118822, - "loss": 46.0, - "step": 22895 - }, - { - "epoch": 3.68722573372519, - "grad_norm": 0.0022047758102416992, - "learning_rate": 0.00019999329717544995, - "loss": 46.0, - "step": 22896 - }, - { - "epoch": 3.6873867708039776, - "grad_norm": 0.0029248481150716543, - "learning_rate": 0.00019999329658968606, - "loss": 46.0, - "step": 22897 - }, - { - "epoch": 3.687547807882765, - "grad_norm": 0.015097597613930702, - "learning_rate": 0.00019999329600389659, - "loss": 46.0, - "step": 22898 - }, - { - "epoch": 3.6877088449615525, - "grad_norm": 0.0012811818160116673, - "learning_rate": 0.00019999329541808153, - "loss": 46.0, - "step": 22899 - }, - { - "epoch": 3.68786988204034, - "grad_norm": 0.00263396929949522, - "learning_rate": 0.00019999329483224085, - "loss": 46.0, - "step": 22900 - }, - { - "epoch": 3.688030919119127, - "grad_norm": 0.004833109211176634, - "learning_rate": 0.00019999329424637462, - "loss": 46.0, - "step": 22901 - }, - { - "epoch": 3.6881919561979144, - "grad_norm": 0.0014522494748234749, - "learning_rate": 0.00019999329366048274, - "loss": 46.0, - "step": 22902 - }, - { - "epoch": 3.688352993276702, - "grad_norm": 0.0032425944227725267, - "learning_rate": 0.0001999932930745653, - "loss": 46.0, - "step": 22903 - }, - { - "epoch": 3.6885140303554893, - "grad_norm": 0.0021811225451529026, - "learning_rate": 0.00019999329248862225, - "loss": 46.0, - "step": 22904 - }, - { - "epoch": 3.6886750674342768, - "grad_norm": 0.0019636524375528097, - "learning_rate": 0.00019999329190265367, - "loss": 46.0, - "step": 22905 - }, - { - "epoch": 3.688836104513064, - "grad_norm": 0.003221196820959449, - "learning_rate": 0.00019999329131665942, - "loss": 46.0, - "step": 22906 - }, - { - "epoch": 3.6889971415918517, - "grad_norm": 0.004156569018959999, - "learning_rate": 0.0001999932907306396, - "loss": 46.0, - "step": 22907 - }, - { - "epoch": 3.689158178670639, - "grad_norm": 0.0012089312076568604, - "learning_rate": 0.00019999329014459423, - "loss": 46.0, - "step": 22908 - }, - { - "epoch": 3.6893192157494266, - "grad_norm": 0.0009855962125584483, - "learning_rate": 0.00019999328955852322, - "loss": 46.0, - "step": 22909 - }, - { - "epoch": 3.6894802528282136, - "grad_norm": 0.004043465480208397, - "learning_rate": 0.00019999328897242662, - "loss": 46.0, - "step": 22910 - }, - { - "epoch": 3.689641289907001, - "grad_norm": 0.002063443884253502, - "learning_rate": 0.00019999328838630445, - "loss": 46.0, - "step": 22911 - }, - { - "epoch": 3.6898023269857885, - "grad_norm": 0.0078023886308074, - "learning_rate": 0.00019999328780015665, - "loss": 46.0, - "step": 22912 - }, - { - "epoch": 3.689963364064576, - "grad_norm": 0.003180890344083309, - "learning_rate": 0.00019999328721398326, - "loss": 46.0, - "step": 22913 - }, - { - "epoch": 3.6901244011433634, - "grad_norm": 0.0017783313523977995, - "learning_rate": 0.0001999932866277843, - "loss": 46.0, - "step": 22914 - }, - { - "epoch": 3.6902854382221504, - "grad_norm": 0.005659760441631079, - "learning_rate": 0.00019999328604155975, - "loss": 46.0, - "step": 22915 - }, - { - "epoch": 3.690446475300938, - "grad_norm": 0.0030311914160847664, - "learning_rate": 0.00019999328545530963, - "loss": 46.0, - "step": 22916 - }, - { - "epoch": 3.6906075123797253, - "grad_norm": 0.002965526655316353, - "learning_rate": 0.00019999328486903386, - "loss": 46.0, - "step": 22917 - }, - { - "epoch": 3.6907685494585127, - "grad_norm": 0.002880080370232463, - "learning_rate": 0.0001999932842827325, - "loss": 46.0, - "step": 22918 - }, - { - "epoch": 3.6909295865373, - "grad_norm": 0.005357214715331793, - "learning_rate": 0.00019999328369640557, - "loss": 46.0, - "step": 22919 - }, - { - "epoch": 3.6910906236160876, - "grad_norm": 0.0023911993484944105, - "learning_rate": 0.00019999328311005304, - "loss": 46.0, - "step": 22920 - }, - { - "epoch": 3.691251660694875, - "grad_norm": 0.01646076887845993, - "learning_rate": 0.00019999328252367495, - "loss": 46.0, - "step": 22921 - }, - { - "epoch": 3.6914126977736625, - "grad_norm": 0.02226894348859787, - "learning_rate": 0.00019999328193727123, - "loss": 46.0, - "step": 22922 - }, - { - "epoch": 3.69157373485245, - "grad_norm": 0.0019311883952468634, - "learning_rate": 0.0001999932813508419, - "loss": 46.0, - "step": 22923 - }, - { - "epoch": 3.691734771931237, - "grad_norm": 0.005404660943895578, - "learning_rate": 0.00019999328076438703, - "loss": 46.0, - "step": 22924 - }, - { - "epoch": 3.6918958090100245, - "grad_norm": 0.0089335385710001, - "learning_rate": 0.00019999328017790655, - "loss": 46.0, - "step": 22925 - }, - { - "epoch": 3.692056846088812, - "grad_norm": 0.0027518875431269407, - "learning_rate": 0.00019999327959140047, - "loss": 46.0, - "step": 22926 - }, - { - "epoch": 3.6922178831675994, - "grad_norm": 0.008946986868977547, - "learning_rate": 0.00019999327900486878, - "loss": 46.0, - "step": 22927 - }, - { - "epoch": 3.692378920246387, - "grad_norm": 0.003374751191586256, - "learning_rate": 0.00019999327841831153, - "loss": 46.0, - "step": 22928 - }, - { - "epoch": 3.6925399573251743, - "grad_norm": 0.0034910866525024176, - "learning_rate": 0.00019999327783172866, - "loss": 46.0, - "step": 22929 - }, - { - "epoch": 3.6927009944039613, - "grad_norm": 0.005131212528795004, - "learning_rate": 0.0001999932772451202, - "loss": 46.0, - "step": 22930 - }, - { - "epoch": 3.6928620314827487, - "grad_norm": 0.0021010444033890963, - "learning_rate": 0.00019999327665848614, - "loss": 46.0, - "step": 22931 - }, - { - "epoch": 3.693023068561536, - "grad_norm": 0.0015383217250928283, - "learning_rate": 0.0001999932760718265, - "loss": 46.0, - "step": 22932 - }, - { - "epoch": 3.6931841056403236, - "grad_norm": 0.006102294661104679, - "learning_rate": 0.00019999327548514127, - "loss": 46.0, - "step": 22933 - }, - { - "epoch": 3.693345142719111, - "grad_norm": 0.0040518539026379585, - "learning_rate": 0.00019999327489843042, - "loss": 46.0, - "step": 22934 - }, - { - "epoch": 3.6935061797978985, - "grad_norm": 0.008036128245294094, - "learning_rate": 0.00019999327431169403, - "loss": 46.0, - "step": 22935 - }, - { - "epoch": 3.693667216876686, - "grad_norm": 0.003131825476884842, - "learning_rate": 0.00019999327372493202, - "loss": 46.0, - "step": 22936 - }, - { - "epoch": 3.6938282539554734, - "grad_norm": 0.0014921118272468448, - "learning_rate": 0.0001999932731381444, - "loss": 46.0, - "step": 22937 - }, - { - "epoch": 3.693989291034261, - "grad_norm": 0.001481468672864139, - "learning_rate": 0.0001999932725513312, - "loss": 46.0, - "step": 22938 - }, - { - "epoch": 3.694150328113048, - "grad_norm": 0.005379470065236092, - "learning_rate": 0.0001999932719644924, - "loss": 46.0, - "step": 22939 - }, - { - "epoch": 3.6943113651918353, - "grad_norm": 0.00589486351236701, - "learning_rate": 0.00019999327137762803, - "loss": 46.0, - "step": 22940 - }, - { - "epoch": 3.694472402270623, - "grad_norm": 0.005143916234374046, - "learning_rate": 0.00019999327079073804, - "loss": 46.0, - "step": 22941 - }, - { - "epoch": 3.6946334393494102, - "grad_norm": 0.00259154150262475, - "learning_rate": 0.00019999327020382246, - "loss": 46.0, - "step": 22942 - }, - { - "epoch": 3.6947944764281977, - "grad_norm": 0.003596675116568804, - "learning_rate": 0.0001999932696168813, - "loss": 46.0, - "step": 22943 - }, - { - "epoch": 3.694955513506985, - "grad_norm": 0.0015946300700306892, - "learning_rate": 0.00019999326902991454, - "loss": 46.0, - "step": 22944 - }, - { - "epoch": 3.695116550585772, - "grad_norm": 0.0016247582389041781, - "learning_rate": 0.0001999932684429222, - "loss": 46.0, - "step": 22945 - }, - { - "epoch": 3.6952775876645596, - "grad_norm": 0.0006699913647025824, - "learning_rate": 0.0001999932678559042, - "loss": 46.0, - "step": 22946 - }, - { - "epoch": 3.695438624743347, - "grad_norm": 0.001453114440664649, - "learning_rate": 0.0001999932672688607, - "loss": 46.0, - "step": 22947 - }, - { - "epoch": 3.6955996618221345, - "grad_norm": 0.005377592984586954, - "learning_rate": 0.00019999326668179157, - "loss": 46.0, - "step": 22948 - }, - { - "epoch": 3.695760698900922, - "grad_norm": 0.004424140322953463, - "learning_rate": 0.00019999326609469685, - "loss": 46.0, - "step": 22949 - }, - { - "epoch": 3.6959217359797094, - "grad_norm": 0.010377160273492336, - "learning_rate": 0.00019999326550757652, - "loss": 46.0, - "step": 22950 - }, - { - "epoch": 3.696082773058497, - "grad_norm": 0.0022835538256913424, - "learning_rate": 0.00019999326492043063, - "loss": 46.0, - "step": 22951 - }, - { - "epoch": 3.6962438101372843, - "grad_norm": 0.003912990912795067, - "learning_rate": 0.00019999326433325912, - "loss": 46.0, - "step": 22952 - }, - { - "epoch": 3.6964048472160718, - "grad_norm": 0.011481009423732758, - "learning_rate": 0.00019999326374606203, - "loss": 46.0, - "step": 22953 - }, - { - "epoch": 3.6965658842948588, - "grad_norm": 0.007846255786716938, - "learning_rate": 0.00019999326315883932, - "loss": 46.0, - "step": 22954 - }, - { - "epoch": 3.6967269213736462, - "grad_norm": 0.0004045862006023526, - "learning_rate": 0.00019999326257159102, - "loss": 46.0, - "step": 22955 - }, - { - "epoch": 3.6968879584524337, - "grad_norm": 0.002795801032334566, - "learning_rate": 0.00019999326198431714, - "loss": 46.0, - "step": 22956 - }, - { - "epoch": 3.697048995531221, - "grad_norm": 0.004743854980915785, - "learning_rate": 0.00019999326139701767, - "loss": 46.0, - "step": 22957 - }, - { - "epoch": 3.6972100326100086, - "grad_norm": 0.004762520547956228, - "learning_rate": 0.00019999326080969262, - "loss": 46.0, - "step": 22958 - }, - { - "epoch": 3.6973710696887956, - "grad_norm": 0.013277491554617882, - "learning_rate": 0.00019999326022234195, - "loss": 46.0, - "step": 22959 - }, - { - "epoch": 3.697532106767583, - "grad_norm": 0.004574695136398077, - "learning_rate": 0.00019999325963496572, - "loss": 46.0, - "step": 22960 - }, - { - "epoch": 3.6976931438463705, - "grad_norm": 0.008152835071086884, - "learning_rate": 0.00019999325904756384, - "loss": 46.0, - "step": 22961 - }, - { - "epoch": 3.697854180925158, - "grad_norm": 0.0036802445538342, - "learning_rate": 0.00019999325846013644, - "loss": 46.0, - "step": 22962 - }, - { - "epoch": 3.6980152180039454, - "grad_norm": 0.003269928740337491, - "learning_rate": 0.0001999932578726834, - "loss": 46.0, - "step": 22963 - }, - { - "epoch": 3.698176255082733, - "grad_norm": 0.0023690026719123125, - "learning_rate": 0.00019999325728520476, - "loss": 46.0, - "step": 22964 - }, - { - "epoch": 3.6983372921615203, - "grad_norm": 0.002575025660917163, - "learning_rate": 0.00019999325669770057, - "loss": 46.0, - "step": 22965 - }, - { - "epoch": 3.6984983292403077, - "grad_norm": 0.0053553506731987, - "learning_rate": 0.00019999325611017076, - "loss": 46.0, - "step": 22966 - }, - { - "epoch": 3.698659366319095, - "grad_norm": 0.0030848453752696514, - "learning_rate": 0.00019999325552261534, - "loss": 46.0, - "step": 22967 - }, - { - "epoch": 3.698820403397882, - "grad_norm": 0.004077146295458078, - "learning_rate": 0.00019999325493503435, - "loss": 46.0, - "step": 22968 - }, - { - "epoch": 3.6989814404766697, - "grad_norm": 0.011258543469011784, - "learning_rate": 0.00019999325434742776, - "loss": 46.0, - "step": 22969 - }, - { - "epoch": 3.699142477555457, - "grad_norm": 0.001314330380409956, - "learning_rate": 0.00019999325375979557, - "loss": 46.0, - "step": 22970 - }, - { - "epoch": 3.6993035146342446, - "grad_norm": 0.0031963076908141375, - "learning_rate": 0.0001999932531721378, - "loss": 46.0, - "step": 22971 - }, - { - "epoch": 3.699464551713032, - "grad_norm": 0.0034081123303622007, - "learning_rate": 0.00019999325258445442, - "loss": 46.0, - "step": 22972 - }, - { - "epoch": 3.6996255887918195, - "grad_norm": 0.005806222092360258, - "learning_rate": 0.00019999325199674547, - "loss": 46.0, - "step": 22973 - }, - { - "epoch": 3.6997866258706065, - "grad_norm": 0.0014548038598150015, - "learning_rate": 0.00019999325140901094, - "loss": 46.0, - "step": 22974 - }, - { - "epoch": 3.699947662949394, - "grad_norm": 0.003776298835873604, - "learning_rate": 0.00019999325082125077, - "loss": 46.0, - "step": 22975 - }, - { - "epoch": 3.7001087000281814, - "grad_norm": 0.0030455931555479765, - "learning_rate": 0.00019999325023346503, - "loss": 46.0, - "step": 22976 - }, - { - "epoch": 3.700269737106969, - "grad_norm": 0.008189480751752853, - "learning_rate": 0.0001999932496456537, - "loss": 46.0, - "step": 22977 - }, - { - "epoch": 3.7004307741857563, - "grad_norm": 0.004415380302816629, - "learning_rate": 0.00019999324905781678, - "loss": 46.0, - "step": 22978 - }, - { - "epoch": 3.7005918112645437, - "grad_norm": 0.0030416182707995176, - "learning_rate": 0.00019999324846995425, - "loss": 46.0, - "step": 22979 - }, - { - "epoch": 3.700752848343331, - "grad_norm": 0.015108397230505943, - "learning_rate": 0.00019999324788206615, - "loss": 46.0, - "step": 22980 - }, - { - "epoch": 3.7009138854221186, - "grad_norm": 0.011023464612662792, - "learning_rate": 0.00019999324729415245, - "loss": 46.0, - "step": 22981 - }, - { - "epoch": 3.701074922500906, - "grad_norm": 0.005132417194545269, - "learning_rate": 0.00019999324670621314, - "loss": 46.0, - "step": 22982 - }, - { - "epoch": 3.701235959579693, - "grad_norm": 0.0005180487642064691, - "learning_rate": 0.00019999324611824824, - "loss": 46.0, - "step": 22983 - }, - { - "epoch": 3.7013969966584805, - "grad_norm": 0.002225810894742608, - "learning_rate": 0.00019999324553025775, - "loss": 46.0, - "step": 22984 - }, - { - "epoch": 3.701558033737268, - "grad_norm": 0.005416846368461847, - "learning_rate": 0.0001999932449422417, - "loss": 46.0, - "step": 22985 - }, - { - "epoch": 3.7017190708160554, - "grad_norm": 0.00812110211700201, - "learning_rate": 0.0001999932443542, - "loss": 46.0, - "step": 22986 - }, - { - "epoch": 3.701880107894843, - "grad_norm": 0.0024413838982582092, - "learning_rate": 0.00019999324376613275, - "loss": 46.0, - "step": 22987 - }, - { - "epoch": 3.70204114497363, - "grad_norm": 0.0050889188423752785, - "learning_rate": 0.0001999932431780399, - "loss": 46.0, - "step": 22988 - }, - { - "epoch": 3.7022021820524174, - "grad_norm": 0.002696953946724534, - "learning_rate": 0.00019999324258992144, - "loss": 46.0, - "step": 22989 - }, - { - "epoch": 3.702363219131205, - "grad_norm": 0.002093171002343297, - "learning_rate": 0.00019999324200177738, - "loss": 46.0, - "step": 22990 - }, - { - "epoch": 3.7025242562099923, - "grad_norm": 0.0025020111352205276, - "learning_rate": 0.00019999324141360775, - "loss": 46.0, - "step": 22991 - }, - { - "epoch": 3.7026852932887797, - "grad_norm": 0.0035636061802506447, - "learning_rate": 0.00019999324082541252, - "loss": 46.0, - "step": 22992 - }, - { - "epoch": 3.702846330367567, - "grad_norm": 0.003597965929657221, - "learning_rate": 0.0001999932402371917, - "loss": 46.0, - "step": 22993 - }, - { - "epoch": 3.7030073674463546, - "grad_norm": 0.002469766652211547, - "learning_rate": 0.00019999323964894528, - "loss": 46.0, - "step": 22994 - }, - { - "epoch": 3.703168404525142, - "grad_norm": 0.0026130781043320894, - "learning_rate": 0.00019999323906067326, - "loss": 46.0, - "step": 22995 - }, - { - "epoch": 3.7033294416039295, - "grad_norm": 0.008627819828689098, - "learning_rate": 0.00019999323847237567, - "loss": 46.0, - "step": 22996 - }, - { - "epoch": 3.703490478682717, - "grad_norm": 0.006470984313637018, - "learning_rate": 0.00019999323788405247, - "loss": 46.0, - "step": 22997 - }, - { - "epoch": 3.703651515761504, - "grad_norm": 0.013201608322560787, - "learning_rate": 0.00019999323729570368, - "loss": 46.0, - "step": 22998 - }, - { - "epoch": 3.7038125528402914, - "grad_norm": 0.0015339754754677415, - "learning_rate": 0.0001999932367073293, - "loss": 46.0, - "step": 22999 - }, - { - "epoch": 3.703973589919079, - "grad_norm": 0.008167789317667484, - "learning_rate": 0.0001999932361189293, - "loss": 46.0, - "step": 23000 - }, - { - "epoch": 3.7041346269978663, - "grad_norm": 0.001999037340283394, - "learning_rate": 0.00019999323553050375, - "loss": 46.0, - "step": 23001 - }, - { - "epoch": 3.704295664076654, - "grad_norm": 0.0035351796541363, - "learning_rate": 0.00019999323494205258, - "loss": 46.0, - "step": 23002 - }, - { - "epoch": 3.704456701155441, - "grad_norm": 0.008765707723796368, - "learning_rate": 0.00019999323435357583, - "loss": 46.0, - "step": 23003 - }, - { - "epoch": 3.7046177382342282, - "grad_norm": 0.005180766806006432, - "learning_rate": 0.0001999932337650735, - "loss": 46.0, - "step": 23004 - }, - { - "epoch": 3.7047787753130157, - "grad_norm": 0.0035774833522737026, - "learning_rate": 0.00019999323317654554, - "loss": 46.0, - "step": 23005 - }, - { - "epoch": 3.704939812391803, - "grad_norm": 0.0008684717467986047, - "learning_rate": 0.000199993232587992, - "loss": 46.0, - "step": 23006 - }, - { - "epoch": 3.7051008494705906, - "grad_norm": 0.0015333372866734862, - "learning_rate": 0.00019999323199941288, - "loss": 46.0, - "step": 23007 - }, - { - "epoch": 3.705261886549378, - "grad_norm": 0.0018458024132996798, - "learning_rate": 0.00019999323141080817, - "loss": 46.0, - "step": 23008 - }, - { - "epoch": 3.7054229236281655, - "grad_norm": 0.0025055750738829374, - "learning_rate": 0.00019999323082217784, - "loss": 46.0, - "step": 23009 - }, - { - "epoch": 3.705583960706953, - "grad_norm": 0.010124053806066513, - "learning_rate": 0.00019999323023352195, - "loss": 46.0, - "step": 23010 - }, - { - "epoch": 3.7057449977857404, - "grad_norm": 0.0055197617039084435, - "learning_rate": 0.00019999322964484045, - "loss": 46.0, - "step": 23011 - }, - { - "epoch": 3.7059060348645274, - "grad_norm": 0.006251504644751549, - "learning_rate": 0.00019999322905613336, - "loss": 46.0, - "step": 23012 - }, - { - "epoch": 3.706067071943315, - "grad_norm": 0.0018859145930036902, - "learning_rate": 0.00019999322846740066, - "loss": 46.0, - "step": 23013 - }, - { - "epoch": 3.7062281090221023, - "grad_norm": 0.005661732517182827, - "learning_rate": 0.00019999322787864237, - "loss": 46.0, - "step": 23014 - }, - { - "epoch": 3.7063891461008898, - "grad_norm": 0.005678261164575815, - "learning_rate": 0.0001999932272898585, - "loss": 46.0, - "step": 23015 - }, - { - "epoch": 3.706550183179677, - "grad_norm": 0.004850441589951515, - "learning_rate": 0.00019999322670104905, - "loss": 46.0, - "step": 23016 - }, - { - "epoch": 3.7067112202584647, - "grad_norm": 0.0026961183175444603, - "learning_rate": 0.00019999322611221398, - "loss": 46.0, - "step": 23017 - }, - { - "epoch": 3.7068722573372517, - "grad_norm": 0.000925944943446666, - "learning_rate": 0.00019999322552335334, - "loss": 46.0, - "step": 23018 - }, - { - "epoch": 3.707033294416039, - "grad_norm": 0.0021295666228979826, - "learning_rate": 0.00019999322493446708, - "loss": 46.0, - "step": 23019 - }, - { - "epoch": 3.7071943314948266, - "grad_norm": 0.0023116902448236942, - "learning_rate": 0.00019999322434555524, - "loss": 46.0, - "step": 23020 - }, - { - "epoch": 3.707355368573614, - "grad_norm": 0.0020218666177242994, - "learning_rate": 0.00019999322375661782, - "loss": 46.0, - "step": 23021 - }, - { - "epoch": 3.7075164056524015, - "grad_norm": 0.0015538507141172886, - "learning_rate": 0.00019999322316765477, - "loss": 46.0, - "step": 23022 - }, - { - "epoch": 3.707677442731189, - "grad_norm": 0.0073905824683606625, - "learning_rate": 0.00019999322257866617, - "loss": 46.0, - "step": 23023 - }, - { - "epoch": 3.7078384798099764, - "grad_norm": 0.0058021689765155315, - "learning_rate": 0.00019999322198965196, - "loss": 46.0, - "step": 23024 - }, - { - "epoch": 3.707999516888764, - "grad_norm": 0.0016744486056268215, - "learning_rate": 0.00019999322140061215, - "loss": 46.0, - "step": 23025 - }, - { - "epoch": 3.7081605539675513, - "grad_norm": 0.003271284978836775, - "learning_rate": 0.00019999322081154674, - "loss": 46.0, - "step": 23026 - }, - { - "epoch": 3.7083215910463383, - "grad_norm": 0.002253343816846609, - "learning_rate": 0.00019999322022245576, - "loss": 46.0, - "step": 23027 - }, - { - "epoch": 3.7084826281251257, - "grad_norm": 0.0013409563107416034, - "learning_rate": 0.00019999321963333917, - "loss": 46.0, - "step": 23028 - }, - { - "epoch": 3.708643665203913, - "grad_norm": 0.0016886061057448387, - "learning_rate": 0.00019999321904419701, - "loss": 46.0, - "step": 23029 - }, - { - "epoch": 3.7088047022827006, - "grad_norm": 0.006010679993778467, - "learning_rate": 0.00019999321845502922, - "loss": 46.0, - "step": 23030 - }, - { - "epoch": 3.708965739361488, - "grad_norm": 0.005514186341315508, - "learning_rate": 0.00019999321786583587, - "loss": 46.0, - "step": 23031 - }, - { - "epoch": 3.709126776440275, - "grad_norm": 0.004206299316138029, - "learning_rate": 0.00019999321727661693, - "loss": 46.0, - "step": 23032 - }, - { - "epoch": 3.7092878135190626, - "grad_norm": 0.0025531467981636524, - "learning_rate": 0.00019999321668737237, - "loss": 46.0, - "step": 23033 - }, - { - "epoch": 3.70944885059785, - "grad_norm": 0.0012883504386991262, - "learning_rate": 0.00019999321609810223, - "loss": 46.0, - "step": 23034 - }, - { - "epoch": 3.7096098876766375, - "grad_norm": 0.002383655169978738, - "learning_rate": 0.0001999932155088065, - "loss": 46.0, - "step": 23035 - }, - { - "epoch": 3.709770924755425, - "grad_norm": 0.0033795132767409086, - "learning_rate": 0.00019999321491948516, - "loss": 46.0, - "step": 23036 - }, - { - "epoch": 3.7099319618342124, - "grad_norm": 0.001663564471527934, - "learning_rate": 0.00019999321433013823, - "loss": 46.0, - "step": 23037 - }, - { - "epoch": 3.710092998913, - "grad_norm": 0.008418679237365723, - "learning_rate": 0.0001999932137407657, - "loss": 46.0, - "step": 23038 - }, - { - "epoch": 3.7102540359917873, - "grad_norm": 0.0033010654151439667, - "learning_rate": 0.00019999321315136763, - "loss": 46.0, - "step": 23039 - }, - { - "epoch": 3.7104150730705747, - "grad_norm": 0.005354561842978001, - "learning_rate": 0.0001999932125619439, - "loss": 46.0, - "step": 23040 - }, - { - "epoch": 3.7105761101493617, - "grad_norm": 0.008223689161241055, - "learning_rate": 0.00019999321197249463, - "loss": 46.0, - "step": 23041 - }, - { - "epoch": 3.710737147228149, - "grad_norm": 0.004386438522487879, - "learning_rate": 0.0001999932113830197, - "loss": 46.0, - "step": 23042 - }, - { - "epoch": 3.7108981843069366, - "grad_norm": 0.0010133887408301234, - "learning_rate": 0.00019999321079351926, - "loss": 46.0, - "step": 23043 - }, - { - "epoch": 3.711059221385724, - "grad_norm": 0.006740883458405733, - "learning_rate": 0.00019999321020399316, - "loss": 46.0, - "step": 23044 - }, - { - "epoch": 3.7112202584645115, - "grad_norm": 0.0010446736123412848, - "learning_rate": 0.0001999932096144415, - "loss": 46.0, - "step": 23045 - }, - { - "epoch": 3.711381295543299, - "grad_norm": 0.00254206545650959, - "learning_rate": 0.00019999320902486424, - "loss": 46.0, - "step": 23046 - }, - { - "epoch": 3.711542332622086, - "grad_norm": 0.009789474308490753, - "learning_rate": 0.0001999932084352614, - "loss": 46.0, - "step": 23047 - }, - { - "epoch": 3.7117033697008734, - "grad_norm": 0.001101772882975638, - "learning_rate": 0.00019999320784563297, - "loss": 46.0, - "step": 23048 - }, - { - "epoch": 3.711864406779661, - "grad_norm": 0.0047613405622541904, - "learning_rate": 0.0001999932072559789, - "loss": 46.0, - "step": 23049 - }, - { - "epoch": 3.7120254438584483, - "grad_norm": 0.001397734391503036, - "learning_rate": 0.0001999932066662993, - "loss": 46.0, - "step": 23050 - }, - { - "epoch": 3.712186480937236, - "grad_norm": 0.00730739114806056, - "learning_rate": 0.00019999320607659406, - "loss": 46.0, - "step": 23051 - }, - { - "epoch": 3.7123475180160233, - "grad_norm": 0.009815474972128868, - "learning_rate": 0.0001999932054868632, - "loss": 46.0, - "step": 23052 - }, - { - "epoch": 3.7125085550948107, - "grad_norm": 0.0046633221209049225, - "learning_rate": 0.00019999320489710683, - "loss": 46.0, - "step": 23053 - }, - { - "epoch": 3.712669592173598, - "grad_norm": 0.004195145331323147, - "learning_rate": 0.0001999932043073248, - "loss": 46.0, - "step": 23054 - }, - { - "epoch": 3.7128306292523856, - "grad_norm": 0.015399031341075897, - "learning_rate": 0.00019999320371751723, - "loss": 46.0, - "step": 23055 - }, - { - "epoch": 3.7129916663311726, - "grad_norm": 0.0033258432522416115, - "learning_rate": 0.000199993203127684, - "loss": 46.0, - "step": 23056 - }, - { - "epoch": 3.71315270340996, - "grad_norm": 0.0014301176415756345, - "learning_rate": 0.00019999320253782522, - "loss": 46.0, - "step": 23057 - }, - { - "epoch": 3.7133137404887475, - "grad_norm": 0.0015852913493290544, - "learning_rate": 0.00019999320194794085, - "loss": 46.0, - "step": 23058 - }, - { - "epoch": 3.713474777567535, - "grad_norm": 0.009821073152124882, - "learning_rate": 0.00019999320135803087, - "loss": 46.0, - "step": 23059 - }, - { - "epoch": 3.7136358146463224, - "grad_norm": 0.008045095019042492, - "learning_rate": 0.00019999320076809533, - "loss": 46.0, - "step": 23060 - }, - { - "epoch": 3.71379685172511, - "grad_norm": 0.002824934897944331, - "learning_rate": 0.00019999320017813417, - "loss": 46.0, - "step": 23061 - }, - { - "epoch": 3.713957888803897, - "grad_norm": 0.002330234507098794, - "learning_rate": 0.00019999319958814745, - "loss": 46.0, - "step": 23062 - }, - { - "epoch": 3.7141189258826843, - "grad_norm": 0.0033003368880599737, - "learning_rate": 0.0001999931989981351, - "loss": 46.0, - "step": 23063 - }, - { - "epoch": 3.714279962961472, - "grad_norm": 0.0009408897021785378, - "learning_rate": 0.00019999319840809717, - "loss": 46.0, - "step": 23064 - }, - { - "epoch": 3.7144410000402592, - "grad_norm": 0.002168254228308797, - "learning_rate": 0.0001999931978180336, - "loss": 46.0, - "step": 23065 - }, - { - "epoch": 3.7146020371190467, - "grad_norm": 0.004309351556003094, - "learning_rate": 0.0001999931972279445, - "loss": 46.0, - "step": 23066 - }, - { - "epoch": 3.714763074197834, - "grad_norm": 0.003641866846010089, - "learning_rate": 0.00019999319663782975, - "loss": 46.0, - "step": 23067 - }, - { - "epoch": 3.7149241112766216, - "grad_norm": 0.015055439434945583, - "learning_rate": 0.00019999319604768946, - "loss": 46.0, - "step": 23068 - }, - { - "epoch": 3.715085148355409, - "grad_norm": 0.0008330951677635312, - "learning_rate": 0.00019999319545752355, - "loss": 46.0, - "step": 23069 - }, - { - "epoch": 3.7152461854341965, - "grad_norm": 0.0017588816117495298, - "learning_rate": 0.00019999319486733205, - "loss": 46.0, - "step": 23070 - }, - { - "epoch": 3.7154072225129835, - "grad_norm": 0.0015629057306796312, - "learning_rate": 0.000199993194277115, - "loss": 46.0, - "step": 23071 - }, - { - "epoch": 3.715568259591771, - "grad_norm": 0.003930437844246626, - "learning_rate": 0.0001999931936868723, - "loss": 46.0, - "step": 23072 - }, - { - "epoch": 3.7157292966705584, - "grad_norm": 0.016792690381407738, - "learning_rate": 0.00019999319309660404, - "loss": 46.0, - "step": 23073 - }, - { - "epoch": 3.715890333749346, - "grad_norm": 0.004370777402073145, - "learning_rate": 0.00019999319250631014, - "loss": 46.0, - "step": 23074 - }, - { - "epoch": 3.7160513708281333, - "grad_norm": 0.0033224904909729958, - "learning_rate": 0.0001999931919159907, - "loss": 46.0, - "step": 23075 - }, - { - "epoch": 3.7162124079069203, - "grad_norm": 0.0037025297060608864, - "learning_rate": 0.00019999319132564566, - "loss": 46.0, - "step": 23076 - }, - { - "epoch": 3.7163734449857078, - "grad_norm": 0.0005642303149215877, - "learning_rate": 0.000199993190735275, - "loss": 46.0, - "step": 23077 - }, - { - "epoch": 3.716534482064495, - "grad_norm": 0.004197478760033846, - "learning_rate": 0.00019999319014487877, - "loss": 46.0, - "step": 23078 - }, - { - "epoch": 3.7166955191432827, - "grad_norm": 0.0027420350816100836, - "learning_rate": 0.00019999318955445694, - "loss": 46.0, - "step": 23079 - }, - { - "epoch": 3.71685655622207, - "grad_norm": 0.0030675502493977547, - "learning_rate": 0.0001999931889640095, - "loss": 46.0, - "step": 23080 - }, - { - "epoch": 3.7170175933008576, - "grad_norm": 0.013590652495622635, - "learning_rate": 0.00019999318837353648, - "loss": 46.0, - "step": 23081 - }, - { - "epoch": 3.717178630379645, - "grad_norm": 0.003240320598706603, - "learning_rate": 0.00019999318778303785, - "loss": 46.0, - "step": 23082 - }, - { - "epoch": 3.7173396674584325, - "grad_norm": 0.002534376922994852, - "learning_rate": 0.00019999318719251364, - "loss": 46.0, - "step": 23083 - }, - { - "epoch": 3.71750070453722, - "grad_norm": 0.003964030183851719, - "learning_rate": 0.00019999318660196384, - "loss": 46.0, - "step": 23084 - }, - { - "epoch": 3.717661741616007, - "grad_norm": 0.00806252472102642, - "learning_rate": 0.00019999318601138848, - "loss": 46.0, - "step": 23085 - }, - { - "epoch": 3.7178227786947944, - "grad_norm": 0.004998079035431147, - "learning_rate": 0.00019999318542078748, - "loss": 46.0, - "step": 23086 - }, - { - "epoch": 3.717983815773582, - "grad_norm": 0.0014475666685029864, - "learning_rate": 0.00019999318483016092, - "loss": 46.0, - "step": 23087 - }, - { - "epoch": 3.7181448528523693, - "grad_norm": 0.006805706303566694, - "learning_rate": 0.00019999318423950872, - "loss": 46.0, - "step": 23088 - }, - { - "epoch": 3.7183058899311567, - "grad_norm": 0.0072632646188139915, - "learning_rate": 0.00019999318364883096, - "loss": 46.0, - "step": 23089 - }, - { - "epoch": 3.718466927009944, - "grad_norm": 0.009562850929796696, - "learning_rate": 0.0001999931830581276, - "loss": 46.0, - "step": 23090 - }, - { - "epoch": 3.718627964088731, - "grad_norm": 0.0031430628150701523, - "learning_rate": 0.00019999318246739867, - "loss": 46.0, - "step": 23091 - }, - { - "epoch": 3.7187890011675186, - "grad_norm": 0.005490915384143591, - "learning_rate": 0.0001999931818766441, - "loss": 46.0, - "step": 23092 - }, - { - "epoch": 3.718950038246306, - "grad_norm": 0.0036144601181149483, - "learning_rate": 0.00019999318128586396, - "loss": 46.0, - "step": 23093 - }, - { - "epoch": 3.7191110753250936, - "grad_norm": 0.002688084961846471, - "learning_rate": 0.00019999318069505826, - "loss": 46.0, - "step": 23094 - }, - { - "epoch": 3.719272112403881, - "grad_norm": 0.007703827228397131, - "learning_rate": 0.00019999318010422692, - "loss": 46.0, - "step": 23095 - }, - { - "epoch": 3.7194331494826685, - "grad_norm": 0.0013276677345857024, - "learning_rate": 0.00019999317951337, - "loss": 46.0, - "step": 23096 - }, - { - "epoch": 3.719594186561456, - "grad_norm": 0.0012462981976568699, - "learning_rate": 0.0001999931789224875, - "loss": 46.0, - "step": 23097 - }, - { - "epoch": 3.7197552236402434, - "grad_norm": 0.005808768328279257, - "learning_rate": 0.0001999931783315794, - "loss": 46.0, - "step": 23098 - }, - { - "epoch": 3.719916260719031, - "grad_norm": 0.005260785110294819, - "learning_rate": 0.0001999931777406457, - "loss": 46.0, - "step": 23099 - }, - { - "epoch": 3.720077297797818, - "grad_norm": 0.0008696753066033125, - "learning_rate": 0.00019999317714968642, - "loss": 46.0, - "step": 23100 - }, - { - "epoch": 3.7202383348766053, - "grad_norm": 0.002557438099756837, - "learning_rate": 0.00019999317655870153, - "loss": 46.0, - "step": 23101 - }, - { - "epoch": 3.7203993719553927, - "grad_norm": 0.005627449601888657, - "learning_rate": 0.00019999317596769106, - "loss": 46.0, - "step": 23102 - }, - { - "epoch": 3.72056040903418, - "grad_norm": 0.004101383034139872, - "learning_rate": 0.000199993175376655, - "loss": 46.0, - "step": 23103 - }, - { - "epoch": 3.7207214461129676, - "grad_norm": 0.0019744932651519775, - "learning_rate": 0.00019999317478559334, - "loss": 46.0, - "step": 23104 - }, - { - "epoch": 3.7208824831917546, - "grad_norm": 0.002369930734857917, - "learning_rate": 0.00019999317419450608, - "loss": 46.0, - "step": 23105 - }, - { - "epoch": 3.721043520270542, - "grad_norm": 0.004191146697849035, - "learning_rate": 0.00019999317360339322, - "loss": 46.0, - "step": 23106 - }, - { - "epoch": 3.7212045573493295, - "grad_norm": 0.0037796811666339636, - "learning_rate": 0.00019999317301225479, - "loss": 46.0, - "step": 23107 - }, - { - "epoch": 3.721365594428117, - "grad_norm": 0.0052701495587825775, - "learning_rate": 0.00019999317242109076, - "loss": 46.0, - "step": 23108 - }, - { - "epoch": 3.7215266315069044, - "grad_norm": 0.0016494878800585866, - "learning_rate": 0.00019999317182990112, - "loss": 46.0, - "step": 23109 - }, - { - "epoch": 3.721687668585692, - "grad_norm": 0.0018284859834238887, - "learning_rate": 0.00019999317123868592, - "loss": 46.0, - "step": 23110 - }, - { - "epoch": 3.7218487056644793, - "grad_norm": 0.0016238036332651973, - "learning_rate": 0.00019999317064744508, - "loss": 46.0, - "step": 23111 - }, - { - "epoch": 3.722009742743267, - "grad_norm": 0.00222844653762877, - "learning_rate": 0.00019999317005617867, - "loss": 46.0, - "step": 23112 - }, - { - "epoch": 3.7221707798220542, - "grad_norm": 0.017566602677106857, - "learning_rate": 0.00019999316946488666, - "loss": 46.0, - "step": 23113 - }, - { - "epoch": 3.7223318169008417, - "grad_norm": 0.0033925585448741913, - "learning_rate": 0.00019999316887356908, - "loss": 46.0, - "step": 23114 - }, - { - "epoch": 3.7224928539796287, - "grad_norm": 0.0023799650371074677, - "learning_rate": 0.0001999931682822259, - "loss": 46.0, - "step": 23115 - }, - { - "epoch": 3.722653891058416, - "grad_norm": 0.0009959266753867269, - "learning_rate": 0.0001999931676908571, - "loss": 46.0, - "step": 23116 - }, - { - "epoch": 3.7228149281372036, - "grad_norm": 0.004000527318567038, - "learning_rate": 0.00019999316709946277, - "loss": 46.0, - "step": 23117 - }, - { - "epoch": 3.722975965215991, - "grad_norm": 0.0026175882667303085, - "learning_rate": 0.0001999931665080428, - "loss": 46.0, - "step": 23118 - }, - { - "epoch": 3.7231370022947785, - "grad_norm": 0.002381636295467615, - "learning_rate": 0.00019999316591659723, - "loss": 46.0, - "step": 23119 - }, - { - "epoch": 3.7232980393735655, - "grad_norm": 0.011801318265497684, - "learning_rate": 0.00019999316532512607, - "loss": 46.0, - "step": 23120 - }, - { - "epoch": 3.723459076452353, - "grad_norm": 0.003851519664749503, - "learning_rate": 0.00019999316473362933, - "loss": 46.0, - "step": 23121 - }, - { - "epoch": 3.7236201135311404, - "grad_norm": 0.0029754312708973885, - "learning_rate": 0.00019999316414210698, - "loss": 46.0, - "step": 23122 - }, - { - "epoch": 3.723781150609928, - "grad_norm": 0.005113998427987099, - "learning_rate": 0.00019999316355055906, - "loss": 46.0, - "step": 23123 - }, - { - "epoch": 3.7239421876887153, - "grad_norm": 0.0030976508278399706, - "learning_rate": 0.00019999316295898553, - "loss": 46.0, - "step": 23124 - }, - { - "epoch": 3.7241032247675028, - "grad_norm": 0.0025091266725212336, - "learning_rate": 0.00019999316236738644, - "loss": 46.0, - "step": 23125 - }, - { - "epoch": 3.7242642618462902, - "grad_norm": 0.011517970822751522, - "learning_rate": 0.00019999316177576174, - "loss": 46.0, - "step": 23126 - }, - { - "epoch": 3.7244252989250777, - "grad_norm": 0.0062842643819749355, - "learning_rate": 0.0001999931611841114, - "loss": 46.0, - "step": 23127 - }, - { - "epoch": 3.724586336003865, - "grad_norm": 0.00368786184117198, - "learning_rate": 0.00019999316059243552, - "loss": 46.0, - "step": 23128 - }, - { - "epoch": 3.724747373082652, - "grad_norm": 0.008995292708277702, - "learning_rate": 0.00019999316000073402, - "loss": 46.0, - "step": 23129 - }, - { - "epoch": 3.7249084101614396, - "grad_norm": 0.0016693223733454943, - "learning_rate": 0.00019999315940900694, - "loss": 46.0, - "step": 23130 - }, - { - "epoch": 3.725069447240227, - "grad_norm": 0.0010223229182884097, - "learning_rate": 0.00019999315881725428, - "loss": 46.0, - "step": 23131 - }, - { - "epoch": 3.7252304843190145, - "grad_norm": 0.00498659722507, - "learning_rate": 0.000199993158225476, - "loss": 46.0, - "step": 23132 - }, - { - "epoch": 3.725391521397802, - "grad_norm": 0.005208555608987808, - "learning_rate": 0.00019999315763367213, - "loss": 46.0, - "step": 23133 - }, - { - "epoch": 3.7255525584765894, - "grad_norm": 0.0015590887051075697, - "learning_rate": 0.0001999931570418427, - "loss": 46.0, - "step": 23134 - }, - { - "epoch": 3.7257135955553764, - "grad_norm": 0.008314471691846848, - "learning_rate": 0.00019999315644998763, - "loss": 46.0, - "step": 23135 - }, - { - "epoch": 3.725874632634164, - "grad_norm": 0.015822215005755424, - "learning_rate": 0.000199993155858107, - "loss": 46.0, - "step": 23136 - }, - { - "epoch": 3.7260356697129513, - "grad_norm": 0.022752514109015465, - "learning_rate": 0.00019999315526620076, - "loss": 46.0, - "step": 23137 - }, - { - "epoch": 3.7261967067917388, - "grad_norm": 0.0013210473116487265, - "learning_rate": 0.00019999315467426892, - "loss": 46.0, - "step": 23138 - }, - { - "epoch": 3.726357743870526, - "grad_norm": 0.008984082378447056, - "learning_rate": 0.0001999931540823115, - "loss": 46.0, - "step": 23139 - }, - { - "epoch": 3.7265187809493137, - "grad_norm": 0.001040121540427208, - "learning_rate": 0.0001999931534903285, - "loss": 46.0, - "step": 23140 - }, - { - "epoch": 3.726679818028101, - "grad_norm": 0.0020976795349270105, - "learning_rate": 0.00019999315289831988, - "loss": 46.0, - "step": 23141 - }, - { - "epoch": 3.7268408551068886, - "grad_norm": 0.009401296265423298, - "learning_rate": 0.00019999315230628567, - "loss": 46.0, - "step": 23142 - }, - { - "epoch": 3.727001892185676, - "grad_norm": 0.00599231431260705, - "learning_rate": 0.0001999931517142259, - "loss": 46.0, - "step": 23143 - }, - { - "epoch": 3.727162929264463, - "grad_norm": 0.003892629174515605, - "learning_rate": 0.0001999931511221405, - "loss": 46.0, - "step": 23144 - }, - { - "epoch": 3.7273239663432505, - "grad_norm": 0.0030356482602655888, - "learning_rate": 0.0001999931505300295, - "loss": 46.0, - "step": 23145 - }, - { - "epoch": 3.727485003422038, - "grad_norm": 0.0031859027221798897, - "learning_rate": 0.00019999314993789295, - "loss": 46.0, - "step": 23146 - }, - { - "epoch": 3.7276460405008254, - "grad_norm": 0.015063718892633915, - "learning_rate": 0.00019999314934573078, - "loss": 46.0, - "step": 23147 - }, - { - "epoch": 3.727807077579613, - "grad_norm": 0.002772016217932105, - "learning_rate": 0.00019999314875354305, - "loss": 46.0, - "step": 23148 - }, - { - "epoch": 3.7279681146584, - "grad_norm": 0.0008904255810193717, - "learning_rate": 0.00019999314816132968, - "loss": 46.0, - "step": 23149 - }, - { - "epoch": 3.7281291517371873, - "grad_norm": 0.0014147789916023612, - "learning_rate": 0.00019999314756909074, - "loss": 46.0, - "step": 23150 - }, - { - "epoch": 3.7282901888159747, - "grad_norm": 0.0038090685848146677, - "learning_rate": 0.00019999314697682617, - "loss": 46.0, - "step": 23151 - }, - { - "epoch": 3.728451225894762, - "grad_norm": 0.0018200924387201667, - "learning_rate": 0.00019999314638453607, - "loss": 46.0, - "step": 23152 - }, - { - "epoch": 3.7286122629735496, - "grad_norm": 0.003739455947652459, - "learning_rate": 0.00019999314579222035, - "loss": 46.0, - "step": 23153 - }, - { - "epoch": 3.728773300052337, - "grad_norm": 0.013962958008050919, - "learning_rate": 0.000199993145199879, - "loss": 46.0, - "step": 23154 - }, - { - "epoch": 3.7289343371311245, - "grad_norm": 0.0044592684134840965, - "learning_rate": 0.00019999314460751212, - "loss": 46.0, - "step": 23155 - }, - { - "epoch": 3.729095374209912, - "grad_norm": 0.00839564111083746, - "learning_rate": 0.0001999931440151196, - "loss": 46.0, - "step": 23156 - }, - { - "epoch": 3.7292564112886994, - "grad_norm": 0.011357411742210388, - "learning_rate": 0.0001999931434227015, - "loss": 46.0, - "step": 23157 - }, - { - "epoch": 3.7294174483674865, - "grad_norm": 0.0008929366013035178, - "learning_rate": 0.00019999314283025783, - "loss": 46.0, - "step": 23158 - }, - { - "epoch": 3.729578485446274, - "grad_norm": 0.0022368384525179863, - "learning_rate": 0.00019999314223778853, - "loss": 46.0, - "step": 23159 - }, - { - "epoch": 3.7297395225250614, - "grad_norm": 0.00394734600558877, - "learning_rate": 0.00019999314164529367, - "loss": 46.0, - "step": 23160 - }, - { - "epoch": 3.729900559603849, - "grad_norm": 0.002173580462113023, - "learning_rate": 0.0001999931410527732, - "loss": 46.0, - "step": 23161 - }, - { - "epoch": 3.7300615966826363, - "grad_norm": 0.0026979295071214437, - "learning_rate": 0.00019999314046022714, - "loss": 46.0, - "step": 23162 - }, - { - "epoch": 3.7302226337614237, - "grad_norm": 0.0017331313574686646, - "learning_rate": 0.00019999313986765547, - "loss": 46.0, - "step": 23163 - }, - { - "epoch": 3.7303836708402107, - "grad_norm": 0.0057273744605481625, - "learning_rate": 0.00019999313927505824, - "loss": 46.0, - "step": 23164 - }, - { - "epoch": 3.730544707918998, - "grad_norm": 0.012292359955608845, - "learning_rate": 0.0001999931386824354, - "loss": 46.0, - "step": 23165 - }, - { - "epoch": 3.7307057449977856, - "grad_norm": 0.006643029861152172, - "learning_rate": 0.00019999313808978695, - "loss": 46.0, - "step": 23166 - }, - { - "epoch": 3.730866782076573, - "grad_norm": 0.0033334516920149326, - "learning_rate": 0.00019999313749711293, - "loss": 46.0, - "step": 23167 - }, - { - "epoch": 3.7310278191553605, - "grad_norm": 0.0030398694798350334, - "learning_rate": 0.0001999931369044133, - "loss": 46.0, - "step": 23168 - }, - { - "epoch": 3.731188856234148, - "grad_norm": 0.006750424858182669, - "learning_rate": 0.00019999313631168807, - "loss": 46.0, - "step": 23169 - }, - { - "epoch": 3.7313498933129354, - "grad_norm": 0.012335010804235935, - "learning_rate": 0.0001999931357189373, - "loss": 46.0, - "step": 23170 - }, - { - "epoch": 3.731510930391723, - "grad_norm": 0.010827120393514633, - "learning_rate": 0.0001999931351261609, - "loss": 46.0, - "step": 23171 - }, - { - "epoch": 3.7316719674705103, - "grad_norm": 0.003413174068555236, - "learning_rate": 0.0001999931345333589, - "loss": 46.0, - "step": 23172 - }, - { - "epoch": 3.7318330045492973, - "grad_norm": 0.005273792427033186, - "learning_rate": 0.0001999931339405313, - "loss": 46.0, - "step": 23173 - }, - { - "epoch": 3.731994041628085, - "grad_norm": 0.0027752777095884085, - "learning_rate": 0.00019999313334767812, - "loss": 46.0, - "step": 23174 - }, - { - "epoch": 3.7321550787068722, - "grad_norm": 0.00590770086273551, - "learning_rate": 0.00019999313275479935, - "loss": 46.0, - "step": 23175 - }, - { - "epoch": 3.7323161157856597, - "grad_norm": 0.007024338003247976, - "learning_rate": 0.000199993132161895, - "loss": 46.0, - "step": 23176 - }, - { - "epoch": 3.732477152864447, - "grad_norm": 0.0032840659841895103, - "learning_rate": 0.000199993131568965, - "loss": 46.0, - "step": 23177 - }, - { - "epoch": 3.732638189943234, - "grad_norm": 0.004606082569807768, - "learning_rate": 0.00019999313097600948, - "loss": 46.0, - "step": 23178 - }, - { - "epoch": 3.7327992270220216, - "grad_norm": 0.0018258269410580397, - "learning_rate": 0.00019999313038302833, - "loss": 46.0, - "step": 23179 - }, - { - "epoch": 3.732960264100809, - "grad_norm": 0.001961832633242011, - "learning_rate": 0.0001999931297900216, - "loss": 46.0, - "step": 23180 - }, - { - "epoch": 3.7331213011795965, - "grad_norm": 0.003131021047011018, - "learning_rate": 0.00019999312919698924, - "loss": 46.0, - "step": 23181 - }, - { - "epoch": 3.733282338258384, - "grad_norm": 0.003571550128981471, - "learning_rate": 0.00019999312860393133, - "loss": 46.0, - "step": 23182 - }, - { - "epoch": 3.7334433753371714, - "grad_norm": 0.002121776808053255, - "learning_rate": 0.0001999931280108478, - "loss": 46.0, - "step": 23183 - }, - { - "epoch": 3.733604412415959, - "grad_norm": 0.0038131901528686285, - "learning_rate": 0.00019999312741773872, - "loss": 46.0, - "step": 23184 - }, - { - "epoch": 3.7337654494947463, - "grad_norm": 0.00986417755484581, - "learning_rate": 0.000199993126824604, - "loss": 46.0, - "step": 23185 - }, - { - "epoch": 3.7339264865735338, - "grad_norm": 0.002036524470895529, - "learning_rate": 0.0001999931262314437, - "loss": 46.0, - "step": 23186 - }, - { - "epoch": 3.734087523652321, - "grad_norm": 0.010031732730567455, - "learning_rate": 0.0001999931256382578, - "loss": 46.0, - "step": 23187 - }, - { - "epoch": 3.7342485607311082, - "grad_norm": 0.003502281615510583, - "learning_rate": 0.00019999312504504632, - "loss": 46.0, - "step": 23188 - }, - { - "epoch": 3.7344095978098957, - "grad_norm": 0.006705968640744686, - "learning_rate": 0.00019999312445180927, - "loss": 46.0, - "step": 23189 - }, - { - "epoch": 3.734570634888683, - "grad_norm": 0.0026294023264199495, - "learning_rate": 0.00019999312385854658, - "loss": 46.0, - "step": 23190 - }, - { - "epoch": 3.7347316719674706, - "grad_norm": 0.004401372279971838, - "learning_rate": 0.00019999312326525833, - "loss": 46.0, - "step": 23191 - }, - { - "epoch": 3.734892709046258, - "grad_norm": 0.007621718104928732, - "learning_rate": 0.00019999312267194447, - "loss": 46.0, - "step": 23192 - }, - { - "epoch": 3.735053746125045, - "grad_norm": 0.0008155046962201595, - "learning_rate": 0.00019999312207860502, - "loss": 46.0, - "step": 23193 - }, - { - "epoch": 3.7352147832038325, - "grad_norm": 0.001990322256460786, - "learning_rate": 0.00019999312148523995, - "loss": 46.0, - "step": 23194 - }, - { - "epoch": 3.73537582028262, - "grad_norm": 0.0029427832923829556, - "learning_rate": 0.00019999312089184933, - "loss": 46.0, - "step": 23195 - }, - { - "epoch": 3.7355368573614074, - "grad_norm": 0.01669461838901043, - "learning_rate": 0.00019999312029843312, - "loss": 46.0, - "step": 23196 - }, - { - "epoch": 3.735697894440195, - "grad_norm": 0.0012514758855104446, - "learning_rate": 0.00019999311970499132, - "loss": 46.0, - "step": 23197 - }, - { - "epoch": 3.7358589315189823, - "grad_norm": 0.0012641752837225795, - "learning_rate": 0.0001999931191115239, - "loss": 46.0, - "step": 23198 - }, - { - "epoch": 3.7360199685977697, - "grad_norm": 0.0016014878638088703, - "learning_rate": 0.00019999311851803087, - "loss": 46.0, - "step": 23199 - }, - { - "epoch": 3.736181005676557, - "grad_norm": 0.005642854608595371, - "learning_rate": 0.00019999311792451229, - "loss": 46.0, - "step": 23200 - }, - { - "epoch": 3.7363420427553447, - "grad_norm": 0.0017803406808525324, - "learning_rate": 0.00019999311733096808, - "loss": 46.0, - "step": 23201 - }, - { - "epoch": 3.7365030798341317, - "grad_norm": 0.003669108496978879, - "learning_rate": 0.0001999931167373983, - "loss": 46.0, - "step": 23202 - }, - { - "epoch": 3.736664116912919, - "grad_norm": 0.004593828693032265, - "learning_rate": 0.00019999311614380292, - "loss": 46.0, - "step": 23203 - }, - { - "epoch": 3.7368251539917066, - "grad_norm": 0.0029747160151600838, - "learning_rate": 0.00019999311555018195, - "loss": 46.0, - "step": 23204 - }, - { - "epoch": 3.736986191070494, - "grad_norm": 0.003492704126983881, - "learning_rate": 0.00019999311495653537, - "loss": 46.0, - "step": 23205 - }, - { - "epoch": 3.7371472281492815, - "grad_norm": 0.002444078680127859, - "learning_rate": 0.00019999311436286324, - "loss": 46.0, - "step": 23206 - }, - { - "epoch": 3.737308265228069, - "grad_norm": 0.0004748831270262599, - "learning_rate": 0.00019999311376916548, - "loss": 46.0, - "step": 23207 - }, - { - "epoch": 3.737469302306856, - "grad_norm": 0.0024681733921170235, - "learning_rate": 0.00019999311317544212, - "loss": 46.0, - "step": 23208 - }, - { - "epoch": 3.7376303393856434, - "grad_norm": 0.007616622839123011, - "learning_rate": 0.0001999931125816932, - "loss": 46.0, - "step": 23209 - }, - { - "epoch": 3.737791376464431, - "grad_norm": 0.006673749536275864, - "learning_rate": 0.00019999311198791865, - "loss": 46.0, - "step": 23210 - }, - { - "epoch": 3.7379524135432183, - "grad_norm": 0.0015464113093912601, - "learning_rate": 0.00019999311139411852, - "loss": 46.0, - "step": 23211 - }, - { - "epoch": 3.7381134506220057, - "grad_norm": 0.003553914837539196, - "learning_rate": 0.0001999931108002928, - "loss": 46.0, - "step": 23212 - }, - { - "epoch": 3.738274487700793, - "grad_norm": 0.004218549467623234, - "learning_rate": 0.0001999931102064415, - "loss": 46.0, - "step": 23213 - }, - { - "epoch": 3.7384355247795806, - "grad_norm": 0.000705431797541678, - "learning_rate": 0.00019999310961256458, - "loss": 46.0, - "step": 23214 - }, - { - "epoch": 3.738596561858368, - "grad_norm": 0.004536414984613657, - "learning_rate": 0.0001999931090186621, - "loss": 46.0, - "step": 23215 - }, - { - "epoch": 3.7387575989371555, - "grad_norm": 0.0033399872481822968, - "learning_rate": 0.000199993108424734, - "loss": 46.0, - "step": 23216 - }, - { - "epoch": 3.7389186360159425, - "grad_norm": 0.002238695975393057, - "learning_rate": 0.0001999931078307803, - "loss": 46.0, - "step": 23217 - }, - { - "epoch": 3.73907967309473, - "grad_norm": 0.008804631419479847, - "learning_rate": 0.00019999310723680104, - "loss": 46.0, - "step": 23218 - }, - { - "epoch": 3.7392407101735174, - "grad_norm": 0.0005120678688399494, - "learning_rate": 0.0001999931066427962, - "loss": 46.0, - "step": 23219 - }, - { - "epoch": 3.739401747252305, - "grad_norm": 0.006160099990665913, - "learning_rate": 0.0001999931060487657, - "loss": 46.0, - "step": 23220 - }, - { - "epoch": 3.7395627843310923, - "grad_norm": 0.0013807618524879217, - "learning_rate": 0.00019999310545470967, - "loss": 46.0, - "step": 23221 - }, - { - "epoch": 3.7397238214098794, - "grad_norm": 0.0012846424942836165, - "learning_rate": 0.000199993104860628, - "loss": 46.0, - "step": 23222 - }, - { - "epoch": 3.739884858488667, - "grad_norm": 0.0010511906584724784, - "learning_rate": 0.00019999310426652077, - "loss": 46.0, - "step": 23223 - }, - { - "epoch": 3.7400458955674543, - "grad_norm": 0.0033722578082233667, - "learning_rate": 0.00019999310367238793, - "loss": 46.0, - "step": 23224 - }, - { - "epoch": 3.7402069326462417, - "grad_norm": 0.0027915111277252436, - "learning_rate": 0.00019999310307822953, - "loss": 46.0, - "step": 23225 - }, - { - "epoch": 3.740367969725029, - "grad_norm": 0.0012366075534373522, - "learning_rate": 0.00019999310248404548, - "loss": 46.0, - "step": 23226 - }, - { - "epoch": 3.7405290068038166, - "grad_norm": 0.008099728263914585, - "learning_rate": 0.00019999310188983588, - "loss": 46.0, - "step": 23227 - }, - { - "epoch": 3.740690043882604, - "grad_norm": 0.013237777166068554, - "learning_rate": 0.00019999310129560066, - "loss": 46.0, - "step": 23228 - }, - { - "epoch": 3.7408510809613915, - "grad_norm": 0.0014385567046701908, - "learning_rate": 0.00019999310070133985, - "loss": 46.0, - "step": 23229 - }, - { - "epoch": 3.741012118040179, - "grad_norm": 0.005096824374049902, - "learning_rate": 0.00019999310010705346, - "loss": 46.0, - "step": 23230 - }, - { - "epoch": 3.741173155118966, - "grad_norm": 0.014582911506295204, - "learning_rate": 0.00019999309951274148, - "loss": 46.0, - "step": 23231 - }, - { - "epoch": 3.7413341921977534, - "grad_norm": 0.007373742293566465, - "learning_rate": 0.00019999309891840388, - "loss": 46.0, - "step": 23232 - }, - { - "epoch": 3.741495229276541, - "grad_norm": 0.0041610137559473515, - "learning_rate": 0.00019999309832404073, - "loss": 46.0, - "step": 23233 - }, - { - "epoch": 3.7416562663553283, - "grad_norm": 0.0027232591528445482, - "learning_rate": 0.00019999309772965196, - "loss": 46.0, - "step": 23234 - }, - { - "epoch": 3.741817303434116, - "grad_norm": 0.010877788066864014, - "learning_rate": 0.0001999930971352376, - "loss": 46.0, - "step": 23235 - }, - { - "epoch": 3.7419783405129032, - "grad_norm": 0.005337840877473354, - "learning_rate": 0.00019999309654079766, - "loss": 46.0, - "step": 23236 - }, - { - "epoch": 3.7421393775916902, - "grad_norm": 0.005943040829151869, - "learning_rate": 0.00019999309594633208, - "loss": 46.0, - "step": 23237 - }, - { - "epoch": 3.7423004146704777, - "grad_norm": 0.0037458178121596575, - "learning_rate": 0.00019999309535184096, - "loss": 46.0, - "step": 23238 - }, - { - "epoch": 3.742461451749265, - "grad_norm": 0.0009623331134207547, - "learning_rate": 0.00019999309475732423, - "loss": 46.0, - "step": 23239 - }, - { - "epoch": 3.7426224888280526, - "grad_norm": 0.0021316311322152615, - "learning_rate": 0.0001999930941627819, - "loss": 46.0, - "step": 23240 - }, - { - "epoch": 3.74278352590684, - "grad_norm": 0.01692894659936428, - "learning_rate": 0.00019999309356821398, - "loss": 46.0, - "step": 23241 - }, - { - "epoch": 3.7429445629856275, - "grad_norm": 0.0046026455238461494, - "learning_rate": 0.00019999309297362048, - "loss": 46.0, - "step": 23242 - }, - { - "epoch": 3.743105600064415, - "grad_norm": 0.000602768617682159, - "learning_rate": 0.0001999930923790014, - "loss": 46.0, - "step": 23243 - }, - { - "epoch": 3.7432666371432024, - "grad_norm": 0.012639382854104042, - "learning_rate": 0.00019999309178435668, - "loss": 46.0, - "step": 23244 - }, - { - "epoch": 3.74342767422199, - "grad_norm": 0.0024291942827403545, - "learning_rate": 0.0001999930911896864, - "loss": 46.0, - "step": 23245 - }, - { - "epoch": 3.743588711300777, - "grad_norm": 0.0007512205629609525, - "learning_rate": 0.0001999930905949905, - "loss": 46.0, - "step": 23246 - }, - { - "epoch": 3.7437497483795643, - "grad_norm": 0.0024907661136239767, - "learning_rate": 0.00019999309000026902, - "loss": 46.0, - "step": 23247 - }, - { - "epoch": 3.7439107854583518, - "grad_norm": 0.0008014555787667632, - "learning_rate": 0.00019999308940552197, - "loss": 46.0, - "step": 23248 - }, - { - "epoch": 3.744071822537139, - "grad_norm": 0.0010756963165476918, - "learning_rate": 0.0001999930888107493, - "loss": 46.0, - "step": 23249 - }, - { - "epoch": 3.7442328596159267, - "grad_norm": 0.005392158403992653, - "learning_rate": 0.00019999308821595104, - "loss": 46.0, - "step": 23250 - }, - { - "epoch": 3.744393896694714, - "grad_norm": 0.004219592548906803, - "learning_rate": 0.0001999930876211272, - "loss": 46.0, - "step": 23251 - }, - { - "epoch": 3.744554933773501, - "grad_norm": 0.0041700867004692554, - "learning_rate": 0.00019999308702627774, - "loss": 46.0, - "step": 23252 - }, - { - "epoch": 3.7447159708522886, - "grad_norm": 0.0034497117158025503, - "learning_rate": 0.0001999930864314027, - "loss": 46.0, - "step": 23253 - }, - { - "epoch": 3.744877007931076, - "grad_norm": 0.0019165397388860583, - "learning_rate": 0.00019999308583650208, - "loss": 46.0, - "step": 23254 - }, - { - "epoch": 3.7450380450098635, - "grad_norm": 0.0017872173339128494, - "learning_rate": 0.00019999308524157585, - "loss": 46.0, - "step": 23255 - }, - { - "epoch": 3.745199082088651, - "grad_norm": 0.0020129105541855097, - "learning_rate": 0.00019999308464662405, - "loss": 46.0, - "step": 23256 - }, - { - "epoch": 3.7453601191674384, - "grad_norm": 0.0017803329974412918, - "learning_rate": 0.00019999308405164662, - "loss": 46.0, - "step": 23257 - }, - { - "epoch": 3.745521156246226, - "grad_norm": 0.0025569559074938297, - "learning_rate": 0.00019999308345664365, - "loss": 46.0, - "step": 23258 - }, - { - "epoch": 3.7456821933250133, - "grad_norm": 0.002819600747898221, - "learning_rate": 0.00019999308286161504, - "loss": 46.0, - "step": 23259 - }, - { - "epoch": 3.7458432304038007, - "grad_norm": 0.0029949620366096497, - "learning_rate": 0.00019999308226656087, - "loss": 46.0, - "step": 23260 - }, - { - "epoch": 3.7460042674825877, - "grad_norm": 0.0017388558480888605, - "learning_rate": 0.0001999930816714811, - "loss": 46.0, - "step": 23261 - }, - { - "epoch": 3.746165304561375, - "grad_norm": 0.0017265565693378448, - "learning_rate": 0.0001999930810763757, - "loss": 46.0, - "step": 23262 - }, - { - "epoch": 3.7463263416401626, - "grad_norm": 0.005802724976092577, - "learning_rate": 0.00019999308048124475, - "loss": 46.0, - "step": 23263 - }, - { - "epoch": 3.74648737871895, - "grad_norm": 0.0030983497854322195, - "learning_rate": 0.0001999930798860882, - "loss": 46.0, - "step": 23264 - }, - { - "epoch": 3.7466484157977376, - "grad_norm": 0.006051764823496342, - "learning_rate": 0.00019999307929090602, - "loss": 46.0, - "step": 23265 - }, - { - "epoch": 3.7468094528765246, - "grad_norm": 0.008463297039270401, - "learning_rate": 0.00019999307869569827, - "loss": 46.0, - "step": 23266 - }, - { - "epoch": 3.746970489955312, - "grad_norm": 0.0011268469970673323, - "learning_rate": 0.00019999307810046496, - "loss": 46.0, - "step": 23267 - }, - { - "epoch": 3.7471315270340995, - "grad_norm": 0.004161206539720297, - "learning_rate": 0.000199993077505206, - "loss": 46.0, - "step": 23268 - }, - { - "epoch": 3.747292564112887, - "grad_norm": 0.0012801686534658074, - "learning_rate": 0.0001999930769099215, - "loss": 46.0, - "step": 23269 - }, - { - "epoch": 3.7474536011916744, - "grad_norm": 0.009900976903736591, - "learning_rate": 0.0001999930763146114, - "loss": 46.0, - "step": 23270 - }, - { - "epoch": 3.747614638270462, - "grad_norm": 0.0022036691661924124, - "learning_rate": 0.00019999307571927567, - "loss": 46.0, - "step": 23271 - }, - { - "epoch": 3.7477756753492493, - "grad_norm": 0.0027693714946508408, - "learning_rate": 0.00019999307512391437, - "loss": 46.0, - "step": 23272 - }, - { - "epoch": 3.7479367124280367, - "grad_norm": 0.0030390287283807993, - "learning_rate": 0.00019999307452852748, - "loss": 46.0, - "step": 23273 - }, - { - "epoch": 3.748097749506824, - "grad_norm": 0.0028306397143751383, - "learning_rate": 0.00019999307393311498, - "loss": 46.0, - "step": 23274 - }, - { - "epoch": 3.748258786585611, - "grad_norm": 0.010642561130225658, - "learning_rate": 0.0001999930733376769, - "loss": 46.0, - "step": 23275 - }, - { - "epoch": 3.7484198236643986, - "grad_norm": 0.0030794998165220022, - "learning_rate": 0.00019999307274221322, - "loss": 46.0, - "step": 23276 - }, - { - "epoch": 3.748580860743186, - "grad_norm": 0.002850885270163417, - "learning_rate": 0.00019999307214672393, - "loss": 46.0, - "step": 23277 - }, - { - "epoch": 3.7487418978219735, - "grad_norm": 0.0037070740945637226, - "learning_rate": 0.0001999930715512091, - "loss": 46.0, - "step": 23278 - }, - { - "epoch": 3.748902934900761, - "grad_norm": 0.006732048466801643, - "learning_rate": 0.00019999307095566865, - "loss": 46.0, - "step": 23279 - }, - { - "epoch": 3.7490639719795484, - "grad_norm": 0.002443492878228426, - "learning_rate": 0.0001999930703601026, - "loss": 46.0, - "step": 23280 - }, - { - "epoch": 3.7492250090583354, - "grad_norm": 0.003433868521824479, - "learning_rate": 0.00019999306976451093, - "loss": 46.0, - "step": 23281 - }, - { - "epoch": 3.749386046137123, - "grad_norm": 0.0033519493881613016, - "learning_rate": 0.0001999930691688937, - "loss": 46.0, - "step": 23282 - }, - { - "epoch": 3.7495470832159103, - "grad_norm": 0.005566973704844713, - "learning_rate": 0.00019999306857325087, - "loss": 46.0, - "step": 23283 - }, - { - "epoch": 3.749708120294698, - "grad_norm": 0.002177475718781352, - "learning_rate": 0.00019999306797758245, - "loss": 46.0, - "step": 23284 - }, - { - "epoch": 3.7498691573734853, - "grad_norm": 0.00495817419141531, - "learning_rate": 0.00019999306738188843, - "loss": 46.0, - "step": 23285 - }, - { - "epoch": 3.7500301944522727, - "grad_norm": 0.003249040339142084, - "learning_rate": 0.00019999306678616883, - "loss": 46.0, - "step": 23286 - }, - { - "epoch": 3.75019123153106, - "grad_norm": 0.002706686733290553, - "learning_rate": 0.00019999306619042365, - "loss": 46.0, - "step": 23287 - }, - { - "epoch": 3.7503522686098476, - "grad_norm": 0.003501946572214365, - "learning_rate": 0.00019999306559465282, - "loss": 46.0, - "step": 23288 - }, - { - "epoch": 3.750513305688635, - "grad_norm": 0.001713958103209734, - "learning_rate": 0.00019999306499885643, - "loss": 46.0, - "step": 23289 - }, - { - "epoch": 3.750674342767422, - "grad_norm": 0.0043103257194161415, - "learning_rate": 0.00019999306440303448, - "loss": 46.0, - "step": 23290 - }, - { - "epoch": 3.7508353798462095, - "grad_norm": 0.0015746662393212318, - "learning_rate": 0.0001999930638071869, - "loss": 46.0, - "step": 23291 - }, - { - "epoch": 3.750996416924997, - "grad_norm": 0.006887136492878199, - "learning_rate": 0.00019999306321131374, - "loss": 46.0, - "step": 23292 - }, - { - "epoch": 3.7511574540037844, - "grad_norm": 0.010822443291544914, - "learning_rate": 0.00019999306261541498, - "loss": 46.0, - "step": 23293 - }, - { - "epoch": 3.751318491082572, - "grad_norm": 0.0028583186212927103, - "learning_rate": 0.00019999306201949063, - "loss": 46.0, - "step": 23294 - }, - { - "epoch": 3.751479528161359, - "grad_norm": 0.009037405252456665, - "learning_rate": 0.00019999306142354069, - "loss": 46.0, - "step": 23295 - }, - { - "epoch": 3.7516405652401463, - "grad_norm": 0.006317655090242624, - "learning_rate": 0.00019999306082756513, - "loss": 46.0, - "step": 23296 - }, - { - "epoch": 3.751801602318934, - "grad_norm": 0.0026851806323975325, - "learning_rate": 0.00019999306023156402, - "loss": 46.0, - "step": 23297 - }, - { - "epoch": 3.7519626393977212, - "grad_norm": 0.0034984173253178596, - "learning_rate": 0.00019999305963553727, - "loss": 46.0, - "step": 23298 - }, - { - "epoch": 3.7521236764765087, - "grad_norm": 0.003804260166361928, - "learning_rate": 0.00019999305903948495, - "loss": 46.0, - "step": 23299 - }, - { - "epoch": 3.752284713555296, - "grad_norm": 0.003835939336568117, - "learning_rate": 0.00019999305844340705, - "loss": 46.0, - "step": 23300 - }, - { - "epoch": 3.7524457506340836, - "grad_norm": 0.007036760449409485, - "learning_rate": 0.00019999305784730356, - "loss": 46.0, - "step": 23301 - }, - { - "epoch": 3.752606787712871, - "grad_norm": 0.011795183643698692, - "learning_rate": 0.00019999305725117446, - "loss": 46.0, - "step": 23302 - }, - { - "epoch": 3.7527678247916585, - "grad_norm": 0.031054265797138214, - "learning_rate": 0.00019999305665501977, - "loss": 46.0, - "step": 23303 - }, - { - "epoch": 3.752928861870446, - "grad_norm": 0.0031320760026574135, - "learning_rate": 0.0001999930560588395, - "loss": 46.0, - "step": 23304 - }, - { - "epoch": 3.753089898949233, - "grad_norm": 0.004960140213370323, - "learning_rate": 0.0001999930554626336, - "loss": 46.0, - "step": 23305 - }, - { - "epoch": 3.7532509360280204, - "grad_norm": 0.012674725614488125, - "learning_rate": 0.00019999305486640215, - "loss": 46.0, - "step": 23306 - }, - { - "epoch": 3.753411973106808, - "grad_norm": 0.003215216798707843, - "learning_rate": 0.00019999305427014508, - "loss": 46.0, - "step": 23307 - }, - { - "epoch": 3.7535730101855953, - "grad_norm": 0.004510294180363417, - "learning_rate": 0.00019999305367386243, - "loss": 46.0, - "step": 23308 - }, - { - "epoch": 3.7537340472643828, - "grad_norm": 0.000803169037681073, - "learning_rate": 0.0001999930530775542, - "loss": 46.0, - "step": 23309 - }, - { - "epoch": 3.7538950843431698, - "grad_norm": 0.0030744210816919804, - "learning_rate": 0.00019999305248122033, - "loss": 46.0, - "step": 23310 - }, - { - "epoch": 3.754056121421957, - "grad_norm": 0.001205191481858492, - "learning_rate": 0.00019999305188486092, - "loss": 46.0, - "step": 23311 - }, - { - "epoch": 3.7542171585007447, - "grad_norm": 0.00221211276948452, - "learning_rate": 0.00019999305128847586, - "loss": 46.0, - "step": 23312 - }, - { - "epoch": 3.754378195579532, - "grad_norm": 0.00478509534150362, - "learning_rate": 0.00019999305069206527, - "loss": 46.0, - "step": 23313 - }, - { - "epoch": 3.7545392326583196, - "grad_norm": 0.001582092372700572, - "learning_rate": 0.00019999305009562904, - "loss": 46.0, - "step": 23314 - }, - { - "epoch": 3.754700269737107, - "grad_norm": 0.003600649069994688, - "learning_rate": 0.00019999304949916722, - "loss": 46.0, - "step": 23315 - }, - { - "epoch": 3.7548613068158945, - "grad_norm": 0.0018423496512696147, - "learning_rate": 0.00019999304890267982, - "loss": 46.0, - "step": 23316 - }, - { - "epoch": 3.755022343894682, - "grad_norm": 0.0012430831557139754, - "learning_rate": 0.00019999304830616685, - "loss": 46.0, - "step": 23317 - }, - { - "epoch": 3.7551833809734694, - "grad_norm": 0.0021214019507169724, - "learning_rate": 0.00019999304770962825, - "loss": 46.0, - "step": 23318 - }, - { - "epoch": 3.7553444180522564, - "grad_norm": 0.0031343905720859766, - "learning_rate": 0.00019999304711306408, - "loss": 46.0, - "step": 23319 - }, - { - "epoch": 3.755505455131044, - "grad_norm": 0.006793077103793621, - "learning_rate": 0.0001999930465164743, - "loss": 46.0, - "step": 23320 - }, - { - "epoch": 3.7556664922098313, - "grad_norm": 0.008207363076508045, - "learning_rate": 0.00019999304591985893, - "loss": 46.0, - "step": 23321 - }, - { - "epoch": 3.7558275292886187, - "grad_norm": 0.006455515045672655, - "learning_rate": 0.00019999304532321798, - "loss": 46.0, - "step": 23322 - }, - { - "epoch": 3.755988566367406, - "grad_norm": 0.005089758895337582, - "learning_rate": 0.00019999304472655144, - "loss": 46.0, - "step": 23323 - }, - { - "epoch": 3.7561496034461936, - "grad_norm": 0.01651889644563198, - "learning_rate": 0.00019999304412985928, - "loss": 46.0, - "step": 23324 - }, - { - "epoch": 3.7563106405249806, - "grad_norm": 0.014437532983720303, - "learning_rate": 0.00019999304353314154, - "loss": 46.0, - "step": 23325 - }, - { - "epoch": 3.756471677603768, - "grad_norm": 0.0029440789949148893, - "learning_rate": 0.00019999304293639823, - "loss": 46.0, - "step": 23326 - }, - { - "epoch": 3.7566327146825556, - "grad_norm": 0.0014560960698872805, - "learning_rate": 0.0001999930423396293, - "loss": 46.0, - "step": 23327 - }, - { - "epoch": 3.756793751761343, - "grad_norm": 0.006653280928730965, - "learning_rate": 0.00019999304174283478, - "loss": 46.0, - "step": 23328 - }, - { - "epoch": 3.7569547888401305, - "grad_norm": 0.01361078955233097, - "learning_rate": 0.00019999304114601464, - "loss": 46.0, - "step": 23329 - }, - { - "epoch": 3.757115825918918, - "grad_norm": 0.004327659495174885, - "learning_rate": 0.00019999304054916896, - "loss": 46.0, - "step": 23330 - }, - { - "epoch": 3.7572768629977054, - "grad_norm": 0.0010913473088294268, - "learning_rate": 0.00019999303995229766, - "loss": 46.0, - "step": 23331 - }, - { - "epoch": 3.757437900076493, - "grad_norm": 0.003111462341621518, - "learning_rate": 0.00019999303935540078, - "loss": 46.0, - "step": 23332 - }, - { - "epoch": 3.7575989371552803, - "grad_norm": 0.005549356807023287, - "learning_rate": 0.00019999303875847831, - "loss": 46.0, - "step": 23333 - }, - { - "epoch": 3.7577599742340673, - "grad_norm": 0.0056270030327141285, - "learning_rate": 0.0001999930381615302, - "loss": 46.0, - "step": 23334 - }, - { - "epoch": 3.7579210113128547, - "grad_norm": 0.021341169252991676, - "learning_rate": 0.00019999303756455654, - "loss": 46.0, - "step": 23335 - }, - { - "epoch": 3.758082048391642, - "grad_norm": 0.005347100086510181, - "learning_rate": 0.00019999303696755728, - "loss": 46.0, - "step": 23336 - }, - { - "epoch": 3.7582430854704296, - "grad_norm": 0.0026312253903597593, - "learning_rate": 0.0001999930363705324, - "loss": 46.0, - "step": 23337 - }, - { - "epoch": 3.758404122549217, - "grad_norm": 0.012708326801657677, - "learning_rate": 0.00019999303577348198, - "loss": 46.0, - "step": 23338 - }, - { - "epoch": 3.758565159628004, - "grad_norm": 0.0008886596187949181, - "learning_rate": 0.00019999303517640593, - "loss": 46.0, - "step": 23339 - }, - { - "epoch": 3.7587261967067915, - "grad_norm": 0.013958428986370564, - "learning_rate": 0.00019999303457930433, - "loss": 46.0, - "step": 23340 - }, - { - "epoch": 3.758887233785579, - "grad_norm": 0.0020769955590367317, - "learning_rate": 0.00019999303398217708, - "loss": 46.0, - "step": 23341 - }, - { - "epoch": 3.7590482708643664, - "grad_norm": 0.012553153559565544, - "learning_rate": 0.00019999303338502427, - "loss": 46.0, - "step": 23342 - }, - { - "epoch": 3.759209307943154, - "grad_norm": 0.003156843362376094, - "learning_rate": 0.00019999303278784585, - "loss": 46.0, - "step": 23343 - }, - { - "epoch": 3.7593703450219413, - "grad_norm": 0.0006373069481924176, - "learning_rate": 0.00019999303219064184, - "loss": 46.0, - "step": 23344 - }, - { - "epoch": 3.759531382100729, - "grad_norm": 0.002016419544816017, - "learning_rate": 0.00019999303159341225, - "loss": 46.0, - "step": 23345 - }, - { - "epoch": 3.7596924191795162, - "grad_norm": 0.001978748245164752, - "learning_rate": 0.00019999303099615704, - "loss": 46.0, - "step": 23346 - }, - { - "epoch": 3.7598534562583037, - "grad_norm": 0.0043363929726183414, - "learning_rate": 0.00019999303039887627, - "loss": 46.0, - "step": 23347 - }, - { - "epoch": 3.7600144933370907, - "grad_norm": 0.0014998463448137045, - "learning_rate": 0.00019999302980156988, - "loss": 46.0, - "step": 23348 - }, - { - "epoch": 3.760175530415878, - "grad_norm": 0.004350574221462011, - "learning_rate": 0.0001999930292042379, - "loss": 46.0, - "step": 23349 - }, - { - "epoch": 3.7603365674946656, - "grad_norm": 0.0032098707742989063, - "learning_rate": 0.00019999302860688033, - "loss": 46.0, - "step": 23350 - }, - { - "epoch": 3.760497604573453, - "grad_norm": 0.005226675420999527, - "learning_rate": 0.00019999302800949718, - "loss": 46.0, - "step": 23351 - }, - { - "epoch": 3.7606586416522405, - "grad_norm": 0.0010043485090136528, - "learning_rate": 0.00019999302741208842, - "loss": 46.0, - "step": 23352 - }, - { - "epoch": 3.760819678731028, - "grad_norm": 0.006235708016902208, - "learning_rate": 0.0001999930268146541, - "loss": 46.0, - "step": 23353 - }, - { - "epoch": 3.760980715809815, - "grad_norm": 0.005920794326812029, - "learning_rate": 0.00019999302621719417, - "loss": 46.0, - "step": 23354 - }, - { - "epoch": 3.7611417528886024, - "grad_norm": 0.0019254216458648443, - "learning_rate": 0.00019999302561970862, - "loss": 46.0, - "step": 23355 - }, - { - "epoch": 3.76130278996739, - "grad_norm": 0.0037464576307684183, - "learning_rate": 0.0001999930250221975, - "loss": 46.0, - "step": 23356 - }, - { - "epoch": 3.7614638270461773, - "grad_norm": 0.004395162221044302, - "learning_rate": 0.0001999930244246608, - "loss": 46.0, - "step": 23357 - }, - { - "epoch": 3.7616248641249648, - "grad_norm": 0.00424527982249856, - "learning_rate": 0.00019999302382709848, - "loss": 46.0, - "step": 23358 - }, - { - "epoch": 3.7617859012037522, - "grad_norm": 0.0008832887979224324, - "learning_rate": 0.00019999302322951058, - "loss": 46.0, - "step": 23359 - }, - { - "epoch": 3.7619469382825397, - "grad_norm": 0.0057721748016774654, - "learning_rate": 0.00019999302263189707, - "loss": 46.0, - "step": 23360 - }, - { - "epoch": 3.762107975361327, - "grad_norm": 0.004645981825888157, - "learning_rate": 0.00019999302203425797, - "loss": 46.0, - "step": 23361 - }, - { - "epoch": 3.7622690124401146, - "grad_norm": 0.007081453688442707, - "learning_rate": 0.0001999930214365933, - "loss": 46.0, - "step": 23362 - }, - { - "epoch": 3.7624300495189016, - "grad_norm": 0.0038249497301876545, - "learning_rate": 0.00019999302083890304, - "loss": 46.0, - "step": 23363 - }, - { - "epoch": 3.762591086597689, - "grad_norm": 0.0052764806896448135, - "learning_rate": 0.00019999302024118718, - "loss": 46.0, - "step": 23364 - }, - { - "epoch": 3.7627521236764765, - "grad_norm": 0.0020675412379205227, - "learning_rate": 0.0001999930196434457, - "loss": 46.0, - "step": 23365 - }, - { - "epoch": 3.762913160755264, - "grad_norm": 0.0055201854556798935, - "learning_rate": 0.00019999301904567864, - "loss": 46.0, - "step": 23366 - }, - { - "epoch": 3.7630741978340514, - "grad_norm": 0.0030638568568974733, - "learning_rate": 0.000199993018447886, - "loss": 46.0, - "step": 23367 - }, - { - "epoch": 3.763235234912839, - "grad_norm": 0.003037769813090563, - "learning_rate": 0.00019999301785006776, - "loss": 46.0, - "step": 23368 - }, - { - "epoch": 3.763396271991626, - "grad_norm": 0.0026919208467006683, - "learning_rate": 0.00019999301725222394, - "loss": 46.0, - "step": 23369 - }, - { - "epoch": 3.7635573090704133, - "grad_norm": 0.0021743851248174906, - "learning_rate": 0.0001999930166543545, - "loss": 46.0, - "step": 23370 - }, - { - "epoch": 3.7637183461492008, - "grad_norm": 0.005291048903018236, - "learning_rate": 0.00019999301605645947, - "loss": 46.0, - "step": 23371 - }, - { - "epoch": 3.763879383227988, - "grad_norm": 0.003063508542254567, - "learning_rate": 0.00019999301545853886, - "loss": 46.0, - "step": 23372 - }, - { - "epoch": 3.7640404203067757, - "grad_norm": 0.004322974011301994, - "learning_rate": 0.00019999301486059266, - "loss": 46.0, - "step": 23373 - }, - { - "epoch": 3.764201457385563, - "grad_norm": 0.004668680485337973, - "learning_rate": 0.00019999301426262088, - "loss": 46.0, - "step": 23374 - }, - { - "epoch": 3.7643624944643506, - "grad_norm": 0.002153209876269102, - "learning_rate": 0.00019999301366462348, - "loss": 46.0, - "step": 23375 - }, - { - "epoch": 3.764523531543138, - "grad_norm": 0.0044435737654566765, - "learning_rate": 0.0001999930130666005, - "loss": 46.0, - "step": 23376 - }, - { - "epoch": 3.7646845686219255, - "grad_norm": 0.004240675829350948, - "learning_rate": 0.00019999301246855191, - "loss": 46.0, - "step": 23377 - }, - { - "epoch": 3.7648456057007125, - "grad_norm": 0.010793029330670834, - "learning_rate": 0.00019999301187047775, - "loss": 46.0, - "step": 23378 - }, - { - "epoch": 3.7650066427795, - "grad_norm": 0.001335603534244001, - "learning_rate": 0.00019999301127237798, - "loss": 46.0, - "step": 23379 - }, - { - "epoch": 3.7651676798582874, - "grad_norm": 0.0051144990138709545, - "learning_rate": 0.00019999301067425261, - "loss": 46.0, - "step": 23380 - }, - { - "epoch": 3.765328716937075, - "grad_norm": 0.011687594465911388, - "learning_rate": 0.00019999301007610166, - "loss": 46.0, - "step": 23381 - }, - { - "epoch": 3.7654897540158623, - "grad_norm": 0.014922913163900375, - "learning_rate": 0.00019999300947792513, - "loss": 46.0, - "step": 23382 - }, - { - "epoch": 3.7656507910946493, - "grad_norm": 0.003803300904110074, - "learning_rate": 0.00019999300887972297, - "loss": 46.0, - "step": 23383 - }, - { - "epoch": 3.7658118281734367, - "grad_norm": 0.001957479165866971, - "learning_rate": 0.00019999300828149526, - "loss": 46.0, - "step": 23384 - }, - { - "epoch": 3.765972865252224, - "grad_norm": 0.00957445427775383, - "learning_rate": 0.0001999930076832419, - "loss": 46.0, - "step": 23385 - }, - { - "epoch": 3.7661339023310116, - "grad_norm": 0.0011465654242783785, - "learning_rate": 0.00019999300708496302, - "loss": 46.0, - "step": 23386 - }, - { - "epoch": 3.766294939409799, - "grad_norm": 0.007577102165669203, - "learning_rate": 0.0001999930064866585, - "loss": 46.0, - "step": 23387 - }, - { - "epoch": 3.7664559764885865, - "grad_norm": 0.0029909831937402487, - "learning_rate": 0.0001999930058883284, - "loss": 46.0, - "step": 23388 - }, - { - "epoch": 3.766617013567374, - "grad_norm": 0.005610207095742226, - "learning_rate": 0.0001999930052899727, - "loss": 46.0, - "step": 23389 - }, - { - "epoch": 3.7667780506461614, - "grad_norm": 0.010227243416011333, - "learning_rate": 0.00019999300469159144, - "loss": 46.0, - "step": 23390 - }, - { - "epoch": 3.766939087724949, - "grad_norm": 0.006348138675093651, - "learning_rate": 0.00019999300409318457, - "loss": 46.0, - "step": 23391 - }, - { - "epoch": 3.767100124803736, - "grad_norm": 0.002757695270702243, - "learning_rate": 0.00019999300349475205, - "loss": 46.0, - "step": 23392 - }, - { - "epoch": 3.7672611618825234, - "grad_norm": 0.0056867399252951145, - "learning_rate": 0.00019999300289629403, - "loss": 46.0, - "step": 23393 - }, - { - "epoch": 3.767422198961311, - "grad_norm": 0.0012817836832255125, - "learning_rate": 0.00019999300229781036, - "loss": 46.0, - "step": 23394 - }, - { - "epoch": 3.7675832360400983, - "grad_norm": 0.01238818746060133, - "learning_rate": 0.00019999300169930108, - "loss": 46.0, - "step": 23395 - }, - { - "epoch": 3.7677442731188857, - "grad_norm": 0.003599542658776045, - "learning_rate": 0.00019999300110076624, - "loss": 46.0, - "step": 23396 - }, - { - "epoch": 3.767905310197673, - "grad_norm": 0.0031063018832355738, - "learning_rate": 0.0001999930005022058, - "loss": 46.0, - "step": 23397 - }, - { - "epoch": 3.76806634727646, - "grad_norm": 0.0021299864165484905, - "learning_rate": 0.00019999299990361977, - "loss": 46.0, - "step": 23398 - }, - { - "epoch": 3.7682273843552476, - "grad_norm": 0.025733893737196922, - "learning_rate": 0.00019999299930500815, - "loss": 46.0, - "step": 23399 - }, - { - "epoch": 3.768388421434035, - "grad_norm": 0.005361327435821295, - "learning_rate": 0.00019999299870637093, - "loss": 46.0, - "step": 23400 - }, - { - "epoch": 3.7685494585128225, - "grad_norm": 0.01232514251023531, - "learning_rate": 0.0001999929981077081, - "loss": 46.0, - "step": 23401 - }, - { - "epoch": 3.76871049559161, - "grad_norm": 0.003512471215799451, - "learning_rate": 0.00019999299750901969, - "loss": 46.0, - "step": 23402 - }, - { - "epoch": 3.7688715326703974, - "grad_norm": 0.0026258009020239115, - "learning_rate": 0.0001999929969103057, - "loss": 46.0, - "step": 23403 - }, - { - "epoch": 3.769032569749185, - "grad_norm": 0.00991733092814684, - "learning_rate": 0.00019999299631156612, - "loss": 46.0, - "step": 23404 - }, - { - "epoch": 3.7691936068279723, - "grad_norm": 0.0013436574954539537, - "learning_rate": 0.0001999929957128009, - "loss": 46.0, - "step": 23405 - }, - { - "epoch": 3.76935464390676, - "grad_norm": 0.004663341678678989, - "learning_rate": 0.00019999299511401015, - "loss": 46.0, - "step": 23406 - }, - { - "epoch": 3.769515680985547, - "grad_norm": 0.004056538455188274, - "learning_rate": 0.0001999929945151938, - "loss": 46.0, - "step": 23407 - }, - { - "epoch": 3.7696767180643342, - "grad_norm": 0.0011676936410367489, - "learning_rate": 0.00019999299391635183, - "loss": 46.0, - "step": 23408 - }, - { - "epoch": 3.7698377551431217, - "grad_norm": 0.0011463186237961054, - "learning_rate": 0.00019999299331748427, - "loss": 46.0, - "step": 23409 - }, - { - "epoch": 3.769998792221909, - "grad_norm": 0.005641614552587271, - "learning_rate": 0.0001999929927185911, - "loss": 46.0, - "step": 23410 - }, - { - "epoch": 3.7701598293006966, - "grad_norm": 0.005202275235205889, - "learning_rate": 0.00019999299211967238, - "loss": 46.0, - "step": 23411 - }, - { - "epoch": 3.7703208663794836, - "grad_norm": 0.0037886020727455616, - "learning_rate": 0.00019999299152072804, - "loss": 46.0, - "step": 23412 - }, - { - "epoch": 3.770481903458271, - "grad_norm": 0.0007066461257636547, - "learning_rate": 0.0001999929909217581, - "loss": 46.0, - "step": 23413 - }, - { - "epoch": 3.7706429405370585, - "grad_norm": 0.005618244409561157, - "learning_rate": 0.0001999929903227626, - "loss": 46.0, - "step": 23414 - }, - { - "epoch": 3.770803977615846, - "grad_norm": 0.00250281416811049, - "learning_rate": 0.00019999298972374146, - "loss": 46.0, - "step": 23415 - }, - { - "epoch": 3.7709650146946334, - "grad_norm": 0.0019470519619062543, - "learning_rate": 0.00019999298912469477, - "loss": 46.0, - "step": 23416 - }, - { - "epoch": 3.771126051773421, - "grad_norm": 0.0028976253233850002, - "learning_rate": 0.00019999298852562246, - "loss": 46.0, - "step": 23417 - }, - { - "epoch": 3.7712870888522083, - "grad_norm": 0.007943235337734222, - "learning_rate": 0.00019999298792652457, - "loss": 46.0, - "step": 23418 - }, - { - "epoch": 3.7714481259309958, - "grad_norm": 0.0029335098806768656, - "learning_rate": 0.0001999929873274011, - "loss": 46.0, - "step": 23419 - }, - { - "epoch": 3.771609163009783, - "grad_norm": 0.009886838495731354, - "learning_rate": 0.000199992986728252, - "loss": 46.0, - "step": 23420 - }, - { - "epoch": 3.7717702000885707, - "grad_norm": 0.0015666900435462594, - "learning_rate": 0.00019999298612907734, - "loss": 46.0, - "step": 23421 - }, - { - "epoch": 3.7719312371673577, - "grad_norm": 0.007241830229759216, - "learning_rate": 0.00019999298552987705, - "loss": 46.0, - "step": 23422 - }, - { - "epoch": 3.772092274246145, - "grad_norm": 0.007948859594762325, - "learning_rate": 0.0001999929849306512, - "loss": 46.0, - "step": 23423 - }, - { - "epoch": 3.7722533113249326, - "grad_norm": 0.0016738002886995673, - "learning_rate": 0.00019999298433139975, - "loss": 46.0, - "step": 23424 - }, - { - "epoch": 3.77241434840372, - "grad_norm": 0.001979612046852708, - "learning_rate": 0.0001999929837321227, - "loss": 46.0, - "step": 23425 - }, - { - "epoch": 3.7725753854825075, - "grad_norm": 0.0015118021983653307, - "learning_rate": 0.00019999298313282005, - "loss": 46.0, - "step": 23426 - }, - { - "epoch": 3.7727364225612945, - "grad_norm": 0.0010859890608116984, - "learning_rate": 0.00019999298253349181, - "loss": 46.0, - "step": 23427 - }, - { - "epoch": 3.772897459640082, - "grad_norm": 0.002263714326545596, - "learning_rate": 0.000199992981934138, - "loss": 46.0, - "step": 23428 - }, - { - "epoch": 3.7730584967188694, - "grad_norm": 0.0005644666380248964, - "learning_rate": 0.0001999929813347586, - "loss": 46.0, - "step": 23429 - }, - { - "epoch": 3.773219533797657, - "grad_norm": 0.0010720707941800356, - "learning_rate": 0.00019999298073535357, - "loss": 46.0, - "step": 23430 - }, - { - "epoch": 3.7733805708764443, - "grad_norm": 0.002276270417496562, - "learning_rate": 0.00019999298013592296, - "loss": 46.0, - "step": 23431 - }, - { - "epoch": 3.7735416079552317, - "grad_norm": 0.0007864724029786885, - "learning_rate": 0.0001999929795364668, - "loss": 46.0, - "step": 23432 - }, - { - "epoch": 3.773702645034019, - "grad_norm": 0.0009934685658663511, - "learning_rate": 0.000199992978936985, - "loss": 46.0, - "step": 23433 - }, - { - "epoch": 3.7738636821128067, - "grad_norm": 0.005271119065582752, - "learning_rate": 0.00019999297833747762, - "loss": 46.0, - "step": 23434 - }, - { - "epoch": 3.774024719191594, - "grad_norm": 0.01132645457983017, - "learning_rate": 0.0001999929777379446, - "loss": 46.0, - "step": 23435 - }, - { - "epoch": 3.774185756270381, - "grad_norm": 0.0023303860798478127, - "learning_rate": 0.00019999297713838604, - "loss": 46.0, - "step": 23436 - }, - { - "epoch": 3.7743467933491686, - "grad_norm": 0.00591937405988574, - "learning_rate": 0.00019999297653880188, - "loss": 46.0, - "step": 23437 - }, - { - "epoch": 3.774507830427956, - "grad_norm": 0.005823095329105854, - "learning_rate": 0.00019999297593919214, - "loss": 46.0, - "step": 23438 - }, - { - "epoch": 3.7746688675067435, - "grad_norm": 0.0056540220975875854, - "learning_rate": 0.00019999297533955678, - "loss": 46.0, - "step": 23439 - }, - { - "epoch": 3.774829904585531, - "grad_norm": 0.00987009983509779, - "learning_rate": 0.00019999297473989583, - "loss": 46.0, - "step": 23440 - }, - { - "epoch": 3.7749909416643184, - "grad_norm": 0.0023566654417663813, - "learning_rate": 0.0001999929741402093, - "loss": 46.0, - "step": 23441 - }, - { - "epoch": 3.7751519787431054, - "grad_norm": 0.0013051513815298676, - "learning_rate": 0.00019999297354049718, - "loss": 46.0, - "step": 23442 - }, - { - "epoch": 3.775313015821893, - "grad_norm": 0.0034219883382320404, - "learning_rate": 0.00019999297294075947, - "loss": 46.0, - "step": 23443 - }, - { - "epoch": 3.7754740529006803, - "grad_norm": 0.0007844906067475677, - "learning_rate": 0.00019999297234099615, - "loss": 46.0, - "step": 23444 - }, - { - "epoch": 3.7756350899794677, - "grad_norm": 0.004213909152895212, - "learning_rate": 0.00019999297174120724, - "loss": 46.0, - "step": 23445 - }, - { - "epoch": 3.775796127058255, - "grad_norm": 0.002701682737097144, - "learning_rate": 0.00019999297114139272, - "loss": 46.0, - "step": 23446 - }, - { - "epoch": 3.7759571641370426, - "grad_norm": 0.002492605010047555, - "learning_rate": 0.00019999297054155263, - "loss": 46.0, - "step": 23447 - }, - { - "epoch": 3.77611820121583, - "grad_norm": 0.0019003769848495722, - "learning_rate": 0.00019999296994168696, - "loss": 46.0, - "step": 23448 - }, - { - "epoch": 3.7762792382946175, - "grad_norm": 0.003208177164196968, - "learning_rate": 0.00019999296934179568, - "loss": 46.0, - "step": 23449 - }, - { - "epoch": 3.776440275373405, - "grad_norm": 0.0016427161172032356, - "learning_rate": 0.0001999929687418788, - "loss": 46.0, - "step": 23450 - }, - { - "epoch": 3.776601312452192, - "grad_norm": 0.005170580465346575, - "learning_rate": 0.00019999296814193635, - "loss": 46.0, - "step": 23451 - }, - { - "epoch": 3.7767623495309794, - "grad_norm": 0.004875520244240761, - "learning_rate": 0.00019999296754196828, - "loss": 46.0, - "step": 23452 - }, - { - "epoch": 3.776923386609767, - "grad_norm": 0.0016431326512247324, - "learning_rate": 0.00019999296694197462, - "loss": 46.0, - "step": 23453 - }, - { - "epoch": 3.7770844236885543, - "grad_norm": 0.007538162637501955, - "learning_rate": 0.0001999929663419554, - "loss": 46.0, - "step": 23454 - }, - { - "epoch": 3.777245460767342, - "grad_norm": 0.0017951886402443051, - "learning_rate": 0.00019999296574191053, - "loss": 46.0, - "step": 23455 - }, - { - "epoch": 3.777406497846129, - "grad_norm": 0.0034587865229696035, - "learning_rate": 0.0001999929651418401, - "loss": 46.0, - "step": 23456 - }, - { - "epoch": 3.7775675349249163, - "grad_norm": 0.0014397423947229981, - "learning_rate": 0.00019999296454174407, - "loss": 46.0, - "step": 23457 - }, - { - "epoch": 3.7777285720037037, - "grad_norm": 0.007553314324468374, - "learning_rate": 0.00019999296394162248, - "loss": 46.0, - "step": 23458 - }, - { - "epoch": 3.777889609082491, - "grad_norm": 0.0010361107997596264, - "learning_rate": 0.00019999296334147527, - "loss": 46.0, - "step": 23459 - }, - { - "epoch": 3.7780506461612786, - "grad_norm": 0.008487565442919731, - "learning_rate": 0.00019999296274130247, - "loss": 46.0, - "step": 23460 - }, - { - "epoch": 3.778211683240066, - "grad_norm": 0.0008957805694080889, - "learning_rate": 0.00019999296214110409, - "loss": 46.0, - "step": 23461 - }, - { - "epoch": 3.7783727203188535, - "grad_norm": 0.002249722369015217, - "learning_rate": 0.00019999296154088009, - "loss": 46.0, - "step": 23462 - }, - { - "epoch": 3.778533757397641, - "grad_norm": 0.0034861310850828886, - "learning_rate": 0.0001999929609406305, - "loss": 46.0, - "step": 23463 - }, - { - "epoch": 3.7786947944764284, - "grad_norm": 0.008774700574576855, - "learning_rate": 0.00019999296034035533, - "loss": 46.0, - "step": 23464 - }, - { - "epoch": 3.7788558315552154, - "grad_norm": 0.001866961014457047, - "learning_rate": 0.00019999295974005454, - "loss": 46.0, - "step": 23465 - }, - { - "epoch": 3.779016868634003, - "grad_norm": 0.004746279213577509, - "learning_rate": 0.00019999295913972816, - "loss": 46.0, - "step": 23466 - }, - { - "epoch": 3.7791779057127903, - "grad_norm": 0.0006533092819154263, - "learning_rate": 0.00019999295853937623, - "loss": 46.0, - "step": 23467 - }, - { - "epoch": 3.779338942791578, - "grad_norm": 0.001827763393521309, - "learning_rate": 0.00019999295793899868, - "loss": 46.0, - "step": 23468 - }, - { - "epoch": 3.7794999798703652, - "grad_norm": 0.0011072917841374874, - "learning_rate": 0.00019999295733859554, - "loss": 46.0, - "step": 23469 - }, - { - "epoch": 3.7796610169491527, - "grad_norm": 0.020383261144161224, - "learning_rate": 0.0001999929567381668, - "loss": 46.0, - "step": 23470 - }, - { - "epoch": 3.7798220540279397, - "grad_norm": 0.005471076816320419, - "learning_rate": 0.00019999295613771248, - "loss": 46.0, - "step": 23471 - }, - { - "epoch": 3.779983091106727, - "grad_norm": 0.005412416532635689, - "learning_rate": 0.00019999295553723256, - "loss": 46.0, - "step": 23472 - }, - { - "epoch": 3.7801441281855146, - "grad_norm": 0.009214709512889385, - "learning_rate": 0.00019999295493672704, - "loss": 46.0, - "step": 23473 - }, - { - "epoch": 3.780305165264302, - "grad_norm": 0.009223414584994316, - "learning_rate": 0.00019999295433619594, - "loss": 46.0, - "step": 23474 - }, - { - "epoch": 3.7804662023430895, - "grad_norm": 0.00212464714422822, - "learning_rate": 0.00019999295373563923, - "loss": 46.0, - "step": 23475 - }, - { - "epoch": 3.780627239421877, - "grad_norm": 0.00892906729131937, - "learning_rate": 0.00019999295313505696, - "loss": 46.0, - "step": 23476 - }, - { - "epoch": 3.7807882765006644, - "grad_norm": 0.008025085553526878, - "learning_rate": 0.00019999295253444904, - "loss": 46.0, - "step": 23477 - }, - { - "epoch": 3.780949313579452, - "grad_norm": 0.0014605334727093577, - "learning_rate": 0.0001999929519338156, - "loss": 46.0, - "step": 23478 - }, - { - "epoch": 3.7811103506582393, - "grad_norm": 0.003567602252587676, - "learning_rate": 0.00019999295133315653, - "loss": 46.0, - "step": 23479 - }, - { - "epoch": 3.7812713877370263, - "grad_norm": 0.0009228388662450016, - "learning_rate": 0.00019999295073247185, - "loss": 46.0, - "step": 23480 - }, - { - "epoch": 3.7814324248158138, - "grad_norm": 0.009420409798622131, - "learning_rate": 0.0001999929501317616, - "loss": 46.0, - "step": 23481 - }, - { - "epoch": 3.781593461894601, - "grad_norm": 0.0041953325271606445, - "learning_rate": 0.00019999294953102574, - "loss": 46.0, - "step": 23482 - }, - { - "epoch": 3.7817544989733887, - "grad_norm": 0.0046717398799955845, - "learning_rate": 0.0001999929489302643, - "loss": 46.0, - "step": 23483 - }, - { - "epoch": 3.781915536052176, - "grad_norm": 0.002981435274705291, - "learning_rate": 0.00019999294832947725, - "loss": 46.0, - "step": 23484 - }, - { - "epoch": 3.7820765731309636, - "grad_norm": 0.008540144190192223, - "learning_rate": 0.0001999929477286646, - "loss": 46.0, - "step": 23485 - }, - { - "epoch": 3.7822376102097506, - "grad_norm": 0.0069182561710476875, - "learning_rate": 0.00019999294712782638, - "loss": 46.0, - "step": 23486 - }, - { - "epoch": 3.782398647288538, - "grad_norm": 0.001253429683856666, - "learning_rate": 0.0001999929465269626, - "loss": 46.0, - "step": 23487 - }, - { - "epoch": 3.7825596843673255, - "grad_norm": 0.005894755478948355, - "learning_rate": 0.0001999929459260732, - "loss": 46.0, - "step": 23488 - }, - { - "epoch": 3.782720721446113, - "grad_norm": 0.0023197641130536795, - "learning_rate": 0.00019999294532515818, - "loss": 46.0, - "step": 23489 - }, - { - "epoch": 3.7828817585249004, - "grad_norm": 0.01157285738736391, - "learning_rate": 0.0001999929447242176, - "loss": 46.0, - "step": 23490 - }, - { - "epoch": 3.783042795603688, - "grad_norm": 0.0069605023600161076, - "learning_rate": 0.0001999929441232514, - "loss": 46.0, - "step": 23491 - }, - { - "epoch": 3.7832038326824753, - "grad_norm": 0.011911396868526936, - "learning_rate": 0.0001999929435222596, - "loss": 46.0, - "step": 23492 - }, - { - "epoch": 3.7833648697612627, - "grad_norm": 0.0012840928975492716, - "learning_rate": 0.00019999294292124224, - "loss": 46.0, - "step": 23493 - }, - { - "epoch": 3.78352590684005, - "grad_norm": 0.0012501165037974715, - "learning_rate": 0.00019999294232019927, - "loss": 46.0, - "step": 23494 - }, - { - "epoch": 3.783686943918837, - "grad_norm": 0.0013155360938981175, - "learning_rate": 0.00019999294171913073, - "loss": 46.0, - "step": 23495 - }, - { - "epoch": 3.7838479809976246, - "grad_norm": 0.0072088055312633514, - "learning_rate": 0.00019999294111803658, - "loss": 46.0, - "step": 23496 - }, - { - "epoch": 3.784009018076412, - "grad_norm": 0.007935171946883202, - "learning_rate": 0.0001999929405169168, - "loss": 46.0, - "step": 23497 - }, - { - "epoch": 3.7841700551551996, - "grad_norm": 0.0071401833556592464, - "learning_rate": 0.00019999293991577148, - "loss": 46.0, - "step": 23498 - }, - { - "epoch": 3.784331092233987, - "grad_norm": 0.0016121533699333668, - "learning_rate": 0.00019999293931460054, - "loss": 46.0, - "step": 23499 - }, - { - "epoch": 3.784492129312774, - "grad_norm": 0.01656302809715271, - "learning_rate": 0.000199992938713404, - "loss": 46.0, - "step": 23500 - }, - { - "epoch": 3.7846531663915615, - "grad_norm": 0.0021986172068864107, - "learning_rate": 0.0001999929381121819, - "loss": 46.0, - "step": 23501 - }, - { - "epoch": 3.784814203470349, - "grad_norm": 0.001959993503987789, - "learning_rate": 0.00019999293751093417, - "loss": 46.0, - "step": 23502 - }, - { - "epoch": 3.7849752405491364, - "grad_norm": 0.0034746001474559307, - "learning_rate": 0.00019999293690966088, - "loss": 46.0, - "step": 23503 - }, - { - "epoch": 3.785136277627924, - "grad_norm": 0.001077381893992424, - "learning_rate": 0.00019999293630836197, - "loss": 46.0, - "step": 23504 - }, - { - "epoch": 3.7852973147067113, - "grad_norm": 0.010049964301288128, - "learning_rate": 0.0001999929357070375, - "loss": 46.0, - "step": 23505 - }, - { - "epoch": 3.7854583517854987, - "grad_norm": 0.004013793542981148, - "learning_rate": 0.00019999293510568743, - "loss": 46.0, - "step": 23506 - }, - { - "epoch": 3.785619388864286, - "grad_norm": 0.004395932424813509, - "learning_rate": 0.0001999929345043117, - "loss": 46.0, - "step": 23507 - }, - { - "epoch": 3.7857804259430736, - "grad_norm": 0.0012676736805588007, - "learning_rate": 0.00019999293390291045, - "loss": 46.0, - "step": 23508 - }, - { - "epoch": 3.7859414630218606, - "grad_norm": 0.0021775392815470695, - "learning_rate": 0.00019999293330148361, - "loss": 46.0, - "step": 23509 - }, - { - "epoch": 3.786102500100648, - "grad_norm": 0.0024661454372107983, - "learning_rate": 0.00019999293270003113, - "loss": 46.0, - "step": 23510 - }, - { - "epoch": 3.7862635371794355, - "grad_norm": 0.009182114154100418, - "learning_rate": 0.0001999929320985531, - "loss": 46.0, - "step": 23511 - }, - { - "epoch": 3.786424574258223, - "grad_norm": 0.0041510877199471, - "learning_rate": 0.00019999293149704946, - "loss": 46.0, - "step": 23512 - }, - { - "epoch": 3.7865856113370104, - "grad_norm": 0.010162005200982094, - "learning_rate": 0.0001999929308955202, - "loss": 46.0, - "step": 23513 - }, - { - "epoch": 3.786746648415798, - "grad_norm": 0.0013867107918486, - "learning_rate": 0.00019999293029396536, - "loss": 46.0, - "step": 23514 - }, - { - "epoch": 3.786907685494585, - "grad_norm": 0.001820576493628323, - "learning_rate": 0.00019999292969238494, - "loss": 46.0, - "step": 23515 - }, - { - "epoch": 3.7870687225733723, - "grad_norm": 0.01022383477538824, - "learning_rate": 0.00019999292909077894, - "loss": 46.0, - "step": 23516 - }, - { - "epoch": 3.78722975965216, - "grad_norm": 0.008014820516109467, - "learning_rate": 0.00019999292848914735, - "loss": 46.0, - "step": 23517 - }, - { - "epoch": 3.7873907967309473, - "grad_norm": 0.001742486609145999, - "learning_rate": 0.00019999292788749014, - "loss": 46.0, - "step": 23518 - }, - { - "epoch": 3.7875518338097347, - "grad_norm": 0.0037293757777661085, - "learning_rate": 0.00019999292728580735, - "loss": 46.0, - "step": 23519 - }, - { - "epoch": 3.787712870888522, - "grad_norm": 0.005462685599923134, - "learning_rate": 0.00019999292668409897, - "loss": 46.0, - "step": 23520 - }, - { - "epoch": 3.7878739079673096, - "grad_norm": 0.019822850823402405, - "learning_rate": 0.00019999292608236497, - "loss": 46.0, - "step": 23521 - }, - { - "epoch": 3.788034945046097, - "grad_norm": 0.0037660361267626286, - "learning_rate": 0.0001999929254806054, - "loss": 46.0, - "step": 23522 - }, - { - "epoch": 3.7881959821248845, - "grad_norm": 0.008371307514607906, - "learning_rate": 0.00019999292487882025, - "loss": 46.0, - "step": 23523 - }, - { - "epoch": 3.7883570192036715, - "grad_norm": 0.0005686399526894093, - "learning_rate": 0.00019999292427700947, - "loss": 46.0, - "step": 23524 - }, - { - "epoch": 3.788518056282459, - "grad_norm": 0.005702432710677385, - "learning_rate": 0.00019999292367517312, - "loss": 46.0, - "step": 23525 - }, - { - "epoch": 3.7886790933612464, - "grad_norm": 0.004375653341412544, - "learning_rate": 0.00019999292307331117, - "loss": 46.0, - "step": 23526 - }, - { - "epoch": 3.788840130440034, - "grad_norm": 0.0016482442151755095, - "learning_rate": 0.00019999292247142365, - "loss": 46.0, - "step": 23527 - }, - { - "epoch": 3.7890011675188213, - "grad_norm": 0.007545525208115578, - "learning_rate": 0.00019999292186951052, - "loss": 46.0, - "step": 23528 - }, - { - "epoch": 3.7891622045976083, - "grad_norm": 0.00484321266412735, - "learning_rate": 0.0001999929212675718, - "loss": 46.0, - "step": 23529 - }, - { - "epoch": 3.789323241676396, - "grad_norm": 0.006865546572953463, - "learning_rate": 0.00019999292066560746, - "loss": 46.0, - "step": 23530 - }, - { - "epoch": 3.7894842787551832, - "grad_norm": 0.0033078778069466352, - "learning_rate": 0.00019999292006361757, - "loss": 46.0, - "step": 23531 - }, - { - "epoch": 3.7896453158339707, - "grad_norm": 0.0017028943402692676, - "learning_rate": 0.00019999291946160206, - "loss": 46.0, - "step": 23532 - }, - { - "epoch": 3.789806352912758, - "grad_norm": 0.0033133940305560827, - "learning_rate": 0.00019999291885956094, - "loss": 46.0, - "step": 23533 - }, - { - "epoch": 3.7899673899915456, - "grad_norm": 0.009623431600630283, - "learning_rate": 0.00019999291825749426, - "loss": 46.0, - "step": 23534 - }, - { - "epoch": 3.790128427070333, - "grad_norm": 0.0015019259881228209, - "learning_rate": 0.000199992917655402, - "loss": 46.0, - "step": 23535 - }, - { - "epoch": 3.7902894641491205, - "grad_norm": 0.0028213916812092066, - "learning_rate": 0.0001999929170532841, - "loss": 46.0, - "step": 23536 - }, - { - "epoch": 3.790450501227908, - "grad_norm": 0.013229393400251865, - "learning_rate": 0.00019999291645114064, - "loss": 46.0, - "step": 23537 - }, - { - "epoch": 3.7906115383066954, - "grad_norm": 0.005452876444905996, - "learning_rate": 0.00019999291584897158, - "loss": 46.0, - "step": 23538 - }, - { - "epoch": 3.7907725753854824, - "grad_norm": 0.006903988774865866, - "learning_rate": 0.0001999929152467769, - "loss": 46.0, - "step": 23539 - }, - { - "epoch": 3.79093361246427, - "grad_norm": 0.0015960269374772906, - "learning_rate": 0.00019999291464455667, - "loss": 46.0, - "step": 23540 - }, - { - "epoch": 3.7910946495430573, - "grad_norm": 0.0014241775497794151, - "learning_rate": 0.00019999291404231083, - "loss": 46.0, - "step": 23541 - }, - { - "epoch": 3.7912556866218448, - "grad_norm": 0.0013796341372653842, - "learning_rate": 0.0001999929134400394, - "loss": 46.0, - "step": 23542 - }, - { - "epoch": 3.791416723700632, - "grad_norm": 0.006222702097147703, - "learning_rate": 0.00019999291283774237, - "loss": 46.0, - "step": 23543 - }, - { - "epoch": 3.791577760779419, - "grad_norm": 0.0026718589942902327, - "learning_rate": 0.00019999291223541974, - "loss": 46.0, - "step": 23544 - }, - { - "epoch": 3.7917387978582067, - "grad_norm": 0.005570186302065849, - "learning_rate": 0.00019999291163307151, - "loss": 46.0, - "step": 23545 - }, - { - "epoch": 3.791899834936994, - "grad_norm": 0.0015155143337324262, - "learning_rate": 0.00019999291103069773, - "loss": 46.0, - "step": 23546 - }, - { - "epoch": 3.7920608720157816, - "grad_norm": 0.004810272250324488, - "learning_rate": 0.00019999291042829834, - "loss": 46.0, - "step": 23547 - }, - { - "epoch": 3.792221909094569, - "grad_norm": 0.0016728319460526109, - "learning_rate": 0.00019999290982587332, - "loss": 46.0, - "step": 23548 - }, - { - "epoch": 3.7923829461733565, - "grad_norm": 0.002594210673123598, - "learning_rate": 0.00019999290922342273, - "loss": 46.0, - "step": 23549 - }, - { - "epoch": 3.792543983252144, - "grad_norm": 0.00584800262004137, - "learning_rate": 0.00019999290862094657, - "loss": 46.0, - "step": 23550 - }, - { - "epoch": 3.7927050203309314, - "grad_norm": 0.0037820106372237206, - "learning_rate": 0.0001999929080184448, - "loss": 46.0, - "step": 23551 - }, - { - "epoch": 3.792866057409719, - "grad_norm": 0.0065300436690449715, - "learning_rate": 0.00019999290741591744, - "loss": 46.0, - "step": 23552 - }, - { - "epoch": 3.793027094488506, - "grad_norm": 0.002413803245872259, - "learning_rate": 0.0001999929068133645, - "loss": 46.0, - "step": 23553 - }, - { - "epoch": 3.7931881315672933, - "grad_norm": 0.0037758895196020603, - "learning_rate": 0.00019999290621078593, - "loss": 46.0, - "step": 23554 - }, - { - "epoch": 3.7933491686460807, - "grad_norm": 0.0014778823824599385, - "learning_rate": 0.00019999290560818178, - "loss": 46.0, - "step": 23555 - }, - { - "epoch": 3.793510205724868, - "grad_norm": 0.0028374451212584972, - "learning_rate": 0.00019999290500555204, - "loss": 46.0, - "step": 23556 - }, - { - "epoch": 3.7936712428036556, - "grad_norm": 0.003306510392576456, - "learning_rate": 0.0001999929044028967, - "loss": 46.0, - "step": 23557 - }, - { - "epoch": 3.793832279882443, - "grad_norm": 0.0024255476891994476, - "learning_rate": 0.00019999290380021579, - "loss": 46.0, - "step": 23558 - }, - { - "epoch": 3.79399331696123, - "grad_norm": 0.005692268721759319, - "learning_rate": 0.00019999290319750926, - "loss": 46.0, - "step": 23559 - }, - { - "epoch": 3.7941543540400176, - "grad_norm": 0.003182213054969907, - "learning_rate": 0.00019999290259477718, - "loss": 46.0, - "step": 23560 - }, - { - "epoch": 3.794315391118805, - "grad_norm": 0.004343269858509302, - "learning_rate": 0.00019999290199201948, - "loss": 46.0, - "step": 23561 - }, - { - "epoch": 3.7944764281975925, - "grad_norm": 0.001727725611999631, - "learning_rate": 0.00019999290138923617, - "loss": 46.0, - "step": 23562 - }, - { - "epoch": 3.79463746527638, - "grad_norm": 0.0019260860281065106, - "learning_rate": 0.00019999290078642727, - "loss": 46.0, - "step": 23563 - }, - { - "epoch": 3.7947985023551674, - "grad_norm": 0.008498795330524445, - "learning_rate": 0.00019999290018359278, - "loss": 46.0, - "step": 23564 - }, - { - "epoch": 3.794959539433955, - "grad_norm": 0.004467620514333248, - "learning_rate": 0.0001999928995807327, - "loss": 46.0, - "step": 23565 - }, - { - "epoch": 3.7951205765127423, - "grad_norm": 0.008226759731769562, - "learning_rate": 0.00019999289897784704, - "loss": 46.0, - "step": 23566 - }, - { - "epoch": 3.7952816135915297, - "grad_norm": 0.004658955615013838, - "learning_rate": 0.00019999289837493577, - "loss": 46.0, - "step": 23567 - }, - { - "epoch": 3.7954426506703167, - "grad_norm": 0.0036381580866873264, - "learning_rate": 0.00019999289777199893, - "loss": 46.0, - "step": 23568 - }, - { - "epoch": 3.795603687749104, - "grad_norm": 0.001321410178206861, - "learning_rate": 0.00019999289716903648, - "loss": 46.0, - "step": 23569 - }, - { - "epoch": 3.7957647248278916, - "grad_norm": 0.0018899224232882261, - "learning_rate": 0.00019999289656604845, - "loss": 46.0, - "step": 23570 - }, - { - "epoch": 3.795925761906679, - "grad_norm": 0.008362204767763615, - "learning_rate": 0.00019999289596303482, - "loss": 46.0, - "step": 23571 - }, - { - "epoch": 3.7960867989854665, - "grad_norm": 0.001967393094673753, - "learning_rate": 0.00019999289535999558, - "loss": 46.0, - "step": 23572 - }, - { - "epoch": 3.7962478360642535, - "grad_norm": 0.020419172942638397, - "learning_rate": 0.00019999289475693078, - "loss": 46.0, - "step": 23573 - }, - { - "epoch": 3.796408873143041, - "grad_norm": 0.007248731795698404, - "learning_rate": 0.00019999289415384034, - "loss": 46.0, - "step": 23574 - }, - { - "epoch": 3.7965699102218284, - "grad_norm": 0.0038336936850100756, - "learning_rate": 0.00019999289355072434, - "loss": 46.0, - "step": 23575 - }, - { - "epoch": 3.796730947300616, - "grad_norm": 0.0012371959164738655, - "learning_rate": 0.00019999289294758276, - "loss": 46.0, - "step": 23576 - }, - { - "epoch": 3.7968919843794033, - "grad_norm": 0.005969126243144274, - "learning_rate": 0.00019999289234441556, - "loss": 46.0, - "step": 23577 - }, - { - "epoch": 3.797053021458191, - "grad_norm": 0.001394742401316762, - "learning_rate": 0.00019999289174122277, - "loss": 46.0, - "step": 23578 - }, - { - "epoch": 3.7972140585369782, - "grad_norm": 0.006398888770490885, - "learning_rate": 0.00019999289113800442, - "loss": 46.0, - "step": 23579 - }, - { - "epoch": 3.7973750956157657, - "grad_norm": 0.002249662997201085, - "learning_rate": 0.00019999289053476043, - "loss": 46.0, - "step": 23580 - }, - { - "epoch": 3.797536132694553, - "grad_norm": 0.011105634272098541, - "learning_rate": 0.00019999288993149085, - "loss": 46.0, - "step": 23581 - }, - { - "epoch": 3.79769716977334, - "grad_norm": 0.004338633269071579, - "learning_rate": 0.0001999928893281957, - "loss": 46.0, - "step": 23582 - }, - { - "epoch": 3.7978582068521276, - "grad_norm": 0.002980279503390193, - "learning_rate": 0.00019999288872487496, - "loss": 46.0, - "step": 23583 - }, - { - "epoch": 3.798019243930915, - "grad_norm": 0.002213576342910528, - "learning_rate": 0.00019999288812152862, - "loss": 46.0, - "step": 23584 - }, - { - "epoch": 3.7981802810097025, - "grad_norm": 0.0026220022700726986, - "learning_rate": 0.0001999928875181567, - "loss": 46.0, - "step": 23585 - }, - { - "epoch": 3.79834131808849, - "grad_norm": 0.003433849662542343, - "learning_rate": 0.00019999288691475916, - "loss": 46.0, - "step": 23586 - }, - { - "epoch": 3.7985023551672774, - "grad_norm": 0.006517481058835983, - "learning_rate": 0.00019999288631133603, - "loss": 46.0, - "step": 23587 - }, - { - "epoch": 3.7986633922460644, - "grad_norm": 0.0023642985615879297, - "learning_rate": 0.00019999288570788731, - "loss": 46.0, - "step": 23588 - }, - { - "epoch": 3.798824429324852, - "grad_norm": 0.003381990361958742, - "learning_rate": 0.000199992885104413, - "loss": 46.0, - "step": 23589 - }, - { - "epoch": 3.7989854664036393, - "grad_norm": 0.0054267458617687225, - "learning_rate": 0.00019999288450091312, - "loss": 46.0, - "step": 23590 - }, - { - "epoch": 3.7991465034824268, - "grad_norm": 0.0018450272036716342, - "learning_rate": 0.00019999288389738762, - "loss": 46.0, - "step": 23591 - }, - { - "epoch": 3.7993075405612142, - "grad_norm": 0.0011486992007121444, - "learning_rate": 0.00019999288329383653, - "loss": 46.0, - "step": 23592 - }, - { - "epoch": 3.7994685776400017, - "grad_norm": 0.0042379009537398815, - "learning_rate": 0.00019999288269025985, - "loss": 46.0, - "step": 23593 - }, - { - "epoch": 3.799629614718789, - "grad_norm": 0.004558446817100048, - "learning_rate": 0.0001999928820866576, - "loss": 46.0, - "step": 23594 - }, - { - "epoch": 3.7997906517975766, - "grad_norm": 0.0015566786751151085, - "learning_rate": 0.00019999288148302974, - "loss": 46.0, - "step": 23595 - }, - { - "epoch": 3.799951688876364, - "grad_norm": 0.0025924034416675568, - "learning_rate": 0.00019999288087937624, - "loss": 46.0, - "step": 23596 - }, - { - "epoch": 3.800112725955151, - "grad_norm": 0.008190043270587921, - "learning_rate": 0.00019999288027569722, - "loss": 46.0, - "step": 23597 - }, - { - "epoch": 3.8002737630339385, - "grad_norm": 0.00351817742921412, - "learning_rate": 0.00019999287967199255, - "loss": 46.0, - "step": 23598 - }, - { - "epoch": 3.800434800112726, - "grad_norm": 0.001109899254515767, - "learning_rate": 0.00019999287906826232, - "loss": 46.0, - "step": 23599 - }, - { - "epoch": 3.8005958371915134, - "grad_norm": 0.0076414719223976135, - "learning_rate": 0.00019999287846450648, - "loss": 46.0, - "step": 23600 - }, - { - "epoch": 3.800756874270301, - "grad_norm": 0.0014270510291680694, - "learning_rate": 0.00019999287786072505, - "loss": 46.0, - "step": 23601 - }, - { - "epoch": 3.800917911349088, - "grad_norm": 0.003094103652983904, - "learning_rate": 0.00019999287725691803, - "loss": 46.0, - "step": 23602 - }, - { - "epoch": 3.8010789484278753, - "grad_norm": 0.008699733763933182, - "learning_rate": 0.00019999287665308543, - "loss": 46.0, - "step": 23603 - }, - { - "epoch": 3.8012399855066628, - "grad_norm": 0.004201273433864117, - "learning_rate": 0.00019999287604922724, - "loss": 46.0, - "step": 23604 - }, - { - "epoch": 3.80140102258545, - "grad_norm": 0.00544446986168623, - "learning_rate": 0.0001999928754453434, - "loss": 46.0, - "step": 23605 - }, - { - "epoch": 3.8015620596642377, - "grad_norm": 0.0029939047526568174, - "learning_rate": 0.00019999287484143404, - "loss": 46.0, - "step": 23606 - }, - { - "epoch": 3.801723096743025, - "grad_norm": 0.004269227851182222, - "learning_rate": 0.00019999287423749906, - "loss": 46.0, - "step": 23607 - }, - { - "epoch": 3.8018841338218126, - "grad_norm": 0.008536243811249733, - "learning_rate": 0.0001999928736335385, - "loss": 46.0, - "step": 23608 - }, - { - "epoch": 3.8020451709006, - "grad_norm": 0.0010491583961993456, - "learning_rate": 0.00019999287302955232, - "loss": 46.0, - "step": 23609 - }, - { - "epoch": 3.8022062079793875, - "grad_norm": 0.0008426276035606861, - "learning_rate": 0.00019999287242554057, - "loss": 46.0, - "step": 23610 - }, - { - "epoch": 3.802367245058175, - "grad_norm": 0.0027947211638092995, - "learning_rate": 0.00019999287182150322, - "loss": 46.0, - "step": 23611 - }, - { - "epoch": 3.802528282136962, - "grad_norm": 0.0009417349356226623, - "learning_rate": 0.00019999287121744025, - "loss": 46.0, - "step": 23612 - }, - { - "epoch": 3.8026893192157494, - "grad_norm": 0.003538094460964203, - "learning_rate": 0.00019999287061335172, - "loss": 46.0, - "step": 23613 - }, - { - "epoch": 3.802850356294537, - "grad_norm": 0.0024768486618995667, - "learning_rate": 0.00019999287000923758, - "loss": 46.0, - "step": 23614 - }, - { - "epoch": 3.8030113933733243, - "grad_norm": 0.004929191432893276, - "learning_rate": 0.00019999286940509785, - "loss": 46.0, - "step": 23615 - }, - { - "epoch": 3.8031724304521117, - "grad_norm": 0.0013620893005281687, - "learning_rate": 0.00019999286880093253, - "loss": 46.0, - "step": 23616 - }, - { - "epoch": 3.8033334675308987, - "grad_norm": 0.0025519547052681446, - "learning_rate": 0.00019999286819674162, - "loss": 46.0, - "step": 23617 - }, - { - "epoch": 3.803494504609686, - "grad_norm": 0.002054918324574828, - "learning_rate": 0.0001999928675925251, - "loss": 46.0, - "step": 23618 - }, - { - "epoch": 3.8036555416884736, - "grad_norm": 0.007278750650584698, - "learning_rate": 0.00019999286698828302, - "loss": 46.0, - "step": 23619 - }, - { - "epoch": 3.803816578767261, - "grad_norm": 0.01253379974514246, - "learning_rate": 0.00019999286638401533, - "loss": 46.0, - "step": 23620 - }, - { - "epoch": 3.8039776158460485, - "grad_norm": 0.0026233820244669914, - "learning_rate": 0.00019999286577972205, - "loss": 46.0, - "step": 23621 - }, - { - "epoch": 3.804138652924836, - "grad_norm": 0.00379826407879591, - "learning_rate": 0.00019999286517540315, - "loss": 46.0, - "step": 23622 - }, - { - "epoch": 3.8042996900036234, - "grad_norm": 0.0019124912796542048, - "learning_rate": 0.0001999928645710587, - "loss": 46.0, - "step": 23623 - }, - { - "epoch": 3.804460727082411, - "grad_norm": 0.009216277860105038, - "learning_rate": 0.00019999286396668863, - "loss": 46.0, - "step": 23624 - }, - { - "epoch": 3.8046217641611983, - "grad_norm": 0.003284048056229949, - "learning_rate": 0.00019999286336229297, - "loss": 46.0, - "step": 23625 - }, - { - "epoch": 3.8047828012399854, - "grad_norm": 0.004107843153178692, - "learning_rate": 0.00019999286275787173, - "loss": 46.0, - "step": 23626 - }, - { - "epoch": 3.804943838318773, - "grad_norm": 0.005023350939154625, - "learning_rate": 0.00019999286215342487, - "loss": 46.0, - "step": 23627 - }, - { - "epoch": 3.8051048753975603, - "grad_norm": 0.020202776417136192, - "learning_rate": 0.00019999286154895245, - "loss": 46.0, - "step": 23628 - }, - { - "epoch": 3.8052659124763477, - "grad_norm": 0.004103577695786953, - "learning_rate": 0.0001999928609444544, - "loss": 46.0, - "step": 23629 - }, - { - "epoch": 3.805426949555135, - "grad_norm": 0.0029470762237906456, - "learning_rate": 0.0001999928603399308, - "loss": 46.0, - "step": 23630 - }, - { - "epoch": 3.8055879866339226, - "grad_norm": 0.0005229293019510806, - "learning_rate": 0.00019999285973538157, - "loss": 46.0, - "step": 23631 - }, - { - "epoch": 3.8057490237127096, - "grad_norm": 0.002074221847578883, - "learning_rate": 0.00019999285913080677, - "loss": 46.0, - "step": 23632 - }, - { - "epoch": 3.805910060791497, - "grad_norm": 0.0034410192165523767, - "learning_rate": 0.00019999285852620636, - "loss": 46.0, - "step": 23633 - }, - { - "epoch": 3.8060710978702845, - "grad_norm": 0.007800699677318335, - "learning_rate": 0.00019999285792158037, - "loss": 46.0, - "step": 23634 - }, - { - "epoch": 3.806232134949072, - "grad_norm": 0.0005563221056945622, - "learning_rate": 0.00019999285731692878, - "loss": 46.0, - "step": 23635 - }, - { - "epoch": 3.8063931720278594, - "grad_norm": 0.008285616524517536, - "learning_rate": 0.0001999928567122516, - "loss": 46.0, - "step": 23636 - }, - { - "epoch": 3.806554209106647, - "grad_norm": 0.0011474619386717677, - "learning_rate": 0.0001999928561075488, - "loss": 46.0, - "step": 23637 - }, - { - "epoch": 3.8067152461854343, - "grad_norm": 0.008062407374382019, - "learning_rate": 0.00019999285550282046, - "loss": 46.0, - "step": 23638 - }, - { - "epoch": 3.806876283264222, - "grad_norm": 0.002201765775680542, - "learning_rate": 0.0001999928548980665, - "loss": 46.0, - "step": 23639 - }, - { - "epoch": 3.8070373203430092, - "grad_norm": 0.001507507637143135, - "learning_rate": 0.00019999285429328693, - "loss": 46.0, - "step": 23640 - }, - { - "epoch": 3.8071983574217962, - "grad_norm": 0.0014076469233259559, - "learning_rate": 0.0001999928536884818, - "loss": 46.0, - "step": 23641 - }, - { - "epoch": 3.8073593945005837, - "grad_norm": 0.0054513937793672085, - "learning_rate": 0.00019999285308365105, - "loss": 46.0, - "step": 23642 - }, - { - "epoch": 3.807520431579371, - "grad_norm": 0.005643813870847225, - "learning_rate": 0.00019999285247879471, - "loss": 46.0, - "step": 23643 - }, - { - "epoch": 3.8076814686581586, - "grad_norm": 0.007829139940440655, - "learning_rate": 0.0001999928518739128, - "loss": 46.0, - "step": 23644 - }, - { - "epoch": 3.807842505736946, - "grad_norm": 0.0011677708243951201, - "learning_rate": 0.00019999285126900528, - "loss": 46.0, - "step": 23645 - }, - { - "epoch": 3.808003542815733, - "grad_norm": 0.0022964433301240206, - "learning_rate": 0.00019999285066407216, - "loss": 46.0, - "step": 23646 - }, - { - "epoch": 3.8081645798945205, - "grad_norm": 0.001790171256288886, - "learning_rate": 0.00019999285005911345, - "loss": 46.0, - "step": 23647 - }, - { - "epoch": 3.808325616973308, - "grad_norm": 0.0019000850152224302, - "learning_rate": 0.00019999284945412918, - "loss": 46.0, - "step": 23648 - }, - { - "epoch": 3.8084866540520954, - "grad_norm": 0.0055053322575986385, - "learning_rate": 0.00019999284884911927, - "loss": 46.0, - "step": 23649 - }, - { - "epoch": 3.808647691130883, - "grad_norm": 0.002368929563090205, - "learning_rate": 0.0001999928482440838, - "loss": 46.0, - "step": 23650 - }, - { - "epoch": 3.8088087282096703, - "grad_norm": 0.004623609595000744, - "learning_rate": 0.0001999928476390227, - "loss": 46.0, - "step": 23651 - }, - { - "epoch": 3.8089697652884578, - "grad_norm": 0.006825034040957689, - "learning_rate": 0.00019999284703393607, - "loss": 46.0, - "step": 23652 - }, - { - "epoch": 3.809130802367245, - "grad_norm": 0.0015381784178316593, - "learning_rate": 0.00019999284642882378, - "loss": 46.0, - "step": 23653 - }, - { - "epoch": 3.8092918394460327, - "grad_norm": 0.007325910031795502, - "learning_rate": 0.00019999284582368593, - "loss": 46.0, - "step": 23654 - }, - { - "epoch": 3.8094528765248197, - "grad_norm": 0.014854847453534603, - "learning_rate": 0.0001999928452185225, - "loss": 46.0, - "step": 23655 - }, - { - "epoch": 3.809613913603607, - "grad_norm": 0.0021445502061396837, - "learning_rate": 0.00019999284461333345, - "loss": 46.0, - "step": 23656 - }, - { - "epoch": 3.8097749506823946, - "grad_norm": 0.005499368533492088, - "learning_rate": 0.0001999928440081188, - "loss": 46.0, - "step": 23657 - }, - { - "epoch": 3.809935987761182, - "grad_norm": 0.0010063834488391876, - "learning_rate": 0.00019999284340287856, - "loss": 46.0, - "step": 23658 - }, - { - "epoch": 3.8100970248399695, - "grad_norm": 0.0063238199800252914, - "learning_rate": 0.00019999284279761278, - "loss": 46.0, - "step": 23659 - }, - { - "epoch": 3.810258061918757, - "grad_norm": 0.0030097351409494877, - "learning_rate": 0.00019999284219232135, - "loss": 46.0, - "step": 23660 - }, - { - "epoch": 3.810419098997544, - "grad_norm": 0.003349976846948266, - "learning_rate": 0.00019999284158700434, - "loss": 46.0, - "step": 23661 - }, - { - "epoch": 3.8105801360763314, - "grad_norm": 0.0038509576115757227, - "learning_rate": 0.00019999284098166174, - "loss": 46.0, - "step": 23662 - }, - { - "epoch": 3.810741173155119, - "grad_norm": 0.006076053716242313, - "learning_rate": 0.00019999284037629353, - "loss": 46.0, - "step": 23663 - }, - { - "epoch": 3.8109022102339063, - "grad_norm": 0.00255609885789454, - "learning_rate": 0.00019999283977089976, - "loss": 46.0, - "step": 23664 - }, - { - "epoch": 3.8110632473126937, - "grad_norm": 0.005714502651244402, - "learning_rate": 0.00019999283916548037, - "loss": 46.0, - "step": 23665 - }, - { - "epoch": 3.811224284391481, - "grad_norm": 0.0011660543968901038, - "learning_rate": 0.00019999283856003542, - "loss": 46.0, - "step": 23666 - }, - { - "epoch": 3.8113853214702687, - "grad_norm": 0.005012121517211199, - "learning_rate": 0.00019999283795456486, - "loss": 46.0, - "step": 23667 - }, - { - "epoch": 3.811546358549056, - "grad_norm": 0.0010740929283201694, - "learning_rate": 0.00019999283734906868, - "loss": 46.0, - "step": 23668 - }, - { - "epoch": 3.8117073956278436, - "grad_norm": 0.0013253248762339354, - "learning_rate": 0.00019999283674354695, - "loss": 46.0, - "step": 23669 - }, - { - "epoch": 3.8118684327066306, - "grad_norm": 0.0023376839235424995, - "learning_rate": 0.0001999928361379996, - "loss": 46.0, - "step": 23670 - }, - { - "epoch": 3.812029469785418, - "grad_norm": 0.007657058071345091, - "learning_rate": 0.00019999283553242666, - "loss": 46.0, - "step": 23671 - }, - { - "epoch": 3.8121905068642055, - "grad_norm": 0.004085401073098183, - "learning_rate": 0.00019999283492682813, - "loss": 46.0, - "step": 23672 - }, - { - "epoch": 3.812351543942993, - "grad_norm": 0.0011746814707294106, - "learning_rate": 0.000199992834321204, - "loss": 46.0, - "step": 23673 - }, - { - "epoch": 3.8125125810217804, - "grad_norm": 0.004453752189874649, - "learning_rate": 0.0001999928337155543, - "loss": 46.0, - "step": 23674 - }, - { - "epoch": 3.812673618100568, - "grad_norm": 0.01846867799758911, - "learning_rate": 0.00019999283310987898, - "loss": 46.0, - "step": 23675 - }, - { - "epoch": 3.812834655179355, - "grad_norm": 0.0049817850813269615, - "learning_rate": 0.00019999283250417805, - "loss": 46.0, - "step": 23676 - }, - { - "epoch": 3.8129956922581423, - "grad_norm": 0.002020810265094042, - "learning_rate": 0.00019999283189845157, - "loss": 46.0, - "step": 23677 - }, - { - "epoch": 3.8131567293369297, - "grad_norm": 0.0018876423127949238, - "learning_rate": 0.0001999928312926995, - "loss": 46.0, - "step": 23678 - }, - { - "epoch": 3.813317766415717, - "grad_norm": 0.005340597592294216, - "learning_rate": 0.0001999928306869218, - "loss": 46.0, - "step": 23679 - }, - { - "epoch": 3.8134788034945046, - "grad_norm": 0.0054100798442959785, - "learning_rate": 0.00019999283008111853, - "loss": 46.0, - "step": 23680 - }, - { - "epoch": 3.813639840573292, - "grad_norm": 0.0010715537937358022, - "learning_rate": 0.00019999282947528966, - "loss": 46.0, - "step": 23681 - }, - { - "epoch": 3.8138008776520795, - "grad_norm": 0.005323282442986965, - "learning_rate": 0.0001999928288694352, - "loss": 46.0, - "step": 23682 - }, - { - "epoch": 3.813961914730867, - "grad_norm": 0.016577554866671562, - "learning_rate": 0.00019999282826355515, - "loss": 46.0, - "step": 23683 - }, - { - "epoch": 3.8141229518096544, - "grad_norm": 0.005348172504454851, - "learning_rate": 0.0001999928276576495, - "loss": 46.0, - "step": 23684 - }, - { - "epoch": 3.8142839888884414, - "grad_norm": 0.004378584213554859, - "learning_rate": 0.00019999282705171828, - "loss": 46.0, - "step": 23685 - }, - { - "epoch": 3.814445025967229, - "grad_norm": 0.002704095793887973, - "learning_rate": 0.00019999282644576143, - "loss": 46.0, - "step": 23686 - }, - { - "epoch": 3.8146060630460163, - "grad_norm": 0.003326993901282549, - "learning_rate": 0.00019999282583977901, - "loss": 46.0, - "step": 23687 - }, - { - "epoch": 3.814767100124804, - "grad_norm": 0.006455174647271633, - "learning_rate": 0.00019999282523377099, - "loss": 46.0, - "step": 23688 - }, - { - "epoch": 3.8149281372035913, - "grad_norm": 0.010551992803812027, - "learning_rate": 0.0001999928246277374, - "loss": 46.0, - "step": 23689 - }, - { - "epoch": 3.8150891742823783, - "grad_norm": 0.0036160375457257032, - "learning_rate": 0.00019999282402167817, - "loss": 46.0, - "step": 23690 - }, - { - "epoch": 3.8152502113611657, - "grad_norm": 0.0015594776486977935, - "learning_rate": 0.00019999282341559335, - "loss": 46.0, - "step": 23691 - }, - { - "epoch": 3.815411248439953, - "grad_norm": 0.0038541185203939676, - "learning_rate": 0.00019999282280948297, - "loss": 46.0, - "step": 23692 - }, - { - "epoch": 3.8155722855187406, - "grad_norm": 0.0035496940836310387, - "learning_rate": 0.000199992822203347, - "loss": 46.0, - "step": 23693 - }, - { - "epoch": 3.815733322597528, - "grad_norm": 0.0014210037188604474, - "learning_rate": 0.00019999282159718543, - "loss": 46.0, - "step": 23694 - }, - { - "epoch": 3.8158943596763155, - "grad_norm": 0.00115022377576679, - "learning_rate": 0.00019999282099099824, - "loss": 46.0, - "step": 23695 - }, - { - "epoch": 3.816055396755103, - "grad_norm": 0.0019606153946369886, - "learning_rate": 0.0001999928203847855, - "loss": 46.0, - "step": 23696 - }, - { - "epoch": 3.8162164338338904, - "grad_norm": 0.001526747248135507, - "learning_rate": 0.00019999281977854715, - "loss": 46.0, - "step": 23697 - }, - { - "epoch": 3.816377470912678, - "grad_norm": 0.003063860582187772, - "learning_rate": 0.00019999281917228317, - "loss": 46.0, - "step": 23698 - }, - { - "epoch": 3.816538507991465, - "grad_norm": 0.001384721021167934, - "learning_rate": 0.00019999281856599363, - "loss": 46.0, - "step": 23699 - }, - { - "epoch": 3.8166995450702523, - "grad_norm": 0.008011037483811378, - "learning_rate": 0.0001999928179596785, - "loss": 46.0, - "step": 23700 - }, - { - "epoch": 3.81686058214904, - "grad_norm": 0.0017826680559664965, - "learning_rate": 0.00019999281735333778, - "loss": 46.0, - "step": 23701 - }, - { - "epoch": 3.8170216192278272, - "grad_norm": 0.0010142020182684064, - "learning_rate": 0.00019999281674697142, - "loss": 46.0, - "step": 23702 - }, - { - "epoch": 3.8171826563066147, - "grad_norm": 0.0013554822653532028, - "learning_rate": 0.0001999928161405795, - "loss": 46.0, - "step": 23703 - }, - { - "epoch": 3.817343693385402, - "grad_norm": 0.001434633624739945, - "learning_rate": 0.00019999281553416203, - "loss": 46.0, - "step": 23704 - }, - { - "epoch": 3.817504730464189, - "grad_norm": 0.005213681608438492, - "learning_rate": 0.00019999281492771894, - "loss": 46.0, - "step": 23705 - }, - { - "epoch": 3.8176657675429766, - "grad_norm": 0.0018418292747810483, - "learning_rate": 0.00019999281432125023, - "loss": 46.0, - "step": 23706 - }, - { - "epoch": 3.817826804621764, - "grad_norm": 0.006393212825059891, - "learning_rate": 0.00019999281371475594, - "loss": 46.0, - "step": 23707 - }, - { - "epoch": 3.8179878417005515, - "grad_norm": 0.0016864193603396416, - "learning_rate": 0.0001999928131082361, - "loss": 46.0, - "step": 23708 - }, - { - "epoch": 3.818148878779339, - "grad_norm": 0.005730402190238237, - "learning_rate": 0.0001999928125016906, - "loss": 46.0, - "step": 23709 - }, - { - "epoch": 3.8183099158581264, - "grad_norm": 0.0013925766106694937, - "learning_rate": 0.00019999281189511954, - "loss": 46.0, - "step": 23710 - }, - { - "epoch": 3.818470952936914, - "grad_norm": 0.005298233591020107, - "learning_rate": 0.00019999281128852287, - "loss": 46.0, - "step": 23711 - }, - { - "epoch": 3.8186319900157013, - "grad_norm": 0.01740388572216034, - "learning_rate": 0.00019999281068190064, - "loss": 46.0, - "step": 23712 - }, - { - "epoch": 3.8187930270944888, - "grad_norm": 0.014286418445408344, - "learning_rate": 0.0001999928100752528, - "loss": 46.0, - "step": 23713 - }, - { - "epoch": 3.8189540641732758, - "grad_norm": 0.002689335960894823, - "learning_rate": 0.00019999280946857932, - "loss": 46.0, - "step": 23714 - }, - { - "epoch": 3.819115101252063, - "grad_norm": 0.0017703790217638016, - "learning_rate": 0.00019999280886188033, - "loss": 46.0, - "step": 23715 - }, - { - "epoch": 3.8192761383308507, - "grad_norm": 0.0038326107896864414, - "learning_rate": 0.0001999928082551557, - "loss": 46.0, - "step": 23716 - }, - { - "epoch": 3.819437175409638, - "grad_norm": 0.00924014300107956, - "learning_rate": 0.00019999280764840548, - "loss": 46.0, - "step": 23717 - }, - { - "epoch": 3.8195982124884256, - "grad_norm": 0.0068095168098807335, - "learning_rate": 0.00019999280704162967, - "loss": 46.0, - "step": 23718 - }, - { - "epoch": 3.8197592495672126, - "grad_norm": 0.004377112258225679, - "learning_rate": 0.00019999280643482825, - "loss": 46.0, - "step": 23719 - }, - { - "epoch": 3.819920286646, - "grad_norm": 0.004944744054228067, - "learning_rate": 0.00019999280582800127, - "loss": 46.0, - "step": 23720 - }, - { - "epoch": 3.8200813237247875, - "grad_norm": 0.0025594928301870823, - "learning_rate": 0.00019999280522114868, - "loss": 46.0, - "step": 23721 - }, - { - "epoch": 3.820242360803575, - "grad_norm": 0.0010990941664204001, - "learning_rate": 0.0001999928046142705, - "loss": 46.0, - "step": 23722 - }, - { - "epoch": 3.8204033978823624, - "grad_norm": 0.0021668653935194016, - "learning_rate": 0.00019999280400736672, - "loss": 46.0, - "step": 23723 - }, - { - "epoch": 3.82056443496115, - "grad_norm": 0.005260012578219175, - "learning_rate": 0.00019999280340043734, - "loss": 46.0, - "step": 23724 - }, - { - "epoch": 3.8207254720399373, - "grad_norm": 0.004265293013304472, - "learning_rate": 0.00019999280279348237, - "loss": 46.0, - "step": 23725 - }, - { - "epoch": 3.8208865091187247, - "grad_norm": 0.0022187649738043547, - "learning_rate": 0.00019999280218650184, - "loss": 46.0, - "step": 23726 - }, - { - "epoch": 3.821047546197512, - "grad_norm": 0.006004838272929192, - "learning_rate": 0.00019999280157949567, - "loss": 46.0, - "step": 23727 - }, - { - "epoch": 3.8212085832762996, - "grad_norm": 0.004536593332886696, - "learning_rate": 0.00019999280097246397, - "loss": 46.0, - "step": 23728 - }, - { - "epoch": 3.8213696203550866, - "grad_norm": 0.0023089649621397257, - "learning_rate": 0.00019999280036540662, - "loss": 46.0, - "step": 23729 - }, - { - "epoch": 3.821530657433874, - "grad_norm": 0.001987896393984556, - "learning_rate": 0.0001999927997583237, - "loss": 46.0, - "step": 23730 - }, - { - "epoch": 3.8216916945126616, - "grad_norm": 0.006554876919835806, - "learning_rate": 0.00019999279915121517, - "loss": 46.0, - "step": 23731 - }, - { - "epoch": 3.821852731591449, - "grad_norm": 0.009937213733792305, - "learning_rate": 0.00019999279854408103, - "loss": 46.0, - "step": 23732 - }, - { - "epoch": 3.8220137686702365, - "grad_norm": 0.0013865112559869885, - "learning_rate": 0.00019999279793692134, - "loss": 46.0, - "step": 23733 - }, - { - "epoch": 3.8221748057490235, - "grad_norm": 0.0072349668480455875, - "learning_rate": 0.00019999279732973605, - "loss": 46.0, - "step": 23734 - }, - { - "epoch": 3.822335842827811, - "grad_norm": 0.006639469880610704, - "learning_rate": 0.00019999279672252516, - "loss": 46.0, - "step": 23735 - }, - { - "epoch": 3.8224968799065984, - "grad_norm": 0.005938095971941948, - "learning_rate": 0.00019999279611528867, - "loss": 46.0, - "step": 23736 - }, - { - "epoch": 3.822657916985386, - "grad_norm": 0.002632892457768321, - "learning_rate": 0.0001999927955080266, - "loss": 46.0, - "step": 23737 - }, - { - "epoch": 3.8228189540641733, - "grad_norm": 0.004561766516417265, - "learning_rate": 0.00019999279490073892, - "loss": 46.0, - "step": 23738 - }, - { - "epoch": 3.8229799911429607, - "grad_norm": 0.0014203216414898634, - "learning_rate": 0.00019999279429342565, - "loss": 46.0, - "step": 23739 - }, - { - "epoch": 3.823141028221748, - "grad_norm": 0.003651438746601343, - "learning_rate": 0.00019999279368608682, - "loss": 46.0, - "step": 23740 - }, - { - "epoch": 3.8233020653005356, - "grad_norm": 0.003425365313887596, - "learning_rate": 0.00019999279307872237, - "loss": 46.0, - "step": 23741 - }, - { - "epoch": 3.823463102379323, - "grad_norm": 0.011294897645711899, - "learning_rate": 0.0001999927924713323, - "loss": 46.0, - "step": 23742 - }, - { - "epoch": 3.82362413945811, - "grad_norm": 0.013482105918228626, - "learning_rate": 0.0001999927918639167, - "loss": 46.0, - "step": 23743 - }, - { - "epoch": 3.8237851765368975, - "grad_norm": 0.0008809698047116399, - "learning_rate": 0.00019999279125647545, - "loss": 46.0, - "step": 23744 - }, - { - "epoch": 3.823946213615685, - "grad_norm": 0.004353642929345369, - "learning_rate": 0.00019999279064900863, - "loss": 46.0, - "step": 23745 - }, - { - "epoch": 3.8241072506944724, - "grad_norm": 0.00181178271304816, - "learning_rate": 0.00019999279004151622, - "loss": 46.0, - "step": 23746 - }, - { - "epoch": 3.82426828777326, - "grad_norm": 0.0008638782892376184, - "learning_rate": 0.00019999278943399823, - "loss": 46.0, - "step": 23747 - }, - { - "epoch": 3.8244293248520473, - "grad_norm": 0.0035788519307971, - "learning_rate": 0.00019999278882645461, - "loss": 46.0, - "step": 23748 - }, - { - "epoch": 3.8245903619308343, - "grad_norm": 0.0019932491704821587, - "learning_rate": 0.00019999278821888542, - "loss": 46.0, - "step": 23749 - }, - { - "epoch": 3.824751399009622, - "grad_norm": 0.005432513076812029, - "learning_rate": 0.00019999278761129063, - "loss": 46.0, - "step": 23750 - }, - { - "epoch": 3.8249124360884093, - "grad_norm": 0.00308479112572968, - "learning_rate": 0.00019999278700367026, - "loss": 46.0, - "step": 23751 - }, - { - "epoch": 3.8250734731671967, - "grad_norm": 0.0024276836775243282, - "learning_rate": 0.00019999278639602427, - "loss": 46.0, - "step": 23752 - }, - { - "epoch": 3.825234510245984, - "grad_norm": 0.0011070839827880263, - "learning_rate": 0.0001999927857883527, - "loss": 46.0, - "step": 23753 - }, - { - "epoch": 3.8253955473247716, - "grad_norm": 0.004445291124284267, - "learning_rate": 0.00019999278518065557, - "loss": 46.0, - "step": 23754 - }, - { - "epoch": 3.825556584403559, - "grad_norm": 0.001863056910224259, - "learning_rate": 0.0001999927845729328, - "loss": 46.0, - "step": 23755 - }, - { - "epoch": 3.8257176214823465, - "grad_norm": 0.001070081372745335, - "learning_rate": 0.00019999278396518446, - "loss": 46.0, - "step": 23756 - }, - { - "epoch": 3.825878658561134, - "grad_norm": 0.002958189230412245, - "learning_rate": 0.0001999927833574105, - "loss": 46.0, - "step": 23757 - }, - { - "epoch": 3.826039695639921, - "grad_norm": 0.0024542456958442926, - "learning_rate": 0.000199992782749611, - "loss": 46.0, - "step": 23758 - }, - { - "epoch": 3.8262007327187084, - "grad_norm": 0.0032551358453929424, - "learning_rate": 0.00019999278214178587, - "loss": 46.0, - "step": 23759 - }, - { - "epoch": 3.826361769797496, - "grad_norm": 0.008647998794913292, - "learning_rate": 0.00019999278153393513, - "loss": 46.0, - "step": 23760 - }, - { - "epoch": 3.8265228068762833, - "grad_norm": 0.00384552706964314, - "learning_rate": 0.00019999278092605884, - "loss": 46.0, - "step": 23761 - }, - { - "epoch": 3.8266838439550708, - "grad_norm": 0.0018242730293422937, - "learning_rate": 0.00019999278031815692, - "loss": 46.0, - "step": 23762 - }, - { - "epoch": 3.826844881033858, - "grad_norm": 0.0024351656902581453, - "learning_rate": 0.00019999277971022945, - "loss": 46.0, - "step": 23763 - }, - { - "epoch": 3.8270059181126452, - "grad_norm": 0.00894754845649004, - "learning_rate": 0.00019999277910227634, - "loss": 46.0, - "step": 23764 - }, - { - "epoch": 3.8271669551914327, - "grad_norm": 0.019980011507868767, - "learning_rate": 0.00019999277849429766, - "loss": 46.0, - "step": 23765 - }, - { - "epoch": 3.82732799227022, - "grad_norm": 0.0037471551913768053, - "learning_rate": 0.00019999277788629338, - "loss": 46.0, - "step": 23766 - }, - { - "epoch": 3.8274890293490076, - "grad_norm": 0.013219069689512253, - "learning_rate": 0.00019999277727826353, - "loss": 46.0, - "step": 23767 - }, - { - "epoch": 3.827650066427795, - "grad_norm": 0.0070031085051596165, - "learning_rate": 0.00019999277667020807, - "loss": 46.0, - "step": 23768 - }, - { - "epoch": 3.8278111035065825, - "grad_norm": 0.005571064539253712, - "learning_rate": 0.00019999277606212702, - "loss": 46.0, - "step": 23769 - }, - { - "epoch": 3.82797214058537, - "grad_norm": 0.0038876717444509268, - "learning_rate": 0.00019999277545402038, - "loss": 46.0, - "step": 23770 - }, - { - "epoch": 3.8281331776641574, - "grad_norm": 0.004201279021799564, - "learning_rate": 0.00019999277484588813, - "loss": 46.0, - "step": 23771 - }, - { - "epoch": 3.8282942147429444, - "grad_norm": 0.0024333479814231396, - "learning_rate": 0.0001999927742377303, - "loss": 46.0, - "step": 23772 - }, - { - "epoch": 3.828455251821732, - "grad_norm": 0.004609811119735241, - "learning_rate": 0.00019999277362954686, - "loss": 46.0, - "step": 23773 - }, - { - "epoch": 3.8286162889005193, - "grad_norm": 0.005001090466976166, - "learning_rate": 0.00019999277302133782, - "loss": 46.0, - "step": 23774 - }, - { - "epoch": 3.8287773259793068, - "grad_norm": 0.002505801385268569, - "learning_rate": 0.00019999277241310322, - "loss": 46.0, - "step": 23775 - }, - { - "epoch": 3.828938363058094, - "grad_norm": 0.014222148805856705, - "learning_rate": 0.000199992771804843, - "loss": 46.0, - "step": 23776 - }, - { - "epoch": 3.8290994001368817, - "grad_norm": 0.0076241218484938145, - "learning_rate": 0.00019999277119655724, - "loss": 46.0, - "step": 23777 - }, - { - "epoch": 3.8292604372156687, - "grad_norm": 0.003420913591980934, - "learning_rate": 0.00019999277058824585, - "loss": 46.0, - "step": 23778 - }, - { - "epoch": 3.829421474294456, - "grad_norm": 0.005837075877934694, - "learning_rate": 0.00019999276997990884, - "loss": 46.0, - "step": 23779 - }, - { - "epoch": 3.8295825113732436, - "grad_norm": 0.005029190797358751, - "learning_rate": 0.0001999927693715463, - "loss": 46.0, - "step": 23780 - }, - { - "epoch": 3.829743548452031, - "grad_norm": 0.0019204431446269155, - "learning_rate": 0.0001999927687631581, - "loss": 46.0, - "step": 23781 - }, - { - "epoch": 3.8299045855308185, - "grad_norm": 0.013321777805685997, - "learning_rate": 0.00019999276815474434, - "loss": 46.0, - "step": 23782 - }, - { - "epoch": 3.830065622609606, - "grad_norm": 0.0017787304241210222, - "learning_rate": 0.00019999276754630502, - "loss": 46.0, - "step": 23783 - }, - { - "epoch": 3.8302266596883934, - "grad_norm": 0.00221823388710618, - "learning_rate": 0.00019999276693784005, - "loss": 46.0, - "step": 23784 - }, - { - "epoch": 3.830387696767181, - "grad_norm": 0.010900537483394146, - "learning_rate": 0.0001999927663293495, - "loss": 46.0, - "step": 23785 - }, - { - "epoch": 3.8305487338459683, - "grad_norm": 0.004711878951638937, - "learning_rate": 0.00019999276572083336, - "loss": 46.0, - "step": 23786 - }, - { - "epoch": 3.8307097709247553, - "grad_norm": 0.010935375466942787, - "learning_rate": 0.00019999276511229163, - "loss": 46.0, - "step": 23787 - }, - { - "epoch": 3.8308708080035427, - "grad_norm": 0.001460511703044176, - "learning_rate": 0.00019999276450372432, - "loss": 46.0, - "step": 23788 - }, - { - "epoch": 3.83103184508233, - "grad_norm": 0.008396551944315434, - "learning_rate": 0.0001999927638951314, - "loss": 46.0, - "step": 23789 - }, - { - "epoch": 3.8311928821611176, - "grad_norm": 0.0016302214935421944, - "learning_rate": 0.0001999927632865129, - "loss": 46.0, - "step": 23790 - }, - { - "epoch": 3.831353919239905, - "grad_norm": 0.002367028733715415, - "learning_rate": 0.0001999927626778688, - "loss": 46.0, - "step": 23791 - }, - { - "epoch": 3.8315149563186925, - "grad_norm": 0.0015716521302238107, - "learning_rate": 0.0001999927620691991, - "loss": 46.0, - "step": 23792 - }, - { - "epoch": 3.8316759933974796, - "grad_norm": 0.005456264596432447, - "learning_rate": 0.0001999927614605038, - "loss": 46.0, - "step": 23793 - }, - { - "epoch": 3.831837030476267, - "grad_norm": 0.0031718614045530558, - "learning_rate": 0.00019999276085178294, - "loss": 46.0, - "step": 23794 - }, - { - "epoch": 3.8319980675550545, - "grad_norm": 0.002720343181863427, - "learning_rate": 0.00019999276024303646, - "loss": 46.0, - "step": 23795 - }, - { - "epoch": 3.832159104633842, - "grad_norm": 0.002677729818969965, - "learning_rate": 0.0001999927596342644, - "loss": 46.0, - "step": 23796 - }, - { - "epoch": 3.8323201417126294, - "grad_norm": 0.00726128788664937, - "learning_rate": 0.00019999275902546674, - "loss": 46.0, - "step": 23797 - }, - { - "epoch": 3.832481178791417, - "grad_norm": 0.008322179317474365, - "learning_rate": 0.0001999927584166435, - "loss": 46.0, - "step": 23798 - }, - { - "epoch": 3.8326422158702043, - "grad_norm": 0.002563060726970434, - "learning_rate": 0.00019999275780779464, - "loss": 46.0, - "step": 23799 - }, - { - "epoch": 3.8328032529489917, - "grad_norm": 0.0039304327219724655, - "learning_rate": 0.0001999927571989202, - "loss": 46.0, - "step": 23800 - }, - { - "epoch": 3.832964290027779, - "grad_norm": 0.007026908919215202, - "learning_rate": 0.00019999275659002017, - "loss": 46.0, - "step": 23801 - }, - { - "epoch": 3.833125327106566, - "grad_norm": 0.000881332263816148, - "learning_rate": 0.00019999275598109456, - "loss": 46.0, - "step": 23802 - }, - { - "epoch": 3.8332863641853536, - "grad_norm": 0.008376401849091053, - "learning_rate": 0.00019999275537214333, - "loss": 46.0, - "step": 23803 - }, - { - "epoch": 3.833447401264141, - "grad_norm": 0.0030209124088287354, - "learning_rate": 0.00019999275476316654, - "loss": 46.0, - "step": 23804 - }, - { - "epoch": 3.8336084383429285, - "grad_norm": 0.0019772592931985855, - "learning_rate": 0.00019999275415416413, - "loss": 46.0, - "step": 23805 - }, - { - "epoch": 3.833769475421716, - "grad_norm": 0.006213182583451271, - "learning_rate": 0.0001999927535451361, - "loss": 46.0, - "step": 23806 - }, - { - "epoch": 3.833930512500503, - "grad_norm": 0.005444422364234924, - "learning_rate": 0.00019999275293608256, - "loss": 46.0, - "step": 23807 - }, - { - "epoch": 3.8340915495792904, - "grad_norm": 0.0014355507446452975, - "learning_rate": 0.00019999275232700337, - "loss": 46.0, - "step": 23808 - }, - { - "epoch": 3.834252586658078, - "grad_norm": 0.0015205810777842999, - "learning_rate": 0.0001999927517178986, - "loss": 46.0, - "step": 23809 - }, - { - "epoch": 3.8344136237368653, - "grad_norm": 0.003284480655565858, - "learning_rate": 0.00019999275110876822, - "loss": 46.0, - "step": 23810 - }, - { - "epoch": 3.834574660815653, - "grad_norm": 0.0027947314083576202, - "learning_rate": 0.00019999275049961224, - "loss": 46.0, - "step": 23811 - }, - { - "epoch": 3.8347356978944402, - "grad_norm": 0.008528749458491802, - "learning_rate": 0.0001999927498904307, - "loss": 46.0, - "step": 23812 - }, - { - "epoch": 3.8348967349732277, - "grad_norm": 0.0013023784849792719, - "learning_rate": 0.00019999274928122357, - "loss": 46.0, - "step": 23813 - }, - { - "epoch": 3.835057772052015, - "grad_norm": 0.005034647881984711, - "learning_rate": 0.0001999927486719908, - "loss": 46.0, - "step": 23814 - }, - { - "epoch": 3.8352188091308026, - "grad_norm": 0.008998457342386246, - "learning_rate": 0.00019999274806273246, - "loss": 46.0, - "step": 23815 - }, - { - "epoch": 3.8353798462095896, - "grad_norm": 0.005973034538328648, - "learning_rate": 0.00019999274745344855, - "loss": 46.0, - "step": 23816 - }, - { - "epoch": 3.835540883288377, - "grad_norm": 0.00256621022708714, - "learning_rate": 0.00019999274684413901, - "loss": 46.0, - "step": 23817 - }, - { - "epoch": 3.8357019203671645, - "grad_norm": 0.007949058897793293, - "learning_rate": 0.0001999927462348039, - "loss": 46.0, - "step": 23818 - }, - { - "epoch": 3.835862957445952, - "grad_norm": 0.0015344663988798857, - "learning_rate": 0.00019999274562544322, - "loss": 46.0, - "step": 23819 - }, - { - "epoch": 3.8360239945247394, - "grad_norm": 0.0013722279109060764, - "learning_rate": 0.00019999274501605695, - "loss": 46.0, - "step": 23820 - }, - { - "epoch": 3.836185031603527, - "grad_norm": 0.0012689040740951896, - "learning_rate": 0.00019999274440664504, - "loss": 46.0, - "step": 23821 - }, - { - "epoch": 3.836346068682314, - "grad_norm": 0.015141929499804974, - "learning_rate": 0.00019999274379720755, - "loss": 46.0, - "step": 23822 - }, - { - "epoch": 3.8365071057611013, - "grad_norm": 0.011476409621536732, - "learning_rate": 0.00019999274318774446, - "loss": 46.0, - "step": 23823 - }, - { - "epoch": 3.8366681428398888, - "grad_norm": 0.0041524856351315975, - "learning_rate": 0.0001999927425782558, - "loss": 46.0, - "step": 23824 - }, - { - "epoch": 3.8368291799186762, - "grad_norm": 0.004125271923840046, - "learning_rate": 0.00019999274196874154, - "loss": 46.0, - "step": 23825 - }, - { - "epoch": 3.8369902169974637, - "grad_norm": 0.01737317629158497, - "learning_rate": 0.00019999274135920167, - "loss": 46.0, - "step": 23826 - }, - { - "epoch": 3.837151254076251, - "grad_norm": 0.006977675016969442, - "learning_rate": 0.00019999274074963624, - "loss": 46.0, - "step": 23827 - }, - { - "epoch": 3.8373122911550386, - "grad_norm": 0.007337931077927351, - "learning_rate": 0.0001999927401400452, - "loss": 46.0, - "step": 23828 - }, - { - "epoch": 3.837473328233826, - "grad_norm": 0.007270507980138063, - "learning_rate": 0.00019999273953042853, - "loss": 46.0, - "step": 23829 - }, - { - "epoch": 3.8376343653126135, - "grad_norm": 0.005673321429640055, - "learning_rate": 0.00019999273892078634, - "loss": 46.0, - "step": 23830 - }, - { - "epoch": 3.8377954023914005, - "grad_norm": 0.0019528295379132032, - "learning_rate": 0.0001999927383111185, - "loss": 46.0, - "step": 23831 - }, - { - "epoch": 3.837956439470188, - "grad_norm": 0.002949316054582596, - "learning_rate": 0.0001999927377014251, - "loss": 46.0, - "step": 23832 - }, - { - "epoch": 3.8381174765489754, - "grad_norm": 0.010876092128455639, - "learning_rate": 0.00019999273709170608, - "loss": 46.0, - "step": 23833 - }, - { - "epoch": 3.838278513627763, - "grad_norm": 0.001497737132012844, - "learning_rate": 0.00019999273648196148, - "loss": 46.0, - "step": 23834 - }, - { - "epoch": 3.8384395507065503, - "grad_norm": 0.003304415615275502, - "learning_rate": 0.0001999927358721913, - "loss": 46.0, - "step": 23835 - }, - { - "epoch": 3.8386005877853373, - "grad_norm": 0.0009059087606146932, - "learning_rate": 0.0001999927352623955, - "loss": 46.0, - "step": 23836 - }, - { - "epoch": 3.8387616248641248, - "grad_norm": 0.0013369123917073011, - "learning_rate": 0.00019999273465257412, - "loss": 46.0, - "step": 23837 - }, - { - "epoch": 3.838922661942912, - "grad_norm": 0.0016327179037034512, - "learning_rate": 0.00019999273404272715, - "loss": 46.0, - "step": 23838 - }, - { - "epoch": 3.8390836990216997, - "grad_norm": 0.004308358766138554, - "learning_rate": 0.0001999927334328546, - "loss": 46.0, - "step": 23839 - }, - { - "epoch": 3.839244736100487, - "grad_norm": 0.0034348871558904648, - "learning_rate": 0.00019999273282295644, - "loss": 46.0, - "step": 23840 - }, - { - "epoch": 3.8394057731792746, - "grad_norm": 0.0012503493344411254, - "learning_rate": 0.00019999273221303268, - "loss": 46.0, - "step": 23841 - }, - { - "epoch": 3.839566810258062, - "grad_norm": 0.002518642693758011, - "learning_rate": 0.00019999273160308334, - "loss": 46.0, - "step": 23842 - }, - { - "epoch": 3.8397278473368495, - "grad_norm": 0.0028336290270090103, - "learning_rate": 0.0001999927309931084, - "loss": 46.0, - "step": 23843 - }, - { - "epoch": 3.839888884415637, - "grad_norm": 0.0015037036500871181, - "learning_rate": 0.00019999273038310785, - "loss": 46.0, - "step": 23844 - }, - { - "epoch": 3.8400499214944244, - "grad_norm": 0.001164411660283804, - "learning_rate": 0.00019999272977308175, - "loss": 46.0, - "step": 23845 - }, - { - "epoch": 3.8402109585732114, - "grad_norm": 0.006247709970921278, - "learning_rate": 0.00019999272916303005, - "loss": 46.0, - "step": 23846 - }, - { - "epoch": 3.840371995651999, - "grad_norm": 0.0024582005571573973, - "learning_rate": 0.00019999272855295271, - "loss": 46.0, - "step": 23847 - }, - { - "epoch": 3.8405330327307863, - "grad_norm": 0.009390462189912796, - "learning_rate": 0.00019999272794284982, - "loss": 46.0, - "step": 23848 - }, - { - "epoch": 3.8406940698095737, - "grad_norm": 0.001749495742842555, - "learning_rate": 0.00019999272733272133, - "loss": 46.0, - "step": 23849 - }, - { - "epoch": 3.840855106888361, - "grad_norm": 0.0022811098024249077, - "learning_rate": 0.00019999272672256723, - "loss": 46.0, - "step": 23850 - }, - { - "epoch": 3.841016143967148, - "grad_norm": 0.001285584643483162, - "learning_rate": 0.00019999272611238755, - "loss": 46.0, - "step": 23851 - }, - { - "epoch": 3.8411771810459356, - "grad_norm": 0.0026719607412815094, - "learning_rate": 0.00019999272550218228, - "loss": 46.0, - "step": 23852 - }, - { - "epoch": 3.841338218124723, - "grad_norm": 0.004525855649262667, - "learning_rate": 0.00019999272489195142, - "loss": 46.0, - "step": 23853 - }, - { - "epoch": 3.8414992552035105, - "grad_norm": 0.0026460273656994104, - "learning_rate": 0.00019999272428169494, - "loss": 46.0, - "step": 23854 - }, - { - "epoch": 3.841660292282298, - "grad_norm": 0.007868615910410881, - "learning_rate": 0.0001999927236714129, - "loss": 46.0, - "step": 23855 - }, - { - "epoch": 3.8418213293610854, - "grad_norm": 0.002740424359217286, - "learning_rate": 0.00019999272306110526, - "loss": 46.0, - "step": 23856 - }, - { - "epoch": 3.841982366439873, - "grad_norm": 0.005905696656554937, - "learning_rate": 0.000199992722450772, - "loss": 46.0, - "step": 23857 - }, - { - "epoch": 3.8421434035186603, - "grad_norm": 0.008847422897815704, - "learning_rate": 0.00019999272184041317, - "loss": 46.0, - "step": 23858 - }, - { - "epoch": 3.842304440597448, - "grad_norm": 0.007343965116888285, - "learning_rate": 0.00019999272123002876, - "loss": 46.0, - "step": 23859 - }, - { - "epoch": 3.842465477676235, - "grad_norm": 0.006909176241606474, - "learning_rate": 0.00019999272061961874, - "loss": 46.0, - "step": 23860 - }, - { - "epoch": 3.8426265147550223, - "grad_norm": 0.003095058025792241, - "learning_rate": 0.00019999272000918313, - "loss": 46.0, - "step": 23861 - }, - { - "epoch": 3.8427875518338097, - "grad_norm": 0.0021113466937094927, - "learning_rate": 0.00019999271939872193, - "loss": 46.0, - "step": 23862 - }, - { - "epoch": 3.842948588912597, - "grad_norm": 0.0008507101447321475, - "learning_rate": 0.00019999271878823514, - "loss": 46.0, - "step": 23863 - }, - { - "epoch": 3.8431096259913846, - "grad_norm": 0.0067399670369923115, - "learning_rate": 0.00019999271817772274, - "loss": 46.0, - "step": 23864 - }, - { - "epoch": 3.843270663070172, - "grad_norm": 0.002260900568217039, - "learning_rate": 0.00019999271756718475, - "loss": 46.0, - "step": 23865 - }, - { - "epoch": 3.843431700148959, - "grad_norm": 0.008987069129943848, - "learning_rate": 0.00019999271695662118, - "loss": 46.0, - "step": 23866 - }, - { - "epoch": 3.8435927372277465, - "grad_norm": 0.010576714761555195, - "learning_rate": 0.00019999271634603202, - "loss": 46.0, - "step": 23867 - }, - { - "epoch": 3.843753774306534, - "grad_norm": 0.00532457185909152, - "learning_rate": 0.00019999271573541724, - "loss": 46.0, - "step": 23868 - }, - { - "epoch": 3.8439148113853214, - "grad_norm": 0.003059093374758959, - "learning_rate": 0.0001999927151247769, - "loss": 46.0, - "step": 23869 - }, - { - "epoch": 3.844075848464109, - "grad_norm": 0.021839655935764313, - "learning_rate": 0.00019999271451411096, - "loss": 46.0, - "step": 23870 - }, - { - "epoch": 3.8442368855428963, - "grad_norm": 0.01567326858639717, - "learning_rate": 0.0001999927139034194, - "loss": 46.0, - "step": 23871 - }, - { - "epoch": 3.844397922621684, - "grad_norm": 0.000972215726505965, - "learning_rate": 0.0001999927132927023, - "loss": 46.0, - "step": 23872 - }, - { - "epoch": 3.8445589597004712, - "grad_norm": 0.008044194430112839, - "learning_rate": 0.00019999271268195953, - "loss": 46.0, - "step": 23873 - }, - { - "epoch": 3.8447199967792587, - "grad_norm": 0.0038501210510730743, - "learning_rate": 0.0001999927120711912, - "loss": 46.0, - "step": 23874 - }, - { - "epoch": 3.8448810338580457, - "grad_norm": 0.014655952341854572, - "learning_rate": 0.00019999271146039732, - "loss": 46.0, - "step": 23875 - }, - { - "epoch": 3.845042070936833, - "grad_norm": 0.009296517819166183, - "learning_rate": 0.00019999271084957781, - "loss": 46.0, - "step": 23876 - }, - { - "epoch": 3.8452031080156206, - "grad_norm": 0.006652393378317356, - "learning_rate": 0.00019999271023873273, - "loss": 46.0, - "step": 23877 - }, - { - "epoch": 3.845364145094408, - "grad_norm": 0.005066597834229469, - "learning_rate": 0.00019999270962786203, - "loss": 46.0, - "step": 23878 - }, - { - "epoch": 3.8455251821731955, - "grad_norm": 0.00882706232368946, - "learning_rate": 0.0001999927090169657, - "loss": 46.0, - "step": 23879 - }, - { - "epoch": 3.8456862192519825, - "grad_norm": 0.012673420831561089, - "learning_rate": 0.00019999270840604386, - "loss": 46.0, - "step": 23880 - }, - { - "epoch": 3.84584725633077, - "grad_norm": 0.0029137085657566786, - "learning_rate": 0.00019999270779509637, - "loss": 46.0, - "step": 23881 - }, - { - "epoch": 3.8460082934095574, - "grad_norm": 0.010180748999118805, - "learning_rate": 0.00019999270718412332, - "loss": 46.0, - "step": 23882 - }, - { - "epoch": 3.846169330488345, - "grad_norm": 0.002074816729873419, - "learning_rate": 0.00019999270657312465, - "loss": 46.0, - "step": 23883 - }, - { - "epoch": 3.8463303675671323, - "grad_norm": 0.008947465568780899, - "learning_rate": 0.0001999927059621004, - "loss": 46.0, - "step": 23884 - }, - { - "epoch": 3.8464914046459198, - "grad_norm": 0.0037822150625288486, - "learning_rate": 0.00019999270535105054, - "loss": 46.0, - "step": 23885 - }, - { - "epoch": 3.846652441724707, - "grad_norm": 0.0004745628102682531, - "learning_rate": 0.00019999270473997514, - "loss": 46.0, - "step": 23886 - }, - { - "epoch": 3.8468134788034947, - "grad_norm": 0.002333694836124778, - "learning_rate": 0.0001999927041288741, - "loss": 46.0, - "step": 23887 - }, - { - "epoch": 3.846974515882282, - "grad_norm": 0.008906783536076546, - "learning_rate": 0.0001999927035177475, - "loss": 46.0, - "step": 23888 - }, - { - "epoch": 3.847135552961069, - "grad_norm": 0.0016794158145785332, - "learning_rate": 0.00019999270290659525, - "loss": 46.0, - "step": 23889 - }, - { - "epoch": 3.8472965900398566, - "grad_norm": 0.00228791288100183, - "learning_rate": 0.00019999270229541748, - "loss": 46.0, - "step": 23890 - }, - { - "epoch": 3.847457627118644, - "grad_norm": 0.005565068684518337, - "learning_rate": 0.00019999270168421403, - "loss": 46.0, - "step": 23891 - }, - { - "epoch": 3.8476186641974315, - "grad_norm": 0.008348161354660988, - "learning_rate": 0.00019999270107298506, - "loss": 46.0, - "step": 23892 - }, - { - "epoch": 3.847779701276219, - "grad_norm": 0.006248812191188335, - "learning_rate": 0.00019999270046173047, - "loss": 46.0, - "step": 23893 - }, - { - "epoch": 3.8479407383550064, - "grad_norm": 0.004908010829240084, - "learning_rate": 0.00019999269985045032, - "loss": 46.0, - "step": 23894 - }, - { - "epoch": 3.8481017754337934, - "grad_norm": 0.004707091022282839, - "learning_rate": 0.00019999269923914452, - "loss": 46.0, - "step": 23895 - }, - { - "epoch": 3.848262812512581, - "grad_norm": 0.015538440085947514, - "learning_rate": 0.00019999269862781314, - "loss": 46.0, - "step": 23896 - }, - { - "epoch": 3.8484238495913683, - "grad_norm": 0.005997444968670607, - "learning_rate": 0.0001999926980164562, - "loss": 46.0, - "step": 23897 - }, - { - "epoch": 3.8485848866701557, - "grad_norm": 0.004419758915901184, - "learning_rate": 0.00019999269740507365, - "loss": 46.0, - "step": 23898 - }, - { - "epoch": 3.848745923748943, - "grad_norm": 0.009846421889960766, - "learning_rate": 0.00019999269679366548, - "loss": 46.0, - "step": 23899 - }, - { - "epoch": 3.8489069608277307, - "grad_norm": 0.003134183818474412, - "learning_rate": 0.00019999269618223176, - "loss": 46.0, - "step": 23900 - }, - { - "epoch": 3.849067997906518, - "grad_norm": 0.0008235535933636129, - "learning_rate": 0.0001999926955707724, - "loss": 46.0, - "step": 23901 - }, - { - "epoch": 3.8492290349853056, - "grad_norm": 0.009524377062916756, - "learning_rate": 0.00019999269495928748, - "loss": 46.0, - "step": 23902 - }, - { - "epoch": 3.849390072064093, - "grad_norm": 0.005838321056216955, - "learning_rate": 0.000199992694347777, - "loss": 46.0, - "step": 23903 - }, - { - "epoch": 3.84955110914288, - "grad_norm": 0.002070268616080284, - "learning_rate": 0.0001999926937362409, - "loss": 46.0, - "step": 23904 - }, - { - "epoch": 3.8497121462216675, - "grad_norm": 0.007831402122974396, - "learning_rate": 0.00019999269312467917, - "loss": 46.0, - "step": 23905 - }, - { - "epoch": 3.849873183300455, - "grad_norm": 0.015754563733935356, - "learning_rate": 0.00019999269251309187, - "loss": 46.0, - "step": 23906 - }, - { - "epoch": 3.8500342203792424, - "grad_norm": 0.010111791081726551, - "learning_rate": 0.00019999269190147897, - "loss": 46.0, - "step": 23907 - }, - { - "epoch": 3.85019525745803, - "grad_norm": 0.001547613414004445, - "learning_rate": 0.0001999926912898405, - "loss": 46.0, - "step": 23908 - }, - { - "epoch": 3.850356294536817, - "grad_norm": 0.0014617778360843658, - "learning_rate": 0.00019999269067817643, - "loss": 46.0, - "step": 23909 - }, - { - "epoch": 3.8505173316156043, - "grad_norm": 0.0030381157994270325, - "learning_rate": 0.00019999269006648675, - "loss": 46.0, - "step": 23910 - }, - { - "epoch": 3.8506783686943917, - "grad_norm": 0.0017007588176056743, - "learning_rate": 0.0001999926894547715, - "loss": 46.0, - "step": 23911 - }, - { - "epoch": 3.850839405773179, - "grad_norm": 0.009313547052443027, - "learning_rate": 0.00019999268884303065, - "loss": 46.0, - "step": 23912 - }, - { - "epoch": 3.8510004428519666, - "grad_norm": 0.0019377215066924691, - "learning_rate": 0.0001999926882312642, - "loss": 46.0, - "step": 23913 - }, - { - "epoch": 3.851161479930754, - "grad_norm": 0.0011417100904509425, - "learning_rate": 0.00019999268761947215, - "loss": 46.0, - "step": 23914 - }, - { - "epoch": 3.8513225170095415, - "grad_norm": 0.0040351650677621365, - "learning_rate": 0.00019999268700765453, - "loss": 46.0, - "step": 23915 - }, - { - "epoch": 3.851483554088329, - "grad_norm": 0.00386438868008554, - "learning_rate": 0.0001999926863958113, - "loss": 46.0, - "step": 23916 - }, - { - "epoch": 3.8516445911671164, - "grad_norm": 0.002421856392174959, - "learning_rate": 0.00019999268578394248, - "loss": 46.0, - "step": 23917 - }, - { - "epoch": 3.851805628245904, - "grad_norm": 0.005682044662535191, - "learning_rate": 0.00019999268517204808, - "loss": 46.0, - "step": 23918 - }, - { - "epoch": 3.851966665324691, - "grad_norm": 0.0033794965129345655, - "learning_rate": 0.00019999268456012806, - "loss": 46.0, - "step": 23919 - }, - { - "epoch": 3.8521277024034783, - "grad_norm": 0.002941686660051346, - "learning_rate": 0.00019999268394818245, - "loss": 46.0, - "step": 23920 - }, - { - "epoch": 3.852288739482266, - "grad_norm": 0.0006447505438700318, - "learning_rate": 0.00019999268333621126, - "loss": 46.0, - "step": 23921 - }, - { - "epoch": 3.8524497765610533, - "grad_norm": 0.009037910960614681, - "learning_rate": 0.0001999926827242145, - "loss": 46.0, - "step": 23922 - }, - { - "epoch": 3.8526108136398407, - "grad_norm": 0.0015992186963558197, - "learning_rate": 0.0001999926821121921, - "loss": 46.0, - "step": 23923 - }, - { - "epoch": 3.8527718507186277, - "grad_norm": 0.002157653449103236, - "learning_rate": 0.00019999268150014415, - "loss": 46.0, - "step": 23924 - }, - { - "epoch": 3.852932887797415, - "grad_norm": 0.009380200877785683, - "learning_rate": 0.00019999268088807058, - "loss": 46.0, - "step": 23925 - }, - { - "epoch": 3.8530939248762026, - "grad_norm": 0.005483757704496384, - "learning_rate": 0.00019999268027597142, - "loss": 46.0, - "step": 23926 - }, - { - "epoch": 3.85325496195499, - "grad_norm": 0.0015798325184732676, - "learning_rate": 0.00019999267966384668, - "loss": 46.0, - "step": 23927 - }, - { - "epoch": 3.8534159990337775, - "grad_norm": 0.0031880135647952557, - "learning_rate": 0.00019999267905169635, - "loss": 46.0, - "step": 23928 - }, - { - "epoch": 3.853577036112565, - "grad_norm": 0.006490484811365604, - "learning_rate": 0.00019999267843952038, - "loss": 46.0, - "step": 23929 - }, - { - "epoch": 3.8537380731913524, - "grad_norm": 0.0019183688564226031, - "learning_rate": 0.00019999267782731884, - "loss": 46.0, - "step": 23930 - }, - { - "epoch": 3.85389911027014, - "grad_norm": 0.0011442864779382944, - "learning_rate": 0.00019999267721509175, - "loss": 46.0, - "step": 23931 - }, - { - "epoch": 3.8540601473489273, - "grad_norm": 0.0018853372894227505, - "learning_rate": 0.00019999267660283901, - "loss": 46.0, - "step": 23932 - }, - { - "epoch": 3.8542211844277143, - "grad_norm": 0.0011674242559820414, - "learning_rate": 0.00019999267599056072, - "loss": 46.0, - "step": 23933 - }, - { - "epoch": 3.854382221506502, - "grad_norm": 0.0023582978174090385, - "learning_rate": 0.00019999267537825684, - "loss": 46.0, - "step": 23934 - }, - { - "epoch": 3.8545432585852892, - "grad_norm": 0.0017070864560082555, - "learning_rate": 0.00019999267476592732, - "loss": 46.0, - "step": 23935 - }, - { - "epoch": 3.8547042956640767, - "grad_norm": 0.0015629647532477975, - "learning_rate": 0.00019999267415357226, - "loss": 46.0, - "step": 23936 - }, - { - "epoch": 3.854865332742864, - "grad_norm": 0.004717658273875713, - "learning_rate": 0.0001999926735411916, - "loss": 46.0, - "step": 23937 - }, - { - "epoch": 3.8550263698216516, - "grad_norm": 0.0037513435818254948, - "learning_rate": 0.0001999926729287853, - "loss": 46.0, - "step": 23938 - }, - { - "epoch": 3.8551874069004386, - "grad_norm": 0.004000397399067879, - "learning_rate": 0.00019999267231635343, - "loss": 46.0, - "step": 23939 - }, - { - "epoch": 3.855348443979226, - "grad_norm": 0.002302933717146516, - "learning_rate": 0.00019999267170389597, - "loss": 46.0, - "step": 23940 - }, - { - "epoch": 3.8555094810580135, - "grad_norm": 0.007076386362314224, - "learning_rate": 0.00019999267109141293, - "loss": 46.0, - "step": 23941 - }, - { - "epoch": 3.855670518136801, - "grad_norm": 0.0018582346383482218, - "learning_rate": 0.00019999267047890427, - "loss": 46.0, - "step": 23942 - }, - { - "epoch": 3.8558315552155884, - "grad_norm": 0.0020525604486465454, - "learning_rate": 0.00019999266986637002, - "loss": 46.0, - "step": 23943 - }, - { - "epoch": 3.855992592294376, - "grad_norm": 0.0007765043410472572, - "learning_rate": 0.00019999266925381019, - "loss": 46.0, - "step": 23944 - }, - { - "epoch": 3.8561536293731633, - "grad_norm": 0.010290330275893211, - "learning_rate": 0.0001999926686412248, - "loss": 46.0, - "step": 23945 - }, - { - "epoch": 3.8563146664519508, - "grad_norm": 0.0020926615688949823, - "learning_rate": 0.00019999266802861375, - "loss": 46.0, - "step": 23946 - }, - { - "epoch": 3.856475703530738, - "grad_norm": 0.0040846941992640495, - "learning_rate": 0.00019999266741597716, - "loss": 46.0, - "step": 23947 - }, - { - "epoch": 3.856636740609525, - "grad_norm": 0.002246662974357605, - "learning_rate": 0.00019999266680331498, - "loss": 46.0, - "step": 23948 - }, - { - "epoch": 3.8567977776883127, - "grad_norm": 0.014789251610636711, - "learning_rate": 0.00019999266619062715, - "loss": 46.0, - "step": 23949 - }, - { - "epoch": 3.8569588147671, - "grad_norm": 0.0029551174957305193, - "learning_rate": 0.00019999266557791377, - "loss": 46.0, - "step": 23950 - }, - { - "epoch": 3.8571198518458876, - "grad_norm": 0.0022438555024564266, - "learning_rate": 0.0001999926649651748, - "loss": 46.0, - "step": 23951 - }, - { - "epoch": 3.857280888924675, - "grad_norm": 0.0008192301611416042, - "learning_rate": 0.0001999926643524102, - "loss": 46.0, - "step": 23952 - }, - { - "epoch": 3.857441926003462, - "grad_norm": 0.0035310029052197933, - "learning_rate": 0.00019999266373962003, - "loss": 46.0, - "step": 23953 - }, - { - "epoch": 3.8576029630822495, - "grad_norm": 0.012899009510874748, - "learning_rate": 0.0001999926631268043, - "loss": 46.0, - "step": 23954 - }, - { - "epoch": 3.857764000161037, - "grad_norm": 0.0018951051170006394, - "learning_rate": 0.00019999266251396293, - "loss": 46.0, - "step": 23955 - }, - { - "epoch": 3.8579250372398244, - "grad_norm": 0.001851825974881649, - "learning_rate": 0.000199992661901096, - "loss": 46.0, - "step": 23956 - }, - { - "epoch": 3.858086074318612, - "grad_norm": 0.002390582114458084, - "learning_rate": 0.00019999266128820344, - "loss": 46.0, - "step": 23957 - }, - { - "epoch": 3.8582471113973993, - "grad_norm": 0.0011908072046935558, - "learning_rate": 0.00019999266067528533, - "loss": 46.0, - "step": 23958 - }, - { - "epoch": 3.8584081484761867, - "grad_norm": 0.0025828955695033073, - "learning_rate": 0.00019999266006234158, - "loss": 46.0, - "step": 23959 - }, - { - "epoch": 3.858569185554974, - "grad_norm": 0.0038658420089632273, - "learning_rate": 0.00019999265944937227, - "loss": 46.0, - "step": 23960 - }, - { - "epoch": 3.8587302226337616, - "grad_norm": 0.003765959059819579, - "learning_rate": 0.00019999265883637737, - "loss": 46.0, - "step": 23961 - }, - { - "epoch": 3.8588912597125486, - "grad_norm": 0.0012922107707709074, - "learning_rate": 0.00019999265822335686, - "loss": 46.0, - "step": 23962 - }, - { - "epoch": 3.859052296791336, - "grad_norm": 0.0026274758856743574, - "learning_rate": 0.00019999265761031076, - "loss": 46.0, - "step": 23963 - }, - { - "epoch": 3.8592133338701236, - "grad_norm": 0.00278154737316072, - "learning_rate": 0.00019999265699723908, - "loss": 46.0, - "step": 23964 - }, - { - "epoch": 3.859374370948911, - "grad_norm": 0.0021455432288348675, - "learning_rate": 0.0001999926563841418, - "loss": 46.0, - "step": 23965 - }, - { - "epoch": 3.8595354080276985, - "grad_norm": 0.0019393404945731163, - "learning_rate": 0.0001999926557710189, - "loss": 46.0, - "step": 23966 - }, - { - "epoch": 3.859696445106486, - "grad_norm": 0.002570522017776966, - "learning_rate": 0.00019999265515787044, - "loss": 46.0, - "step": 23967 - }, - { - "epoch": 3.859857482185273, - "grad_norm": 0.021145427599549294, - "learning_rate": 0.0001999926545446964, - "loss": 46.0, - "step": 23968 - }, - { - "epoch": 3.8600185192640604, - "grad_norm": 0.0017528659664094448, - "learning_rate": 0.00019999265393149673, - "loss": 46.0, - "step": 23969 - }, - { - "epoch": 3.860179556342848, - "grad_norm": 0.003110961988568306, - "learning_rate": 0.0001999926533182715, - "loss": 46.0, - "step": 23970 - }, - { - "epoch": 3.8603405934216353, - "grad_norm": 0.0026708340737968683, - "learning_rate": 0.00019999265270502067, - "loss": 46.0, - "step": 23971 - }, - { - "epoch": 3.8605016305004227, - "grad_norm": 0.015935273841023445, - "learning_rate": 0.0001999926520917442, - "loss": 46.0, - "step": 23972 - }, - { - "epoch": 3.86066266757921, - "grad_norm": 0.0038627285975962877, - "learning_rate": 0.00019999265147844218, - "loss": 46.0, - "step": 23973 - }, - { - "epoch": 3.8608237046579976, - "grad_norm": 0.004917750600725412, - "learning_rate": 0.00019999265086511456, - "loss": 46.0, - "step": 23974 - }, - { - "epoch": 3.860984741736785, - "grad_norm": 0.001065968768671155, - "learning_rate": 0.00019999265025176134, - "loss": 46.0, - "step": 23975 - }, - { - "epoch": 3.8611457788155725, - "grad_norm": 0.0022499931510537863, - "learning_rate": 0.00019999264963838255, - "loss": 46.0, - "step": 23976 - }, - { - "epoch": 3.8613068158943595, - "grad_norm": 0.0013169817393645644, - "learning_rate": 0.00019999264902497815, - "loss": 46.0, - "step": 23977 - }, - { - "epoch": 3.861467852973147, - "grad_norm": 0.007300612982362509, - "learning_rate": 0.00019999264841154813, - "loss": 46.0, - "step": 23978 - }, - { - "epoch": 3.8616288900519344, - "grad_norm": 0.007505542133003473, - "learning_rate": 0.00019999264779809256, - "loss": 46.0, - "step": 23979 - }, - { - "epoch": 3.861789927130722, - "grad_norm": 0.0012584192445501685, - "learning_rate": 0.0001999926471846114, - "loss": 46.0, - "step": 23980 - }, - { - "epoch": 3.8619509642095093, - "grad_norm": 0.008367362432181835, - "learning_rate": 0.00019999264657110462, - "loss": 46.0, - "step": 23981 - }, - { - "epoch": 3.862112001288297, - "grad_norm": 0.008066481910645962, - "learning_rate": 0.00019999264595757225, - "loss": 46.0, - "step": 23982 - }, - { - "epoch": 3.862273038367084, - "grad_norm": 0.006368101108819246, - "learning_rate": 0.0001999926453440143, - "loss": 46.0, - "step": 23983 - }, - { - "epoch": 3.8624340754458713, - "grad_norm": 0.015335018746554852, - "learning_rate": 0.00019999264473043074, - "loss": 46.0, - "step": 23984 - }, - { - "epoch": 3.8625951125246587, - "grad_norm": 0.007345108315348625, - "learning_rate": 0.0001999926441168216, - "loss": 46.0, - "step": 23985 - }, - { - "epoch": 3.862756149603446, - "grad_norm": 0.002686293562874198, - "learning_rate": 0.00019999264350318687, - "loss": 46.0, - "step": 23986 - }, - { - "epoch": 3.8629171866822336, - "grad_norm": 0.002146095037460327, - "learning_rate": 0.00019999264288952655, - "loss": 46.0, - "step": 23987 - }, - { - "epoch": 3.863078223761021, - "grad_norm": 0.004183642100542784, - "learning_rate": 0.00019999264227584063, - "loss": 46.0, - "step": 23988 - }, - { - "epoch": 3.8632392608398085, - "grad_norm": 0.0020193124655634165, - "learning_rate": 0.00019999264166212913, - "loss": 46.0, - "step": 23989 - }, - { - "epoch": 3.863400297918596, - "grad_norm": 0.00040674267802387476, - "learning_rate": 0.00019999264104839201, - "loss": 46.0, - "step": 23990 - }, - { - "epoch": 3.8635613349973834, - "grad_norm": 0.0014019823865965009, - "learning_rate": 0.00019999264043462928, - "loss": 46.0, - "step": 23991 - }, - { - "epoch": 3.8637223720761704, - "grad_norm": 0.014791633933782578, - "learning_rate": 0.00019999263982084102, - "loss": 46.0, - "step": 23992 - }, - { - "epoch": 3.863883409154958, - "grad_norm": 0.0013207363663241267, - "learning_rate": 0.00019999263920702714, - "loss": 46.0, - "step": 23993 - }, - { - "epoch": 3.8640444462337453, - "grad_norm": 0.004629611968994141, - "learning_rate": 0.00019999263859318765, - "loss": 46.0, - "step": 23994 - }, - { - "epoch": 3.8642054833125328, - "grad_norm": 0.0037192164454609156, - "learning_rate": 0.00019999263797932257, - "loss": 46.0, - "step": 23995 - }, - { - "epoch": 3.8643665203913202, - "grad_norm": 0.005566635634750128, - "learning_rate": 0.0001999926373654319, - "loss": 46.0, - "step": 23996 - }, - { - "epoch": 3.8645275574701072, - "grad_norm": 0.003593568690121174, - "learning_rate": 0.00019999263675151566, - "loss": 46.0, - "step": 23997 - }, - { - "epoch": 3.8646885945488947, - "grad_norm": 0.003664148971438408, - "learning_rate": 0.0001999926361375738, - "loss": 46.0, - "step": 23998 - }, - { - "epoch": 3.864849631627682, - "grad_norm": 0.0015974607085809112, - "learning_rate": 0.00019999263552360633, - "loss": 46.0, - "step": 23999 - }, - { - "epoch": 3.8650106687064696, - "grad_norm": 0.0035389717668294907, - "learning_rate": 0.00019999263490961332, - "loss": 46.0, - "step": 24000 - }, - { - "epoch": 3.865171705785257, - "grad_norm": 0.006094140466302633, - "learning_rate": 0.00019999263429559466, - "loss": 46.0, - "step": 24001 - }, - { - "epoch": 3.8653327428640445, - "grad_norm": 0.009808441624045372, - "learning_rate": 0.00019999263368155045, - "loss": 46.0, - "step": 24002 - }, - { - "epoch": 3.865493779942832, - "grad_norm": 0.0015333130722865462, - "learning_rate": 0.00019999263306748065, - "loss": 46.0, - "step": 24003 - }, - { - "epoch": 3.8656548170216194, - "grad_norm": 0.004158667754381895, - "learning_rate": 0.00019999263245338523, - "loss": 46.0, - "step": 24004 - }, - { - "epoch": 3.865815854100407, - "grad_norm": 0.0019012149423360825, - "learning_rate": 0.00019999263183926425, - "loss": 46.0, - "step": 24005 - }, - { - "epoch": 3.865976891179194, - "grad_norm": 0.004446346778422594, - "learning_rate": 0.00019999263122511763, - "loss": 46.0, - "step": 24006 - }, - { - "epoch": 3.8661379282579813, - "grad_norm": 0.006250792648643255, - "learning_rate": 0.00019999263061094545, - "loss": 46.0, - "step": 24007 - }, - { - "epoch": 3.8662989653367688, - "grad_norm": 0.012037362903356552, - "learning_rate": 0.00019999262999674766, - "loss": 46.0, - "step": 24008 - }, - { - "epoch": 3.866460002415556, - "grad_norm": 0.0010690775234252214, - "learning_rate": 0.00019999262938252428, - "loss": 46.0, - "step": 24009 - }, - { - "epoch": 3.8666210394943437, - "grad_norm": 0.01186061929911375, - "learning_rate": 0.0001999926287682753, - "loss": 46.0, - "step": 24010 - }, - { - "epoch": 3.866782076573131, - "grad_norm": 0.011861158534884453, - "learning_rate": 0.00019999262815400078, - "loss": 46.0, - "step": 24011 - }, - { - "epoch": 3.866943113651918, - "grad_norm": 0.0011435867054387927, - "learning_rate": 0.00019999262753970062, - "loss": 46.0, - "step": 24012 - }, - { - "epoch": 3.8671041507307056, - "grad_norm": 0.00245189992710948, - "learning_rate": 0.00019999262692537486, - "loss": 46.0, - "step": 24013 - }, - { - "epoch": 3.867265187809493, - "grad_norm": 0.001089563244022429, - "learning_rate": 0.00019999262631102354, - "loss": 46.0, - "step": 24014 - }, - { - "epoch": 3.8674262248882805, - "grad_norm": 0.0014150417409837246, - "learning_rate": 0.00019999262569664659, - "loss": 46.0, - "step": 24015 - }, - { - "epoch": 3.867587261967068, - "grad_norm": 0.008513418957591057, - "learning_rate": 0.00019999262508224407, - "loss": 46.0, - "step": 24016 - }, - { - "epoch": 3.8677482990458554, - "grad_norm": 0.0006640064530074596, - "learning_rate": 0.00019999262446781594, - "loss": 46.0, - "step": 24017 - }, - { - "epoch": 3.867909336124643, - "grad_norm": 0.0021558396983891726, - "learning_rate": 0.00019999262385336224, - "loss": 46.0, - "step": 24018 - }, - { - "epoch": 3.8680703732034303, - "grad_norm": 0.006785494741052389, - "learning_rate": 0.00019999262323888294, - "loss": 46.0, - "step": 24019 - }, - { - "epoch": 3.8682314102822177, - "grad_norm": 0.001933910883963108, - "learning_rate": 0.00019999262262437805, - "loss": 46.0, - "step": 24020 - }, - { - "epoch": 3.8683924473610047, - "grad_norm": 0.005459047853946686, - "learning_rate": 0.00019999262200984754, - "loss": 46.0, - "step": 24021 - }, - { - "epoch": 3.868553484439792, - "grad_norm": 0.0012136929435655475, - "learning_rate": 0.00019999262139529144, - "loss": 46.0, - "step": 24022 - }, - { - "epoch": 3.8687145215185796, - "grad_norm": 0.002100676530972123, - "learning_rate": 0.00019999262078070976, - "loss": 46.0, - "step": 24023 - }, - { - "epoch": 3.868875558597367, - "grad_norm": 0.001325149554759264, - "learning_rate": 0.00019999262016610252, - "loss": 46.0, - "step": 24024 - }, - { - "epoch": 3.8690365956761545, - "grad_norm": 0.001594682689756155, - "learning_rate": 0.00019999261955146964, - "loss": 46.0, - "step": 24025 - }, - { - "epoch": 3.8691976327549416, - "grad_norm": 0.0010829733218997717, - "learning_rate": 0.0001999926189368112, - "loss": 46.0, - "step": 24026 - }, - { - "epoch": 3.869358669833729, - "grad_norm": 0.010376001708209515, - "learning_rate": 0.00019999261832212713, - "loss": 46.0, - "step": 24027 - }, - { - "epoch": 3.8695197069125165, - "grad_norm": 0.0011982464930042624, - "learning_rate": 0.00019999261770741752, - "loss": 46.0, - "step": 24028 - }, - { - "epoch": 3.869680743991304, - "grad_norm": 0.005371060688048601, - "learning_rate": 0.00019999261709268228, - "loss": 46.0, - "step": 24029 - }, - { - "epoch": 3.8698417810700914, - "grad_norm": 0.006046678870916367, - "learning_rate": 0.00019999261647792144, - "loss": 46.0, - "step": 24030 - }, - { - "epoch": 3.870002818148879, - "grad_norm": 0.0029259948059916496, - "learning_rate": 0.00019999261586313503, - "loss": 46.0, - "step": 24031 - }, - { - "epoch": 3.8701638552276663, - "grad_norm": 0.001116109313443303, - "learning_rate": 0.000199992615248323, - "loss": 46.0, - "step": 24032 - }, - { - "epoch": 3.8703248923064537, - "grad_norm": 0.0017653099494054914, - "learning_rate": 0.0001999926146334854, - "loss": 46.0, - "step": 24033 - }, - { - "epoch": 3.870485929385241, - "grad_norm": 0.004673819988965988, - "learning_rate": 0.0001999926140186222, - "loss": 46.0, - "step": 24034 - }, - { - "epoch": 3.8706469664640286, - "grad_norm": 0.003567418549209833, - "learning_rate": 0.0001999926134037334, - "loss": 46.0, - "step": 24035 - }, - { - "epoch": 3.8708080035428156, - "grad_norm": 0.0019304970046505332, - "learning_rate": 0.00019999261278881903, - "loss": 46.0, - "step": 24036 - }, - { - "epoch": 3.870969040621603, - "grad_norm": 0.0025090614799410105, - "learning_rate": 0.00019999261217387905, - "loss": 46.0, - "step": 24037 - }, - { - "epoch": 3.8711300777003905, - "grad_norm": 0.002176633570343256, - "learning_rate": 0.0001999926115589135, - "loss": 46.0, - "step": 24038 - }, - { - "epoch": 3.871291114779178, - "grad_norm": 0.002901740139350295, - "learning_rate": 0.00019999261094392232, - "loss": 46.0, - "step": 24039 - }, - { - "epoch": 3.8714521518579654, - "grad_norm": 0.009482808411121368, - "learning_rate": 0.00019999261032890557, - "loss": 46.0, - "step": 24040 - }, - { - "epoch": 3.8716131889367524, - "grad_norm": 0.0035449410788714886, - "learning_rate": 0.0001999926097138632, - "loss": 46.0, - "step": 24041 - }, - { - "epoch": 3.87177422601554, - "grad_norm": 0.004730087239295244, - "learning_rate": 0.00019999260909879526, - "loss": 46.0, - "step": 24042 - }, - { - "epoch": 3.8719352630943273, - "grad_norm": 0.0021472552325576544, - "learning_rate": 0.00019999260848370173, - "loss": 46.0, - "step": 24043 - }, - { - "epoch": 3.872096300173115, - "grad_norm": 0.010897508822381496, - "learning_rate": 0.00019999260786858264, - "loss": 46.0, - "step": 24044 - }, - { - "epoch": 3.8722573372519022, - "grad_norm": 0.01087038591504097, - "learning_rate": 0.0001999926072534379, - "loss": 46.0, - "step": 24045 - }, - { - "epoch": 3.8724183743306897, - "grad_norm": 0.00884197372943163, - "learning_rate": 0.00019999260663826758, - "loss": 46.0, - "step": 24046 - }, - { - "epoch": 3.872579411409477, - "grad_norm": 0.009836284443736076, - "learning_rate": 0.00019999260602307167, - "loss": 46.0, - "step": 24047 - }, - { - "epoch": 3.8727404484882646, - "grad_norm": 0.002207430312409997, - "learning_rate": 0.00019999260540785017, - "loss": 46.0, - "step": 24048 - }, - { - "epoch": 3.872901485567052, - "grad_norm": 0.004285207949578762, - "learning_rate": 0.00019999260479260306, - "loss": 46.0, - "step": 24049 - }, - { - "epoch": 3.873062522645839, - "grad_norm": 0.0009452085359953344, - "learning_rate": 0.0001999926041773304, - "loss": 46.0, - "step": 24050 - }, - { - "epoch": 3.8732235597246265, - "grad_norm": 0.003883578348904848, - "learning_rate": 0.0001999926035620321, - "loss": 46.0, - "step": 24051 - }, - { - "epoch": 3.873384596803414, - "grad_norm": 0.004387306049466133, - "learning_rate": 0.00019999260294670826, - "loss": 46.0, - "step": 24052 - }, - { - "epoch": 3.8735456338822014, - "grad_norm": 0.004749350715428591, - "learning_rate": 0.00019999260233135877, - "loss": 46.0, - "step": 24053 - }, - { - "epoch": 3.873706670960989, - "grad_norm": 0.017119865864515305, - "learning_rate": 0.0001999926017159837, - "loss": 46.0, - "step": 24054 - }, - { - "epoch": 3.8738677080397763, - "grad_norm": 0.01100144349038601, - "learning_rate": 0.00019999260110058306, - "loss": 46.0, - "step": 24055 - }, - { - "epoch": 3.8740287451185633, - "grad_norm": 0.007789523806422949, - "learning_rate": 0.0001999926004851568, - "loss": 46.0, - "step": 24056 - }, - { - "epoch": 3.8741897821973508, - "grad_norm": 0.0015500987647101283, - "learning_rate": 0.00019999259986970498, - "loss": 46.0, - "step": 24057 - }, - { - "epoch": 3.8743508192761382, - "grad_norm": 0.0019097868353128433, - "learning_rate": 0.00019999259925422756, - "loss": 46.0, - "step": 24058 - }, - { - "epoch": 3.8745118563549257, - "grad_norm": 0.004824074916541576, - "learning_rate": 0.00019999259863872452, - "loss": 46.0, - "step": 24059 - }, - { - "epoch": 3.874672893433713, - "grad_norm": 0.0035186184104532003, - "learning_rate": 0.0001999925980231959, - "loss": 46.0, - "step": 24060 - }, - { - "epoch": 3.8748339305125006, - "grad_norm": 0.005075378343462944, - "learning_rate": 0.0001999925974076417, - "loss": 46.0, - "step": 24061 - }, - { - "epoch": 3.874994967591288, - "grad_norm": 0.0016036900924518704, - "learning_rate": 0.00019999259679206191, - "loss": 46.0, - "step": 24062 - }, - { - "epoch": 3.8751560046700755, - "grad_norm": 0.009347415529191494, - "learning_rate": 0.0001999925961764565, - "loss": 46.0, - "step": 24063 - }, - { - "epoch": 3.875317041748863, - "grad_norm": 0.0012929226504638791, - "learning_rate": 0.0001999925955608255, - "loss": 46.0, - "step": 24064 - }, - { - "epoch": 3.87547807882765, - "grad_norm": 0.002047156449407339, - "learning_rate": 0.00019999259494516894, - "loss": 46.0, - "step": 24065 - }, - { - "epoch": 3.8756391159064374, - "grad_norm": 0.012128115631639957, - "learning_rate": 0.00019999259432948677, - "loss": 46.0, - "step": 24066 - }, - { - "epoch": 3.875800152985225, - "grad_norm": 0.0029642682056874037, - "learning_rate": 0.00019999259371377898, - "loss": 46.0, - "step": 24067 - }, - { - "epoch": 3.8759611900640123, - "grad_norm": 0.0023666848428547382, - "learning_rate": 0.00019999259309804563, - "loss": 46.0, - "step": 24068 - }, - { - "epoch": 3.8761222271427997, - "grad_norm": 0.008460167795419693, - "learning_rate": 0.00019999259248228667, - "loss": 46.0, - "step": 24069 - }, - { - "epoch": 3.8762832642215868, - "grad_norm": 0.004398632328957319, - "learning_rate": 0.00019999259186650212, - "loss": 46.0, - "step": 24070 - }, - { - "epoch": 3.876444301300374, - "grad_norm": 0.0014474081108346581, - "learning_rate": 0.000199992591250692, - "loss": 46.0, - "step": 24071 - }, - { - "epoch": 3.8766053383791617, - "grad_norm": 0.0017836528131738305, - "learning_rate": 0.00019999259063485628, - "loss": 46.0, - "step": 24072 - }, - { - "epoch": 3.876766375457949, - "grad_norm": 0.008612348698079586, - "learning_rate": 0.00019999259001899494, - "loss": 46.0, - "step": 24073 - }, - { - "epoch": 3.8769274125367366, - "grad_norm": 0.00445523951202631, - "learning_rate": 0.00019999258940310802, - "loss": 46.0, - "step": 24074 - }, - { - "epoch": 3.877088449615524, - "grad_norm": 0.0028079834301024675, - "learning_rate": 0.0001999925887871955, - "loss": 46.0, - "step": 24075 - }, - { - "epoch": 3.8772494866943115, - "grad_norm": 0.001897752983495593, - "learning_rate": 0.0001999925881712574, - "loss": 46.0, - "step": 24076 - }, - { - "epoch": 3.877410523773099, - "grad_norm": 0.0069461390376091, - "learning_rate": 0.00019999258755529372, - "loss": 46.0, - "step": 24077 - }, - { - "epoch": 3.8775715608518864, - "grad_norm": 0.0010076443431898952, - "learning_rate": 0.00019999258693930442, - "loss": 46.0, - "step": 24078 - }, - { - "epoch": 3.8777325979306734, - "grad_norm": 0.0057729510590434074, - "learning_rate": 0.00019999258632328953, - "loss": 46.0, - "step": 24079 - }, - { - "epoch": 3.877893635009461, - "grad_norm": 0.00535427313297987, - "learning_rate": 0.00019999258570724905, - "loss": 46.0, - "step": 24080 - }, - { - "epoch": 3.8780546720882483, - "grad_norm": 0.010499185882508755, - "learning_rate": 0.000199992585091183, - "loss": 46.0, - "step": 24081 - }, - { - "epoch": 3.8782157091670357, - "grad_norm": 0.012383539229631424, - "learning_rate": 0.00019999258447509134, - "loss": 46.0, - "step": 24082 - }, - { - "epoch": 3.878376746245823, - "grad_norm": 0.0036151634994894266, - "learning_rate": 0.00019999258385897407, - "loss": 46.0, - "step": 24083 - }, - { - "epoch": 3.8785377833246106, - "grad_norm": 0.0011932895286008716, - "learning_rate": 0.00019999258324283122, - "loss": 46.0, - "step": 24084 - }, - { - "epoch": 3.8786988204033976, - "grad_norm": 0.0017412648303434253, - "learning_rate": 0.00019999258262666278, - "loss": 46.0, - "step": 24085 - }, - { - "epoch": 3.878859857482185, - "grad_norm": 0.0036719886120408773, - "learning_rate": 0.00019999258201046876, - "loss": 46.0, - "step": 24086 - }, - { - "epoch": 3.8790208945609725, - "grad_norm": 0.004792661406099796, - "learning_rate": 0.00019999258139424912, - "loss": 46.0, - "step": 24087 - }, - { - "epoch": 3.87918193163976, - "grad_norm": 0.003962106071412563, - "learning_rate": 0.00019999258077800391, - "loss": 46.0, - "step": 24088 - }, - { - "epoch": 3.8793429687185474, - "grad_norm": 0.004261672031134367, - "learning_rate": 0.0001999925801617331, - "loss": 46.0, - "step": 24089 - }, - { - "epoch": 3.879504005797335, - "grad_norm": 0.01727946475148201, - "learning_rate": 0.0001999925795454367, - "loss": 46.0, - "step": 24090 - }, - { - "epoch": 3.8796650428761223, - "grad_norm": 0.0010890386765822768, - "learning_rate": 0.00019999257892911468, - "loss": 46.0, - "step": 24091 - }, - { - "epoch": 3.87982607995491, - "grad_norm": 0.00723354471847415, - "learning_rate": 0.0001999925783127671, - "loss": 46.0, - "step": 24092 - }, - { - "epoch": 3.8799871170336973, - "grad_norm": 0.004591763485223055, - "learning_rate": 0.00019999257769639391, - "loss": 46.0, - "step": 24093 - }, - { - "epoch": 3.8801481541124843, - "grad_norm": 0.0029744494240731, - "learning_rate": 0.00019999257707999514, - "loss": 46.0, - "step": 24094 - }, - { - "epoch": 3.8803091911912717, - "grad_norm": 0.008001652546226978, - "learning_rate": 0.00019999257646357077, - "loss": 46.0, - "step": 24095 - }, - { - "epoch": 3.880470228270059, - "grad_norm": 0.005045961122959852, - "learning_rate": 0.00019999257584712082, - "loss": 46.0, - "step": 24096 - }, - { - "epoch": 3.8806312653488466, - "grad_norm": 0.0023090653121471405, - "learning_rate": 0.00019999257523064525, - "loss": 46.0, - "step": 24097 - }, - { - "epoch": 3.880792302427634, - "grad_norm": 0.002473338507115841, - "learning_rate": 0.0001999925746141441, - "loss": 46.0, - "step": 24098 - }, - { - "epoch": 3.8809533395064215, - "grad_norm": 0.002843037713319063, - "learning_rate": 0.00019999257399761736, - "loss": 46.0, - "step": 24099 - }, - { - "epoch": 3.8811143765852085, - "grad_norm": 0.003936846740543842, - "learning_rate": 0.00019999257338106503, - "loss": 46.0, - "step": 24100 - }, - { - "epoch": 3.881275413663996, - "grad_norm": 0.0013656708179041743, - "learning_rate": 0.0001999925727644871, - "loss": 46.0, - "step": 24101 - }, - { - "epoch": 3.8814364507427834, - "grad_norm": 0.0062868306413292885, - "learning_rate": 0.00019999257214788356, - "loss": 46.0, - "step": 24102 - }, - { - "epoch": 3.881597487821571, - "grad_norm": 0.0016342218732461333, - "learning_rate": 0.00019999257153125447, - "loss": 46.0, - "step": 24103 - }, - { - "epoch": 3.8817585249003583, - "grad_norm": 0.004117118194699287, - "learning_rate": 0.00019999257091459974, - "loss": 46.0, - "step": 24104 - }, - { - "epoch": 3.881919561979146, - "grad_norm": 0.002345234854146838, - "learning_rate": 0.00019999257029791948, - "loss": 46.0, - "step": 24105 - }, - { - "epoch": 3.8820805990579332, - "grad_norm": 0.008430286310613155, - "learning_rate": 0.00019999256968121357, - "loss": 46.0, - "step": 24106 - }, - { - "epoch": 3.8822416361367207, - "grad_norm": 0.005393289960920811, - "learning_rate": 0.00019999256906448208, - "loss": 46.0, - "step": 24107 - }, - { - "epoch": 3.882402673215508, - "grad_norm": 0.0008756909519433975, - "learning_rate": 0.00019999256844772497, - "loss": 46.0, - "step": 24108 - }, - { - "epoch": 3.882563710294295, - "grad_norm": 0.010313312523066998, - "learning_rate": 0.0001999925678309423, - "loss": 46.0, - "step": 24109 - }, - { - "epoch": 3.8827247473730826, - "grad_norm": 0.0035547094885259867, - "learning_rate": 0.00019999256721413405, - "loss": 46.0, - "step": 24110 - }, - { - "epoch": 3.88288578445187, - "grad_norm": 0.004055333323776722, - "learning_rate": 0.00019999256659730018, - "loss": 46.0, - "step": 24111 - }, - { - "epoch": 3.8830468215306575, - "grad_norm": 0.0021817300003021955, - "learning_rate": 0.00019999256598044073, - "loss": 46.0, - "step": 24112 - }, - { - "epoch": 3.883207858609445, - "grad_norm": 0.006538146175444126, - "learning_rate": 0.00019999256536355571, - "loss": 46.0, - "step": 24113 - }, - { - "epoch": 3.883368895688232, - "grad_norm": 0.0006481777527369559, - "learning_rate": 0.00019999256474664508, - "loss": 46.0, - "step": 24114 - }, - { - "epoch": 3.8835299327670194, - "grad_norm": 0.0010808173101395369, - "learning_rate": 0.00019999256412970887, - "loss": 46.0, - "step": 24115 - }, - { - "epoch": 3.883690969845807, - "grad_norm": 0.007094038650393486, - "learning_rate": 0.000199992563512747, - "loss": 46.0, - "step": 24116 - }, - { - "epoch": 3.8838520069245943, - "grad_norm": 0.007232999429106712, - "learning_rate": 0.00019999256289575962, - "loss": 46.0, - "step": 24117 - }, - { - "epoch": 3.8840130440033818, - "grad_norm": 0.0025392400566488504, - "learning_rate": 0.00019999256227874659, - "loss": 46.0, - "step": 24118 - }, - { - "epoch": 3.884174081082169, - "grad_norm": 0.01313191931694746, - "learning_rate": 0.000199992561661708, - "loss": 46.0, - "step": 24119 - }, - { - "epoch": 3.8843351181609567, - "grad_norm": 0.002989828120917082, - "learning_rate": 0.0001999925610446438, - "loss": 46.0, - "step": 24120 - }, - { - "epoch": 3.884496155239744, - "grad_norm": 0.0030406485311686993, - "learning_rate": 0.00019999256042755402, - "loss": 46.0, - "step": 24121 - }, - { - "epoch": 3.8846571923185316, - "grad_norm": 0.001560604083351791, - "learning_rate": 0.00019999255981043864, - "loss": 46.0, - "step": 24122 - }, - { - "epoch": 3.8848182293973186, - "grad_norm": 0.003048016456887126, - "learning_rate": 0.00019999255919329767, - "loss": 46.0, - "step": 24123 - }, - { - "epoch": 3.884979266476106, - "grad_norm": 0.005632190499454737, - "learning_rate": 0.0001999925585761311, - "loss": 46.0, - "step": 24124 - }, - { - "epoch": 3.8851403035548935, - "grad_norm": 0.008868822827935219, - "learning_rate": 0.00019999255795893892, - "loss": 46.0, - "step": 24125 - }, - { - "epoch": 3.885301340633681, - "grad_norm": 0.0019880905747413635, - "learning_rate": 0.0001999925573417212, - "loss": 46.0, - "step": 24126 - }, - { - "epoch": 3.8854623777124684, - "grad_norm": 0.012027781456708908, - "learning_rate": 0.00019999255672447784, - "loss": 46.0, - "step": 24127 - }, - { - "epoch": 3.885623414791256, - "grad_norm": 0.0021413180511444807, - "learning_rate": 0.0001999925561072089, - "loss": 46.0, - "step": 24128 - }, - { - "epoch": 3.885784451870043, - "grad_norm": 0.0014022750547155738, - "learning_rate": 0.00019999255548991437, - "loss": 46.0, - "step": 24129 - }, - { - "epoch": 3.8859454889488303, - "grad_norm": 0.0031765063758939505, - "learning_rate": 0.00019999255487259423, - "loss": 46.0, - "step": 24130 - }, - { - "epoch": 3.8861065260276177, - "grad_norm": 0.009840128943324089, - "learning_rate": 0.00019999255425524854, - "loss": 46.0, - "step": 24131 - }, - { - "epoch": 3.886267563106405, - "grad_norm": 0.0036061331629753113, - "learning_rate": 0.00019999255363787723, - "loss": 46.0, - "step": 24132 - }, - { - "epoch": 3.8864286001851926, - "grad_norm": 0.00471268268302083, - "learning_rate": 0.0001999925530204803, - "loss": 46.0, - "step": 24133 - }, - { - "epoch": 3.88658963726398, - "grad_norm": 0.0011690089013427496, - "learning_rate": 0.00019999255240305783, - "loss": 46.0, - "step": 24134 - }, - { - "epoch": 3.8867506743427676, - "grad_norm": 0.004206649027764797, - "learning_rate": 0.00019999255178560974, - "loss": 46.0, - "step": 24135 - }, - { - "epoch": 3.886911711421555, - "grad_norm": 0.010350743308663368, - "learning_rate": 0.00019999255116813605, - "loss": 46.0, - "step": 24136 - }, - { - "epoch": 3.8870727485003425, - "grad_norm": 0.008397089317440987, - "learning_rate": 0.00019999255055063678, - "loss": 46.0, - "step": 24137 - }, - { - "epoch": 3.8872337855791295, - "grad_norm": 0.004742264281958342, - "learning_rate": 0.0001999925499331119, - "loss": 46.0, - "step": 24138 - }, - { - "epoch": 3.887394822657917, - "grad_norm": 0.007493521552532911, - "learning_rate": 0.00019999254931556143, - "loss": 46.0, - "step": 24139 - }, - { - "epoch": 3.8875558597367044, - "grad_norm": 0.002483305288478732, - "learning_rate": 0.0001999925486979854, - "loss": 46.0, - "step": 24140 - }, - { - "epoch": 3.887716896815492, - "grad_norm": 0.011176660656929016, - "learning_rate": 0.00019999254808038375, - "loss": 46.0, - "step": 24141 - }, - { - "epoch": 3.8878779338942793, - "grad_norm": 0.00583136361092329, - "learning_rate": 0.00019999254746275652, - "loss": 46.0, - "step": 24142 - }, - { - "epoch": 3.8880389709730663, - "grad_norm": 0.0037059015594422817, - "learning_rate": 0.00019999254684510367, - "loss": 46.0, - "step": 24143 - }, - { - "epoch": 3.8882000080518537, - "grad_norm": 0.013053884729743004, - "learning_rate": 0.00019999254622742524, - "loss": 46.0, - "step": 24144 - }, - { - "epoch": 3.888361045130641, - "grad_norm": 0.008035755716264248, - "learning_rate": 0.0001999925456097212, - "loss": 46.0, - "step": 24145 - }, - { - "epoch": 3.8885220822094286, - "grad_norm": 0.0010997587814927101, - "learning_rate": 0.00019999254499199163, - "loss": 46.0, - "step": 24146 - }, - { - "epoch": 3.888683119288216, - "grad_norm": 0.001482553081586957, - "learning_rate": 0.0001999925443742364, - "loss": 46.0, - "step": 24147 - }, - { - "epoch": 3.8888441563670035, - "grad_norm": 0.0019616500940173864, - "learning_rate": 0.00019999254375645563, - "loss": 46.0, - "step": 24148 - }, - { - "epoch": 3.889005193445791, - "grad_norm": 0.003192444331943989, - "learning_rate": 0.0001999925431386492, - "loss": 46.0, - "step": 24149 - }, - { - "epoch": 3.8891662305245784, - "grad_norm": 0.0011868590954691172, - "learning_rate": 0.00019999254252081724, - "loss": 46.0, - "step": 24150 - }, - { - "epoch": 3.889327267603366, - "grad_norm": 0.007910474203526974, - "learning_rate": 0.00019999254190295967, - "loss": 46.0, - "step": 24151 - }, - { - "epoch": 3.8894883046821533, - "grad_norm": 0.015386927872896194, - "learning_rate": 0.00019999254128507649, - "loss": 46.0, - "step": 24152 - }, - { - "epoch": 3.8896493417609403, - "grad_norm": 0.010213050059974194, - "learning_rate": 0.00019999254066716774, - "loss": 46.0, - "step": 24153 - }, - { - "epoch": 3.889810378839728, - "grad_norm": 0.003139836946502328, - "learning_rate": 0.00019999254004923338, - "loss": 46.0, - "step": 24154 - }, - { - "epoch": 3.8899714159185153, - "grad_norm": 0.0035195823293179274, - "learning_rate": 0.00019999253943127343, - "loss": 46.0, - "step": 24155 - }, - { - "epoch": 3.8901324529973027, - "grad_norm": 0.0009600928169675171, - "learning_rate": 0.0001999925388132879, - "loss": 46.0, - "step": 24156 - }, - { - "epoch": 3.89029349007609, - "grad_norm": 0.002001772169023752, - "learning_rate": 0.00019999253819527675, - "loss": 46.0, - "step": 24157 - }, - { - "epoch": 3.890454527154877, - "grad_norm": 0.0035237520933151245, - "learning_rate": 0.00019999253757724, - "loss": 46.0, - "step": 24158 - }, - { - "epoch": 3.8906155642336646, - "grad_norm": 0.0014567676698789, - "learning_rate": 0.0001999925369591777, - "loss": 46.0, - "step": 24159 - }, - { - "epoch": 3.890776601312452, - "grad_norm": 0.0017159712733700871, - "learning_rate": 0.0001999925363410898, - "loss": 46.0, - "step": 24160 - }, - { - "epoch": 3.8909376383912395, - "grad_norm": 0.005806478671729565, - "learning_rate": 0.00019999253572297628, - "loss": 46.0, - "step": 24161 - }, - { - "epoch": 3.891098675470027, - "grad_norm": 0.007622143253684044, - "learning_rate": 0.0001999925351048372, - "loss": 46.0, - "step": 24162 - }, - { - "epoch": 3.8912597125488144, - "grad_norm": 0.0026005215477198362, - "learning_rate": 0.0001999925344866725, - "loss": 46.0, - "step": 24163 - }, - { - "epoch": 3.891420749627602, - "grad_norm": 0.0015932057285681367, - "learning_rate": 0.0001999925338684822, - "loss": 46.0, - "step": 24164 - }, - { - "epoch": 3.8915817867063893, - "grad_norm": 0.007183215580880642, - "learning_rate": 0.00019999253325026634, - "loss": 46.0, - "step": 24165 - }, - { - "epoch": 3.8917428237851768, - "grad_norm": 0.0019393969560042024, - "learning_rate": 0.00019999253263202488, - "loss": 46.0, - "step": 24166 - }, - { - "epoch": 3.891903860863964, - "grad_norm": 0.004852348007261753, - "learning_rate": 0.0001999925320137578, - "loss": 46.0, - "step": 24167 - }, - { - "epoch": 3.8920648979427512, - "grad_norm": 0.007236093748360872, - "learning_rate": 0.00019999253139546514, - "loss": 46.0, - "step": 24168 - }, - { - "epoch": 3.8922259350215387, - "grad_norm": 0.007589172106236219, - "learning_rate": 0.0001999925307771469, - "loss": 46.0, - "step": 24169 - }, - { - "epoch": 3.892386972100326, - "grad_norm": 0.006314924452453852, - "learning_rate": 0.00019999253015880305, - "loss": 46.0, - "step": 24170 - }, - { - "epoch": 3.8925480091791136, - "grad_norm": 0.0028942995704710484, - "learning_rate": 0.0001999925295404336, - "loss": 46.0, - "step": 24171 - }, - { - "epoch": 3.892709046257901, - "grad_norm": 0.0020921321120113134, - "learning_rate": 0.0001999925289220386, - "loss": 46.0, - "step": 24172 - }, - { - "epoch": 3.892870083336688, - "grad_norm": 0.0008537573739886284, - "learning_rate": 0.00019999252830361797, - "loss": 46.0, - "step": 24173 - }, - { - "epoch": 3.8930311204154755, - "grad_norm": 0.002026214264333248, - "learning_rate": 0.00019999252768517175, - "loss": 46.0, - "step": 24174 - }, - { - "epoch": 3.893192157494263, - "grad_norm": 0.0033443926367908716, - "learning_rate": 0.00019999252706669996, - "loss": 46.0, - "step": 24175 - }, - { - "epoch": 3.8933531945730504, - "grad_norm": 0.0008840114460326731, - "learning_rate": 0.00019999252644820254, - "loss": 46.0, - "step": 24176 - }, - { - "epoch": 3.893514231651838, - "grad_norm": 0.0081992968916893, - "learning_rate": 0.00019999252582967957, - "loss": 46.0, - "step": 24177 - }, - { - "epoch": 3.8936752687306253, - "grad_norm": 0.0019973102025687695, - "learning_rate": 0.00019999252521113095, - "loss": 46.0, - "step": 24178 - }, - { - "epoch": 3.8938363058094128, - "grad_norm": 0.000560144369956106, - "learning_rate": 0.0001999925245925568, - "loss": 46.0, - "step": 24179 - }, - { - "epoch": 3.8939973428882, - "grad_norm": 0.006176150869578123, - "learning_rate": 0.00019999252397395702, - "loss": 46.0, - "step": 24180 - }, - { - "epoch": 3.8941583799669877, - "grad_norm": 0.00864804070442915, - "learning_rate": 0.00019999252335533164, - "loss": 46.0, - "step": 24181 - }, - { - "epoch": 3.8943194170457747, - "grad_norm": 0.009942607954144478, - "learning_rate": 0.0001999925227366807, - "loss": 46.0, - "step": 24182 - }, - { - "epoch": 3.894480454124562, - "grad_norm": 0.004692689515650272, - "learning_rate": 0.00019999252211800415, - "loss": 46.0, - "step": 24183 - }, - { - "epoch": 3.8946414912033496, - "grad_norm": 0.002124243415892124, - "learning_rate": 0.000199992521499302, - "loss": 46.0, - "step": 24184 - }, - { - "epoch": 3.894802528282137, - "grad_norm": 0.00414430582895875, - "learning_rate": 0.0001999925208805743, - "loss": 46.0, - "step": 24185 - }, - { - "epoch": 3.8949635653609245, - "grad_norm": 0.004519076552242041, - "learning_rate": 0.00019999252026182095, - "loss": 46.0, - "step": 24186 - }, - { - "epoch": 3.8951246024397115, - "grad_norm": 0.002755421446636319, - "learning_rate": 0.00019999251964304202, - "loss": 46.0, - "step": 24187 - }, - { - "epoch": 3.895285639518499, - "grad_norm": 0.00133071793243289, - "learning_rate": 0.0001999925190242375, - "loss": 46.0, - "step": 24188 - }, - { - "epoch": 3.8954466765972864, - "grad_norm": 0.003784241620451212, - "learning_rate": 0.0001999925184054074, - "loss": 46.0, - "step": 24189 - }, - { - "epoch": 3.895607713676074, - "grad_norm": 0.012115403078496456, - "learning_rate": 0.0001999925177865517, - "loss": 46.0, - "step": 24190 - }, - { - "epoch": 3.8957687507548613, - "grad_norm": 0.0018362364498898387, - "learning_rate": 0.0001999925171676704, - "loss": 46.0, - "step": 24191 - }, - { - "epoch": 3.8959297878336487, - "grad_norm": 0.005691426806151867, - "learning_rate": 0.0001999925165487635, - "loss": 46.0, - "step": 24192 - }, - { - "epoch": 3.896090824912436, - "grad_norm": 0.007418483030050993, - "learning_rate": 0.00019999251592983105, - "loss": 46.0, - "step": 24193 - }, - { - "epoch": 3.8962518619912236, - "grad_norm": 0.003390252124518156, - "learning_rate": 0.00019999251531087296, - "loss": 46.0, - "step": 24194 - }, - { - "epoch": 3.896412899070011, - "grad_norm": 0.0013939245836809278, - "learning_rate": 0.00019999251469188928, - "loss": 46.0, - "step": 24195 - }, - { - "epoch": 3.896573936148798, - "grad_norm": 0.0035515360068529844, - "learning_rate": 0.00019999251407288001, - "loss": 46.0, - "step": 24196 - }, - { - "epoch": 3.8967349732275856, - "grad_norm": 0.0044871107675135136, - "learning_rate": 0.0001999925134538452, - "loss": 46.0, - "step": 24197 - }, - { - "epoch": 3.896896010306373, - "grad_norm": 0.0023952689953148365, - "learning_rate": 0.00019999251283478472, - "loss": 46.0, - "step": 24198 - }, - { - "epoch": 3.8970570473851605, - "grad_norm": 0.002432761713862419, - "learning_rate": 0.0001999925122156987, - "loss": 46.0, - "step": 24199 - }, - { - "epoch": 3.897218084463948, - "grad_norm": 0.004769071005284786, - "learning_rate": 0.00019999251159658705, - "loss": 46.0, - "step": 24200 - }, - { - "epoch": 3.8973791215427354, - "grad_norm": 0.013915703631937504, - "learning_rate": 0.00019999251097744982, - "loss": 46.0, - "step": 24201 - }, - { - "epoch": 3.8975401586215224, - "grad_norm": 0.005264056380838156, - "learning_rate": 0.000199992510358287, - "loss": 46.0, - "step": 24202 - }, - { - "epoch": 3.89770119570031, - "grad_norm": 0.007863735780119896, - "learning_rate": 0.00019999250973909863, - "loss": 46.0, - "step": 24203 - }, - { - "epoch": 3.8978622327790973, - "grad_norm": 0.001511303591541946, - "learning_rate": 0.00019999250911988462, - "loss": 46.0, - "step": 24204 - }, - { - "epoch": 3.8980232698578847, - "grad_norm": 0.004624978173524141, - "learning_rate": 0.000199992508500645, - "loss": 46.0, - "step": 24205 - }, - { - "epoch": 3.898184306936672, - "grad_norm": 0.01257304847240448, - "learning_rate": 0.00019999250788137982, - "loss": 46.0, - "step": 24206 - }, - { - "epoch": 3.8983453440154596, - "grad_norm": 0.012954912148416042, - "learning_rate": 0.00019999250726208904, - "loss": 46.0, - "step": 24207 - }, - { - "epoch": 3.898506381094247, - "grad_norm": 0.0005362546071410179, - "learning_rate": 0.00019999250664277268, - "loss": 46.0, - "step": 24208 - }, - { - "epoch": 3.8986674181730345, - "grad_norm": 0.0005457444349303842, - "learning_rate": 0.0001999925060234307, - "loss": 46.0, - "step": 24209 - }, - { - "epoch": 3.898828455251822, - "grad_norm": 0.004380771424621344, - "learning_rate": 0.00019999250540406313, - "loss": 46.0, - "step": 24210 - }, - { - "epoch": 3.898989492330609, - "grad_norm": 0.0032124503049999475, - "learning_rate": 0.00019999250478466997, - "loss": 46.0, - "step": 24211 - }, - { - "epoch": 3.8991505294093964, - "grad_norm": 0.004542514681816101, - "learning_rate": 0.00019999250416525123, - "loss": 46.0, - "step": 24212 - }, - { - "epoch": 3.899311566488184, - "grad_norm": 0.0030122194439172745, - "learning_rate": 0.0001999925035458069, - "loss": 46.0, - "step": 24213 - }, - { - "epoch": 3.8994726035669713, - "grad_norm": 0.015683606266975403, - "learning_rate": 0.000199992502926337, - "loss": 46.0, - "step": 24214 - }, - { - "epoch": 3.899633640645759, - "grad_norm": 0.018528258427977562, - "learning_rate": 0.00019999250230684143, - "loss": 46.0, - "step": 24215 - }, - { - "epoch": 3.899794677724546, - "grad_norm": 0.005893013905733824, - "learning_rate": 0.00019999250168732031, - "loss": 46.0, - "step": 24216 - }, - { - "epoch": 3.8999557148033333, - "grad_norm": 0.008164685219526291, - "learning_rate": 0.0001999925010677736, - "loss": 46.0, - "step": 24217 - }, - { - "epoch": 3.9001167518821207, - "grad_norm": 0.005075582303106785, - "learning_rate": 0.0001999925004482013, - "loss": 46.0, - "step": 24218 - }, - { - "epoch": 3.900277788960908, - "grad_norm": 0.004707506392151117, - "learning_rate": 0.00019999249982860338, - "loss": 46.0, - "step": 24219 - }, - { - "epoch": 3.9004388260396956, - "grad_norm": 0.003324424382299185, - "learning_rate": 0.00019999249920897995, - "loss": 46.0, - "step": 24220 - }, - { - "epoch": 3.900599863118483, - "grad_norm": 0.00880496110767126, - "learning_rate": 0.00019999249858933084, - "loss": 46.0, - "step": 24221 - }, - { - "epoch": 3.9007609001972705, - "grad_norm": 0.0033119728323072195, - "learning_rate": 0.00019999249796965614, - "loss": 46.0, - "step": 24222 - }, - { - "epoch": 3.900921937276058, - "grad_norm": 0.0024106313940137625, - "learning_rate": 0.0001999924973499559, - "loss": 46.0, - "step": 24223 - }, - { - "epoch": 3.9010829743548454, - "grad_norm": 0.005441464018076658, - "learning_rate": 0.00019999249673023002, - "loss": 46.0, - "step": 24224 - }, - { - "epoch": 3.901244011433633, - "grad_norm": 0.002073982497677207, - "learning_rate": 0.00019999249611047856, - "loss": 46.0, - "step": 24225 - }, - { - "epoch": 3.90140504851242, - "grad_norm": 0.009327485226094723, - "learning_rate": 0.00019999249549070155, - "loss": 46.0, - "step": 24226 - }, - { - "epoch": 3.9015660855912073, - "grad_norm": 0.001985186478123069, - "learning_rate": 0.0001999924948708989, - "loss": 46.0, - "step": 24227 - }, - { - "epoch": 3.9017271226699948, - "grad_norm": 0.0021331205498427153, - "learning_rate": 0.00019999249425107065, - "loss": 46.0, - "step": 24228 - }, - { - "epoch": 3.9018881597487822, - "grad_norm": 0.0018735101912170649, - "learning_rate": 0.00019999249363121684, - "loss": 46.0, - "step": 24229 - }, - { - "epoch": 3.9020491968275697, - "grad_norm": 0.0019107733387500048, - "learning_rate": 0.0001999924930113374, - "loss": 46.0, - "step": 24230 - }, - { - "epoch": 3.9022102339063567, - "grad_norm": 0.012786966748535633, - "learning_rate": 0.0001999924923914324, - "loss": 46.0, - "step": 24231 - }, - { - "epoch": 3.902371270985144, - "grad_norm": 0.0006641882355324924, - "learning_rate": 0.0001999924917715018, - "loss": 46.0, - "step": 24232 - }, - { - "epoch": 3.9025323080639316, - "grad_norm": 0.0053881434723734856, - "learning_rate": 0.0001999924911515456, - "loss": 46.0, - "step": 24233 - }, - { - "epoch": 3.902693345142719, - "grad_norm": 0.0014949383912608027, - "learning_rate": 0.0001999924905315638, - "loss": 46.0, - "step": 24234 - }, - { - "epoch": 3.9028543822215065, - "grad_norm": 0.008667722344398499, - "learning_rate": 0.0001999924899115564, - "loss": 46.0, - "step": 24235 - }, - { - "epoch": 3.903015419300294, - "grad_norm": 0.003046006429940462, - "learning_rate": 0.00019999248929152344, - "loss": 46.0, - "step": 24236 - }, - { - "epoch": 3.9031764563790814, - "grad_norm": 0.0012053512036800385, - "learning_rate": 0.00019999248867146486, - "loss": 46.0, - "step": 24237 - }, - { - "epoch": 3.903337493457869, - "grad_norm": 0.010604547336697578, - "learning_rate": 0.0001999924880513807, - "loss": 46.0, - "step": 24238 - }, - { - "epoch": 3.9034985305366563, - "grad_norm": 0.00573544017970562, - "learning_rate": 0.00019999248743127096, - "loss": 46.0, - "step": 24239 - }, - { - "epoch": 3.9036595676154433, - "grad_norm": 0.002507448196411133, - "learning_rate": 0.00019999248681113558, - "loss": 46.0, - "step": 24240 - }, - { - "epoch": 3.9038206046942308, - "grad_norm": 0.0019390815868973732, - "learning_rate": 0.00019999248619097462, - "loss": 46.0, - "step": 24241 - }, - { - "epoch": 3.903981641773018, - "grad_norm": 0.007382815703749657, - "learning_rate": 0.0001999924855707881, - "loss": 46.0, - "step": 24242 - }, - { - "epoch": 3.9041426788518057, - "grad_norm": 0.001997357001528144, - "learning_rate": 0.000199992484950576, - "loss": 46.0, - "step": 24243 - }, - { - "epoch": 3.904303715930593, - "grad_norm": 0.0026674431283026934, - "learning_rate": 0.00019999248433033828, - "loss": 46.0, - "step": 24244 - }, - { - "epoch": 3.9044647530093806, - "grad_norm": 0.0023212411906570196, - "learning_rate": 0.00019999248371007494, - "loss": 46.0, - "step": 24245 - }, - { - "epoch": 3.9046257900881676, - "grad_norm": 0.0011796843027696013, - "learning_rate": 0.00019999248308978605, - "loss": 46.0, - "step": 24246 - }, - { - "epoch": 3.904786827166955, - "grad_norm": 0.0010502564255148172, - "learning_rate": 0.00019999248246947157, - "loss": 46.0, - "step": 24247 - }, - { - "epoch": 3.9049478642457425, - "grad_norm": 0.00227341684512794, - "learning_rate": 0.00019999248184913144, - "loss": 46.0, - "step": 24248 - }, - { - "epoch": 3.90510890132453, - "grad_norm": 0.0022540211211889982, - "learning_rate": 0.00019999248122876576, - "loss": 46.0, - "step": 24249 - }, - { - "epoch": 3.9052699384033174, - "grad_norm": 0.0015600745100528002, - "learning_rate": 0.0001999924806083745, - "loss": 46.0, - "step": 24250 - }, - { - "epoch": 3.905430975482105, - "grad_norm": 0.007016841322183609, - "learning_rate": 0.00019999247998795763, - "loss": 46.0, - "step": 24251 - }, - { - "epoch": 3.9055920125608923, - "grad_norm": 0.007967165671288967, - "learning_rate": 0.00019999247936751516, - "loss": 46.0, - "step": 24252 - }, - { - "epoch": 3.9057530496396797, - "grad_norm": 0.003511112881824374, - "learning_rate": 0.0001999924787470471, - "loss": 46.0, - "step": 24253 - }, - { - "epoch": 3.905914086718467, - "grad_norm": 0.0021445807069540024, - "learning_rate": 0.00019999247812655345, - "loss": 46.0, - "step": 24254 - }, - { - "epoch": 3.906075123797254, - "grad_norm": 0.0020800428465008736, - "learning_rate": 0.00019999247750603422, - "loss": 46.0, - "step": 24255 - }, - { - "epoch": 3.9062361608760416, - "grad_norm": 0.0035484626423567533, - "learning_rate": 0.00019999247688548937, - "loss": 46.0, - "step": 24256 - }, - { - "epoch": 3.906397197954829, - "grad_norm": 0.010253670625388622, - "learning_rate": 0.00019999247626491893, - "loss": 46.0, - "step": 24257 - }, - { - "epoch": 3.9065582350336165, - "grad_norm": 0.0008009191369637847, - "learning_rate": 0.0001999924756443229, - "loss": 46.0, - "step": 24258 - }, - { - "epoch": 3.906719272112404, - "grad_norm": 0.004874709993600845, - "learning_rate": 0.0001999924750237013, - "loss": 46.0, - "step": 24259 - }, - { - "epoch": 3.906880309191191, - "grad_norm": 0.006979524157941341, - "learning_rate": 0.00019999247440305408, - "loss": 46.0, - "step": 24260 - }, - { - "epoch": 3.9070413462699785, - "grad_norm": 0.008102000690996647, - "learning_rate": 0.0001999924737823813, - "loss": 46.0, - "step": 24261 - }, - { - "epoch": 3.907202383348766, - "grad_norm": 0.005288542248308659, - "learning_rate": 0.0001999924731616829, - "loss": 46.0, - "step": 24262 - }, - { - "epoch": 3.9073634204275534, - "grad_norm": 0.0025655077770352364, - "learning_rate": 0.0001999924725409589, - "loss": 46.0, - "step": 24263 - }, - { - "epoch": 3.907524457506341, - "grad_norm": 0.0030293199233710766, - "learning_rate": 0.00019999247192020934, - "loss": 46.0, - "step": 24264 - }, - { - "epoch": 3.9076854945851283, - "grad_norm": 0.005259111989289522, - "learning_rate": 0.00019999247129943415, - "loss": 46.0, - "step": 24265 - }, - { - "epoch": 3.9078465316639157, - "grad_norm": 0.006712483707815409, - "learning_rate": 0.0001999924706786334, - "loss": 46.0, - "step": 24266 - }, - { - "epoch": 3.908007568742703, - "grad_norm": 0.002392790513113141, - "learning_rate": 0.00019999247005780704, - "loss": 46.0, - "step": 24267 - }, - { - "epoch": 3.9081686058214906, - "grad_norm": 0.005893141031265259, - "learning_rate": 0.0001999924694369551, - "loss": 46.0, - "step": 24268 - }, - { - "epoch": 3.9083296429002776, - "grad_norm": 0.002989998087286949, - "learning_rate": 0.00019999246881607753, - "loss": 46.0, - "step": 24269 - }, - { - "epoch": 3.908490679979065, - "grad_norm": 0.0043844375759363174, - "learning_rate": 0.0001999924681951744, - "loss": 46.0, - "step": 24270 - }, - { - "epoch": 3.9086517170578525, - "grad_norm": 0.0005724730435758829, - "learning_rate": 0.00019999246757424567, - "loss": 46.0, - "step": 24271 - }, - { - "epoch": 3.90881275413664, - "grad_norm": 0.0026805636007338762, - "learning_rate": 0.00019999246695329135, - "loss": 46.0, - "step": 24272 - }, - { - "epoch": 3.9089737912154274, - "grad_norm": 0.006760813295841217, - "learning_rate": 0.00019999246633231144, - "loss": 46.0, - "step": 24273 - }, - { - "epoch": 3.909134828294215, - "grad_norm": 0.0062536257319152355, - "learning_rate": 0.0001999924657113059, - "loss": 46.0, - "step": 24274 - }, - { - "epoch": 3.909295865373002, - "grad_norm": 0.006175358314067125, - "learning_rate": 0.00019999246509027482, - "loss": 46.0, - "step": 24275 - }, - { - "epoch": 3.9094569024517893, - "grad_norm": 0.0011312743881717324, - "learning_rate": 0.00019999246446921812, - "loss": 46.0, - "step": 24276 - }, - { - "epoch": 3.909617939530577, - "grad_norm": 0.005588274449110031, - "learning_rate": 0.00019999246384813584, - "loss": 46.0, - "step": 24277 - }, - { - "epoch": 3.9097789766093642, - "grad_norm": 0.004909217823296785, - "learning_rate": 0.00019999246322702796, - "loss": 46.0, - "step": 24278 - }, - { - "epoch": 3.9099400136881517, - "grad_norm": 0.006424983497709036, - "learning_rate": 0.00019999246260589447, - "loss": 46.0, - "step": 24279 - }, - { - "epoch": 3.910101050766939, - "grad_norm": 0.0027578778099268675, - "learning_rate": 0.00019999246198473543, - "loss": 46.0, - "step": 24280 - }, - { - "epoch": 3.9102620878457266, - "grad_norm": 0.0024216067977249622, - "learning_rate": 0.00019999246136355076, - "loss": 46.0, - "step": 24281 - }, - { - "epoch": 3.910423124924514, - "grad_norm": 0.0019010824616998434, - "learning_rate": 0.0001999924607423405, - "loss": 46.0, - "step": 24282 - }, - { - "epoch": 3.9105841620033015, - "grad_norm": 0.008727259933948517, - "learning_rate": 0.00019999246012110468, - "loss": 46.0, - "step": 24283 - }, - { - "epoch": 3.9107451990820885, - "grad_norm": 0.003990017343312502, - "learning_rate": 0.00019999245949984322, - "loss": 46.0, - "step": 24284 - }, - { - "epoch": 3.910906236160876, - "grad_norm": 0.0037385544274002314, - "learning_rate": 0.00019999245887855618, - "loss": 46.0, - "step": 24285 - }, - { - "epoch": 3.9110672732396634, - "grad_norm": 0.0029327156953513622, - "learning_rate": 0.00019999245825724359, - "loss": 46.0, - "step": 24286 - }, - { - "epoch": 3.911228310318451, - "grad_norm": 0.001799881225451827, - "learning_rate": 0.00019999245763590535, - "loss": 46.0, - "step": 24287 - }, - { - "epoch": 3.9113893473972383, - "grad_norm": 0.009433192200958729, - "learning_rate": 0.00019999245701454155, - "loss": 46.0, - "step": 24288 - }, - { - "epoch": 3.9115503844760258, - "grad_norm": 0.0045669106766581535, - "learning_rate": 0.00019999245639315213, - "loss": 46.0, - "step": 24289 - }, - { - "epoch": 3.9117114215548128, - "grad_norm": 0.001397787593305111, - "learning_rate": 0.00019999245577173716, - "loss": 46.0, - "step": 24290 - }, - { - "epoch": 3.9118724586336002, - "grad_norm": 0.0020416441839188337, - "learning_rate": 0.00019999245515029657, - "loss": 46.0, - "step": 24291 - }, - { - "epoch": 3.9120334957123877, - "grad_norm": 0.002587964292615652, - "learning_rate": 0.00019999245452883036, - "loss": 46.0, - "step": 24292 - }, - { - "epoch": 3.912194532791175, - "grad_norm": 0.005450607743114233, - "learning_rate": 0.0001999924539073386, - "loss": 46.0, - "step": 24293 - }, - { - "epoch": 3.9123555698699626, - "grad_norm": 0.005157710053026676, - "learning_rate": 0.00019999245328582122, - "loss": 46.0, - "step": 24294 - }, - { - "epoch": 3.91251660694875, - "grad_norm": 0.004069255664944649, - "learning_rate": 0.00019999245266427826, - "loss": 46.0, - "step": 24295 - }, - { - "epoch": 3.9126776440275375, - "grad_norm": 0.010862830094993114, - "learning_rate": 0.0001999924520427097, - "loss": 46.0, - "step": 24296 - }, - { - "epoch": 3.912838681106325, - "grad_norm": 0.003951295278966427, - "learning_rate": 0.00019999245142111557, - "loss": 46.0, - "step": 24297 - }, - { - "epoch": 3.9129997181851124, - "grad_norm": 0.01030823215842247, - "learning_rate": 0.00019999245079949581, - "loss": 46.0, - "step": 24298 - }, - { - "epoch": 3.9131607552638994, - "grad_norm": 0.006517235189676285, - "learning_rate": 0.00019999245017785047, - "loss": 46.0, - "step": 24299 - }, - { - "epoch": 3.913321792342687, - "grad_norm": 0.008500062860548496, - "learning_rate": 0.00019999244955617957, - "loss": 46.0, - "step": 24300 - }, - { - "epoch": 3.9134828294214743, - "grad_norm": 0.0017892306204885244, - "learning_rate": 0.00019999244893448306, - "loss": 46.0, - "step": 24301 - }, - { - "epoch": 3.9136438665002617, - "grad_norm": 0.002678181976079941, - "learning_rate": 0.00019999244831276093, - "loss": 46.0, - "step": 24302 - }, - { - "epoch": 3.913804903579049, - "grad_norm": 0.01573646441102028, - "learning_rate": 0.00019999244769101324, - "loss": 46.0, - "step": 24303 - }, - { - "epoch": 3.913965940657836, - "grad_norm": 0.000764210766647011, - "learning_rate": 0.00019999244706923994, - "loss": 46.0, - "step": 24304 - }, - { - "epoch": 3.9141269777366237, - "grad_norm": 0.003422482404857874, - "learning_rate": 0.00019999244644744107, - "loss": 46.0, - "step": 24305 - }, - { - "epoch": 3.914288014815411, - "grad_norm": 0.008215613663196564, - "learning_rate": 0.00019999244582561657, - "loss": 46.0, - "step": 24306 - }, - { - "epoch": 3.9144490518941986, - "grad_norm": 0.0015661359066143632, - "learning_rate": 0.00019999244520376648, - "loss": 46.0, - "step": 24307 - }, - { - "epoch": 3.914610088972986, - "grad_norm": 0.007579847238957882, - "learning_rate": 0.00019999244458189082, - "loss": 46.0, - "step": 24308 - }, - { - "epoch": 3.9147711260517735, - "grad_norm": 0.0019457826856523752, - "learning_rate": 0.00019999244395998956, - "loss": 46.0, - "step": 24309 - }, - { - "epoch": 3.914932163130561, - "grad_norm": 0.0009566232329234481, - "learning_rate": 0.0001999924433380627, - "loss": 46.0, - "step": 24310 - }, - { - "epoch": 3.9150932002093484, - "grad_norm": 0.007234182208776474, - "learning_rate": 0.00019999244271611026, - "loss": 46.0, - "step": 24311 - }, - { - "epoch": 3.915254237288136, - "grad_norm": 0.004438658244907856, - "learning_rate": 0.0001999924420941322, - "loss": 46.0, - "step": 24312 - }, - { - "epoch": 3.915415274366923, - "grad_norm": 0.002196710556745529, - "learning_rate": 0.0001999924414721286, - "loss": 46.0, - "step": 24313 - }, - { - "epoch": 3.9155763114457103, - "grad_norm": 0.0016576610505580902, - "learning_rate": 0.00019999244085009936, - "loss": 46.0, - "step": 24314 - }, - { - "epoch": 3.9157373485244977, - "grad_norm": 0.013751697726547718, - "learning_rate": 0.00019999244022804452, - "loss": 46.0, - "step": 24315 - }, - { - "epoch": 3.915898385603285, - "grad_norm": 0.00655261380597949, - "learning_rate": 0.00019999243960596412, - "loss": 46.0, - "step": 24316 - }, - { - "epoch": 3.9160594226820726, - "grad_norm": 0.0025038616731762886, - "learning_rate": 0.0001999924389838581, - "loss": 46.0, - "step": 24317 - }, - { - "epoch": 3.91622045976086, - "grad_norm": 0.008263389579951763, - "learning_rate": 0.00019999243836172652, - "loss": 46.0, - "step": 24318 - }, - { - "epoch": 3.916381496839647, - "grad_norm": 0.00229754694737494, - "learning_rate": 0.00019999243773956933, - "loss": 46.0, - "step": 24319 - }, - { - "epoch": 3.9165425339184345, - "grad_norm": 0.004807076882570982, - "learning_rate": 0.00019999243711738655, - "loss": 46.0, - "step": 24320 - }, - { - "epoch": 3.916703570997222, - "grad_norm": 0.0017659561708569527, - "learning_rate": 0.00019999243649517816, - "loss": 46.0, - "step": 24321 - }, - { - "epoch": 3.9168646080760094, - "grad_norm": 0.0021025885362178087, - "learning_rate": 0.00019999243587294418, - "loss": 46.0, - "step": 24322 - }, - { - "epoch": 3.917025645154797, - "grad_norm": 0.013332934118807316, - "learning_rate": 0.00019999243525068463, - "loss": 46.0, - "step": 24323 - }, - { - "epoch": 3.9171866822335843, - "grad_norm": 0.0029037711210548878, - "learning_rate": 0.00019999243462839948, - "loss": 46.0, - "step": 24324 - }, - { - "epoch": 3.917347719312372, - "grad_norm": 0.01141924038529396, - "learning_rate": 0.0001999924340060887, - "loss": 46.0, - "step": 24325 - }, - { - "epoch": 3.9175087563911593, - "grad_norm": 0.004519221372902393, - "learning_rate": 0.0001999924333837524, - "loss": 46.0, - "step": 24326 - }, - { - "epoch": 3.9176697934699467, - "grad_norm": 0.0029633843805640936, - "learning_rate": 0.00019999243276139046, - "loss": 46.0, - "step": 24327 - }, - { - "epoch": 3.9178308305487337, - "grad_norm": 0.002741101896390319, - "learning_rate": 0.0001999924321390029, - "loss": 46.0, - "step": 24328 - }, - { - "epoch": 3.917991867627521, - "grad_norm": 0.003616885282099247, - "learning_rate": 0.0001999924315165898, - "loss": 46.0, - "step": 24329 - }, - { - "epoch": 3.9181529047063086, - "grad_norm": 0.0010131315793842077, - "learning_rate": 0.00019999243089415108, - "loss": 46.0, - "step": 24330 - }, - { - "epoch": 3.918313941785096, - "grad_norm": 0.0015564961358904839, - "learning_rate": 0.00019999243027168676, - "loss": 46.0, - "step": 24331 - }, - { - "epoch": 3.9184749788638835, - "grad_norm": 0.016632836312055588, - "learning_rate": 0.00019999242964919688, - "loss": 46.0, - "step": 24332 - }, - { - "epoch": 3.9186360159426705, - "grad_norm": 0.010603503324091434, - "learning_rate": 0.0001999924290266814, - "loss": 46.0, - "step": 24333 - }, - { - "epoch": 3.918797053021458, - "grad_norm": 0.0012143533676862717, - "learning_rate": 0.0001999924284041403, - "loss": 46.0, - "step": 24334 - }, - { - "epoch": 3.9189580901002454, - "grad_norm": 0.004277637228369713, - "learning_rate": 0.00019999242778157364, - "loss": 46.0, - "step": 24335 - }, - { - "epoch": 3.919119127179033, - "grad_norm": 0.0029652700759470463, - "learning_rate": 0.00019999242715898136, - "loss": 46.0, - "step": 24336 - }, - { - "epoch": 3.9192801642578203, - "grad_norm": 0.011836004443466663, - "learning_rate": 0.00019999242653636349, - "loss": 46.0, - "step": 24337 - }, - { - "epoch": 3.919441201336608, - "grad_norm": 0.0017603442538529634, - "learning_rate": 0.00019999242591372003, - "loss": 46.0, - "step": 24338 - }, - { - "epoch": 3.9196022384153952, - "grad_norm": 0.005851692520081997, - "learning_rate": 0.00019999242529105098, - "loss": 46.0, - "step": 24339 - }, - { - "epoch": 3.9197632754941827, - "grad_norm": 0.0037130527198314667, - "learning_rate": 0.00019999242466835635, - "loss": 46.0, - "step": 24340 - }, - { - "epoch": 3.91992431257297, - "grad_norm": 0.0024980197194963694, - "learning_rate": 0.0001999924240456361, - "loss": 46.0, - "step": 24341 - }, - { - "epoch": 3.9200853496517576, - "grad_norm": 0.0038946177810430527, - "learning_rate": 0.00019999242342289027, - "loss": 46.0, - "step": 24342 - }, - { - "epoch": 3.9202463867305446, - "grad_norm": 0.0033849331084638834, - "learning_rate": 0.00019999242280011886, - "loss": 46.0, - "step": 24343 - }, - { - "epoch": 3.920407423809332, - "grad_norm": 0.01179580483585596, - "learning_rate": 0.00019999242217732185, - "loss": 46.0, - "step": 24344 - }, - { - "epoch": 3.9205684608881195, - "grad_norm": 0.001581982127390802, - "learning_rate": 0.00019999242155449923, - "loss": 46.0, - "step": 24345 - }, - { - "epoch": 3.920729497966907, - "grad_norm": 0.00549320550635457, - "learning_rate": 0.00019999242093165102, - "loss": 46.0, - "step": 24346 - }, - { - "epoch": 3.9208905350456944, - "grad_norm": 0.0030619860626757145, - "learning_rate": 0.00019999242030877725, - "loss": 46.0, - "step": 24347 - }, - { - "epoch": 3.9210515721244814, - "grad_norm": 0.005441068671643734, - "learning_rate": 0.00019999241968587784, - "loss": 46.0, - "step": 24348 - }, - { - "epoch": 3.921212609203269, - "grad_norm": 0.0016338042914867401, - "learning_rate": 0.00019999241906295287, - "loss": 46.0, - "step": 24349 - }, - { - "epoch": 3.9213736462820563, - "grad_norm": 0.0029275708366185427, - "learning_rate": 0.00019999241844000228, - "loss": 46.0, - "step": 24350 - }, - { - "epoch": 3.9215346833608438, - "grad_norm": 0.0021279742941260338, - "learning_rate": 0.00019999241781702614, - "loss": 46.0, - "step": 24351 - }, - { - "epoch": 3.921695720439631, - "grad_norm": 0.002017433987930417, - "learning_rate": 0.00019999241719402438, - "loss": 46.0, - "step": 24352 - }, - { - "epoch": 3.9218567575184187, - "grad_norm": 0.0036244397051632404, - "learning_rate": 0.00019999241657099704, - "loss": 46.0, - "step": 24353 - }, - { - "epoch": 3.922017794597206, - "grad_norm": 0.0064416504465043545, - "learning_rate": 0.0001999924159479441, - "loss": 46.0, - "step": 24354 - }, - { - "epoch": 3.9221788316759936, - "grad_norm": 0.006330688018351793, - "learning_rate": 0.00019999241532486556, - "loss": 46.0, - "step": 24355 - }, - { - "epoch": 3.922339868754781, - "grad_norm": 0.003200170351192355, - "learning_rate": 0.00019999241470176142, - "loss": 46.0, - "step": 24356 - }, - { - "epoch": 3.922500905833568, - "grad_norm": 0.006174854934215546, - "learning_rate": 0.0001999924140786317, - "loss": 46.0, - "step": 24357 - }, - { - "epoch": 3.9226619429123555, - "grad_norm": 0.007807197980582714, - "learning_rate": 0.0001999924134554764, - "loss": 46.0, - "step": 24358 - }, - { - "epoch": 3.922822979991143, - "grad_norm": 0.005494216922670603, - "learning_rate": 0.00019999241283229547, - "loss": 46.0, - "step": 24359 - }, - { - "epoch": 3.9229840170699304, - "grad_norm": 0.009967494755983353, - "learning_rate": 0.00019999241220908896, - "loss": 46.0, - "step": 24360 - }, - { - "epoch": 3.923145054148718, - "grad_norm": 0.004544772207736969, - "learning_rate": 0.00019999241158585686, - "loss": 46.0, - "step": 24361 - }, - { - "epoch": 3.9233060912275053, - "grad_norm": 0.005755240563303232, - "learning_rate": 0.0001999924109625992, - "loss": 46.0, - "step": 24362 - }, - { - "epoch": 3.9234671283062923, - "grad_norm": 0.002001445507630706, - "learning_rate": 0.0001999924103393159, - "loss": 46.0, - "step": 24363 - }, - { - "epoch": 3.9236281653850797, - "grad_norm": 0.003334293607622385, - "learning_rate": 0.00019999240971600704, - "loss": 46.0, - "step": 24364 - }, - { - "epoch": 3.923789202463867, - "grad_norm": 0.0029219326097518206, - "learning_rate": 0.00019999240909267257, - "loss": 46.0, - "step": 24365 - }, - { - "epoch": 3.9239502395426546, - "grad_norm": 0.0026663679163903, - "learning_rate": 0.0001999924084693125, - "loss": 46.0, - "step": 24366 - }, - { - "epoch": 3.924111276621442, - "grad_norm": 0.002864918438717723, - "learning_rate": 0.00019999240784592686, - "loss": 46.0, - "step": 24367 - }, - { - "epoch": 3.9242723137002296, - "grad_norm": 0.01613437570631504, - "learning_rate": 0.00019999240722251563, - "loss": 46.0, - "step": 24368 - }, - { - "epoch": 3.924433350779017, - "grad_norm": 0.023917198181152344, - "learning_rate": 0.00019999240659907875, - "loss": 46.0, - "step": 24369 - }, - { - "epoch": 3.9245943878578045, - "grad_norm": 0.017110902816057205, - "learning_rate": 0.00019999240597561632, - "loss": 46.0, - "step": 24370 - }, - { - "epoch": 3.924755424936592, - "grad_norm": 0.001088926917873323, - "learning_rate": 0.00019999240535212832, - "loss": 46.0, - "step": 24371 - }, - { - "epoch": 3.924916462015379, - "grad_norm": 0.001707250252366066, - "learning_rate": 0.0001999924047286147, - "loss": 46.0, - "step": 24372 - }, - { - "epoch": 3.9250774990941664, - "grad_norm": 0.007229944691061974, - "learning_rate": 0.00019999240410507548, - "loss": 46.0, - "step": 24373 - }, - { - "epoch": 3.925238536172954, - "grad_norm": 0.0019223009003326297, - "learning_rate": 0.0001999924034815107, - "loss": 46.0, - "step": 24374 - }, - { - "epoch": 3.9253995732517413, - "grad_norm": 0.014989964663982391, - "learning_rate": 0.0001999924028579203, - "loss": 46.0, - "step": 24375 - }, - { - "epoch": 3.9255606103305287, - "grad_norm": 0.005130224861204624, - "learning_rate": 0.00019999240223430431, - "loss": 46.0, - "step": 24376 - }, - { - "epoch": 3.9257216474093157, - "grad_norm": 0.009179562330245972, - "learning_rate": 0.00019999240161066274, - "loss": 46.0, - "step": 24377 - }, - { - "epoch": 3.925882684488103, - "grad_norm": 0.003586171194911003, - "learning_rate": 0.00019999240098699555, - "loss": 46.0, - "step": 24378 - }, - { - "epoch": 3.9260437215668906, - "grad_norm": 0.001972099067643285, - "learning_rate": 0.00019999240036330278, - "loss": 46.0, - "step": 24379 - }, - { - "epoch": 3.926204758645678, - "grad_norm": 0.0045670573599636555, - "learning_rate": 0.00019999239973958442, - "loss": 46.0, - "step": 24380 - }, - { - "epoch": 3.9263657957244655, - "grad_norm": 0.003906788770109415, - "learning_rate": 0.00019999239911584047, - "loss": 46.0, - "step": 24381 - }, - { - "epoch": 3.926526832803253, - "grad_norm": 0.001660844311118126, - "learning_rate": 0.00019999239849207093, - "loss": 46.0, - "step": 24382 - }, - { - "epoch": 3.9266878698820404, - "grad_norm": 0.0007775754784233868, - "learning_rate": 0.0001999923978682758, - "loss": 46.0, - "step": 24383 - }, - { - "epoch": 3.926848906960828, - "grad_norm": 0.007136431522667408, - "learning_rate": 0.00019999239724445507, - "loss": 46.0, - "step": 24384 - }, - { - "epoch": 3.9270099440396153, - "grad_norm": 0.003681618720293045, - "learning_rate": 0.00019999239662060874, - "loss": 46.0, - "step": 24385 - }, - { - "epoch": 3.9271709811184023, - "grad_norm": 0.0019532719161361456, - "learning_rate": 0.00019999239599673683, - "loss": 46.0, - "step": 24386 - }, - { - "epoch": 3.92733201819719, - "grad_norm": 0.006563209462910891, - "learning_rate": 0.0001999923953728393, - "loss": 46.0, - "step": 24387 - }, - { - "epoch": 3.9274930552759773, - "grad_norm": 0.008773471228778362, - "learning_rate": 0.0001999923947489162, - "loss": 46.0, - "step": 24388 - }, - { - "epoch": 3.9276540923547647, - "grad_norm": 0.001396200037561357, - "learning_rate": 0.00019999239412496752, - "loss": 46.0, - "step": 24389 - }, - { - "epoch": 3.927815129433552, - "grad_norm": 0.007127637974917889, - "learning_rate": 0.0001999923935009932, - "loss": 46.0, - "step": 24390 - }, - { - "epoch": 3.9279761665123396, - "grad_norm": 0.017086312174797058, - "learning_rate": 0.00019999239287699333, - "loss": 46.0, - "step": 24391 - }, - { - "epoch": 3.9281372035911266, - "grad_norm": 0.005270370282232761, - "learning_rate": 0.00019999239225296784, - "loss": 46.0, - "step": 24392 - }, - { - "epoch": 3.928298240669914, - "grad_norm": 0.0016827555373311043, - "learning_rate": 0.0001999923916289168, - "loss": 46.0, - "step": 24393 - }, - { - "epoch": 3.9284592777487015, - "grad_norm": 0.003553206566721201, - "learning_rate": 0.00019999239100484012, - "loss": 46.0, - "step": 24394 - }, - { - "epoch": 3.928620314827489, - "grad_norm": 0.0048052165657281876, - "learning_rate": 0.00019999239038073787, - "loss": 46.0, - "step": 24395 - }, - { - "epoch": 3.9287813519062764, - "grad_norm": 0.004999968688935041, - "learning_rate": 0.00019999238975661004, - "loss": 46.0, - "step": 24396 - }, - { - "epoch": 3.928942388985064, - "grad_norm": 0.005583801306784153, - "learning_rate": 0.00019999238913245658, - "loss": 46.0, - "step": 24397 - }, - { - "epoch": 3.9291034260638513, - "grad_norm": 0.0017018058570101857, - "learning_rate": 0.00019999238850827754, - "loss": 46.0, - "step": 24398 - }, - { - "epoch": 3.9292644631426388, - "grad_norm": 0.00243575731292367, - "learning_rate": 0.00019999238788407292, - "loss": 46.0, - "step": 24399 - }, - { - "epoch": 3.9294255002214262, - "grad_norm": 0.003056165063753724, - "learning_rate": 0.0001999923872598427, - "loss": 46.0, - "step": 24400 - }, - { - "epoch": 3.9295865373002132, - "grad_norm": 0.005382818635553122, - "learning_rate": 0.0001999923866355869, - "loss": 46.0, - "step": 24401 - }, - { - "epoch": 3.9297475743790007, - "grad_norm": 0.0014281562762334943, - "learning_rate": 0.00019999238601130549, - "loss": 46.0, - "step": 24402 - }, - { - "epoch": 3.929908611457788, - "grad_norm": 0.0013499307679012418, - "learning_rate": 0.00019999238538699848, - "loss": 46.0, - "step": 24403 - }, - { - "epoch": 3.9300696485365756, - "grad_norm": 0.0035716991405934095, - "learning_rate": 0.00019999238476266592, - "loss": 46.0, - "step": 24404 - }, - { - "epoch": 3.930230685615363, - "grad_norm": 0.009319575503468513, - "learning_rate": 0.00019999238413830772, - "loss": 46.0, - "step": 24405 - }, - { - "epoch": 3.9303917226941505, - "grad_norm": 0.00225442205555737, - "learning_rate": 0.00019999238351392395, - "loss": 46.0, - "step": 24406 - }, - { - "epoch": 3.9305527597729375, - "grad_norm": 0.0031867618672549725, - "learning_rate": 0.00019999238288951457, - "loss": 46.0, - "step": 24407 - }, - { - "epoch": 3.930713796851725, - "grad_norm": 0.00477884104475379, - "learning_rate": 0.0001999923822650796, - "loss": 46.0, - "step": 24408 - }, - { - "epoch": 3.9308748339305124, - "grad_norm": 0.001682958216406405, - "learning_rate": 0.00019999238164061906, - "loss": 46.0, - "step": 24409 - }, - { - "epoch": 3.9310358710093, - "grad_norm": 0.002937573241069913, - "learning_rate": 0.00019999238101613292, - "loss": 46.0, - "step": 24410 - }, - { - "epoch": 3.9311969080880873, - "grad_norm": 0.004121054895222187, - "learning_rate": 0.0001999923803916212, - "loss": 46.0, - "step": 24411 - }, - { - "epoch": 3.9313579451668748, - "grad_norm": 0.006640677340328693, - "learning_rate": 0.00019999237976708385, - "loss": 46.0, - "step": 24412 - }, - { - "epoch": 3.931518982245662, - "grad_norm": 0.0036491509526968002, - "learning_rate": 0.00019999237914252092, - "loss": 46.0, - "step": 24413 - }, - { - "epoch": 3.9316800193244497, - "grad_norm": 0.0027274528983980417, - "learning_rate": 0.0001999923785179324, - "loss": 46.0, - "step": 24414 - }, - { - "epoch": 3.931841056403237, - "grad_norm": 0.004347385838627815, - "learning_rate": 0.00019999237789331827, - "loss": 46.0, - "step": 24415 - }, - { - "epoch": 3.932002093482024, - "grad_norm": 0.0015313193434849381, - "learning_rate": 0.00019999237726867856, - "loss": 46.0, - "step": 24416 - }, - { - "epoch": 3.9321631305608116, - "grad_norm": 0.027371564880013466, - "learning_rate": 0.00019999237664401328, - "loss": 46.0, - "step": 24417 - }, - { - "epoch": 3.932324167639599, - "grad_norm": 0.0009167072712443769, - "learning_rate": 0.0001999923760193224, - "loss": 46.0, - "step": 24418 - }, - { - "epoch": 3.9324852047183865, - "grad_norm": 0.003430148120969534, - "learning_rate": 0.00019999237539460588, - "loss": 46.0, - "step": 24419 - }, - { - "epoch": 3.932646241797174, - "grad_norm": 0.00242228084243834, - "learning_rate": 0.00019999237476986384, - "loss": 46.0, - "step": 24420 - }, - { - "epoch": 3.932807278875961, - "grad_norm": 0.010304367169737816, - "learning_rate": 0.00019999237414509616, - "loss": 46.0, - "step": 24421 - }, - { - "epoch": 3.9329683159547484, - "grad_norm": 0.006370606832206249, - "learning_rate": 0.00019999237352030292, - "loss": 46.0, - "step": 24422 - }, - { - "epoch": 3.933129353033536, - "grad_norm": 0.0054320283234119415, - "learning_rate": 0.00019999237289548404, - "loss": 46.0, - "step": 24423 - }, - { - "epoch": 3.9332903901123233, - "grad_norm": 0.0010298618581146002, - "learning_rate": 0.0001999923722706396, - "loss": 46.0, - "step": 24424 - }, - { - "epoch": 3.9334514271911107, - "grad_norm": 0.016750281676650047, - "learning_rate": 0.00019999237164576954, - "loss": 46.0, - "step": 24425 - }, - { - "epoch": 3.933612464269898, - "grad_norm": 0.0008130322676151991, - "learning_rate": 0.0001999923710208739, - "loss": 46.0, - "step": 24426 - }, - { - "epoch": 3.9337735013486856, - "grad_norm": 0.001588718849234283, - "learning_rate": 0.0001999923703959527, - "loss": 46.0, - "step": 24427 - }, - { - "epoch": 3.933934538427473, - "grad_norm": 0.006425799336284399, - "learning_rate": 0.00019999236977100585, - "loss": 46.0, - "step": 24428 - }, - { - "epoch": 3.9340955755062605, - "grad_norm": 0.005509865004569292, - "learning_rate": 0.00019999236914603345, - "loss": 46.0, - "step": 24429 - }, - { - "epoch": 3.9342566125850476, - "grad_norm": 0.0029557920061051846, - "learning_rate": 0.00019999236852103546, - "loss": 46.0, - "step": 24430 - }, - { - "epoch": 3.934417649663835, - "grad_norm": 0.0070237042382359505, - "learning_rate": 0.00019999236789601185, - "loss": 46.0, - "step": 24431 - }, - { - "epoch": 3.9345786867426225, - "grad_norm": 0.0018422477878630161, - "learning_rate": 0.00019999236727096266, - "loss": 46.0, - "step": 24432 - }, - { - "epoch": 3.93473972382141, - "grad_norm": 0.008246928453445435, - "learning_rate": 0.00019999236664588788, - "loss": 46.0, - "step": 24433 - }, - { - "epoch": 3.9349007609001974, - "grad_norm": 0.005493798293173313, - "learning_rate": 0.0001999923660207875, - "loss": 46.0, - "step": 24434 - }, - { - "epoch": 3.935061797978985, - "grad_norm": 0.0015698785427957773, - "learning_rate": 0.00019999236539566153, - "loss": 46.0, - "step": 24435 - }, - { - "epoch": 3.935222835057772, - "grad_norm": 0.005580568220466375, - "learning_rate": 0.00019999236477050999, - "loss": 46.0, - "step": 24436 - }, - { - "epoch": 3.9353838721365593, - "grad_norm": 0.006116837728768587, - "learning_rate": 0.00019999236414533283, - "loss": 46.0, - "step": 24437 - }, - { - "epoch": 3.9355449092153467, - "grad_norm": 0.0014182840241119266, - "learning_rate": 0.00019999236352013003, - "loss": 46.0, - "step": 24438 - }, - { - "epoch": 3.935705946294134, - "grad_norm": 0.007212547119706869, - "learning_rate": 0.00019999236289490173, - "loss": 46.0, - "step": 24439 - }, - { - "epoch": 3.9358669833729216, - "grad_norm": 0.002889544004574418, - "learning_rate": 0.00019999236226964778, - "loss": 46.0, - "step": 24440 - }, - { - "epoch": 3.936028020451709, - "grad_norm": 0.0021446554455906153, - "learning_rate": 0.00019999236164436825, - "loss": 46.0, - "step": 24441 - }, - { - "epoch": 3.9361890575304965, - "grad_norm": 0.009180703200399876, - "learning_rate": 0.00019999236101906313, - "loss": 46.0, - "step": 24442 - }, - { - "epoch": 3.936350094609284, - "grad_norm": 0.0017527807503938675, - "learning_rate": 0.00019999236039373243, - "loss": 46.0, - "step": 24443 - }, - { - "epoch": 3.9365111316880714, - "grad_norm": 0.00906195305287838, - "learning_rate": 0.0001999923597683761, - "loss": 46.0, - "step": 24444 - }, - { - "epoch": 3.9366721687668584, - "grad_norm": 0.01186458207666874, - "learning_rate": 0.0001999923591429942, - "loss": 46.0, - "step": 24445 - }, - { - "epoch": 3.936833205845646, - "grad_norm": 0.011776086874306202, - "learning_rate": 0.00019999235851758673, - "loss": 46.0, - "step": 24446 - }, - { - "epoch": 3.9369942429244333, - "grad_norm": 0.0017151961801573634, - "learning_rate": 0.00019999235789215365, - "loss": 46.0, - "step": 24447 - }, - { - "epoch": 3.937155280003221, - "grad_norm": 0.0012179887853562832, - "learning_rate": 0.00019999235726669495, - "loss": 46.0, - "step": 24448 - }, - { - "epoch": 3.9373163170820082, - "grad_norm": 0.006051843985915184, - "learning_rate": 0.0001999923566412107, - "loss": 46.0, - "step": 24449 - }, - { - "epoch": 3.9374773541607953, - "grad_norm": 0.00758137134835124, - "learning_rate": 0.00019999235601570083, - "loss": 46.0, - "step": 24450 - }, - { - "epoch": 3.9376383912395827, - "grad_norm": 0.006233640015125275, - "learning_rate": 0.00019999235539016537, - "loss": 46.0, - "step": 24451 - }, - { - "epoch": 3.93779942831837, - "grad_norm": 0.008672326803207397, - "learning_rate": 0.00019999235476460432, - "loss": 46.0, - "step": 24452 - }, - { - "epoch": 3.9379604653971576, - "grad_norm": 0.005385734140872955, - "learning_rate": 0.00019999235413901766, - "loss": 46.0, - "step": 24453 - }, - { - "epoch": 3.938121502475945, - "grad_norm": 0.0013677530223503709, - "learning_rate": 0.00019999235351340544, - "loss": 46.0, - "step": 24454 - }, - { - "epoch": 3.9382825395547325, - "grad_norm": 0.00402272492647171, - "learning_rate": 0.0001999923528877676, - "loss": 46.0, - "step": 24455 - }, - { - "epoch": 3.93844357663352, - "grad_norm": 0.004740627016872168, - "learning_rate": 0.0001999923522621042, - "loss": 46.0, - "step": 24456 - }, - { - "epoch": 3.9386046137123074, - "grad_norm": 0.009174207225441933, - "learning_rate": 0.00019999235163641515, - "loss": 46.0, - "step": 24457 - }, - { - "epoch": 3.938765650791095, - "grad_norm": 0.014905828982591629, - "learning_rate": 0.00019999235101070056, - "loss": 46.0, - "step": 24458 - }, - { - "epoch": 3.9389266878698823, - "grad_norm": 0.0029241740703582764, - "learning_rate": 0.00019999235038496035, - "loss": 46.0, - "step": 24459 - }, - { - "epoch": 3.9390877249486693, - "grad_norm": 0.00474179582670331, - "learning_rate": 0.00019999234975919455, - "loss": 46.0, - "step": 24460 - }, - { - "epoch": 3.9392487620274568, - "grad_norm": 0.004056048113852739, - "learning_rate": 0.00019999234913340317, - "loss": 46.0, - "step": 24461 - }, - { - "epoch": 3.9394097991062442, - "grad_norm": 0.005026824306696653, - "learning_rate": 0.00019999234850758623, - "loss": 46.0, - "step": 24462 - }, - { - "epoch": 3.9395708361850317, - "grad_norm": 0.0023352550342679024, - "learning_rate": 0.00019999234788174364, - "loss": 46.0, - "step": 24463 - }, - { - "epoch": 3.939731873263819, - "grad_norm": 0.0009777604136615992, - "learning_rate": 0.00019999234725587547, - "loss": 46.0, - "step": 24464 - }, - { - "epoch": 3.939892910342606, - "grad_norm": 0.006058571394532919, - "learning_rate": 0.0001999923466299817, - "loss": 46.0, - "step": 24465 - }, - { - "epoch": 3.9400539474213936, - "grad_norm": 0.004813102073967457, - "learning_rate": 0.00019999234600406236, - "loss": 46.0, - "step": 24466 - }, - { - "epoch": 3.940214984500181, - "grad_norm": 0.0020553087815642357, - "learning_rate": 0.00019999234537811743, - "loss": 46.0, - "step": 24467 - }, - { - "epoch": 3.9403760215789685, - "grad_norm": 0.0022767093032598495, - "learning_rate": 0.00019999234475214688, - "loss": 46.0, - "step": 24468 - }, - { - "epoch": 3.940537058657756, - "grad_norm": 0.012391808442771435, - "learning_rate": 0.00019999234412615074, - "loss": 46.0, - "step": 24469 - }, - { - "epoch": 3.9406980957365434, - "grad_norm": 0.005296351388096809, - "learning_rate": 0.00019999234350012902, - "loss": 46.0, - "step": 24470 - }, - { - "epoch": 3.940859132815331, - "grad_norm": 0.003683160524815321, - "learning_rate": 0.0001999923428740817, - "loss": 46.0, - "step": 24471 - }, - { - "epoch": 3.9410201698941183, - "grad_norm": 0.0030060908757150173, - "learning_rate": 0.0001999923422480088, - "loss": 46.0, - "step": 24472 - }, - { - "epoch": 3.9411812069729057, - "grad_norm": 0.003344297409057617, - "learning_rate": 0.0001999923416219103, - "loss": 46.0, - "step": 24473 - }, - { - "epoch": 3.9413422440516928, - "grad_norm": 0.005463795270770788, - "learning_rate": 0.0001999923409957862, - "loss": 46.0, - "step": 24474 - }, - { - "epoch": 3.94150328113048, - "grad_norm": 0.006249964702874422, - "learning_rate": 0.00019999234036963652, - "loss": 46.0, - "step": 24475 - }, - { - "epoch": 3.9416643182092677, - "grad_norm": 0.002506648190319538, - "learning_rate": 0.00019999233974346122, - "loss": 46.0, - "step": 24476 - }, - { - "epoch": 3.941825355288055, - "grad_norm": 0.0032582751009613276, - "learning_rate": 0.00019999233911726036, - "loss": 46.0, - "step": 24477 - }, - { - "epoch": 3.9419863923668426, - "grad_norm": 0.004647792316973209, - "learning_rate": 0.00019999233849103391, - "loss": 46.0, - "step": 24478 - }, - { - "epoch": 3.94214742944563, - "grad_norm": 0.009381710551679134, - "learning_rate": 0.00019999233786478185, - "loss": 46.0, - "step": 24479 - }, - { - "epoch": 3.942308466524417, - "grad_norm": 0.006575780920684338, - "learning_rate": 0.00019999233723850418, - "loss": 46.0, - "step": 24480 - }, - { - "epoch": 3.9424695036032045, - "grad_norm": 0.0029790003318339586, - "learning_rate": 0.00019999233661220097, - "loss": 46.0, - "step": 24481 - }, - { - "epoch": 3.942630540681992, - "grad_norm": 0.013919372111558914, - "learning_rate": 0.0001999923359858721, - "loss": 46.0, - "step": 24482 - }, - { - "epoch": 3.9427915777607794, - "grad_norm": 0.011686764657497406, - "learning_rate": 0.00019999233535951768, - "loss": 46.0, - "step": 24483 - }, - { - "epoch": 3.942952614839567, - "grad_norm": 0.008118031546473503, - "learning_rate": 0.00019999233473313766, - "loss": 46.0, - "step": 24484 - }, - { - "epoch": 3.9431136519183543, - "grad_norm": 0.00322963809594512, - "learning_rate": 0.00019999233410673205, - "loss": 46.0, - "step": 24485 - }, - { - "epoch": 3.9432746889971417, - "grad_norm": 0.003362247720360756, - "learning_rate": 0.00019999233348030085, - "loss": 46.0, - "step": 24486 - }, - { - "epoch": 3.943435726075929, - "grad_norm": 0.006708750035613775, - "learning_rate": 0.00019999233285384403, - "loss": 46.0, - "step": 24487 - }, - { - "epoch": 3.9435967631547166, - "grad_norm": 0.005984371528029442, - "learning_rate": 0.00019999233222736163, - "loss": 46.0, - "step": 24488 - }, - { - "epoch": 3.9437578002335036, - "grad_norm": 0.003902870463207364, - "learning_rate": 0.00019999233160085365, - "loss": 46.0, - "step": 24489 - }, - { - "epoch": 3.943918837312291, - "grad_norm": 0.005402877926826477, - "learning_rate": 0.00019999233097432007, - "loss": 46.0, - "step": 24490 - }, - { - "epoch": 3.9440798743910785, - "grad_norm": 0.004179893992841244, - "learning_rate": 0.00019999233034776088, - "loss": 46.0, - "step": 24491 - }, - { - "epoch": 3.944240911469866, - "grad_norm": 0.0032317708246409893, - "learning_rate": 0.00019999232972117614, - "loss": 46.0, - "step": 24492 - }, - { - "epoch": 3.9444019485486534, - "grad_norm": 0.003148757154121995, - "learning_rate": 0.00019999232909456577, - "loss": 46.0, - "step": 24493 - }, - { - "epoch": 3.9445629856274405, - "grad_norm": 0.001753403339534998, - "learning_rate": 0.00019999232846792982, - "loss": 46.0, - "step": 24494 - }, - { - "epoch": 3.944724022706228, - "grad_norm": 0.004594752565026283, - "learning_rate": 0.00019999232784126826, - "loss": 46.0, - "step": 24495 - }, - { - "epoch": 3.9448850597850154, - "grad_norm": 0.003652523271739483, - "learning_rate": 0.00019999232721458113, - "loss": 46.0, - "step": 24496 - }, - { - "epoch": 3.945046096863803, - "grad_norm": 0.0020665600895881653, - "learning_rate": 0.0001999923265878684, - "loss": 46.0, - "step": 24497 - }, - { - "epoch": 3.9452071339425903, - "grad_norm": 0.00592459924519062, - "learning_rate": 0.0001999923259611301, - "loss": 46.0, - "step": 24498 - }, - { - "epoch": 3.9453681710213777, - "grad_norm": 0.001933505991473794, - "learning_rate": 0.00019999232533436616, - "loss": 46.0, - "step": 24499 - }, - { - "epoch": 3.945529208100165, - "grad_norm": 0.0006409965571947396, - "learning_rate": 0.00019999232470757666, - "loss": 46.0, - "step": 24500 - }, - { - "epoch": 3.9456902451789526, - "grad_norm": 0.001832569600082934, - "learning_rate": 0.00019999232408076157, - "loss": 46.0, - "step": 24501 - }, - { - "epoch": 3.94585128225774, - "grad_norm": 0.004537877626717091, - "learning_rate": 0.00019999232345392084, - "loss": 46.0, - "step": 24502 - }, - { - "epoch": 3.946012319336527, - "grad_norm": 0.0005695093423128128, - "learning_rate": 0.00019999232282705455, - "loss": 46.0, - "step": 24503 - }, - { - "epoch": 3.9461733564153145, - "grad_norm": 0.0008433074108324945, - "learning_rate": 0.00019999232220016267, - "loss": 46.0, - "step": 24504 - }, - { - "epoch": 3.946334393494102, - "grad_norm": 0.0032459928188472986, - "learning_rate": 0.0001999923215732452, - "loss": 46.0, - "step": 24505 - }, - { - "epoch": 3.9464954305728894, - "grad_norm": 0.0019439419265836477, - "learning_rate": 0.00019999232094630213, - "loss": 46.0, - "step": 24506 - }, - { - "epoch": 3.946656467651677, - "grad_norm": 0.0017491135513409972, - "learning_rate": 0.00019999232031933347, - "loss": 46.0, - "step": 24507 - }, - { - "epoch": 3.9468175047304643, - "grad_norm": 0.005312608554959297, - "learning_rate": 0.00019999231969233922, - "loss": 46.0, - "step": 24508 - }, - { - "epoch": 3.9469785418092513, - "grad_norm": 0.0019482218194752932, - "learning_rate": 0.00019999231906531938, - "loss": 46.0, - "step": 24509 - }, - { - "epoch": 3.947139578888039, - "grad_norm": 0.0024019565898925066, - "learning_rate": 0.00019999231843827392, - "loss": 46.0, - "step": 24510 - }, - { - "epoch": 3.9473006159668262, - "grad_norm": 0.003191335592418909, - "learning_rate": 0.0001999923178112029, - "loss": 46.0, - "step": 24511 - }, - { - "epoch": 3.9474616530456137, - "grad_norm": 0.010148586705327034, - "learning_rate": 0.00019999231718410628, - "loss": 46.0, - "step": 24512 - }, - { - "epoch": 3.947622690124401, - "grad_norm": 0.005791592411696911, - "learning_rate": 0.00019999231655698407, - "loss": 46.0, - "step": 24513 - }, - { - "epoch": 3.9477837272031886, - "grad_norm": 0.02290402166545391, - "learning_rate": 0.00019999231592983624, - "loss": 46.0, - "step": 24514 - }, - { - "epoch": 3.947944764281976, - "grad_norm": 0.007506233640015125, - "learning_rate": 0.00019999231530266288, - "loss": 46.0, - "step": 24515 - }, - { - "epoch": 3.9481058013607635, - "grad_norm": 0.00459651043638587, - "learning_rate": 0.00019999231467546387, - "loss": 46.0, - "step": 24516 - }, - { - "epoch": 3.948266838439551, - "grad_norm": 0.013507161289453506, - "learning_rate": 0.00019999231404823928, - "loss": 46.0, - "step": 24517 - }, - { - "epoch": 3.948427875518338, - "grad_norm": 0.01366760116070509, - "learning_rate": 0.00019999231342098908, - "loss": 46.0, - "step": 24518 - }, - { - "epoch": 3.9485889125971254, - "grad_norm": 0.0063840365037322044, - "learning_rate": 0.0001999923127937133, - "loss": 46.0, - "step": 24519 - }, - { - "epoch": 3.948749949675913, - "grad_norm": 0.001921529765240848, - "learning_rate": 0.00019999231216641193, - "loss": 46.0, - "step": 24520 - }, - { - "epoch": 3.9489109867547003, - "grad_norm": 0.0056505585089325905, - "learning_rate": 0.00019999231153908497, - "loss": 46.0, - "step": 24521 - }, - { - "epoch": 3.9490720238334878, - "grad_norm": 0.005920773837715387, - "learning_rate": 0.00019999231091173244, - "loss": 46.0, - "step": 24522 - }, - { - "epoch": 3.949233060912275, - "grad_norm": 0.0006380933336913586, - "learning_rate": 0.0001999923102843543, - "loss": 46.0, - "step": 24523 - }, - { - "epoch": 3.9493940979910622, - "grad_norm": 0.005779617466032505, - "learning_rate": 0.00019999230965695055, - "loss": 46.0, - "step": 24524 - }, - { - "epoch": 3.9495551350698497, - "grad_norm": 0.0075210267677903175, - "learning_rate": 0.00019999230902952123, - "loss": 46.0, - "step": 24525 - }, - { - "epoch": 3.949716172148637, - "grad_norm": 0.0014395912876352668, - "learning_rate": 0.00019999230840206627, - "loss": 46.0, - "step": 24526 - }, - { - "epoch": 3.9498772092274246, - "grad_norm": 0.002967552747577429, - "learning_rate": 0.00019999230777458578, - "loss": 46.0, - "step": 24527 - }, - { - "epoch": 3.950038246306212, - "grad_norm": 0.0018580231117084622, - "learning_rate": 0.00019999230714707965, - "loss": 46.0, - "step": 24528 - }, - { - "epoch": 3.9501992833849995, - "grad_norm": 0.003680624533444643, - "learning_rate": 0.00019999230651954796, - "loss": 46.0, - "step": 24529 - }, - { - "epoch": 3.950360320463787, - "grad_norm": 0.004533518571406603, - "learning_rate": 0.00019999230589199068, - "loss": 46.0, - "step": 24530 - }, - { - "epoch": 3.9505213575425744, - "grad_norm": 0.0060940757393836975, - "learning_rate": 0.00019999230526440776, - "loss": 46.0, - "step": 24531 - }, - { - "epoch": 3.950682394621362, - "grad_norm": 0.0031006347853690386, - "learning_rate": 0.0001999923046367993, - "loss": 46.0, - "step": 24532 - }, - { - "epoch": 3.950843431700149, - "grad_norm": 0.0012043003225699067, - "learning_rate": 0.00019999230400916522, - "loss": 46.0, - "step": 24533 - }, - { - "epoch": 3.9510044687789363, - "grad_norm": 0.003294929163530469, - "learning_rate": 0.00019999230338150554, - "loss": 46.0, - "step": 24534 - }, - { - "epoch": 3.9511655058577237, - "grad_norm": 0.006164142396301031, - "learning_rate": 0.00019999230275382027, - "loss": 46.0, - "step": 24535 - }, - { - "epoch": 3.951326542936511, - "grad_norm": 0.008812782354652882, - "learning_rate": 0.00019999230212610941, - "loss": 46.0, - "step": 24536 - }, - { - "epoch": 3.9514875800152987, - "grad_norm": 0.007229702081531286, - "learning_rate": 0.00019999230149837297, - "loss": 46.0, - "step": 24537 - }, - { - "epoch": 3.9516486170940857, - "grad_norm": 0.014134583063423634, - "learning_rate": 0.00019999230087061094, - "loss": 46.0, - "step": 24538 - }, - { - "epoch": 3.951809654172873, - "grad_norm": 0.0012406106106936932, - "learning_rate": 0.00019999230024282333, - "loss": 46.0, - "step": 24539 - }, - { - "epoch": 3.9519706912516606, - "grad_norm": 0.000965129176620394, - "learning_rate": 0.0001999922996150101, - "loss": 46.0, - "step": 24540 - }, - { - "epoch": 3.952131728330448, - "grad_norm": 0.0019423657795414329, - "learning_rate": 0.00019999229898717128, - "loss": 46.0, - "step": 24541 - }, - { - "epoch": 3.9522927654092355, - "grad_norm": 0.0010240275878459215, - "learning_rate": 0.00019999229835930687, - "loss": 46.0, - "step": 24542 - }, - { - "epoch": 3.952453802488023, - "grad_norm": 0.006287777796387672, - "learning_rate": 0.00019999229773141685, - "loss": 46.0, - "step": 24543 - }, - { - "epoch": 3.9526148395668104, - "grad_norm": 0.004004526883363724, - "learning_rate": 0.00019999229710350124, - "loss": 46.0, - "step": 24544 - }, - { - "epoch": 3.952775876645598, - "grad_norm": 0.004683574661612511, - "learning_rate": 0.00019999229647556005, - "loss": 46.0, - "step": 24545 - }, - { - "epoch": 3.9529369137243853, - "grad_norm": 0.005028977990150452, - "learning_rate": 0.00019999229584759327, - "loss": 46.0, - "step": 24546 - }, - { - "epoch": 3.9530979508031723, - "grad_norm": 0.0026760261971503496, - "learning_rate": 0.0001999922952196009, - "loss": 46.0, - "step": 24547 - }, - { - "epoch": 3.9532589878819597, - "grad_norm": 0.005508644971996546, - "learning_rate": 0.00019999229459158292, - "loss": 46.0, - "step": 24548 - }, - { - "epoch": 3.953420024960747, - "grad_norm": 0.0029298197478055954, - "learning_rate": 0.00019999229396353938, - "loss": 46.0, - "step": 24549 - }, - { - "epoch": 3.9535810620395346, - "grad_norm": 0.0018909123027697206, - "learning_rate": 0.00019999229333547022, - "loss": 46.0, - "step": 24550 - }, - { - "epoch": 3.953742099118322, - "grad_norm": 0.009362361393868923, - "learning_rate": 0.00019999229270737547, - "loss": 46.0, - "step": 24551 - }, - { - "epoch": 3.9539031361971095, - "grad_norm": 0.003478787373751402, - "learning_rate": 0.00019999229207925514, - "loss": 46.0, - "step": 24552 - }, - { - "epoch": 3.9540641732758965, - "grad_norm": 0.004581904038786888, - "learning_rate": 0.0001999922914511092, - "loss": 46.0, - "step": 24553 - }, - { - "epoch": 3.954225210354684, - "grad_norm": 0.00929262489080429, - "learning_rate": 0.00019999229082293766, - "loss": 46.0, - "step": 24554 - }, - { - "epoch": 3.9543862474334714, - "grad_norm": 0.004574401304125786, - "learning_rate": 0.00019999229019474054, - "loss": 46.0, - "step": 24555 - }, - { - "epoch": 3.954547284512259, - "grad_norm": 0.0038825373630970716, - "learning_rate": 0.00019999228956651784, - "loss": 46.0, - "step": 24556 - }, - { - "epoch": 3.9547083215910463, - "grad_norm": 0.002296898514032364, - "learning_rate": 0.00019999228893826954, - "loss": 46.0, - "step": 24557 - }, - { - "epoch": 3.954869358669834, - "grad_norm": 0.0063306717202067375, - "learning_rate": 0.00019999228830999563, - "loss": 46.0, - "step": 24558 - }, - { - "epoch": 3.9550303957486213, - "grad_norm": 0.004092125222086906, - "learning_rate": 0.00019999228768169617, - "loss": 46.0, - "step": 24559 - }, - { - "epoch": 3.9551914328274087, - "grad_norm": 0.000549173797480762, - "learning_rate": 0.00019999228705337108, - "loss": 46.0, - "step": 24560 - }, - { - "epoch": 3.955352469906196, - "grad_norm": 0.00241836067289114, - "learning_rate": 0.00019999228642502036, - "loss": 46.0, - "step": 24561 - }, - { - "epoch": 3.955513506984983, - "grad_norm": 0.002404788276180625, - "learning_rate": 0.00019999228579664413, - "loss": 46.0, - "step": 24562 - }, - { - "epoch": 3.9556745440637706, - "grad_norm": 0.001841920311562717, - "learning_rate": 0.00019999228516824226, - "loss": 46.0, - "step": 24563 - }, - { - "epoch": 3.955835581142558, - "grad_norm": 0.0011428429279476404, - "learning_rate": 0.0001999922845398148, - "loss": 46.0, - "step": 24564 - }, - { - "epoch": 3.9559966182213455, - "grad_norm": 0.001579323667101562, - "learning_rate": 0.00019999228391136175, - "loss": 46.0, - "step": 24565 - }, - { - "epoch": 3.956157655300133, - "grad_norm": 0.004542314447462559, - "learning_rate": 0.00019999228328288312, - "loss": 46.0, - "step": 24566 - }, - { - "epoch": 3.95631869237892, - "grad_norm": 0.00798245519399643, - "learning_rate": 0.00019999228265437887, - "loss": 46.0, - "step": 24567 - }, - { - "epoch": 3.9564797294577074, - "grad_norm": 0.003039659233763814, - "learning_rate": 0.00019999228202584904, - "loss": 46.0, - "step": 24568 - }, - { - "epoch": 3.956640766536495, - "grad_norm": 0.0036243193317204714, - "learning_rate": 0.00019999228139729362, - "loss": 46.0, - "step": 24569 - }, - { - "epoch": 3.9568018036152823, - "grad_norm": 0.003200292820110917, - "learning_rate": 0.0001999922807687126, - "loss": 46.0, - "step": 24570 - }, - { - "epoch": 3.95696284069407, - "grad_norm": 0.003493676660582423, - "learning_rate": 0.000199992280140106, - "loss": 46.0, - "step": 24571 - }, - { - "epoch": 3.9571238777728572, - "grad_norm": 0.006319560110569, - "learning_rate": 0.0001999922795114738, - "loss": 46.0, - "step": 24572 - }, - { - "epoch": 3.9572849148516447, - "grad_norm": 0.0011464391136541963, - "learning_rate": 0.00019999227888281603, - "loss": 46.0, - "step": 24573 - }, - { - "epoch": 3.957445951930432, - "grad_norm": 0.0019397110445424914, - "learning_rate": 0.00019999227825413262, - "loss": 46.0, - "step": 24574 - }, - { - "epoch": 3.9576069890092196, - "grad_norm": 0.0020150644704699516, - "learning_rate": 0.00019999227762542367, - "loss": 46.0, - "step": 24575 - }, - { - "epoch": 3.957768026088007, - "grad_norm": 0.006844239309430122, - "learning_rate": 0.0001999922769966891, - "loss": 46.0, - "step": 24576 - }, - { - "epoch": 3.957929063166794, - "grad_norm": 0.0008079410181380808, - "learning_rate": 0.00019999227636792891, - "loss": 46.0, - "step": 24577 - }, - { - "epoch": 3.9580901002455815, - "grad_norm": 0.005341343581676483, - "learning_rate": 0.00019999227573914315, - "loss": 46.0, - "step": 24578 - }, - { - "epoch": 3.958251137324369, - "grad_norm": 0.007375328801572323, - "learning_rate": 0.0001999922751103318, - "loss": 46.0, - "step": 24579 - }, - { - "epoch": 3.9584121744031564, - "grad_norm": 0.0037683420814573765, - "learning_rate": 0.00019999227448149487, - "loss": 46.0, - "step": 24580 - }, - { - "epoch": 3.958573211481944, - "grad_norm": 0.0030840453691780567, - "learning_rate": 0.00019999227385263235, - "loss": 46.0, - "step": 24581 - }, - { - "epoch": 3.958734248560731, - "grad_norm": 0.0016249476466327906, - "learning_rate": 0.0001999922732237442, - "loss": 46.0, - "step": 24582 - }, - { - "epoch": 3.9588952856395183, - "grad_norm": 0.0052049485966563225, - "learning_rate": 0.0001999922725948305, - "loss": 46.0, - "step": 24583 - }, - { - "epoch": 3.9590563227183058, - "grad_norm": 0.003728102194145322, - "learning_rate": 0.00019999227196589118, - "loss": 46.0, - "step": 24584 - }, - { - "epoch": 3.959217359797093, - "grad_norm": 0.008609190583229065, - "learning_rate": 0.00019999227133692625, - "loss": 46.0, - "step": 24585 - }, - { - "epoch": 3.9593783968758807, - "grad_norm": 0.0010269450722262263, - "learning_rate": 0.00019999227070793577, - "loss": 46.0, - "step": 24586 - }, - { - "epoch": 3.959539433954668, - "grad_norm": 0.017943529412150383, - "learning_rate": 0.00019999227007891967, - "loss": 46.0, - "step": 24587 - }, - { - "epoch": 3.9597004710334556, - "grad_norm": 0.003252143505960703, - "learning_rate": 0.000199992269449878, - "loss": 46.0, - "step": 24588 - }, - { - "epoch": 3.959861508112243, - "grad_norm": 0.01167239248752594, - "learning_rate": 0.00019999226882081074, - "loss": 46.0, - "step": 24589 - }, - { - "epoch": 3.9600225451910305, - "grad_norm": 0.009695515036582947, - "learning_rate": 0.00019999226819171785, - "loss": 46.0, - "step": 24590 - }, - { - "epoch": 3.9601835822698175, - "grad_norm": 0.0008362962398678064, - "learning_rate": 0.0001999922675625994, - "loss": 46.0, - "step": 24591 - }, - { - "epoch": 3.960344619348605, - "grad_norm": 0.0036508759949356318, - "learning_rate": 0.00019999226693345534, - "loss": 46.0, - "step": 24592 - }, - { - "epoch": 3.9605056564273924, - "grad_norm": 0.003990464378148317, - "learning_rate": 0.0001999922663042857, - "loss": 46.0, - "step": 24593 - }, - { - "epoch": 3.96066669350618, - "grad_norm": 0.003227963810786605, - "learning_rate": 0.00019999226567509045, - "loss": 46.0, - "step": 24594 - }, - { - "epoch": 3.9608277305849673, - "grad_norm": 0.00426216097548604, - "learning_rate": 0.0001999922650458696, - "loss": 46.0, - "step": 24595 - }, - { - "epoch": 3.9609887676637547, - "grad_norm": 0.0027961486484855413, - "learning_rate": 0.00019999226441662316, - "loss": 46.0, - "step": 24596 - }, - { - "epoch": 3.9611498047425417, - "grad_norm": 0.000826875155325979, - "learning_rate": 0.00019999226378735117, - "loss": 46.0, - "step": 24597 - }, - { - "epoch": 3.961310841821329, - "grad_norm": 0.01634635031223297, - "learning_rate": 0.00019999226315805355, - "loss": 46.0, - "step": 24598 - }, - { - "epoch": 3.9614718789001166, - "grad_norm": 0.003508588531985879, - "learning_rate": 0.00019999226252873035, - "loss": 46.0, - "step": 24599 - }, - { - "epoch": 3.961632915978904, - "grad_norm": 0.0032635731622576714, - "learning_rate": 0.00019999226189938154, - "loss": 46.0, - "step": 24600 - }, - { - "epoch": 3.9617939530576916, - "grad_norm": 0.004523720126599073, - "learning_rate": 0.00019999226127000714, - "loss": 46.0, - "step": 24601 - }, - { - "epoch": 3.961954990136479, - "grad_norm": 0.0037904521450400352, - "learning_rate": 0.00019999226064060715, - "loss": 46.0, - "step": 24602 - }, - { - "epoch": 3.9621160272152665, - "grad_norm": 0.003829203313216567, - "learning_rate": 0.00019999226001118158, - "loss": 46.0, - "step": 24603 - }, - { - "epoch": 3.962277064294054, - "grad_norm": 0.015329036861658096, - "learning_rate": 0.00019999225938173041, - "loss": 46.0, - "step": 24604 - }, - { - "epoch": 3.9624381013728414, - "grad_norm": 0.001176963560283184, - "learning_rate": 0.00019999225875225364, - "loss": 46.0, - "step": 24605 - }, - { - "epoch": 3.9625991384516284, - "grad_norm": 0.008821663446724415, - "learning_rate": 0.0001999922581227513, - "loss": 46.0, - "step": 24606 - }, - { - "epoch": 3.962760175530416, - "grad_norm": 0.006941392086446285, - "learning_rate": 0.00019999225749322335, - "loss": 46.0, - "step": 24607 - }, - { - "epoch": 3.9629212126092033, - "grad_norm": 0.005642466712743044, - "learning_rate": 0.0001999922568636698, - "loss": 46.0, - "step": 24608 - }, - { - "epoch": 3.9630822496879907, - "grad_norm": 0.0016563436947762966, - "learning_rate": 0.00019999225623409066, - "loss": 46.0, - "step": 24609 - }, - { - "epoch": 3.963243286766778, - "grad_norm": 0.004143841564655304, - "learning_rate": 0.00019999225560448595, - "loss": 46.0, - "step": 24610 - }, - { - "epoch": 3.963404323845565, - "grad_norm": 0.008736458607017994, - "learning_rate": 0.00019999225497485562, - "loss": 46.0, - "step": 24611 - }, - { - "epoch": 3.9635653609243526, - "grad_norm": 0.005676708649843931, - "learning_rate": 0.0001999922543451997, - "loss": 46.0, - "step": 24612 - }, - { - "epoch": 3.96372639800314, - "grad_norm": 0.003004975849762559, - "learning_rate": 0.0001999922537155182, - "loss": 46.0, - "step": 24613 - }, - { - "epoch": 3.9638874350819275, - "grad_norm": 0.005604591220617294, - "learning_rate": 0.00019999225308581112, - "loss": 46.0, - "step": 24614 - }, - { - "epoch": 3.964048472160715, - "grad_norm": 0.011630786582827568, - "learning_rate": 0.0001999922524560784, - "loss": 46.0, - "step": 24615 - }, - { - "epoch": 3.9642095092395024, - "grad_norm": 0.00235688965767622, - "learning_rate": 0.0001999922518263201, - "loss": 46.0, - "step": 24616 - }, - { - "epoch": 3.96437054631829, - "grad_norm": 0.0023954608477652073, - "learning_rate": 0.00019999225119653625, - "loss": 46.0, - "step": 24617 - }, - { - "epoch": 3.9645315833970773, - "grad_norm": 0.002856738632544875, - "learning_rate": 0.00019999225056672676, - "loss": 46.0, - "step": 24618 - }, - { - "epoch": 3.964692620475865, - "grad_norm": 0.0027815313078463078, - "learning_rate": 0.0001999922499368917, - "loss": 46.0, - "step": 24619 - }, - { - "epoch": 3.964853657554652, - "grad_norm": 0.008760727010667324, - "learning_rate": 0.00019999224930703107, - "loss": 46.0, - "step": 24620 - }, - { - "epoch": 3.9650146946334393, - "grad_norm": 0.002955361269414425, - "learning_rate": 0.00019999224867714479, - "loss": 46.0, - "step": 24621 - }, - { - "epoch": 3.9651757317122267, - "grad_norm": 0.0180122759193182, - "learning_rate": 0.00019999224804723295, - "loss": 46.0, - "step": 24622 - }, - { - "epoch": 3.965336768791014, - "grad_norm": 0.002538575790822506, - "learning_rate": 0.00019999224741729552, - "loss": 46.0, - "step": 24623 - }, - { - "epoch": 3.9654978058698016, - "grad_norm": 0.0071937888860702515, - "learning_rate": 0.00019999224678733248, - "loss": 46.0, - "step": 24624 - }, - { - "epoch": 3.965658842948589, - "grad_norm": 0.009937465190887451, - "learning_rate": 0.00019999224615734388, - "loss": 46.0, - "step": 24625 - }, - { - "epoch": 3.965819880027376, - "grad_norm": 0.01640373282134533, - "learning_rate": 0.00019999224552732966, - "loss": 46.0, - "step": 24626 - }, - { - "epoch": 3.9659809171061635, - "grad_norm": 0.005190275143831968, - "learning_rate": 0.00019999224489728986, - "loss": 46.0, - "step": 24627 - }, - { - "epoch": 3.966141954184951, - "grad_norm": 0.0014798413030803204, - "learning_rate": 0.00019999224426722444, - "loss": 46.0, - "step": 24628 - }, - { - "epoch": 3.9663029912637384, - "grad_norm": 0.010416446253657341, - "learning_rate": 0.00019999224363713344, - "loss": 46.0, - "step": 24629 - }, - { - "epoch": 3.966464028342526, - "grad_norm": 0.00212200079113245, - "learning_rate": 0.00019999224300701687, - "loss": 46.0, - "step": 24630 - }, - { - "epoch": 3.9666250654213133, - "grad_norm": 0.005232592113316059, - "learning_rate": 0.0001999922423768747, - "loss": 46.0, - "step": 24631 - }, - { - "epoch": 3.9667861025001008, - "grad_norm": 0.002631609793752432, - "learning_rate": 0.00019999224174670693, - "loss": 46.0, - "step": 24632 - }, - { - "epoch": 3.9669471395788882, - "grad_norm": 0.0026403837837278843, - "learning_rate": 0.00019999224111651355, - "loss": 46.0, - "step": 24633 - }, - { - "epoch": 3.9671081766576757, - "grad_norm": 0.0011286140652373433, - "learning_rate": 0.0001999922404862946, - "loss": 46.0, - "step": 24634 - }, - { - "epoch": 3.9672692137364627, - "grad_norm": 0.0037339525297284126, - "learning_rate": 0.00019999223985605005, - "loss": 46.0, - "step": 24635 - }, - { - "epoch": 3.96743025081525, - "grad_norm": 0.0016117809573188424, - "learning_rate": 0.0001999922392257799, - "loss": 46.0, - "step": 24636 - }, - { - "epoch": 3.9675912878940376, - "grad_norm": 0.003521806327626109, - "learning_rate": 0.00019999223859548418, - "loss": 46.0, - "step": 24637 - }, - { - "epoch": 3.967752324972825, - "grad_norm": 0.002333017997443676, - "learning_rate": 0.00019999223796516284, - "loss": 46.0, - "step": 24638 - }, - { - "epoch": 3.9679133620516125, - "grad_norm": 0.0025561561342328787, - "learning_rate": 0.0001999922373348159, - "loss": 46.0, - "step": 24639 - }, - { - "epoch": 3.9680743991303995, - "grad_norm": 0.0010061677312478423, - "learning_rate": 0.0001999922367044434, - "loss": 46.0, - "step": 24640 - }, - { - "epoch": 3.968235436209187, - "grad_norm": 0.011254752986133099, - "learning_rate": 0.0001999922360740453, - "loss": 46.0, - "step": 24641 - }, - { - "epoch": 3.9683964732879744, - "grad_norm": 0.008519008755683899, - "learning_rate": 0.00019999223544362157, - "loss": 46.0, - "step": 24642 - }, - { - "epoch": 3.968557510366762, - "grad_norm": 0.00339347287081182, - "learning_rate": 0.0001999922348131723, - "loss": 46.0, - "step": 24643 - }, - { - "epoch": 3.9687185474455493, - "grad_norm": 0.0018427064642310143, - "learning_rate": 0.00019999223418269742, - "loss": 46.0, - "step": 24644 - }, - { - "epoch": 3.9688795845243368, - "grad_norm": 0.008539896458387375, - "learning_rate": 0.00019999223355219694, - "loss": 46.0, - "step": 24645 - }, - { - "epoch": 3.969040621603124, - "grad_norm": 0.0031247620936483145, - "learning_rate": 0.00019999223292167088, - "loss": 46.0, - "step": 24646 - }, - { - "epoch": 3.9692016586819117, - "grad_norm": 0.002595973666757345, - "learning_rate": 0.0001999922322911192, - "loss": 46.0, - "step": 24647 - }, - { - "epoch": 3.969362695760699, - "grad_norm": 0.003589972387999296, - "learning_rate": 0.00019999223166054195, - "loss": 46.0, - "step": 24648 - }, - { - "epoch": 3.9695237328394866, - "grad_norm": 0.008597678504884243, - "learning_rate": 0.0001999922310299391, - "loss": 46.0, - "step": 24649 - }, - { - "epoch": 3.9696847699182736, - "grad_norm": 0.0037852467503398657, - "learning_rate": 0.00019999223039931065, - "loss": 46.0, - "step": 24650 - }, - { - "epoch": 3.969845806997061, - "grad_norm": 0.016174133867025375, - "learning_rate": 0.00019999222976865662, - "loss": 46.0, - "step": 24651 - }, - { - "epoch": 3.9700068440758485, - "grad_norm": 0.0014885629061609507, - "learning_rate": 0.000199992229137977, - "loss": 46.0, - "step": 24652 - }, - { - "epoch": 3.970167881154636, - "grad_norm": 0.0023910885211080313, - "learning_rate": 0.00019999222850727177, - "loss": 46.0, - "step": 24653 - }, - { - "epoch": 3.9703289182334234, - "grad_norm": 0.0009760893881320953, - "learning_rate": 0.00019999222787654095, - "loss": 46.0, - "step": 24654 - }, - { - "epoch": 3.9704899553122104, - "grad_norm": 0.005014242138713598, - "learning_rate": 0.00019999222724578455, - "loss": 46.0, - "step": 24655 - }, - { - "epoch": 3.970650992390998, - "grad_norm": 0.004406902473419905, - "learning_rate": 0.00019999222661500255, - "loss": 46.0, - "step": 24656 - }, - { - "epoch": 3.9708120294697853, - "grad_norm": 0.010985944420099258, - "learning_rate": 0.00019999222598419494, - "loss": 46.0, - "step": 24657 - }, - { - "epoch": 3.9709730665485727, - "grad_norm": 0.000462591735413298, - "learning_rate": 0.00019999222535336175, - "loss": 46.0, - "step": 24658 - }, - { - "epoch": 3.97113410362736, - "grad_norm": 0.01707618683576584, - "learning_rate": 0.00019999222472250297, - "loss": 46.0, - "step": 24659 - }, - { - "epoch": 3.9712951407061476, - "grad_norm": 0.002642994513735175, - "learning_rate": 0.0001999922240916186, - "loss": 46.0, - "step": 24660 - }, - { - "epoch": 3.971456177784935, - "grad_norm": 0.004335297271609306, - "learning_rate": 0.00019999222346070864, - "loss": 46.0, - "step": 24661 - }, - { - "epoch": 3.9716172148637225, - "grad_norm": 0.020558614283800125, - "learning_rate": 0.00019999222282977307, - "loss": 46.0, - "step": 24662 - }, - { - "epoch": 3.97177825194251, - "grad_norm": 0.008380230516195297, - "learning_rate": 0.00019999222219881194, - "loss": 46.0, - "step": 24663 - }, - { - "epoch": 3.971939289021297, - "grad_norm": 0.004801325965672731, - "learning_rate": 0.0001999922215678252, - "loss": 46.0, - "step": 24664 - }, - { - "epoch": 3.9721003261000845, - "grad_norm": 0.0026345206424593925, - "learning_rate": 0.00019999222093681284, - "loss": 46.0, - "step": 24665 - }, - { - "epoch": 3.972261363178872, - "grad_norm": 0.0064878519624471664, - "learning_rate": 0.00019999222030577492, - "loss": 46.0, - "step": 24666 - }, - { - "epoch": 3.9724224002576594, - "grad_norm": 0.001823552418500185, - "learning_rate": 0.0001999922196747114, - "loss": 46.0, - "step": 24667 - }, - { - "epoch": 3.972583437336447, - "grad_norm": 0.004214639309793711, - "learning_rate": 0.0001999922190436223, - "loss": 46.0, - "step": 24668 - }, - { - "epoch": 3.9727444744152343, - "grad_norm": 0.01371967326849699, - "learning_rate": 0.00019999221841250758, - "loss": 46.0, - "step": 24669 - }, - { - "epoch": 3.9729055114940213, - "grad_norm": 0.007138080894947052, - "learning_rate": 0.00019999221778136729, - "loss": 46.0, - "step": 24670 - }, - { - "epoch": 3.9730665485728087, - "grad_norm": 0.002246492076665163, - "learning_rate": 0.00019999221715020138, - "loss": 46.0, - "step": 24671 - }, - { - "epoch": 3.973227585651596, - "grad_norm": 0.0008088405011221766, - "learning_rate": 0.00019999221651900988, - "loss": 46.0, - "step": 24672 - }, - { - "epoch": 3.9733886227303836, - "grad_norm": 0.0047655063681304455, - "learning_rate": 0.0001999922158877928, - "loss": 46.0, - "step": 24673 - }, - { - "epoch": 3.973549659809171, - "grad_norm": 0.0012428220361471176, - "learning_rate": 0.00019999221525655015, - "loss": 46.0, - "step": 24674 - }, - { - "epoch": 3.9737106968879585, - "grad_norm": 0.0027536784764379263, - "learning_rate": 0.00019999221462528187, - "loss": 46.0, - "step": 24675 - }, - { - "epoch": 3.973871733966746, - "grad_norm": 0.0021960961166769266, - "learning_rate": 0.000199992213993988, - "loss": 46.0, - "step": 24676 - }, - { - "epoch": 3.9740327710455334, - "grad_norm": 0.0020430022850632668, - "learning_rate": 0.00019999221336266856, - "loss": 46.0, - "step": 24677 - }, - { - "epoch": 3.974193808124321, - "grad_norm": 0.010779774747788906, - "learning_rate": 0.00019999221273132352, - "loss": 46.0, - "step": 24678 - }, - { - "epoch": 3.974354845203108, - "grad_norm": 0.0004967165878042579, - "learning_rate": 0.00019999221209995288, - "loss": 46.0, - "step": 24679 - }, - { - "epoch": 3.9745158822818953, - "grad_norm": 0.004149053245782852, - "learning_rate": 0.00019999221146855666, - "loss": 46.0, - "step": 24680 - }, - { - "epoch": 3.974676919360683, - "grad_norm": 0.01024789921939373, - "learning_rate": 0.00019999221083713483, - "loss": 46.0, - "step": 24681 - }, - { - "epoch": 3.9748379564394702, - "grad_norm": 0.004827845375984907, - "learning_rate": 0.0001999922102056874, - "loss": 46.0, - "step": 24682 - }, - { - "epoch": 3.9749989935182577, - "grad_norm": 0.0033513468224555254, - "learning_rate": 0.00019999220957421442, - "loss": 46.0, - "step": 24683 - }, - { - "epoch": 3.9751600305970447, - "grad_norm": 0.006375516764819622, - "learning_rate": 0.0001999922089427158, - "loss": 46.0, - "step": 24684 - }, - { - "epoch": 3.975321067675832, - "grad_norm": 0.0020544766448438168, - "learning_rate": 0.0001999922083111916, - "loss": 46.0, - "step": 24685 - }, - { - "epoch": 3.9754821047546196, - "grad_norm": 0.0003264143888372928, - "learning_rate": 0.0001999922076796418, - "loss": 46.0, - "step": 24686 - }, - { - "epoch": 3.975643141833407, - "grad_norm": 0.010692698881030083, - "learning_rate": 0.00019999220704806643, - "loss": 46.0, - "step": 24687 - }, - { - "epoch": 3.9758041789121945, - "grad_norm": 0.0098728621378541, - "learning_rate": 0.00019999220641646548, - "loss": 46.0, - "step": 24688 - }, - { - "epoch": 3.975965215990982, - "grad_norm": 0.007909636944532394, - "learning_rate": 0.0001999922057848389, - "loss": 46.0, - "step": 24689 - }, - { - "epoch": 3.9761262530697694, - "grad_norm": 0.0012305461568757892, - "learning_rate": 0.00019999220515318675, - "loss": 46.0, - "step": 24690 - }, - { - "epoch": 3.976287290148557, - "grad_norm": 0.010655613616108894, - "learning_rate": 0.000199992204521509, - "loss": 46.0, - "step": 24691 - }, - { - "epoch": 3.9764483272273443, - "grad_norm": 0.011089557781815529, - "learning_rate": 0.00019999220388980567, - "loss": 46.0, - "step": 24692 - }, - { - "epoch": 3.9766093643061313, - "grad_norm": 0.0222268495708704, - "learning_rate": 0.00019999220325807673, - "loss": 46.0, - "step": 24693 - }, - { - "epoch": 3.9767704013849188, - "grad_norm": 0.0025264096911996603, - "learning_rate": 0.00019999220262632218, - "loss": 46.0, - "step": 24694 - }, - { - "epoch": 3.9769314384637062, - "grad_norm": 0.004113536793738604, - "learning_rate": 0.00019999220199454207, - "loss": 46.0, - "step": 24695 - }, - { - "epoch": 3.9770924755424937, - "grad_norm": 0.012270519509911537, - "learning_rate": 0.00019999220136273635, - "loss": 46.0, - "step": 24696 - }, - { - "epoch": 3.977253512621281, - "grad_norm": 0.0021190920379012823, - "learning_rate": 0.00019999220073090503, - "loss": 46.0, - "step": 24697 - }, - { - "epoch": 3.9774145497000686, - "grad_norm": 0.0017293510027229786, - "learning_rate": 0.00019999220009904814, - "loss": 46.0, - "step": 24698 - }, - { - "epoch": 3.9775755867788556, - "grad_norm": 0.0018818129319697618, - "learning_rate": 0.00019999219946716562, - "loss": 46.0, - "step": 24699 - }, - { - "epoch": 3.977736623857643, - "grad_norm": 0.0004118960350751877, - "learning_rate": 0.00019999219883525755, - "loss": 46.0, - "step": 24700 - }, - { - "epoch": 3.9778976609364305, - "grad_norm": 0.0017546769231557846, - "learning_rate": 0.00019999219820332386, - "loss": 46.0, - "step": 24701 - }, - { - "epoch": 3.978058698015218, - "grad_norm": 0.0035819578915834427, - "learning_rate": 0.00019999219757136462, - "loss": 46.0, - "step": 24702 - }, - { - "epoch": 3.9782197350940054, - "grad_norm": 0.003807461354881525, - "learning_rate": 0.00019999219693937973, - "loss": 46.0, - "step": 24703 - }, - { - "epoch": 3.978380772172793, - "grad_norm": 0.006211885716766119, - "learning_rate": 0.00019999219630736928, - "loss": 46.0, - "step": 24704 - }, - { - "epoch": 3.9785418092515803, - "grad_norm": 0.0023693828843533993, - "learning_rate": 0.00019999219567533321, - "loss": 46.0, - "step": 24705 - }, - { - "epoch": 3.9787028463303677, - "grad_norm": 0.0019062282517552376, - "learning_rate": 0.0001999921950432716, - "loss": 46.0, - "step": 24706 - }, - { - "epoch": 3.978863883409155, - "grad_norm": 0.008016892708837986, - "learning_rate": 0.00019999219441118435, - "loss": 46.0, - "step": 24707 - }, - { - "epoch": 3.979024920487942, - "grad_norm": 0.005275571718811989, - "learning_rate": 0.0001999921937790715, - "loss": 46.0, - "step": 24708 - }, - { - "epoch": 3.9791859575667297, - "grad_norm": 0.002864799229428172, - "learning_rate": 0.00019999219314693306, - "loss": 46.0, - "step": 24709 - }, - { - "epoch": 3.979346994645517, - "grad_norm": 0.0022006540093570948, - "learning_rate": 0.00019999219251476907, - "loss": 46.0, - "step": 24710 - }, - { - "epoch": 3.9795080317243046, - "grad_norm": 0.0026308512315154076, - "learning_rate": 0.00019999219188257948, - "loss": 46.0, - "step": 24711 - }, - { - "epoch": 3.979669068803092, - "grad_norm": 0.0034603977110236883, - "learning_rate": 0.00019999219125036425, - "loss": 46.0, - "step": 24712 - }, - { - "epoch": 3.9798301058818795, - "grad_norm": 0.00779740558937192, - "learning_rate": 0.00019999219061812346, - "loss": 46.0, - "step": 24713 - }, - { - "epoch": 3.9799911429606665, - "grad_norm": 0.013969328254461288, - "learning_rate": 0.0001999921899858571, - "loss": 46.0, - "step": 24714 - }, - { - "epoch": 3.980152180039454, - "grad_norm": 0.0053104558028280735, - "learning_rate": 0.0001999921893535651, - "loss": 46.0, - "step": 24715 - }, - { - "epoch": 3.9803132171182414, - "grad_norm": 0.0016471610870212317, - "learning_rate": 0.00019999218872124752, - "loss": 46.0, - "step": 24716 - }, - { - "epoch": 3.980474254197029, - "grad_norm": 0.000960015517193824, - "learning_rate": 0.00019999218808890436, - "loss": 46.0, - "step": 24717 - }, - { - "epoch": 3.9806352912758163, - "grad_norm": 0.003994822036474943, - "learning_rate": 0.00019999218745653558, - "loss": 46.0, - "step": 24718 - }, - { - "epoch": 3.9807963283546037, - "grad_norm": 0.001035549445077777, - "learning_rate": 0.00019999218682414125, - "loss": 46.0, - "step": 24719 - }, - { - "epoch": 3.980957365433391, - "grad_norm": 0.009329813532531261, - "learning_rate": 0.0001999921861917213, - "loss": 46.0, - "step": 24720 - }, - { - "epoch": 3.9811184025121786, - "grad_norm": 0.0053340899758040905, - "learning_rate": 0.00019999218555927578, - "loss": 46.0, - "step": 24721 - }, - { - "epoch": 3.981279439590966, - "grad_norm": 0.011015500873327255, - "learning_rate": 0.00019999218492680463, - "loss": 46.0, - "step": 24722 - }, - { - "epoch": 3.981440476669753, - "grad_norm": 0.012094903737306595, - "learning_rate": 0.00019999218429430792, - "loss": 46.0, - "step": 24723 - }, - { - "epoch": 3.9816015137485405, - "grad_norm": 0.005665578413754702, - "learning_rate": 0.00019999218366178561, - "loss": 46.0, - "step": 24724 - }, - { - "epoch": 3.981762550827328, - "grad_norm": 0.0025657762307673693, - "learning_rate": 0.00019999218302923767, - "loss": 46.0, - "step": 24725 - }, - { - "epoch": 3.9819235879061154, - "grad_norm": 0.005956851411610842, - "learning_rate": 0.00019999218239666417, - "loss": 46.0, - "step": 24726 - }, - { - "epoch": 3.982084624984903, - "grad_norm": 0.0009998215828090906, - "learning_rate": 0.00019999218176406508, - "loss": 46.0, - "step": 24727 - }, - { - "epoch": 3.98224566206369, - "grad_norm": 0.015045387670397758, - "learning_rate": 0.0001999921811314404, - "loss": 46.0, - "step": 24728 - }, - { - "epoch": 3.9824066991424774, - "grad_norm": 0.0013141731033101678, - "learning_rate": 0.00019999218049879011, - "loss": 46.0, - "step": 24729 - }, - { - "epoch": 3.982567736221265, - "grad_norm": 0.002103073289617896, - "learning_rate": 0.00019999217986611424, - "loss": 46.0, - "step": 24730 - }, - { - "epoch": 3.9827287733000523, - "grad_norm": 0.0019091975409537554, - "learning_rate": 0.00019999217923341277, - "loss": 46.0, - "step": 24731 - }, - { - "epoch": 3.9828898103788397, - "grad_norm": 0.006263358052819967, - "learning_rate": 0.00019999217860068572, - "loss": 46.0, - "step": 24732 - }, - { - "epoch": 3.983050847457627, - "grad_norm": 0.0015347658190876245, - "learning_rate": 0.00019999217796793305, - "loss": 46.0, - "step": 24733 - }, - { - "epoch": 3.9832118845364146, - "grad_norm": 0.0006784264696761966, - "learning_rate": 0.00019999217733515483, - "loss": 46.0, - "step": 24734 - }, - { - "epoch": 3.983372921615202, - "grad_norm": 0.004705656319856644, - "learning_rate": 0.00019999217670235099, - "loss": 46.0, - "step": 24735 - }, - { - "epoch": 3.9835339586939895, - "grad_norm": 0.002923637628555298, - "learning_rate": 0.00019999217606952153, - "loss": 46.0, - "step": 24736 - }, - { - "epoch": 3.9836949957727765, - "grad_norm": 0.0025600737426429987, - "learning_rate": 0.00019999217543666654, - "loss": 46.0, - "step": 24737 - }, - { - "epoch": 3.983856032851564, - "grad_norm": 0.0014375726459547877, - "learning_rate": 0.0001999921748037859, - "loss": 46.0, - "step": 24738 - }, - { - "epoch": 3.9840170699303514, - "grad_norm": 0.0026932931505143642, - "learning_rate": 0.00019999217417087972, - "loss": 46.0, - "step": 24739 - }, - { - "epoch": 3.984178107009139, - "grad_norm": 0.0025541021022945642, - "learning_rate": 0.0001999921735379479, - "loss": 46.0, - "step": 24740 - }, - { - "epoch": 3.9843391440879263, - "grad_norm": 0.0012892563827335835, - "learning_rate": 0.0001999921729049905, - "loss": 46.0, - "step": 24741 - }, - { - "epoch": 3.984500181166714, - "grad_norm": 0.002662871964275837, - "learning_rate": 0.0001999921722720075, - "loss": 46.0, - "step": 24742 - }, - { - "epoch": 3.984661218245501, - "grad_norm": 0.010451670736074448, - "learning_rate": 0.00019999217163899893, - "loss": 46.0, - "step": 24743 - }, - { - "epoch": 3.9848222553242882, - "grad_norm": 0.016197005286812782, - "learning_rate": 0.00019999217100596478, - "loss": 46.0, - "step": 24744 - }, - { - "epoch": 3.9849832924030757, - "grad_norm": 0.001326465979218483, - "learning_rate": 0.00019999217037290499, - "loss": 46.0, - "step": 24745 - }, - { - "epoch": 3.985144329481863, - "grad_norm": 0.0023445934057235718, - "learning_rate": 0.00019999216973981963, - "loss": 46.0, - "step": 24746 - }, - { - "epoch": 3.9853053665606506, - "grad_norm": 0.00758154783397913, - "learning_rate": 0.0001999921691067087, - "loss": 46.0, - "step": 24747 - }, - { - "epoch": 3.985466403639438, - "grad_norm": 0.007461497560143471, - "learning_rate": 0.00019999216847357213, - "loss": 46.0, - "step": 24748 - }, - { - "epoch": 3.9856274407182255, - "grad_norm": 0.012365833856165409, - "learning_rate": 0.00019999216784041002, - "loss": 46.0, - "step": 24749 - }, - { - "epoch": 3.985788477797013, - "grad_norm": 0.0039055971428751945, - "learning_rate": 0.00019999216720722226, - "loss": 46.0, - "step": 24750 - }, - { - "epoch": 3.9859495148758004, - "grad_norm": 0.003098601708188653, - "learning_rate": 0.00019999216657400895, - "loss": 46.0, - "step": 24751 - }, - { - "epoch": 3.9861105519545874, - "grad_norm": 0.0011022333055734634, - "learning_rate": 0.00019999216594077004, - "loss": 46.0, - "step": 24752 - }, - { - "epoch": 3.986271589033375, - "grad_norm": 0.01449637021869421, - "learning_rate": 0.0001999921653075055, - "loss": 46.0, - "step": 24753 - }, - { - "epoch": 3.9864326261121623, - "grad_norm": 0.0024452845100313425, - "learning_rate": 0.00019999216467421542, - "loss": 46.0, - "step": 24754 - }, - { - "epoch": 3.9865936631909498, - "grad_norm": 0.012003865092992783, - "learning_rate": 0.0001999921640408997, - "loss": 46.0, - "step": 24755 - }, - { - "epoch": 3.986754700269737, - "grad_norm": 0.0013558976352214813, - "learning_rate": 0.00019999216340755845, - "loss": 46.0, - "step": 24756 - }, - { - "epoch": 3.9869157373485242, - "grad_norm": 0.004575369879603386, - "learning_rate": 0.00019999216277419155, - "loss": 46.0, - "step": 24757 - }, - { - "epoch": 3.9870767744273117, - "grad_norm": 0.006203664466738701, - "learning_rate": 0.0001999921621407991, - "loss": 46.0, - "step": 24758 - }, - { - "epoch": 3.987237811506099, - "grad_norm": 0.0019251321209594607, - "learning_rate": 0.00019999216150738103, - "loss": 46.0, - "step": 24759 - }, - { - "epoch": 3.9873988485848866, - "grad_norm": 0.009185073897242546, - "learning_rate": 0.00019999216087393737, - "loss": 46.0, - "step": 24760 - }, - { - "epoch": 3.987559885663674, - "grad_norm": 0.0031341221183538437, - "learning_rate": 0.0001999921602404681, - "loss": 46.0, - "step": 24761 - }, - { - "epoch": 3.9877209227424615, - "grad_norm": 0.004984932020306587, - "learning_rate": 0.00019999215960697327, - "loss": 46.0, - "step": 24762 - }, - { - "epoch": 3.987881959821249, - "grad_norm": 0.0034871690440922976, - "learning_rate": 0.00019999215897345283, - "loss": 46.0, - "step": 24763 - }, - { - "epoch": 3.9880429969000364, - "grad_norm": 0.0013955391477793455, - "learning_rate": 0.0001999921583399068, - "loss": 46.0, - "step": 24764 - }, - { - "epoch": 3.988204033978824, - "grad_norm": 0.009611187502741814, - "learning_rate": 0.00019999215770633518, - "loss": 46.0, - "step": 24765 - }, - { - "epoch": 3.9883650710576113, - "grad_norm": 0.001299932599067688, - "learning_rate": 0.00019999215707273795, - "loss": 46.0, - "step": 24766 - }, - { - "epoch": 3.9885261081363983, - "grad_norm": 0.0028664679266512394, - "learning_rate": 0.00019999215643911513, - "loss": 46.0, - "step": 24767 - }, - { - "epoch": 3.9886871452151857, - "grad_norm": 0.007663626689463854, - "learning_rate": 0.00019999215580546675, - "loss": 46.0, - "step": 24768 - }, - { - "epoch": 3.988848182293973, - "grad_norm": 0.0063698128797113895, - "learning_rate": 0.00019999215517179275, - "loss": 46.0, - "step": 24769 - }, - { - "epoch": 3.9890092193727607, - "grad_norm": 0.0028165553230792284, - "learning_rate": 0.00019999215453809317, - "loss": 46.0, - "step": 24770 - }, - { - "epoch": 3.989170256451548, - "grad_norm": 0.0068365428596735, - "learning_rate": 0.00019999215390436797, - "loss": 46.0, - "step": 24771 - }, - { - "epoch": 3.989331293530335, - "grad_norm": 0.0009981007315218449, - "learning_rate": 0.0001999921532706172, - "loss": 46.0, - "step": 24772 - }, - { - "epoch": 3.9894923306091226, - "grad_norm": 0.002645881613716483, - "learning_rate": 0.00019999215263684082, - "loss": 46.0, - "step": 24773 - }, - { - "epoch": 3.98965336768791, - "grad_norm": 0.001725579029880464, - "learning_rate": 0.00019999215200303886, - "loss": 46.0, - "step": 24774 - }, - { - "epoch": 3.9898144047666975, - "grad_norm": 0.00524672819301486, - "learning_rate": 0.00019999215136921132, - "loss": 46.0, - "step": 24775 - }, - { - "epoch": 3.989975441845485, - "grad_norm": 0.0017213233513757586, - "learning_rate": 0.0001999921507353582, - "loss": 46.0, - "step": 24776 - }, - { - "epoch": 3.9901364789242724, - "grad_norm": 0.003459772327914834, - "learning_rate": 0.00019999215010147941, - "loss": 46.0, - "step": 24777 - }, - { - "epoch": 3.99029751600306, - "grad_norm": 0.00529498141258955, - "learning_rate": 0.0001999921494675751, - "loss": 46.0, - "step": 24778 - }, - { - "epoch": 3.9904585530818473, - "grad_norm": 0.01653357408940792, - "learning_rate": 0.0001999921488336452, - "loss": 46.0, - "step": 24779 - }, - { - "epoch": 3.9906195901606347, - "grad_norm": 0.0013534490717574954, - "learning_rate": 0.00019999214819968968, - "loss": 46.0, - "step": 24780 - }, - { - "epoch": 3.9907806272394217, - "grad_norm": 0.004747649189084768, - "learning_rate": 0.00019999214756570853, - "loss": 46.0, - "step": 24781 - }, - { - "epoch": 3.990941664318209, - "grad_norm": 0.004367772489786148, - "learning_rate": 0.00019999214693170185, - "loss": 46.0, - "step": 24782 - }, - { - "epoch": 3.9911027013969966, - "grad_norm": 0.008277861401438713, - "learning_rate": 0.00019999214629766955, - "loss": 46.0, - "step": 24783 - }, - { - "epoch": 3.991263738475784, - "grad_norm": 0.006896821781992912, - "learning_rate": 0.00019999214566361167, - "loss": 46.0, - "step": 24784 - }, - { - "epoch": 3.9914247755545715, - "grad_norm": 0.0029220758005976677, - "learning_rate": 0.0001999921450295282, - "loss": 46.0, - "step": 24785 - }, - { - "epoch": 3.991585812633359, - "grad_norm": 0.003588699968531728, - "learning_rate": 0.00019999214439541914, - "loss": 46.0, - "step": 24786 - }, - { - "epoch": 3.991746849712146, - "grad_norm": 0.004981989040970802, - "learning_rate": 0.00019999214376128444, - "loss": 46.0, - "step": 24787 - }, - { - "epoch": 3.9919078867909334, - "grad_norm": 0.005065221339464188, - "learning_rate": 0.00019999214312712419, - "loss": 46.0, - "step": 24788 - }, - { - "epoch": 3.992068923869721, - "grad_norm": 0.0030243347864598036, - "learning_rate": 0.00019999214249293834, - "loss": 46.0, - "step": 24789 - }, - { - "epoch": 3.9922299609485083, - "grad_norm": 0.003149948548525572, - "learning_rate": 0.0001999921418587269, - "loss": 46.0, - "step": 24790 - }, - { - "epoch": 3.992390998027296, - "grad_norm": 0.0061769732274115086, - "learning_rate": 0.00019999214122448986, - "loss": 46.0, - "step": 24791 - }, - { - "epoch": 3.9925520351060833, - "grad_norm": 0.004211899358779192, - "learning_rate": 0.00019999214059022725, - "loss": 46.0, - "step": 24792 - }, - { - "epoch": 3.9927130721848707, - "grad_norm": 0.004260799381881952, - "learning_rate": 0.00019999213995593903, - "loss": 46.0, - "step": 24793 - }, - { - "epoch": 3.992874109263658, - "grad_norm": 0.0047255465760827065, - "learning_rate": 0.00019999213932162517, - "loss": 46.0, - "step": 24794 - }, - { - "epoch": 3.9930351463424456, - "grad_norm": 0.004821198061108589, - "learning_rate": 0.0001999921386872858, - "loss": 46.0, - "step": 24795 - }, - { - "epoch": 3.9931961834212326, - "grad_norm": 0.002056732075288892, - "learning_rate": 0.00019999213805292079, - "loss": 46.0, - "step": 24796 - }, - { - "epoch": 3.99335722050002, - "grad_norm": 0.001037733512930572, - "learning_rate": 0.0001999921374185302, - "loss": 46.0, - "step": 24797 - }, - { - "epoch": 3.9935182575788075, - "grad_norm": 0.006771755404770374, - "learning_rate": 0.000199992136784114, - "loss": 46.0, - "step": 24798 - }, - { - "epoch": 3.993679294657595, - "grad_norm": 0.006890963297337294, - "learning_rate": 0.0001999921361496722, - "loss": 46.0, - "step": 24799 - }, - { - "epoch": 3.9938403317363824, - "grad_norm": 0.004899971187114716, - "learning_rate": 0.00019999213551520484, - "loss": 46.0, - "step": 24800 - }, - { - "epoch": 3.9940013688151694, - "grad_norm": 0.0019697954412549734, - "learning_rate": 0.00019999213488071187, - "loss": 46.0, - "step": 24801 - }, - { - "epoch": 3.994162405893957, - "grad_norm": 0.0011517596431076527, - "learning_rate": 0.0001999921342461933, - "loss": 46.0, - "step": 24802 - }, - { - "epoch": 3.9943234429727443, - "grad_norm": 0.0033353553153574467, - "learning_rate": 0.00019999213361164916, - "loss": 46.0, - "step": 24803 - }, - { - "epoch": 3.994484480051532, - "grad_norm": 0.009231653064489365, - "learning_rate": 0.00019999213297707943, - "loss": 46.0, - "step": 24804 - }, - { - "epoch": 3.9946455171303192, - "grad_norm": 0.005704787094146013, - "learning_rate": 0.00019999213234248408, - "loss": 46.0, - "step": 24805 - }, - { - "epoch": 3.9948065542091067, - "grad_norm": 0.0023751973640173674, - "learning_rate": 0.00019999213170786314, - "loss": 46.0, - "step": 24806 - }, - { - "epoch": 3.994967591287894, - "grad_norm": 0.0035583116114139557, - "learning_rate": 0.00019999213107321662, - "loss": 46.0, - "step": 24807 - }, - { - "epoch": 3.9951286283666816, - "grad_norm": 0.004200700670480728, - "learning_rate": 0.0001999921304385445, - "loss": 46.0, - "step": 24808 - }, - { - "epoch": 3.995289665445469, - "grad_norm": 0.0019821873866021633, - "learning_rate": 0.00019999212980384678, - "loss": 46.0, - "step": 24809 - }, - { - "epoch": 3.995450702524256, - "grad_norm": 0.01348206214606762, - "learning_rate": 0.0001999921291691235, - "loss": 46.0, - "step": 24810 - }, - { - "epoch": 3.9956117396030435, - "grad_norm": 0.004082653671503067, - "learning_rate": 0.0001999921285343746, - "loss": 46.0, - "step": 24811 - }, - { - "epoch": 3.995772776681831, - "grad_norm": 0.008464104495942593, - "learning_rate": 0.0001999921278996001, - "loss": 46.0, - "step": 24812 - }, - { - "epoch": 3.9959338137606184, - "grad_norm": 0.0035004690289497375, - "learning_rate": 0.00019999212726480004, - "loss": 46.0, - "step": 24813 - }, - { - "epoch": 3.996094850839406, - "grad_norm": 0.002037183614447713, - "learning_rate": 0.00019999212662997435, - "loss": 46.0, - "step": 24814 - }, - { - "epoch": 3.9962558879181933, - "grad_norm": 0.001355841406621039, - "learning_rate": 0.0001999921259951231, - "loss": 46.0, - "step": 24815 - }, - { - "epoch": 3.9964169249969803, - "grad_norm": 0.0014639337314292789, - "learning_rate": 0.00019999212536024624, - "loss": 46.0, - "step": 24816 - }, - { - "epoch": 3.9965779620757678, - "grad_norm": 0.020203057676553726, - "learning_rate": 0.0001999921247253438, - "loss": 46.0, - "step": 24817 - }, - { - "epoch": 3.996738999154555, - "grad_norm": 0.002857648767530918, - "learning_rate": 0.00019999212409041572, - "loss": 46.0, - "step": 24818 - }, - { - "epoch": 3.9969000362333427, - "grad_norm": 0.0017090535257011652, - "learning_rate": 0.00019999212345546207, - "loss": 46.0, - "step": 24819 - }, - { - "epoch": 3.99706107331213, - "grad_norm": 0.0034088161773979664, - "learning_rate": 0.00019999212282048286, - "loss": 46.0, - "step": 24820 - }, - { - "epoch": 3.9972221103909176, - "grad_norm": 0.00397028774023056, - "learning_rate": 0.00019999212218547804, - "loss": 46.0, - "step": 24821 - }, - { - "epoch": 3.997383147469705, - "grad_norm": 0.002675285330042243, - "learning_rate": 0.00019999212155044762, - "loss": 46.0, - "step": 24822 - }, - { - "epoch": 3.9975441845484925, - "grad_norm": 0.0015281352680176497, - "learning_rate": 0.00019999212091539162, - "loss": 46.0, - "step": 24823 - }, - { - "epoch": 3.99770522162728, - "grad_norm": 0.001216026023030281, - "learning_rate": 0.00019999212028031, - "loss": 46.0, - "step": 24824 - }, - { - "epoch": 3.997866258706067, - "grad_norm": 0.0036450561601668596, - "learning_rate": 0.0001999921196452028, - "loss": 46.0, - "step": 24825 - }, - { - "epoch": 3.9980272957848544, - "grad_norm": 0.01008449587970972, - "learning_rate": 0.00019999211901007002, - "loss": 46.0, - "step": 24826 - }, - { - "epoch": 3.998188332863642, - "grad_norm": 0.005471523851156235, - "learning_rate": 0.00019999211837491164, - "loss": 46.0, - "step": 24827 - }, - { - "epoch": 3.9983493699424293, - "grad_norm": 0.002226640470325947, - "learning_rate": 0.00019999211773972765, - "loss": 46.0, - "step": 24828 - }, - { - "epoch": 3.9985104070212167, - "grad_norm": 0.012375475838780403, - "learning_rate": 0.0001999921171045181, - "loss": 46.0, - "step": 24829 - }, - { - "epoch": 3.998671444100004, - "grad_norm": 0.003869743784889579, - "learning_rate": 0.00019999211646928294, - "loss": 46.0, - "step": 24830 - }, - { - "epoch": 3.998832481178791, - "grad_norm": 0.0021538231521844864, - "learning_rate": 0.0001999921158340222, - "loss": 46.0, - "step": 24831 - }, - { - "epoch": 3.9989935182575786, - "grad_norm": 0.004209042061120272, - "learning_rate": 0.00019999211519873585, - "loss": 46.0, - "step": 24832 - }, - { - "epoch": 3.999154555336366, - "grad_norm": 0.004311444237828255, - "learning_rate": 0.0001999921145634239, - "loss": 46.0, - "step": 24833 - }, - { - "epoch": 3.9993155924151536, - "grad_norm": 0.004337720572948456, - "learning_rate": 0.00019999211392808639, - "loss": 46.0, - "step": 24834 - }, - { - "epoch": 3.999476629493941, - "grad_norm": 0.0011222240282222629, - "learning_rate": 0.00019999211329272326, - "loss": 46.0, - "step": 24835 - }, - { - "epoch": 3.9996376665727285, - "grad_norm": 0.012477749958634377, - "learning_rate": 0.00019999211265733452, - "loss": 46.0, - "step": 24836 - }, - { - "epoch": 3.999798703651516, - "grad_norm": 0.003405028721317649, - "learning_rate": 0.00019999211202192024, - "loss": 46.0, - "step": 24837 - }, - { - "epoch": 3.9999597407303034, - "grad_norm": 0.0043719857931137085, - "learning_rate": 0.00019999211138648033, - "loss": 46.0, - "step": 24838 - }, - { - "epoch": 3.9999597407303034, - "eval_loss": 11.5, - "eval_runtime": 14.8473, - "eval_samples_per_second": 176.126, - "eval_steps_per_second": 88.097, - "step": 24838 - }, - { - "epoch": 4.0001610370787875, - "grad_norm": 0.013840921223163605, - "learning_rate": 0.00019999211075101482, - "loss": 46.0, - "step": 24839 - }, - { - "epoch": 4.000322074157575, - "grad_norm": 0.0011507347226142883, - "learning_rate": 0.00019999211011552373, - "loss": 46.0, - "step": 24840 - }, - { - "epoch": 4.000483111236362, - "grad_norm": 0.01128339022397995, - "learning_rate": 0.00019999210948000706, - "loss": 46.0, - "step": 24841 - }, - { - "epoch": 4.00064414831515, - "grad_norm": 0.006644542794674635, - "learning_rate": 0.00019999210884446476, - "loss": 46.0, - "step": 24842 - }, - { - "epoch": 4.000805185393937, - "grad_norm": 0.005154333543032408, - "learning_rate": 0.0001999921082088969, - "loss": 46.0, - "step": 24843 - }, - { - "epoch": 4.000966222472725, - "grad_norm": 0.004086259752511978, - "learning_rate": 0.00019999210757330347, - "loss": 46.0, - "step": 24844 - }, - { - "epoch": 4.001127259551512, - "grad_norm": 0.0042493208311498165, - "learning_rate": 0.0001999921069376844, - "loss": 46.0, - "step": 24845 - }, - { - "epoch": 4.001288296630299, - "grad_norm": 0.004113773349672556, - "learning_rate": 0.00019999210630203975, - "loss": 46.0, - "step": 24846 - }, - { - "epoch": 4.001449333709086, - "grad_norm": 0.004083591513335705, - "learning_rate": 0.00019999210566636953, - "loss": 46.0, - "step": 24847 - }, - { - "epoch": 4.001610370787874, - "grad_norm": 0.010515168309211731, - "learning_rate": 0.00019999210503067368, - "loss": 46.0, - "step": 24848 - }, - { - "epoch": 4.001771407866661, - "grad_norm": 0.0006976528675295413, - "learning_rate": 0.00019999210439495228, - "loss": 46.0, - "step": 24849 - }, - { - "epoch": 4.0019324449454485, - "grad_norm": 0.00829688087105751, - "learning_rate": 0.00019999210375920524, - "loss": 46.0, - "step": 24850 - }, - { - "epoch": 4.002093482024236, - "grad_norm": 0.011194777674973011, - "learning_rate": 0.00019999210312343264, - "loss": 46.0, - "step": 24851 - }, - { - "epoch": 4.002254519103023, - "grad_norm": 0.0022454794961959124, - "learning_rate": 0.00019999210248763445, - "loss": 46.0, - "step": 24852 - }, - { - "epoch": 4.002415556181811, - "grad_norm": 0.0011609733337536454, - "learning_rate": 0.00019999210185181064, - "loss": 46.0, - "step": 24853 - }, - { - "epoch": 4.002576593260598, - "grad_norm": 0.0038015421014279127, - "learning_rate": 0.00019999210121596125, - "loss": 46.0, - "step": 24854 - }, - { - "epoch": 4.002737630339386, - "grad_norm": 0.001069945632480085, - "learning_rate": 0.00019999210058008627, - "loss": 46.0, - "step": 24855 - }, - { - "epoch": 4.002898667418173, - "grad_norm": 0.005756659433245659, - "learning_rate": 0.0001999920999441857, - "loss": 46.0, - "step": 24856 - }, - { - "epoch": 4.003059704496961, - "grad_norm": 0.012916953302919865, - "learning_rate": 0.00019999209930825952, - "loss": 46.0, - "step": 24857 - }, - { - "epoch": 4.003220741575748, - "grad_norm": 0.0026917133945971727, - "learning_rate": 0.00019999209867230778, - "loss": 46.0, - "step": 24858 - }, - { - "epoch": 4.003381778654536, - "grad_norm": 0.017080552875995636, - "learning_rate": 0.00019999209803633043, - "loss": 46.0, - "step": 24859 - }, - { - "epoch": 4.003542815733322, - "grad_norm": 0.010402707383036613, - "learning_rate": 0.0001999920974003275, - "loss": 46.0, - "step": 24860 - }, - { - "epoch": 4.00370385281211, - "grad_norm": 0.007182632572948933, - "learning_rate": 0.00019999209676429896, - "loss": 46.0, - "step": 24861 - }, - { - "epoch": 4.003864889890897, - "grad_norm": 0.005855188705027103, - "learning_rate": 0.00019999209612824484, - "loss": 46.0, - "step": 24862 - }, - { - "epoch": 4.0040259269696845, - "grad_norm": 0.0009144256473518908, - "learning_rate": 0.0001999920954921651, - "loss": 46.0, - "step": 24863 - }, - { - "epoch": 4.004186964048472, - "grad_norm": 0.0022613827604800463, - "learning_rate": 0.00019999209485605977, - "loss": 46.0, - "step": 24864 - }, - { - "epoch": 4.004348001127259, - "grad_norm": 0.002389657311141491, - "learning_rate": 0.0001999920942199289, - "loss": 46.0, - "step": 24865 - }, - { - "epoch": 4.004509038206047, - "grad_norm": 0.006755763199180365, - "learning_rate": 0.00019999209358377237, - "loss": 46.0, - "step": 24866 - }, - { - "epoch": 4.004670075284834, - "grad_norm": 0.0031629065051674843, - "learning_rate": 0.00019999209294759026, - "loss": 46.0, - "step": 24867 - }, - { - "epoch": 4.004831112363622, - "grad_norm": 0.0027789652813225985, - "learning_rate": 0.0001999920923113826, - "loss": 46.0, - "step": 24868 - }, - { - "epoch": 4.004992149442409, - "grad_norm": 0.006508123595267534, - "learning_rate": 0.0001999920916751493, - "loss": 46.0, - "step": 24869 - }, - { - "epoch": 4.005153186521197, - "grad_norm": 0.003923442680388689, - "learning_rate": 0.00019999209103889042, - "loss": 46.0, - "step": 24870 - }, - { - "epoch": 4.005314223599984, - "grad_norm": 0.004182691685855389, - "learning_rate": 0.000199992090402606, - "loss": 46.0, - "step": 24871 - }, - { - "epoch": 4.005475260678772, - "grad_norm": 0.01167905144393444, - "learning_rate": 0.0001999920897662959, - "loss": 46.0, - "step": 24872 - }, - { - "epoch": 4.005636297757559, - "grad_norm": 0.009988836012780666, - "learning_rate": 0.00019999208912996027, - "loss": 46.0, - "step": 24873 - }, - { - "epoch": 4.0057973348363465, - "grad_norm": 0.009333707392215729, - "learning_rate": 0.000199992088493599, - "loss": 46.0, - "step": 24874 - }, - { - "epoch": 4.005958371915133, - "grad_norm": 0.008291992358863354, - "learning_rate": 0.00019999208785721217, - "loss": 46.0, - "step": 24875 - }, - { - "epoch": 4.0061194089939205, - "grad_norm": 0.0009046962368302047, - "learning_rate": 0.00019999208722079975, - "loss": 46.0, - "step": 24876 - }, - { - "epoch": 4.006280446072708, - "grad_norm": 0.008579776622354984, - "learning_rate": 0.00019999208658436172, - "loss": 46.0, - "step": 24877 - }, - { - "epoch": 4.006441483151495, - "grad_norm": 0.0016432763077318668, - "learning_rate": 0.0001999920859478981, - "loss": 46.0, - "step": 24878 - }, - { - "epoch": 4.006602520230283, - "grad_norm": 0.008889056742191315, - "learning_rate": 0.0001999920853114089, - "loss": 46.0, - "step": 24879 - }, - { - "epoch": 4.00676355730907, - "grad_norm": 0.0012846224708482623, - "learning_rate": 0.00019999208467489407, - "loss": 46.0, - "step": 24880 - }, - { - "epoch": 4.006924594387858, - "grad_norm": 0.0062545230612158775, - "learning_rate": 0.0001999920840383537, - "loss": 46.0, - "step": 24881 - }, - { - "epoch": 4.007085631466645, - "grad_norm": 0.005658826325088739, - "learning_rate": 0.0001999920834017877, - "loss": 46.0, - "step": 24882 - }, - { - "epoch": 4.007246668545433, - "grad_norm": 0.0049173543229699135, - "learning_rate": 0.0001999920827651961, - "loss": 46.0, - "step": 24883 - }, - { - "epoch": 4.00740770562422, - "grad_norm": 0.006444412283599377, - "learning_rate": 0.00019999208212857897, - "loss": 46.0, - "step": 24884 - }, - { - "epoch": 4.007568742703008, - "grad_norm": 0.0014707319205626845, - "learning_rate": 0.00019999208149193618, - "loss": 46.0, - "step": 24885 - }, - { - "epoch": 4.007729779781795, - "grad_norm": 0.001380774425342679, - "learning_rate": 0.0001999920808552678, - "loss": 46.0, - "step": 24886 - }, - { - "epoch": 4.0078908168605825, - "grad_norm": 0.0029165069572627544, - "learning_rate": 0.00019999208021857388, - "loss": 46.0, - "step": 24887 - }, - { - "epoch": 4.00805185393937, - "grad_norm": 0.001213430194184184, - "learning_rate": 0.00019999207958185434, - "loss": 46.0, - "step": 24888 - }, - { - "epoch": 4.008212891018157, - "grad_norm": 0.0045651583932340145, - "learning_rate": 0.0001999920789451092, - "loss": 46.0, - "step": 24889 - }, - { - "epoch": 4.008373928096944, - "grad_norm": 0.005638986360281706, - "learning_rate": 0.00019999207830833848, - "loss": 46.0, - "step": 24890 - }, - { - "epoch": 4.008534965175731, - "grad_norm": 0.005020777229219675, - "learning_rate": 0.00019999207767154212, - "loss": 46.0, - "step": 24891 - }, - { - "epoch": 4.008696002254519, - "grad_norm": 0.0014963076682761312, - "learning_rate": 0.00019999207703472023, - "loss": 46.0, - "step": 24892 - }, - { - "epoch": 4.008857039333306, - "grad_norm": 0.006173077039420605, - "learning_rate": 0.00019999207639787272, - "loss": 46.0, - "step": 24893 - }, - { - "epoch": 4.009018076412094, - "grad_norm": 0.004271052777767181, - "learning_rate": 0.0001999920757609996, - "loss": 46.0, - "step": 24894 - }, - { - "epoch": 4.009179113490881, - "grad_norm": 0.00589071586728096, - "learning_rate": 0.00019999207512410092, - "loss": 46.0, - "step": 24895 - }, - { - "epoch": 4.009340150569669, - "grad_norm": 0.0028419415466487408, - "learning_rate": 0.00019999207448717665, - "loss": 46.0, - "step": 24896 - }, - { - "epoch": 4.009501187648456, - "grad_norm": 0.004104808904230595, - "learning_rate": 0.00019999207385022676, - "loss": 46.0, - "step": 24897 - }, - { - "epoch": 4.0096622247272435, - "grad_norm": 0.0016246193554252386, - "learning_rate": 0.0001999920732132513, - "loss": 46.0, - "step": 24898 - }, - { - "epoch": 4.009823261806031, - "grad_norm": 0.005655268207192421, - "learning_rate": 0.0001999920725762502, - "loss": 46.0, - "step": 24899 - }, - { - "epoch": 4.009984298884818, - "grad_norm": 0.007813077419996262, - "learning_rate": 0.00019999207193922356, - "loss": 46.0, - "step": 24900 - }, - { - "epoch": 4.010145335963606, - "grad_norm": 0.006974251940846443, - "learning_rate": 0.0001999920713021713, - "loss": 46.0, - "step": 24901 - }, - { - "epoch": 4.010306373042393, - "grad_norm": 0.002973036142066121, - "learning_rate": 0.00019999207066509348, - "loss": 46.0, - "step": 24902 - }, - { - "epoch": 4.010467410121181, - "grad_norm": 0.0018903139280155301, - "learning_rate": 0.00019999207002799002, - "loss": 46.0, - "step": 24903 - }, - { - "epoch": 4.010628447199967, - "grad_norm": 0.0013387489598244429, - "learning_rate": 0.000199992069390861, - "loss": 46.0, - "step": 24904 - }, - { - "epoch": 4.010789484278755, - "grad_norm": 0.00622066855430603, - "learning_rate": 0.0001999920687537064, - "loss": 46.0, - "step": 24905 - }, - { - "epoch": 4.010950521357542, - "grad_norm": 0.01253416296094656, - "learning_rate": 0.00019999206811652616, - "loss": 46.0, - "step": 24906 - }, - { - "epoch": 4.01111155843633, - "grad_norm": 0.0027325714472681284, - "learning_rate": 0.00019999206747932035, - "loss": 46.0, - "step": 24907 - }, - { - "epoch": 4.011272595515117, - "grad_norm": 0.006488606799393892, - "learning_rate": 0.00019999206684208893, - "loss": 46.0, - "step": 24908 - }, - { - "epoch": 4.011433632593905, - "grad_norm": 0.002637316007167101, - "learning_rate": 0.00019999206620483195, - "loss": 46.0, - "step": 24909 - }, - { - "epoch": 4.011594669672692, - "grad_norm": 0.004580279812216759, - "learning_rate": 0.00019999206556754937, - "loss": 46.0, - "step": 24910 - }, - { - "epoch": 4.0117557067514795, - "grad_norm": 0.0018053918611258268, - "learning_rate": 0.0001999920649302412, - "loss": 46.0, - "step": 24911 - }, - { - "epoch": 4.011916743830267, - "grad_norm": 0.004670732654631138, - "learning_rate": 0.0001999920642929074, - "loss": 46.0, - "step": 24912 - }, - { - "epoch": 4.012077780909054, - "grad_norm": 0.004771932028234005, - "learning_rate": 0.00019999206365554803, - "loss": 46.0, - "step": 24913 - }, - { - "epoch": 4.012238817987842, - "grad_norm": 0.0013259975239634514, - "learning_rate": 0.00019999206301816308, - "loss": 46.0, - "step": 24914 - }, - { - "epoch": 4.012399855066629, - "grad_norm": 0.004627016838639975, - "learning_rate": 0.00019999206238075255, - "loss": 46.0, - "step": 24915 - }, - { - "epoch": 4.012560892145417, - "grad_norm": 0.006728042382746935, - "learning_rate": 0.0001999920617433164, - "loss": 46.0, - "step": 24916 - }, - { - "epoch": 4.012721929224204, - "grad_norm": 0.0013815994607284665, - "learning_rate": 0.00019999206110585463, - "loss": 46.0, - "step": 24917 - }, - { - "epoch": 4.012882966302992, - "grad_norm": 0.008304712362587452, - "learning_rate": 0.00019999206046836734, - "loss": 46.0, - "step": 24918 - }, - { - "epoch": 4.013044003381778, - "grad_norm": 0.0037253228947520256, - "learning_rate": 0.0001999920598308544, - "loss": 46.0, - "step": 24919 - }, - { - "epoch": 4.013205040460566, - "grad_norm": 0.003998242784291506, - "learning_rate": 0.0001999920591933159, - "loss": 46.0, - "step": 24920 - }, - { - "epoch": 4.013366077539353, - "grad_norm": 0.0059352112002670765, - "learning_rate": 0.0001999920585557518, - "loss": 46.0, - "step": 24921 - }, - { - "epoch": 4.013527114618141, - "grad_norm": 0.004196269437670708, - "learning_rate": 0.0001999920579181621, - "loss": 46.0, - "step": 24922 - }, - { - "epoch": 4.013688151696928, - "grad_norm": 0.0044249314814805984, - "learning_rate": 0.0001999920572805468, - "loss": 46.0, - "step": 24923 - }, - { - "epoch": 4.0138491887757155, - "grad_norm": 0.001224488951265812, - "learning_rate": 0.0001999920566429059, - "loss": 46.0, - "step": 24924 - }, - { - "epoch": 4.014010225854503, - "grad_norm": 0.01328927744179964, - "learning_rate": 0.00019999205600523942, - "loss": 46.0, - "step": 24925 - }, - { - "epoch": 4.01417126293329, - "grad_norm": 0.0035148095339536667, - "learning_rate": 0.00019999205536754734, - "loss": 46.0, - "step": 24926 - }, - { - "epoch": 4.014332300012078, - "grad_norm": 0.0024550568778067827, - "learning_rate": 0.00019999205472982968, - "loss": 46.0, - "step": 24927 - }, - { - "epoch": 4.014493337090865, - "grad_norm": 0.002423433819785714, - "learning_rate": 0.0001999920540920864, - "loss": 46.0, - "step": 24928 - }, - { - "epoch": 4.014654374169653, - "grad_norm": 0.001050095190294087, - "learning_rate": 0.00019999205345431757, - "loss": 46.0, - "step": 24929 - }, - { - "epoch": 4.01481541124844, - "grad_norm": 0.005810847505927086, - "learning_rate": 0.00019999205281652315, - "loss": 46.0, - "step": 24930 - }, - { - "epoch": 4.014976448327228, - "grad_norm": 0.002049764385446906, - "learning_rate": 0.00019999205217870308, - "loss": 46.0, - "step": 24931 - }, - { - "epoch": 4.015137485406015, - "grad_norm": 0.0035755804274231195, - "learning_rate": 0.00019999205154085746, - "loss": 46.0, - "step": 24932 - }, - { - "epoch": 4.015298522484803, - "grad_norm": 0.0034430858213454485, - "learning_rate": 0.00019999205090298622, - "loss": 46.0, - "step": 24933 - }, - { - "epoch": 4.015459559563589, - "grad_norm": 0.021088844165205956, - "learning_rate": 0.00019999205026508942, - "loss": 46.0, - "step": 24934 - }, - { - "epoch": 4.015620596642377, - "grad_norm": 0.0028874108102172613, - "learning_rate": 0.00019999204962716703, - "loss": 46.0, - "step": 24935 - }, - { - "epoch": 4.015781633721164, - "grad_norm": 0.011025993153452873, - "learning_rate": 0.000199992048989219, - "loss": 46.0, - "step": 24936 - }, - { - "epoch": 4.0159426707999515, - "grad_norm": 0.02030302956700325, - "learning_rate": 0.00019999204835124542, - "loss": 46.0, - "step": 24937 - }, - { - "epoch": 4.016103707878739, - "grad_norm": 0.0024919710122048855, - "learning_rate": 0.00019999204771324624, - "loss": 46.0, - "step": 24938 - }, - { - "epoch": 4.016264744957526, - "grad_norm": 0.005026031751185656, - "learning_rate": 0.00019999204707522145, - "loss": 46.0, - "step": 24939 - }, - { - "epoch": 4.016425782036314, - "grad_norm": 0.005144732538610697, - "learning_rate": 0.00019999204643717108, - "loss": 46.0, - "step": 24940 - }, - { - "epoch": 4.016586819115101, - "grad_norm": 0.0009662075317464769, - "learning_rate": 0.0001999920457990951, - "loss": 46.0, - "step": 24941 - }, - { - "epoch": 4.016747856193889, - "grad_norm": 0.010334135964512825, - "learning_rate": 0.00019999204516099356, - "loss": 46.0, - "step": 24942 - }, - { - "epoch": 4.016908893272676, - "grad_norm": 0.0017033818876370788, - "learning_rate": 0.00019999204452286642, - "loss": 46.0, - "step": 24943 - }, - { - "epoch": 4.017069930351464, - "grad_norm": 0.007641301490366459, - "learning_rate": 0.0001999920438847137, - "loss": 46.0, - "step": 24944 - }, - { - "epoch": 4.017230967430251, - "grad_norm": 0.0037254299968481064, - "learning_rate": 0.00019999204324653533, - "loss": 46.0, - "step": 24945 - }, - { - "epoch": 4.0173920045090385, - "grad_norm": 0.002317138249054551, - "learning_rate": 0.0001999920426083314, - "loss": 46.0, - "step": 24946 - }, - { - "epoch": 4.017553041587826, - "grad_norm": 0.0012140922481194139, - "learning_rate": 0.0001999920419701019, - "loss": 46.0, - "step": 24947 - }, - { - "epoch": 4.017714078666613, - "grad_norm": 0.003595631802454591, - "learning_rate": 0.00019999204133184676, - "loss": 46.0, - "step": 24948 - }, - { - "epoch": 4.0178751157454, - "grad_norm": 0.015302340500056744, - "learning_rate": 0.00019999204069356608, - "loss": 46.0, - "step": 24949 - }, - { - "epoch": 4.0180361528241875, - "grad_norm": 0.003522982355207205, - "learning_rate": 0.00019999204005525975, - "loss": 46.0, - "step": 24950 - }, - { - "epoch": 4.018197189902975, - "grad_norm": 0.008157297968864441, - "learning_rate": 0.00019999203941692786, - "loss": 46.0, - "step": 24951 - }, - { - "epoch": 4.018358226981762, - "grad_norm": 0.004223973024636507, - "learning_rate": 0.00019999203877857038, - "loss": 46.0, - "step": 24952 - }, - { - "epoch": 4.01851926406055, - "grad_norm": 0.011615936644375324, - "learning_rate": 0.0001999920381401873, - "loss": 46.0, - "step": 24953 - }, - { - "epoch": 4.018680301139337, - "grad_norm": 0.0013157908106222749, - "learning_rate": 0.0001999920375017786, - "loss": 46.0, - "step": 24954 - }, - { - "epoch": 4.018841338218125, - "grad_norm": 0.0023194567766040564, - "learning_rate": 0.00019999203686334437, - "loss": 46.0, - "step": 24955 - }, - { - "epoch": 4.019002375296912, - "grad_norm": 0.0052051842212677, - "learning_rate": 0.00019999203622488452, - "loss": 46.0, - "step": 24956 - }, - { - "epoch": 4.0191634123757, - "grad_norm": 0.0013000270118936896, - "learning_rate": 0.00019999203558639908, - "loss": 46.0, - "step": 24957 - }, - { - "epoch": 4.019324449454487, - "grad_norm": 0.00223491876386106, - "learning_rate": 0.000199992034947888, - "loss": 46.0, - "step": 24958 - }, - { - "epoch": 4.0194854865332745, - "grad_norm": 0.0036144836340099573, - "learning_rate": 0.00019999203430935139, - "loss": 46.0, - "step": 24959 - }, - { - "epoch": 4.019646523612062, - "grad_norm": 0.0016257986426353455, - "learning_rate": 0.00019999203367078913, - "loss": 46.0, - "step": 24960 - }, - { - "epoch": 4.019807560690849, - "grad_norm": 0.0010201609693467617, - "learning_rate": 0.00019999203303220134, - "loss": 46.0, - "step": 24961 - }, - { - "epoch": 4.019968597769637, - "grad_norm": 0.001547725056298077, - "learning_rate": 0.0001999920323935879, - "loss": 46.0, - "step": 24962 - }, - { - "epoch": 4.0201296348484234, - "grad_norm": 0.006314681842923164, - "learning_rate": 0.0001999920317549489, - "loss": 46.0, - "step": 24963 - }, - { - "epoch": 4.020290671927211, - "grad_norm": 0.0016107544070109725, - "learning_rate": 0.00019999203111628432, - "loss": 46.0, - "step": 24964 - }, - { - "epoch": 4.020451709005998, - "grad_norm": 0.0024504787288606167, - "learning_rate": 0.00019999203047759413, - "loss": 46.0, - "step": 24965 - }, - { - "epoch": 4.020612746084786, - "grad_norm": 0.002583805937319994, - "learning_rate": 0.00019999202983887832, - "loss": 46.0, - "step": 24966 - }, - { - "epoch": 4.020773783163573, - "grad_norm": 0.004775282461196184, - "learning_rate": 0.00019999202920013696, - "loss": 46.0, - "step": 24967 - }, - { - "epoch": 4.020934820242361, - "grad_norm": 0.015760183334350586, - "learning_rate": 0.00019999202856136998, - "loss": 46.0, - "step": 24968 - }, - { - "epoch": 4.021095857321148, - "grad_norm": 0.0076279910281300545, - "learning_rate": 0.0001999920279225774, - "loss": 46.0, - "step": 24969 - }, - { - "epoch": 4.021256894399936, - "grad_norm": 0.002553044818341732, - "learning_rate": 0.00019999202728375926, - "loss": 46.0, - "step": 24970 - }, - { - "epoch": 4.021417931478723, - "grad_norm": 0.002563176676630974, - "learning_rate": 0.00019999202664491551, - "loss": 46.0, - "step": 24971 - }, - { - "epoch": 4.0215789685575105, - "grad_norm": 0.010500002652406693, - "learning_rate": 0.00019999202600604616, - "loss": 46.0, - "step": 24972 - }, - { - "epoch": 4.021740005636298, - "grad_norm": 0.0006657430785708129, - "learning_rate": 0.00019999202536715122, - "loss": 46.0, - "step": 24973 - }, - { - "epoch": 4.021901042715085, - "grad_norm": 0.0030671637505292892, - "learning_rate": 0.0001999920247282307, - "loss": 46.0, - "step": 24974 - }, - { - "epoch": 4.022062079793873, - "grad_norm": 0.00227194931358099, - "learning_rate": 0.00019999202408928457, - "loss": 46.0, - "step": 24975 - }, - { - "epoch": 4.02222311687266, - "grad_norm": 0.01887577958405018, - "learning_rate": 0.00019999202345031286, - "loss": 46.0, - "step": 24976 - }, - { - "epoch": 4.022384153951447, - "grad_norm": 0.002493244130164385, - "learning_rate": 0.00019999202281131555, - "loss": 46.0, - "step": 24977 - }, - { - "epoch": 4.022545191030234, - "grad_norm": 0.0019332539523020387, - "learning_rate": 0.00019999202217229267, - "loss": 46.0, - "step": 24978 - }, - { - "epoch": 4.022706228109022, - "grad_norm": 0.01769905537366867, - "learning_rate": 0.00019999202153324417, - "loss": 46.0, - "step": 24979 - }, - { - "epoch": 4.022867265187809, - "grad_norm": 0.001634084852412343, - "learning_rate": 0.0001999920208941701, - "loss": 46.0, - "step": 24980 - }, - { - "epoch": 4.023028302266597, - "grad_norm": 0.004199749790132046, - "learning_rate": 0.0001999920202550704, - "loss": 46.0, - "step": 24981 - }, - { - "epoch": 4.023189339345384, - "grad_norm": 0.0019532626029103994, - "learning_rate": 0.00019999201961594514, - "loss": 46.0, - "step": 24982 - }, - { - "epoch": 4.023350376424172, - "grad_norm": 0.002360276412218809, - "learning_rate": 0.00019999201897679428, - "loss": 46.0, - "step": 24983 - }, - { - "epoch": 4.023511413502959, - "grad_norm": 0.010797782801091671, - "learning_rate": 0.00019999201833761782, - "loss": 46.0, - "step": 24984 - }, - { - "epoch": 4.0236724505817465, - "grad_norm": 0.0034726497251540422, - "learning_rate": 0.00019999201769841575, - "loss": 46.0, - "step": 24985 - }, - { - "epoch": 4.023833487660534, - "grad_norm": 0.008773855865001678, - "learning_rate": 0.00019999201705918812, - "loss": 46.0, - "step": 24986 - }, - { - "epoch": 4.023994524739321, - "grad_norm": 0.0036492610815912485, - "learning_rate": 0.00019999201641993488, - "loss": 46.0, - "step": 24987 - }, - { - "epoch": 4.024155561818109, - "grad_norm": 0.005425607319921255, - "learning_rate": 0.00019999201578065607, - "loss": 46.0, - "step": 24988 - }, - { - "epoch": 4.024316598896896, - "grad_norm": 0.0035622103605419397, - "learning_rate": 0.00019999201514135165, - "loss": 46.0, - "step": 24989 - }, - { - "epoch": 4.024477635975684, - "grad_norm": 0.004134961403906345, - "learning_rate": 0.00019999201450202162, - "loss": 46.0, - "step": 24990 - }, - { - "epoch": 4.024638673054471, - "grad_norm": 0.009632492437958717, - "learning_rate": 0.00019999201386266603, - "loss": 46.0, - "step": 24991 - }, - { - "epoch": 4.024799710133258, - "grad_norm": 0.0043112351559102535, - "learning_rate": 0.00019999201322328482, - "loss": 46.0, - "step": 24992 - }, - { - "epoch": 4.024960747212045, - "grad_norm": 0.001084771123714745, - "learning_rate": 0.00019999201258387802, - "loss": 46.0, - "step": 24993 - }, - { - "epoch": 4.025121784290833, - "grad_norm": 0.011641322635114193, - "learning_rate": 0.00019999201194444567, - "loss": 46.0, - "step": 24994 - }, - { - "epoch": 4.02528282136962, - "grad_norm": 0.006057314109057188, - "learning_rate": 0.00019999201130498765, - "loss": 46.0, - "step": 24995 - }, - { - "epoch": 4.025443858448408, - "grad_norm": 0.0020053863991051912, - "learning_rate": 0.0001999920106655041, - "loss": 46.0, - "step": 24996 - }, - { - "epoch": 4.025604895527195, - "grad_norm": 0.005097714252769947, - "learning_rate": 0.00019999201002599492, - "loss": 46.0, - "step": 24997 - }, - { - "epoch": 4.0257659326059825, - "grad_norm": 0.00591740757226944, - "learning_rate": 0.0001999920093864602, - "loss": 46.0, - "step": 24998 - }, - { - "epoch": 4.02592696968477, - "grad_norm": 0.00429281173273921, - "learning_rate": 0.00019999200874689982, - "loss": 46.0, - "step": 24999 - }, - { - "epoch": 4.026088006763557, - "grad_norm": 0.009144529700279236, - "learning_rate": 0.00019999200810731388, - "loss": 46.0, - "step": 25000 - }, - { - "epoch": 4.026249043842345, - "grad_norm": 0.002104668179526925, - "learning_rate": 0.00019999200746770234, - "loss": 46.0, - "step": 25001 - }, - { - "epoch": 4.026410080921132, - "grad_norm": 0.0022079164627939463, - "learning_rate": 0.00019999200682806523, - "loss": 46.0, - "step": 25002 - }, - { - "epoch": 4.02657111799992, - "grad_norm": 0.0010310064535588026, - "learning_rate": 0.0001999920061884025, - "loss": 46.0, - "step": 25003 - }, - { - "epoch": 4.026732155078707, - "grad_norm": 0.0016190280439332128, - "learning_rate": 0.0001999920055487142, - "loss": 46.0, - "step": 25004 - }, - { - "epoch": 4.026893192157495, - "grad_norm": 0.0067587136290967464, - "learning_rate": 0.00019999200490900028, - "loss": 46.0, - "step": 25005 - }, - { - "epoch": 4.027054229236282, - "grad_norm": 0.005166187416762114, - "learning_rate": 0.00019999200426926077, - "loss": 46.0, - "step": 25006 - }, - { - "epoch": 4.027215266315069, - "grad_norm": 0.012126859277486801, - "learning_rate": 0.00019999200362949568, - "loss": 46.0, - "step": 25007 - }, - { - "epoch": 4.027376303393856, - "grad_norm": 0.0038047428242862225, - "learning_rate": 0.00019999200298970502, - "loss": 46.0, - "step": 25008 - }, - { - "epoch": 4.027537340472644, - "grad_norm": 0.008614459075033665, - "learning_rate": 0.00019999200234988872, - "loss": 46.0, - "step": 25009 - }, - { - "epoch": 4.027698377551431, - "grad_norm": 0.0038431184366345406, - "learning_rate": 0.00019999200171004686, - "loss": 46.0, - "step": 25010 - }, - { - "epoch": 4.0278594146302185, - "grad_norm": 0.004231929313391447, - "learning_rate": 0.00019999200107017942, - "loss": 46.0, - "step": 25011 - }, - { - "epoch": 4.028020451709006, - "grad_norm": 0.0010132197057828307, - "learning_rate": 0.00019999200043028633, - "loss": 46.0, - "step": 25012 - }, - { - "epoch": 4.028181488787793, - "grad_norm": 0.001272825407795608, - "learning_rate": 0.00019999199979036769, - "loss": 46.0, - "step": 25013 - }, - { - "epoch": 4.028342525866581, - "grad_norm": 0.0025782862212508917, - "learning_rate": 0.00019999199915042345, - "loss": 46.0, - "step": 25014 - }, - { - "epoch": 4.028503562945368, - "grad_norm": 0.0019017639569938183, - "learning_rate": 0.0001999919985104536, - "loss": 46.0, - "step": 25015 - }, - { - "epoch": 4.028664600024156, - "grad_norm": 0.0020026471465826035, - "learning_rate": 0.0001999919978704582, - "loss": 46.0, - "step": 25016 - }, - { - "epoch": 4.028825637102943, - "grad_norm": 0.0023075321223586798, - "learning_rate": 0.00019999199723043715, - "loss": 46.0, - "step": 25017 - }, - { - "epoch": 4.028986674181731, - "grad_norm": 0.002112357411533594, - "learning_rate": 0.00019999199659039056, - "loss": 46.0, - "step": 25018 - }, - { - "epoch": 4.029147711260518, - "grad_norm": 0.002465772209689021, - "learning_rate": 0.00019999199595031834, - "loss": 46.0, - "step": 25019 - }, - { - "epoch": 4.0293087483393055, - "grad_norm": 0.010843264870345592, - "learning_rate": 0.00019999199531022056, - "loss": 46.0, - "step": 25020 - }, - { - "epoch": 4.029469785418092, - "grad_norm": 0.0015449357451871037, - "learning_rate": 0.00019999199467009716, - "loss": 46.0, - "step": 25021 - }, - { - "epoch": 4.0296308224968795, - "grad_norm": 0.006690714042633772, - "learning_rate": 0.00019999199402994817, - "loss": 46.0, - "step": 25022 - }, - { - "epoch": 4.029791859575667, - "grad_norm": 0.006099675316363573, - "learning_rate": 0.0001999919933897736, - "loss": 46.0, - "step": 25023 - }, - { - "epoch": 4.029952896654454, - "grad_norm": 0.015103408135473728, - "learning_rate": 0.00019999199274957344, - "loss": 46.0, - "step": 25024 - }, - { - "epoch": 4.030113933733242, - "grad_norm": 0.004041378851979971, - "learning_rate": 0.00019999199210934767, - "loss": 46.0, - "step": 25025 - }, - { - "epoch": 4.030274970812029, - "grad_norm": 0.0014146839966997504, - "learning_rate": 0.00019999199146909633, - "loss": 46.0, - "step": 25026 - }, - { - "epoch": 4.030436007890817, - "grad_norm": 0.010885761119425297, - "learning_rate": 0.00019999199082881936, - "loss": 46.0, - "step": 25027 - }, - { - "epoch": 4.030597044969604, - "grad_norm": 0.002577926032245159, - "learning_rate": 0.00019999199018851682, - "loss": 46.0, - "step": 25028 - }, - { - "epoch": 4.030758082048392, - "grad_norm": 0.002156562637537718, - "learning_rate": 0.0001999919895481887, - "loss": 46.0, - "step": 25029 - }, - { - "epoch": 4.030919119127179, - "grad_norm": 0.006228687707334757, - "learning_rate": 0.00019999198890783496, - "loss": 46.0, - "step": 25030 - }, - { - "epoch": 4.031080156205967, - "grad_norm": 0.002927055349573493, - "learning_rate": 0.00019999198826745564, - "loss": 46.0, - "step": 25031 - }, - { - "epoch": 4.031241193284754, - "grad_norm": 0.015131741762161255, - "learning_rate": 0.00019999198762705073, - "loss": 46.0, - "step": 25032 - }, - { - "epoch": 4.0314022303635415, - "grad_norm": 0.008905814960598946, - "learning_rate": 0.00019999198698662023, - "loss": 46.0, - "step": 25033 - }, - { - "epoch": 4.031563267442329, - "grad_norm": 0.0007261548307724297, - "learning_rate": 0.00019999198634616412, - "loss": 46.0, - "step": 25034 - }, - { - "epoch": 4.031724304521116, - "grad_norm": 0.009940962307155132, - "learning_rate": 0.00019999198570568244, - "loss": 46.0, - "step": 25035 - }, - { - "epoch": 4.031885341599903, - "grad_norm": 0.0009617822943255305, - "learning_rate": 0.00019999198506517516, - "loss": 46.0, - "step": 25036 - }, - { - "epoch": 4.03204637867869, - "grad_norm": 0.002603802364319563, - "learning_rate": 0.00019999198442464228, - "loss": 46.0, - "step": 25037 - }, - { - "epoch": 4.032207415757478, - "grad_norm": 0.000773383944761008, - "learning_rate": 0.00019999198378408382, - "loss": 46.0, - "step": 25038 - }, - { - "epoch": 4.032368452836265, - "grad_norm": 0.0027535571716725826, - "learning_rate": 0.00019999198314349977, - "loss": 46.0, - "step": 25039 - }, - { - "epoch": 4.032529489915053, - "grad_norm": 0.0035872994922101498, - "learning_rate": 0.0001999919825028901, - "loss": 46.0, - "step": 25040 - }, - { - "epoch": 4.03269052699384, - "grad_norm": 0.008839412592351437, - "learning_rate": 0.00019999198186225486, - "loss": 46.0, - "step": 25041 - }, - { - "epoch": 4.032851564072628, - "grad_norm": 0.00111858278978616, - "learning_rate": 0.00019999198122159402, - "loss": 46.0, - "step": 25042 - }, - { - "epoch": 4.033012601151415, - "grad_norm": 0.0018319826340302825, - "learning_rate": 0.00019999198058090757, - "loss": 46.0, - "step": 25043 - }, - { - "epoch": 4.033173638230203, - "grad_norm": 0.003526741173118353, - "learning_rate": 0.00019999197994019556, - "loss": 46.0, - "step": 25044 - }, - { - "epoch": 4.03333467530899, - "grad_norm": 0.0012674665777012706, - "learning_rate": 0.0001999919792994579, - "loss": 46.0, - "step": 25045 - }, - { - "epoch": 4.0334957123877775, - "grad_norm": 0.0018450462957844138, - "learning_rate": 0.00019999197865869472, - "loss": 46.0, - "step": 25046 - }, - { - "epoch": 4.033656749466565, - "grad_norm": 0.004494448192417622, - "learning_rate": 0.0001999919780179059, - "loss": 46.0, - "step": 25047 - }, - { - "epoch": 4.033817786545352, - "grad_norm": 0.0012645210372284055, - "learning_rate": 0.0001999919773770915, - "loss": 46.0, - "step": 25048 - }, - { - "epoch": 4.03397882362414, - "grad_norm": 0.00114197819493711, - "learning_rate": 0.0001999919767362515, - "loss": 46.0, - "step": 25049 - }, - { - "epoch": 4.034139860702927, - "grad_norm": 0.0013513603480532765, - "learning_rate": 0.00019999197609538595, - "loss": 46.0, - "step": 25050 - }, - { - "epoch": 4.034300897781714, - "grad_norm": 0.006062310189008713, - "learning_rate": 0.00019999197545449477, - "loss": 46.0, - "step": 25051 - }, - { - "epoch": 4.034461934860501, - "grad_norm": 0.002342484425753355, - "learning_rate": 0.00019999197481357798, - "loss": 46.0, - "step": 25052 - }, - { - "epoch": 4.034622971939289, - "grad_norm": 0.00553166214376688, - "learning_rate": 0.00019999197417263566, - "loss": 46.0, - "step": 25053 - }, - { - "epoch": 4.034784009018076, - "grad_norm": 0.016203725710511208, - "learning_rate": 0.00019999197353166766, - "loss": 46.0, - "step": 25054 - }, - { - "epoch": 4.034945046096864, - "grad_norm": 0.008070418611168861, - "learning_rate": 0.00019999197289067414, - "loss": 46.0, - "step": 25055 - }, - { - "epoch": 4.035106083175651, - "grad_norm": 0.01761159673333168, - "learning_rate": 0.000199991972249655, - "loss": 46.0, - "step": 25056 - }, - { - "epoch": 4.035267120254439, - "grad_norm": 0.0008269277750514448, - "learning_rate": 0.00019999197160861028, - "loss": 46.0, - "step": 25057 - }, - { - "epoch": 4.035428157333226, - "grad_norm": 0.010538355447351933, - "learning_rate": 0.00019999197096753994, - "loss": 46.0, - "step": 25058 - }, - { - "epoch": 4.0355891944120135, - "grad_norm": 0.0006135565345175564, - "learning_rate": 0.00019999197032644403, - "loss": 46.0, - "step": 25059 - }, - { - "epoch": 4.035750231490801, - "grad_norm": 0.002369600348174572, - "learning_rate": 0.00019999196968532252, - "loss": 46.0, - "step": 25060 - }, - { - "epoch": 4.035911268569588, - "grad_norm": 0.0025276204105466604, - "learning_rate": 0.00019999196904417542, - "loss": 46.0, - "step": 25061 - }, - { - "epoch": 4.036072305648376, - "grad_norm": 0.007783102337270975, - "learning_rate": 0.00019999196840300273, - "loss": 46.0, - "step": 25062 - }, - { - "epoch": 4.036233342727163, - "grad_norm": 0.002237935084849596, - "learning_rate": 0.00019999196776180443, - "loss": 46.0, - "step": 25063 - }, - { - "epoch": 4.036394379805951, - "grad_norm": 0.0032588159665465355, - "learning_rate": 0.00019999196712058054, - "loss": 46.0, - "step": 25064 - }, - { - "epoch": 4.036555416884737, - "grad_norm": 0.0023829364217817783, - "learning_rate": 0.00019999196647933109, - "loss": 46.0, - "step": 25065 - }, - { - "epoch": 4.036716453963525, - "grad_norm": 0.020837897434830666, - "learning_rate": 0.000199991965838056, - "loss": 46.0, - "step": 25066 - }, - { - "epoch": 4.036877491042312, - "grad_norm": 0.004435418639332056, - "learning_rate": 0.00019999196519675534, - "loss": 46.0, - "step": 25067 - }, - { - "epoch": 4.0370385281211, - "grad_norm": 0.0006732947658747435, - "learning_rate": 0.00019999196455542908, - "loss": 46.0, - "step": 25068 - }, - { - "epoch": 4.037199565199887, - "grad_norm": 0.004389289300888777, - "learning_rate": 0.00019999196391407725, - "loss": 46.0, - "step": 25069 - }, - { - "epoch": 4.0373606022786745, - "grad_norm": 0.004719639662653208, - "learning_rate": 0.0001999919632726998, - "loss": 46.0, - "step": 25070 - }, - { - "epoch": 4.037521639357462, - "grad_norm": 0.006348883267492056, - "learning_rate": 0.00019999196263129675, - "loss": 46.0, - "step": 25071 - }, - { - "epoch": 4.0376826764362495, - "grad_norm": 0.0005346002290025353, - "learning_rate": 0.00019999196198986817, - "loss": 46.0, - "step": 25072 - }, - { - "epoch": 4.037843713515037, - "grad_norm": 0.01372213289141655, - "learning_rate": 0.00019999196134841394, - "loss": 46.0, - "step": 25073 - }, - { - "epoch": 4.038004750593824, - "grad_norm": 0.0025717909447848797, - "learning_rate": 0.00019999196070693412, - "loss": 46.0, - "step": 25074 - }, - { - "epoch": 4.038165787672612, - "grad_norm": 0.0019021675689145923, - "learning_rate": 0.00019999196006542872, - "loss": 46.0, - "step": 25075 - }, - { - "epoch": 4.038326824751399, - "grad_norm": 0.0017507259035483003, - "learning_rate": 0.0001999919594238977, - "loss": 46.0, - "step": 25076 - }, - { - "epoch": 4.038487861830187, - "grad_norm": 0.0019939723424613476, - "learning_rate": 0.00019999195878234112, - "loss": 46.0, - "step": 25077 - }, - { - "epoch": 4.038648898908974, - "grad_norm": 0.0016106757102534175, - "learning_rate": 0.00019999195814075896, - "loss": 46.0, - "step": 25078 - }, - { - "epoch": 4.038809935987762, - "grad_norm": 0.0068449340760707855, - "learning_rate": 0.00019999195749915115, - "loss": 46.0, - "step": 25079 - }, - { - "epoch": 4.038970973066548, - "grad_norm": 0.007759213447570801, - "learning_rate": 0.00019999195685751778, - "loss": 46.0, - "step": 25080 - }, - { - "epoch": 4.039132010145336, - "grad_norm": 0.0021500838920474052, - "learning_rate": 0.00019999195621585886, - "loss": 46.0, - "step": 25081 - }, - { - "epoch": 4.039293047224123, - "grad_norm": 0.0017113228095695376, - "learning_rate": 0.0001999919555741743, - "loss": 46.0, - "step": 25082 - }, - { - "epoch": 4.0394540843029105, - "grad_norm": 0.0013238689862191677, - "learning_rate": 0.00019999195493246413, - "loss": 46.0, - "step": 25083 - }, - { - "epoch": 4.039615121381698, - "grad_norm": 0.0016637734370306134, - "learning_rate": 0.00019999195429072842, - "loss": 46.0, - "step": 25084 - }, - { - "epoch": 4.039776158460485, - "grad_norm": 0.00834680162370205, - "learning_rate": 0.0001999919536489671, - "loss": 46.0, - "step": 25085 - }, - { - "epoch": 4.039937195539273, - "grad_norm": 0.0008648328948765993, - "learning_rate": 0.00019999195300718015, - "loss": 46.0, - "step": 25086 - }, - { - "epoch": 4.04009823261806, - "grad_norm": 0.0017787946853786707, - "learning_rate": 0.00019999195236536764, - "loss": 46.0, - "step": 25087 - }, - { - "epoch": 4.040259269696848, - "grad_norm": 0.003584889229387045, - "learning_rate": 0.00019999195172352952, - "loss": 46.0, - "step": 25088 - }, - { - "epoch": 4.040420306775635, - "grad_norm": 0.007776655722409487, - "learning_rate": 0.00019999195108166582, - "loss": 46.0, - "step": 25089 - }, - { - "epoch": 4.040581343854423, - "grad_norm": 0.0030605753418058157, - "learning_rate": 0.00019999195043977653, - "loss": 46.0, - "step": 25090 - }, - { - "epoch": 4.04074238093321, - "grad_norm": 0.0029180629644542933, - "learning_rate": 0.00019999194979786165, - "loss": 46.0, - "step": 25091 - }, - { - "epoch": 4.040903418011998, - "grad_norm": 0.001516191172413528, - "learning_rate": 0.00019999194915592115, - "loss": 46.0, - "step": 25092 - }, - { - "epoch": 4.041064455090785, - "grad_norm": 0.0032532494515180588, - "learning_rate": 0.00019999194851395507, - "loss": 46.0, - "step": 25093 - }, - { - "epoch": 4.041225492169572, - "grad_norm": 0.007593729067593813, - "learning_rate": 0.00019999194787196343, - "loss": 46.0, - "step": 25094 - }, - { - "epoch": 4.041386529248359, - "grad_norm": 0.003355221124365926, - "learning_rate": 0.00019999194722994618, - "loss": 46.0, - "step": 25095 - }, - { - "epoch": 4.0415475663271465, - "grad_norm": 0.008278166875243187, - "learning_rate": 0.0001999919465879033, - "loss": 46.0, - "step": 25096 - }, - { - "epoch": 4.041708603405934, - "grad_norm": 0.004897734150290489, - "learning_rate": 0.00019999194594583485, - "loss": 46.0, - "step": 25097 - }, - { - "epoch": 4.041869640484721, - "grad_norm": 0.001760690938681364, - "learning_rate": 0.00019999194530374083, - "loss": 46.0, - "step": 25098 - }, - { - "epoch": 4.042030677563509, - "grad_norm": 0.0021013529039919376, - "learning_rate": 0.0001999919446616212, - "loss": 46.0, - "step": 25099 - }, - { - "epoch": 4.042191714642296, - "grad_norm": 0.006535171065479517, - "learning_rate": 0.00019999194401947596, - "loss": 46.0, - "step": 25100 - }, - { - "epoch": 4.042352751721084, - "grad_norm": 0.0026875522453337908, - "learning_rate": 0.00019999194337730515, - "loss": 46.0, - "step": 25101 - }, - { - "epoch": 4.042513788799871, - "grad_norm": 0.002320497529581189, - "learning_rate": 0.00019999194273510876, - "loss": 46.0, - "step": 25102 - }, - { - "epoch": 4.042674825878659, - "grad_norm": 0.003637721762061119, - "learning_rate": 0.00019999194209288675, - "loss": 46.0, - "step": 25103 - }, - { - "epoch": 4.042835862957446, - "grad_norm": 0.0026988149620592594, - "learning_rate": 0.00019999194145063916, - "loss": 46.0, - "step": 25104 - }, - { - "epoch": 4.042996900036234, - "grad_norm": 0.006968206260353327, - "learning_rate": 0.00019999194080836598, - "loss": 46.0, - "step": 25105 - }, - { - "epoch": 4.043157937115021, - "grad_norm": 0.0019634298514574766, - "learning_rate": 0.00019999194016606718, - "loss": 46.0, - "step": 25106 - }, - { - "epoch": 4.0433189741938085, - "grad_norm": 0.004155104514211416, - "learning_rate": 0.0001999919395237428, - "loss": 46.0, - "step": 25107 - }, - { - "epoch": 4.043480011272596, - "grad_norm": 0.010871573351323605, - "learning_rate": 0.00019999193888139285, - "loss": 46.0, - "step": 25108 - }, - { - "epoch": 4.0436410483513825, - "grad_norm": 0.013126935809850693, - "learning_rate": 0.00019999193823901727, - "loss": 46.0, - "step": 25109 - }, - { - "epoch": 4.04380208543017, - "grad_norm": 0.0015862892614677548, - "learning_rate": 0.00019999193759661613, - "loss": 46.0, - "step": 25110 - }, - { - "epoch": 4.043963122508957, - "grad_norm": 0.03452477976679802, - "learning_rate": 0.0001999919369541894, - "loss": 46.0, - "step": 25111 - }, - { - "epoch": 4.044124159587745, - "grad_norm": 0.013962416909635067, - "learning_rate": 0.00019999193631173705, - "loss": 46.0, - "step": 25112 - }, - { - "epoch": 4.044285196666532, - "grad_norm": 0.0023345299996435642, - "learning_rate": 0.00019999193566925914, - "loss": 46.0, - "step": 25113 - }, - { - "epoch": 4.04444623374532, - "grad_norm": 0.005403893534094095, - "learning_rate": 0.00019999193502675562, - "loss": 46.0, - "step": 25114 - }, - { - "epoch": 4.044607270824107, - "grad_norm": 0.009967776946723461, - "learning_rate": 0.00019999193438422646, - "loss": 46.0, - "step": 25115 - }, - { - "epoch": 4.044768307902895, - "grad_norm": 0.005063614808022976, - "learning_rate": 0.00019999193374167177, - "loss": 46.0, - "step": 25116 - }, - { - "epoch": 4.044929344981682, - "grad_norm": 0.008629816584289074, - "learning_rate": 0.0001999919330990915, - "loss": 46.0, - "step": 25117 - }, - { - "epoch": 4.04509038206047, - "grad_norm": 0.0034128918778151274, - "learning_rate": 0.00019999193245648556, - "loss": 46.0, - "step": 25118 - }, - { - "epoch": 4.045251419139257, - "grad_norm": 0.0012624525697901845, - "learning_rate": 0.0001999919318138541, - "loss": 46.0, - "step": 25119 - }, - { - "epoch": 4.0454124562180445, - "grad_norm": 0.015965472906827927, - "learning_rate": 0.000199991931171197, - "loss": 46.0, - "step": 25120 - }, - { - "epoch": 4.045573493296832, - "grad_norm": 0.006691340357065201, - "learning_rate": 0.00019999193052851435, - "loss": 46.0, - "step": 25121 - }, - { - "epoch": 4.045734530375619, - "grad_norm": 0.0012196802999824286, - "learning_rate": 0.00019999192988580608, - "loss": 46.0, - "step": 25122 - }, - { - "epoch": 4.045895567454407, - "grad_norm": 0.0007714468520134687, - "learning_rate": 0.00019999192924307222, - "loss": 46.0, - "step": 25123 - }, - { - "epoch": 4.046056604533193, - "grad_norm": 0.008993716910481453, - "learning_rate": 0.00019999192860031278, - "loss": 46.0, - "step": 25124 - }, - { - "epoch": 4.046217641611981, - "grad_norm": 0.004828222095966339, - "learning_rate": 0.00019999192795752772, - "loss": 46.0, - "step": 25125 - }, - { - "epoch": 4.046378678690768, - "grad_norm": 0.0037623513489961624, - "learning_rate": 0.0001999919273147171, - "loss": 46.0, - "step": 25126 - }, - { - "epoch": 4.046539715769556, - "grad_norm": 0.004046284593641758, - "learning_rate": 0.00019999192667188086, - "loss": 46.0, - "step": 25127 - }, - { - "epoch": 4.046700752848343, - "grad_norm": 0.005544481333345175, - "learning_rate": 0.00019999192602901904, - "loss": 46.0, - "step": 25128 - }, - { - "epoch": 4.046861789927131, - "grad_norm": 0.008518224582076073, - "learning_rate": 0.00019999192538613163, - "loss": 46.0, - "step": 25129 - }, - { - "epoch": 4.047022827005918, - "grad_norm": 0.009400081820786, - "learning_rate": 0.0001999919247432186, - "loss": 46.0, - "step": 25130 - }, - { - "epoch": 4.0471838640847055, - "grad_norm": 0.00755870109423995, - "learning_rate": 0.00019999192410028002, - "loss": 46.0, - "step": 25131 - }, - { - "epoch": 4.047344901163493, - "grad_norm": 0.004427696578204632, - "learning_rate": 0.0001999919234573158, - "loss": 46.0, - "step": 25132 - }, - { - "epoch": 4.04750593824228, - "grad_norm": 0.003742228029295802, - "learning_rate": 0.00019999192281432604, - "loss": 46.0, - "step": 25133 - }, - { - "epoch": 4.047666975321068, - "grad_norm": 0.005632624961435795, - "learning_rate": 0.00019999192217131067, - "loss": 46.0, - "step": 25134 - }, - { - "epoch": 4.047828012399855, - "grad_norm": 0.0042465548031032085, - "learning_rate": 0.00019999192152826968, - "loss": 46.0, - "step": 25135 - }, - { - "epoch": 4.047989049478643, - "grad_norm": 0.0019142965320497751, - "learning_rate": 0.0001999919208852031, - "loss": 46.0, - "step": 25136 - }, - { - "epoch": 4.04815008655743, - "grad_norm": 0.006928254850208759, - "learning_rate": 0.00019999192024211095, - "loss": 46.0, - "step": 25137 - }, - { - "epoch": 4.048311123636217, - "grad_norm": 0.001466908841393888, - "learning_rate": 0.0001999919195989932, - "loss": 46.0, - "step": 25138 - }, - { - "epoch": 4.048472160715004, - "grad_norm": 0.001917113084346056, - "learning_rate": 0.00019999191895584987, - "loss": 46.0, - "step": 25139 - }, - { - "epoch": 4.048633197793792, - "grad_norm": 0.0011072380002588034, - "learning_rate": 0.00019999191831268095, - "loss": 46.0, - "step": 25140 - }, - { - "epoch": 4.048794234872579, - "grad_norm": 0.0024496724363416433, - "learning_rate": 0.00019999191766948638, - "loss": 46.0, - "step": 25141 - }, - { - "epoch": 4.048955271951367, - "grad_norm": 0.0028447695076465607, - "learning_rate": 0.0001999919170262663, - "loss": 46.0, - "step": 25142 - }, - { - "epoch": 4.049116309030154, - "grad_norm": 0.002107927342876792, - "learning_rate": 0.00019999191638302058, - "loss": 46.0, - "step": 25143 - }, - { - "epoch": 4.0492773461089415, - "grad_norm": 0.006156007759273052, - "learning_rate": 0.00019999191573974928, - "loss": 46.0, - "step": 25144 - }, - { - "epoch": 4.049438383187729, - "grad_norm": 0.0030636582523584366, - "learning_rate": 0.00019999191509645237, - "loss": 46.0, - "step": 25145 - }, - { - "epoch": 4.049599420266516, - "grad_norm": 0.0011971063213422894, - "learning_rate": 0.00019999191445312987, - "loss": 46.0, - "step": 25146 - }, - { - "epoch": 4.049760457345304, - "grad_norm": 0.0026841172948479652, - "learning_rate": 0.0001999919138097818, - "loss": 46.0, - "step": 25147 - }, - { - "epoch": 4.049921494424091, - "grad_norm": 0.00591725530102849, - "learning_rate": 0.00019999191316640813, - "loss": 46.0, - "step": 25148 - }, - { - "epoch": 4.050082531502879, - "grad_norm": 0.0026979632675647736, - "learning_rate": 0.00019999191252300885, - "loss": 46.0, - "step": 25149 - }, - { - "epoch": 4.050243568581666, - "grad_norm": 0.001508760149590671, - "learning_rate": 0.00019999191187958397, - "loss": 46.0, - "step": 25150 - }, - { - "epoch": 4.050404605660454, - "grad_norm": 0.0017535273218527436, - "learning_rate": 0.0001999919112361335, - "loss": 46.0, - "step": 25151 - }, - { - "epoch": 4.050565642739241, - "grad_norm": 0.009976713918149471, - "learning_rate": 0.00019999191059265746, - "loss": 46.0, - "step": 25152 - }, - { - "epoch": 4.050726679818028, - "grad_norm": 0.007307478692382574, - "learning_rate": 0.00019999190994915585, - "loss": 46.0, - "step": 25153 - }, - { - "epoch": 4.050887716896815, - "grad_norm": 0.002895388752222061, - "learning_rate": 0.0001999919093056286, - "loss": 46.0, - "step": 25154 - }, - { - "epoch": 4.051048753975603, - "grad_norm": 0.003612358821555972, - "learning_rate": 0.00019999190866207576, - "loss": 46.0, - "step": 25155 - }, - { - "epoch": 4.05120979105439, - "grad_norm": 0.006851687096059322, - "learning_rate": 0.00019999190801849736, - "loss": 46.0, - "step": 25156 - }, - { - "epoch": 4.0513708281331775, - "grad_norm": 0.0065048676915466785, - "learning_rate": 0.00019999190737489335, - "loss": 46.0, - "step": 25157 - }, - { - "epoch": 4.051531865211965, - "grad_norm": 0.002576471772044897, - "learning_rate": 0.00019999190673126372, - "loss": 46.0, - "step": 25158 - }, - { - "epoch": 4.051692902290752, - "grad_norm": 0.0021585344802588224, - "learning_rate": 0.00019999190608760854, - "loss": 46.0, - "step": 25159 - }, - { - "epoch": 4.05185393936954, - "grad_norm": 0.0053537990897893906, - "learning_rate": 0.00019999190544392774, - "loss": 46.0, - "step": 25160 - }, - { - "epoch": 4.052014976448327, - "grad_norm": 0.008653052151203156, - "learning_rate": 0.00019999190480022138, - "loss": 46.0, - "step": 25161 - }, - { - "epoch": 4.052176013527115, - "grad_norm": 0.0014121340354904532, - "learning_rate": 0.00019999190415648937, - "loss": 46.0, - "step": 25162 - }, - { - "epoch": 4.052337050605902, - "grad_norm": 0.0024111121892929077, - "learning_rate": 0.00019999190351273184, - "loss": 46.0, - "step": 25163 - }, - { - "epoch": 4.05249808768469, - "grad_norm": 0.013518192805349827, - "learning_rate": 0.00019999190286894863, - "loss": 46.0, - "step": 25164 - }, - { - "epoch": 4.052659124763477, - "grad_norm": 0.004137784242630005, - "learning_rate": 0.00019999190222513987, - "loss": 46.0, - "step": 25165 - }, - { - "epoch": 4.052820161842265, - "grad_norm": 0.0022434331476688385, - "learning_rate": 0.00019999190158130555, - "loss": 46.0, - "step": 25166 - }, - { - "epoch": 4.052981198921051, - "grad_norm": 0.007980064488947392, - "learning_rate": 0.0001999919009374456, - "loss": 46.0, - "step": 25167 - }, - { - "epoch": 4.053142235999839, - "grad_norm": 0.002234174171462655, - "learning_rate": 0.00019999190029356006, - "loss": 46.0, - "step": 25168 - }, - { - "epoch": 4.053303273078626, - "grad_norm": 0.00829023215919733, - "learning_rate": 0.00019999189964964894, - "loss": 46.0, - "step": 25169 - }, - { - "epoch": 4.0534643101574135, - "grad_norm": 0.002040259772911668, - "learning_rate": 0.00019999189900571222, - "loss": 46.0, - "step": 25170 - }, - { - "epoch": 4.053625347236201, - "grad_norm": 0.0046273223124444485, - "learning_rate": 0.00019999189836174993, - "loss": 46.0, - "step": 25171 - }, - { - "epoch": 4.053786384314988, - "grad_norm": 0.006136543117463589, - "learning_rate": 0.000199991897717762, - "loss": 46.0, - "step": 25172 - }, - { - "epoch": 4.053947421393776, - "grad_norm": 0.0066335368901491165, - "learning_rate": 0.0001999918970737485, - "loss": 46.0, - "step": 25173 - }, - { - "epoch": 4.054108458472563, - "grad_norm": 0.0004941087099723518, - "learning_rate": 0.0001999918964297094, - "loss": 46.0, - "step": 25174 - }, - { - "epoch": 4.054269495551351, - "grad_norm": 0.002243608236312866, - "learning_rate": 0.00019999189578564472, - "loss": 46.0, - "step": 25175 - }, - { - "epoch": 4.054430532630138, - "grad_norm": 0.007276506163179874, - "learning_rate": 0.00019999189514155444, - "loss": 46.0, - "step": 25176 - }, - { - "epoch": 4.054591569708926, - "grad_norm": 0.00997980684041977, - "learning_rate": 0.0001999918944974386, - "loss": 46.0, - "step": 25177 - }, - { - "epoch": 4.054752606787713, - "grad_norm": 0.0020957200322300196, - "learning_rate": 0.00019999189385329713, - "loss": 46.0, - "step": 25178 - }, - { - "epoch": 4.0549136438665005, - "grad_norm": 0.003410806180909276, - "learning_rate": 0.0001999918932091301, - "loss": 46.0, - "step": 25179 - }, - { - "epoch": 4.055074680945288, - "grad_norm": 0.004063980188220739, - "learning_rate": 0.0001999918925649374, - "loss": 46.0, - "step": 25180 - }, - { - "epoch": 4.0552357180240755, - "grad_norm": 0.0035752933472394943, - "learning_rate": 0.0001999918919207192, - "loss": 46.0, - "step": 25181 - }, - { - "epoch": 4.055396755102862, - "grad_norm": 0.006180457770824432, - "learning_rate": 0.00019999189127647534, - "loss": 46.0, - "step": 25182 - }, - { - "epoch": 4.0555577921816495, - "grad_norm": 0.002256970154121518, - "learning_rate": 0.00019999189063220593, - "loss": 46.0, - "step": 25183 - }, - { - "epoch": 4.055718829260437, - "grad_norm": 0.0028089622501283884, - "learning_rate": 0.00019999188998791093, - "loss": 46.0, - "step": 25184 - }, - { - "epoch": 4.055879866339224, - "grad_norm": 0.004492702428251505, - "learning_rate": 0.0001999918893435903, - "loss": 46.0, - "step": 25185 - }, - { - "epoch": 4.056040903418012, - "grad_norm": 0.009128935635089874, - "learning_rate": 0.0001999918886992441, - "loss": 46.0, - "step": 25186 - }, - { - "epoch": 4.056201940496799, - "grad_norm": 0.006036663427948952, - "learning_rate": 0.0001999918880548723, - "loss": 46.0, - "step": 25187 - }, - { - "epoch": 4.056362977575587, - "grad_norm": 0.0018259697826579213, - "learning_rate": 0.00019999188741047491, - "loss": 46.0, - "step": 25188 - }, - { - "epoch": 4.056524014654374, - "grad_norm": 0.0036418833769857883, - "learning_rate": 0.00019999188676605192, - "loss": 46.0, - "step": 25189 - }, - { - "epoch": 4.056685051733162, - "grad_norm": 0.004148347303271294, - "learning_rate": 0.00019999188612160334, - "loss": 46.0, - "step": 25190 - }, - { - "epoch": 4.056846088811949, - "grad_norm": 0.00329596851952374, - "learning_rate": 0.00019999188547712918, - "loss": 46.0, - "step": 25191 - }, - { - "epoch": 4.0570071258907365, - "grad_norm": 0.0062295617535710335, - "learning_rate": 0.00019999188483262942, - "loss": 46.0, - "step": 25192 - }, - { - "epoch": 4.057168162969524, - "grad_norm": 0.007119002752006054, - "learning_rate": 0.00019999188418810406, - "loss": 46.0, - "step": 25193 - }, - { - "epoch": 4.057329200048311, - "grad_norm": 0.001718393643386662, - "learning_rate": 0.0001999918835435531, - "loss": 46.0, - "step": 25194 - }, - { - "epoch": 4.057490237127099, - "grad_norm": 0.02157345972955227, - "learning_rate": 0.0001999918828989766, - "loss": 46.0, - "step": 25195 - }, - { - "epoch": 4.057651274205886, - "grad_norm": 0.0031764970626682043, - "learning_rate": 0.00019999188225437443, - "loss": 46.0, - "step": 25196 - }, - { - "epoch": 4.057812311284673, - "grad_norm": 0.0010836933506652713, - "learning_rate": 0.00019999188160974672, - "loss": 46.0, - "step": 25197 - }, - { - "epoch": 4.05797334836346, - "grad_norm": 0.009517136961221695, - "learning_rate": 0.0001999918809650934, - "loss": 46.0, - "step": 25198 - }, - { - "epoch": 4.058134385442248, - "grad_norm": 0.0042744833044707775, - "learning_rate": 0.0001999918803204145, - "loss": 46.0, - "step": 25199 - }, - { - "epoch": 4.058295422521035, - "grad_norm": 0.0010750008514150977, - "learning_rate": 0.00019999187967571, - "loss": 46.0, - "step": 25200 - }, - { - "epoch": 4.058456459599823, - "grad_norm": 0.0023556307423859835, - "learning_rate": 0.0001999918790309799, - "loss": 46.0, - "step": 25201 - }, - { - "epoch": 4.05861749667861, - "grad_norm": 0.0028152463492006063, - "learning_rate": 0.0001999918783862242, - "loss": 46.0, - "step": 25202 - }, - { - "epoch": 4.058778533757398, - "grad_norm": 0.001785579021088779, - "learning_rate": 0.00019999187774144293, - "loss": 46.0, - "step": 25203 - }, - { - "epoch": 4.058939570836185, - "grad_norm": 0.006174648180603981, - "learning_rate": 0.00019999187709663605, - "loss": 46.0, - "step": 25204 - }, - { - "epoch": 4.0591006079149725, - "grad_norm": 0.008724072016775608, - "learning_rate": 0.00019999187645180358, - "loss": 46.0, - "step": 25205 - }, - { - "epoch": 4.05926164499376, - "grad_norm": 0.00929219275712967, - "learning_rate": 0.00019999187580694555, - "loss": 46.0, - "step": 25206 - }, - { - "epoch": 4.059422682072547, - "grad_norm": 0.0054887025617063046, - "learning_rate": 0.00019999187516206188, - "loss": 46.0, - "step": 25207 - }, - { - "epoch": 4.059583719151335, - "grad_norm": 0.003904205746948719, - "learning_rate": 0.00019999187451715263, - "loss": 46.0, - "step": 25208 - }, - { - "epoch": 4.059744756230122, - "grad_norm": 0.0022380314767360687, - "learning_rate": 0.00019999187387221778, - "loss": 46.0, - "step": 25209 - }, - { - "epoch": 4.05990579330891, - "grad_norm": 0.0006175815942697227, - "learning_rate": 0.00019999187322725735, - "loss": 46.0, - "step": 25210 - }, - { - "epoch": 4.060066830387696, - "grad_norm": 0.005174033343791962, - "learning_rate": 0.00019999187258227133, - "loss": 46.0, - "step": 25211 - }, - { - "epoch": 4.060227867466484, - "grad_norm": 0.002573292935267091, - "learning_rate": 0.00019999187193725973, - "loss": 46.0, - "step": 25212 - }, - { - "epoch": 4.060388904545271, - "grad_norm": 0.003381407354027033, - "learning_rate": 0.0001999918712922225, - "loss": 46.0, - "step": 25213 - }, - { - "epoch": 4.060549941624059, - "grad_norm": 0.0033199312165379524, - "learning_rate": 0.0001999918706471597, - "loss": 46.0, - "step": 25214 - }, - { - "epoch": 4.060710978702846, - "grad_norm": 0.005421431735157967, - "learning_rate": 0.0001999918700020713, - "loss": 46.0, - "step": 25215 - }, - { - "epoch": 4.060872015781634, - "grad_norm": 0.005486281588673592, - "learning_rate": 0.0001999918693569573, - "loss": 46.0, - "step": 25216 - }, - { - "epoch": 4.061033052860421, - "grad_norm": 0.007144741248339415, - "learning_rate": 0.00019999186871181773, - "loss": 46.0, - "step": 25217 - }, - { - "epoch": 4.0611940899392085, - "grad_norm": 0.018770670518279076, - "learning_rate": 0.00019999186806665255, - "loss": 46.0, - "step": 25218 - }, - { - "epoch": 4.061355127017996, - "grad_norm": 0.0036431788466870785, - "learning_rate": 0.0001999918674214618, - "loss": 46.0, - "step": 25219 - }, - { - "epoch": 4.061516164096783, - "grad_norm": 0.0012442056322470307, - "learning_rate": 0.00019999186677624543, - "loss": 46.0, - "step": 25220 - }, - { - "epoch": 4.061677201175571, - "grad_norm": 0.0025082076899707317, - "learning_rate": 0.00019999186613100348, - "loss": 46.0, - "step": 25221 - }, - { - "epoch": 4.061838238254358, - "grad_norm": 0.006033538840711117, - "learning_rate": 0.00019999186548573595, - "loss": 46.0, - "step": 25222 - }, - { - "epoch": 4.061999275333146, - "grad_norm": 0.007237591315060854, - "learning_rate": 0.0001999918648404428, - "loss": 46.0, - "step": 25223 - }, - { - "epoch": 4.062160312411933, - "grad_norm": 0.011093477718532085, - "learning_rate": 0.00019999186419512408, - "loss": 46.0, - "step": 25224 - }, - { - "epoch": 4.062321349490721, - "grad_norm": 0.001368444412946701, - "learning_rate": 0.00019999186354977976, - "loss": 46.0, - "step": 25225 - }, - { - "epoch": 4.062482386569507, - "grad_norm": 0.00213375105522573, - "learning_rate": 0.00019999186290440982, - "loss": 46.0, - "step": 25226 - }, - { - "epoch": 4.062643423648295, - "grad_norm": 0.0029317731969058514, - "learning_rate": 0.00019999186225901433, - "loss": 46.0, - "step": 25227 - }, - { - "epoch": 4.062804460727082, - "grad_norm": 0.0009794340003281832, - "learning_rate": 0.00019999186161359322, - "loss": 46.0, - "step": 25228 - }, - { - "epoch": 4.06296549780587, - "grad_norm": 0.00515911728143692, - "learning_rate": 0.00019999186096814655, - "loss": 46.0, - "step": 25229 - }, - { - "epoch": 4.063126534884657, - "grad_norm": 0.010796592570841312, - "learning_rate": 0.00019999186032267424, - "loss": 46.0, - "step": 25230 - }, - { - "epoch": 4.0632875719634445, - "grad_norm": 0.0009916980052366853, - "learning_rate": 0.00019999185967717637, - "loss": 46.0, - "step": 25231 - }, - { - "epoch": 4.063448609042232, - "grad_norm": 0.002051148796454072, - "learning_rate": 0.0001999918590316529, - "loss": 46.0, - "step": 25232 - }, - { - "epoch": 4.063609646121019, - "grad_norm": 0.0016804755432531238, - "learning_rate": 0.00019999185838610385, - "loss": 46.0, - "step": 25233 - }, - { - "epoch": 4.063770683199807, - "grad_norm": 0.000529679236933589, - "learning_rate": 0.0001999918577405292, - "loss": 46.0, - "step": 25234 - }, - { - "epoch": 4.063931720278594, - "grad_norm": 0.0062715704552829266, - "learning_rate": 0.00019999185709492894, - "loss": 46.0, - "step": 25235 - }, - { - "epoch": 4.064092757357382, - "grad_norm": 0.0011910318862646818, - "learning_rate": 0.00019999185644930308, - "loss": 46.0, - "step": 25236 - }, - { - "epoch": 4.064253794436169, - "grad_norm": 0.009606339037418365, - "learning_rate": 0.00019999185580365164, - "loss": 46.0, - "step": 25237 - }, - { - "epoch": 4.064414831514957, - "grad_norm": 0.004720029886811972, - "learning_rate": 0.0001999918551579746, - "loss": 46.0, - "step": 25238 - }, - { - "epoch": 4.064575868593744, - "grad_norm": 0.008531820960342884, - "learning_rate": 0.000199991854512272, - "loss": 46.0, - "step": 25239 - }, - { - "epoch": 4.064736905672531, - "grad_norm": 0.001511663431301713, - "learning_rate": 0.00019999185386654377, - "loss": 46.0, - "step": 25240 - }, - { - "epoch": 4.064897942751318, - "grad_norm": 0.014855904504656792, - "learning_rate": 0.00019999185322078998, - "loss": 46.0, - "step": 25241 - }, - { - "epoch": 4.065058979830106, - "grad_norm": 0.0005880754324607551, - "learning_rate": 0.00019999185257501057, - "loss": 46.0, - "step": 25242 - }, - { - "epoch": 4.065220016908893, - "grad_norm": 0.0004652971983887255, - "learning_rate": 0.00019999185192920557, - "loss": 46.0, - "step": 25243 - }, - { - "epoch": 4.0653810539876805, - "grad_norm": 0.0011934635695070028, - "learning_rate": 0.00019999185128337499, - "loss": 46.0, - "step": 25244 - }, - { - "epoch": 4.065542091066468, - "grad_norm": 0.0019407463259994984, - "learning_rate": 0.00019999185063751881, - "loss": 46.0, - "step": 25245 - }, - { - "epoch": 4.065703128145255, - "grad_norm": 0.0012099232990294695, - "learning_rate": 0.00019999184999163703, - "loss": 46.0, - "step": 25246 - }, - { - "epoch": 4.065864165224043, - "grad_norm": 0.023715445771813393, - "learning_rate": 0.00019999184934572968, - "loss": 46.0, - "step": 25247 - }, - { - "epoch": 4.06602520230283, - "grad_norm": 0.014227522537112236, - "learning_rate": 0.0001999918486997967, - "loss": 46.0, - "step": 25248 - }, - { - "epoch": 4.066186239381618, - "grad_norm": 0.0035511336755007505, - "learning_rate": 0.00019999184805383818, - "loss": 46.0, - "step": 25249 - }, - { - "epoch": 4.066347276460405, - "grad_norm": 0.005493625998497009, - "learning_rate": 0.00019999184740785401, - "loss": 46.0, - "step": 25250 - }, - { - "epoch": 4.066508313539193, - "grad_norm": 0.005144867580384016, - "learning_rate": 0.0001999918467618443, - "loss": 46.0, - "step": 25251 - }, - { - "epoch": 4.06666935061798, - "grad_norm": 0.0035444507375359535, - "learning_rate": 0.00019999184611580896, - "loss": 46.0, - "step": 25252 - }, - { - "epoch": 4.0668303876967675, - "grad_norm": 0.0022409844677895308, - "learning_rate": 0.00019999184546974803, - "loss": 46.0, - "step": 25253 - }, - { - "epoch": 4.066991424775555, - "grad_norm": 0.0017127110622823238, - "learning_rate": 0.00019999184482366152, - "loss": 46.0, - "step": 25254 - }, - { - "epoch": 4.0671524618543415, - "grad_norm": 0.004753213841468096, - "learning_rate": 0.00019999184417754943, - "loss": 46.0, - "step": 25255 - }, - { - "epoch": 4.067313498933129, - "grad_norm": 0.0015547281363978982, - "learning_rate": 0.00019999184353141172, - "loss": 46.0, - "step": 25256 - }, - { - "epoch": 4.067474536011916, - "grad_norm": 0.0014243695186451077, - "learning_rate": 0.00019999184288524844, - "loss": 46.0, - "step": 25257 - }, - { - "epoch": 4.067635573090704, - "grad_norm": 0.0010112738236784935, - "learning_rate": 0.00019999184223905956, - "loss": 46.0, - "step": 25258 - }, - { - "epoch": 4.067796610169491, - "grad_norm": 0.013485829345881939, - "learning_rate": 0.00019999184159284506, - "loss": 46.0, - "step": 25259 - }, - { - "epoch": 4.067957647248279, - "grad_norm": 0.0068550691939890385, - "learning_rate": 0.000199991840946605, - "loss": 46.0, - "step": 25260 - }, - { - "epoch": 4.068118684327066, - "grad_norm": 0.000990360975265503, - "learning_rate": 0.00019999184030033932, - "loss": 46.0, - "step": 25261 - }, - { - "epoch": 4.068279721405854, - "grad_norm": 0.002246738411486149, - "learning_rate": 0.00019999183965404806, - "loss": 46.0, - "step": 25262 - }, - { - "epoch": 4.068440758484641, - "grad_norm": 0.005762267857789993, - "learning_rate": 0.00019999183900773124, - "loss": 46.0, - "step": 25263 - }, - { - "epoch": 4.068601795563429, - "grad_norm": 0.0022816245909780264, - "learning_rate": 0.0001999918383613888, - "loss": 46.0, - "step": 25264 - }, - { - "epoch": 4.068762832642216, - "grad_norm": 0.0005689717945642769, - "learning_rate": 0.00019999183771502075, - "loss": 46.0, - "step": 25265 - }, - { - "epoch": 4.0689238697210035, - "grad_norm": 0.0046620857901871204, - "learning_rate": 0.00019999183706862712, - "loss": 46.0, - "step": 25266 - }, - { - "epoch": 4.069084906799791, - "grad_norm": 0.002422595862299204, - "learning_rate": 0.0001999918364222079, - "loss": 46.0, - "step": 25267 - }, - { - "epoch": 4.069245943878578, - "grad_norm": 0.007057536393404007, - "learning_rate": 0.00019999183577576308, - "loss": 46.0, - "step": 25268 - }, - { - "epoch": 4.069406980957366, - "grad_norm": 0.003205882851034403, - "learning_rate": 0.00019999183512929268, - "loss": 46.0, - "step": 25269 - }, - { - "epoch": 4.069568018036152, - "grad_norm": 0.008235041052103043, - "learning_rate": 0.00019999183448279667, - "loss": 46.0, - "step": 25270 - }, - { - "epoch": 4.06972905511494, - "grad_norm": 0.0087369279935956, - "learning_rate": 0.0001999918338362751, - "loss": 46.0, - "step": 25271 - }, - { - "epoch": 4.069890092193727, - "grad_norm": 0.003213796764612198, - "learning_rate": 0.0001999918331897279, - "loss": 46.0, - "step": 25272 - }, - { - "epoch": 4.070051129272515, - "grad_norm": 0.0017283365596085787, - "learning_rate": 0.00019999183254315513, - "loss": 46.0, - "step": 25273 - }, - { - "epoch": 4.070212166351302, - "grad_norm": 0.004078635945916176, - "learning_rate": 0.00019999183189655677, - "loss": 46.0, - "step": 25274 - }, - { - "epoch": 4.07037320343009, - "grad_norm": 0.0010716464603319764, - "learning_rate": 0.0001999918312499328, - "loss": 46.0, - "step": 25275 - }, - { - "epoch": 4.070534240508877, - "grad_norm": 0.0020703410264104605, - "learning_rate": 0.00019999183060328326, - "loss": 46.0, - "step": 25276 - }, - { - "epoch": 4.070695277587665, - "grad_norm": 0.003401249647140503, - "learning_rate": 0.0001999918299566081, - "loss": 46.0, - "step": 25277 - }, - { - "epoch": 4.070856314666452, - "grad_norm": 0.002651548944413662, - "learning_rate": 0.00019999182930990737, - "loss": 46.0, - "step": 25278 - }, - { - "epoch": 4.0710173517452395, - "grad_norm": 0.0022555135656148195, - "learning_rate": 0.00019999182866318105, - "loss": 46.0, - "step": 25279 - }, - { - "epoch": 4.071178388824027, - "grad_norm": 0.0022942586801946163, - "learning_rate": 0.0001999918280164291, - "loss": 46.0, - "step": 25280 - }, - { - "epoch": 4.071339425902814, - "grad_norm": 0.006510575767606497, - "learning_rate": 0.00019999182736965158, - "loss": 46.0, - "step": 25281 - }, - { - "epoch": 4.071500462981602, - "grad_norm": 0.00912565365433693, - "learning_rate": 0.0001999918267228485, - "loss": 46.0, - "step": 25282 - }, - { - "epoch": 4.071661500060389, - "grad_norm": 0.009387057274580002, - "learning_rate": 0.00019999182607601977, - "loss": 46.0, - "step": 25283 - }, - { - "epoch": 4.071822537139177, - "grad_norm": 0.008505571633577347, - "learning_rate": 0.00019999182542916545, - "loss": 46.0, - "step": 25284 - }, - { - "epoch": 4.071983574217963, - "grad_norm": 0.011253810487687588, - "learning_rate": 0.00019999182478228558, - "loss": 46.0, - "step": 25285 - }, - { - "epoch": 4.072144611296751, - "grad_norm": 0.006155780982226133, - "learning_rate": 0.00019999182413538011, - "loss": 46.0, - "step": 25286 - }, - { - "epoch": 4.072305648375538, - "grad_norm": 0.003153882920742035, - "learning_rate": 0.00019999182348844904, - "loss": 46.0, - "step": 25287 - }, - { - "epoch": 4.072466685454326, - "grad_norm": 0.0036026728339493275, - "learning_rate": 0.00019999182284149235, - "loss": 46.0, - "step": 25288 - }, - { - "epoch": 4.072627722533113, - "grad_norm": 0.0011451862519606948, - "learning_rate": 0.0001999918221945101, - "loss": 46.0, - "step": 25289 - }, - { - "epoch": 4.072788759611901, - "grad_norm": 0.010938707739114761, - "learning_rate": 0.00019999182154750223, - "loss": 46.0, - "step": 25290 - }, - { - "epoch": 4.072949796690688, - "grad_norm": 0.004895662888884544, - "learning_rate": 0.0001999918209004688, - "loss": 46.0, - "step": 25291 - }, - { - "epoch": 4.0731108337694755, - "grad_norm": 0.01065182127058506, - "learning_rate": 0.00019999182025340977, - "loss": 46.0, - "step": 25292 - }, - { - "epoch": 4.073271870848263, - "grad_norm": 0.0026786313392221928, - "learning_rate": 0.0001999918196063251, - "loss": 46.0, - "step": 25293 - }, - { - "epoch": 4.07343290792705, - "grad_norm": 0.00778460456058383, - "learning_rate": 0.0001999918189592149, - "loss": 46.0, - "step": 25294 - }, - { - "epoch": 4.073593945005838, - "grad_norm": 0.003203046740964055, - "learning_rate": 0.00019999181831207907, - "loss": 46.0, - "step": 25295 - }, - { - "epoch": 4.073754982084625, - "grad_norm": 0.001484760083258152, - "learning_rate": 0.00019999181766491768, - "loss": 46.0, - "step": 25296 - }, - { - "epoch": 4.073916019163413, - "grad_norm": 0.006206273566931486, - "learning_rate": 0.00019999181701773065, - "loss": 46.0, - "step": 25297 - }, - { - "epoch": 4.0740770562422, - "grad_norm": 0.0030963770113885403, - "learning_rate": 0.00019999181637051806, - "loss": 46.0, - "step": 25298 - }, - { - "epoch": 4.074238093320987, - "grad_norm": 0.0029490015003830194, - "learning_rate": 0.0001999918157232799, - "loss": 46.0, - "step": 25299 - }, - { - "epoch": 4.074399130399774, - "grad_norm": 0.005219497252255678, - "learning_rate": 0.0001999918150760161, - "loss": 46.0, - "step": 25300 - }, - { - "epoch": 4.074560167478562, - "grad_norm": 0.0019559382926672697, - "learning_rate": 0.00019999181442872672, - "loss": 46.0, - "step": 25301 - }, - { - "epoch": 4.074721204557349, - "grad_norm": 0.0020601944997906685, - "learning_rate": 0.00019999181378141175, - "loss": 46.0, - "step": 25302 - }, - { - "epoch": 4.0748822416361365, - "grad_norm": 0.005165477283298969, - "learning_rate": 0.0001999918131340712, - "loss": 46.0, - "step": 25303 - }, - { - "epoch": 4.075043278714924, - "grad_norm": 0.002091160509735346, - "learning_rate": 0.00019999181248670503, - "loss": 46.0, - "step": 25304 - }, - { - "epoch": 4.0752043157937115, - "grad_norm": 0.012431016191840172, - "learning_rate": 0.0001999918118393133, - "loss": 46.0, - "step": 25305 - }, - { - "epoch": 4.075365352872499, - "grad_norm": 0.0012369583128020167, - "learning_rate": 0.00019999181119189597, - "loss": 46.0, - "step": 25306 - }, - { - "epoch": 4.075526389951286, - "grad_norm": 0.011434996500611305, - "learning_rate": 0.000199991810544453, - "loss": 46.0, - "step": 25307 - }, - { - "epoch": 4.075687427030074, - "grad_norm": 0.005263522733002901, - "learning_rate": 0.0001999918098969845, - "loss": 46.0, - "step": 25308 - }, - { - "epoch": 4.075848464108861, - "grad_norm": 0.0013070355635136366, - "learning_rate": 0.0001999918092494904, - "loss": 46.0, - "step": 25309 - }, - { - "epoch": 4.076009501187649, - "grad_norm": 0.001119088614359498, - "learning_rate": 0.00019999180860197068, - "loss": 46.0, - "step": 25310 - }, - { - "epoch": 4.076170538266436, - "grad_norm": 0.00222882186062634, - "learning_rate": 0.00019999180795442537, - "loss": 46.0, - "step": 25311 - }, - { - "epoch": 4.076331575345224, - "grad_norm": 0.0012359506217762828, - "learning_rate": 0.00019999180730685448, - "loss": 46.0, - "step": 25312 - }, - { - "epoch": 4.076492612424011, - "grad_norm": 0.012910140678286552, - "learning_rate": 0.000199991806659258, - "loss": 46.0, - "step": 25313 - }, - { - "epoch": 4.076653649502798, - "grad_norm": 0.003943998832255602, - "learning_rate": 0.0001999918060116359, - "loss": 46.0, - "step": 25314 - }, - { - "epoch": 4.076814686581585, - "grad_norm": 0.002708324696868658, - "learning_rate": 0.00019999180536398826, - "loss": 46.0, - "step": 25315 - }, - { - "epoch": 4.0769757236603725, - "grad_norm": 0.0022963974624872208, - "learning_rate": 0.000199991804716315, - "loss": 46.0, - "step": 25316 - }, - { - "epoch": 4.07713676073916, - "grad_norm": 0.003000213298946619, - "learning_rate": 0.00019999180406861614, - "loss": 46.0, - "step": 25317 - }, - { - "epoch": 4.077297797817947, - "grad_norm": 0.0036189432721585035, - "learning_rate": 0.00019999180342089167, - "loss": 46.0, - "step": 25318 - }, - { - "epoch": 4.077458834896735, - "grad_norm": 0.0041221617721021175, - "learning_rate": 0.0001999918027731416, - "loss": 46.0, - "step": 25319 - }, - { - "epoch": 4.077619871975522, - "grad_norm": 0.006861657369881868, - "learning_rate": 0.000199991802125366, - "loss": 46.0, - "step": 25320 - }, - { - "epoch": 4.07778090905431, - "grad_norm": 0.0033206609077751637, - "learning_rate": 0.00019999180147756477, - "loss": 46.0, - "step": 25321 - }, - { - "epoch": 4.077941946133097, - "grad_norm": 0.0022297140676528215, - "learning_rate": 0.00019999180082973795, - "loss": 46.0, - "step": 25322 - }, - { - "epoch": 4.078102983211885, - "grad_norm": 0.0053413985297083855, - "learning_rate": 0.00019999180018188552, - "loss": 46.0, - "step": 25323 - }, - { - "epoch": 4.078264020290672, - "grad_norm": 0.016457371413707733, - "learning_rate": 0.00019999179953400753, - "loss": 46.0, - "step": 25324 - }, - { - "epoch": 4.07842505736946, - "grad_norm": 0.01563914865255356, - "learning_rate": 0.00019999179888610392, - "loss": 46.0, - "step": 25325 - }, - { - "epoch": 4.078586094448247, - "grad_norm": 0.003230681410059333, - "learning_rate": 0.0001999917982381747, - "loss": 46.0, - "step": 25326 - }, - { - "epoch": 4.0787471315270345, - "grad_norm": 0.0024274399038404226, - "learning_rate": 0.00019999179759021992, - "loss": 46.0, - "step": 25327 - }, - { - "epoch": 4.078908168605821, - "grad_norm": 0.015444042161107063, - "learning_rate": 0.00019999179694223955, - "loss": 46.0, - "step": 25328 - }, - { - "epoch": 4.0790692056846085, - "grad_norm": 0.007372250314801931, - "learning_rate": 0.00019999179629423357, - "loss": 46.0, - "step": 25329 - }, - { - "epoch": 4.079230242763396, - "grad_norm": 0.002808860968798399, - "learning_rate": 0.00019999179564620203, - "loss": 46.0, - "step": 25330 - }, - { - "epoch": 4.079391279842183, - "grad_norm": 0.011987869627773762, - "learning_rate": 0.00019999179499814487, - "loss": 46.0, - "step": 25331 - }, - { - "epoch": 4.079552316920971, - "grad_norm": 0.0008494609501212835, - "learning_rate": 0.00019999179435006213, - "loss": 46.0, - "step": 25332 - }, - { - "epoch": 4.079713353999758, - "grad_norm": 0.002371512819081545, - "learning_rate": 0.00019999179370195375, - "loss": 46.0, - "step": 25333 - }, - { - "epoch": 4.079874391078546, - "grad_norm": 0.004722840152680874, - "learning_rate": 0.00019999179305381983, - "loss": 46.0, - "step": 25334 - }, - { - "epoch": 4.080035428157333, - "grad_norm": 0.0013415272114798427, - "learning_rate": 0.0001999917924056603, - "loss": 46.0, - "step": 25335 - }, - { - "epoch": 4.080196465236121, - "grad_norm": 0.0025180336087942123, - "learning_rate": 0.00019999179175747518, - "loss": 46.0, - "step": 25336 - }, - { - "epoch": 4.080357502314908, - "grad_norm": 0.005454804748296738, - "learning_rate": 0.00019999179110926447, - "loss": 46.0, - "step": 25337 - }, - { - "epoch": 4.080518539393696, - "grad_norm": 0.0020784582011401653, - "learning_rate": 0.00019999179046102815, - "loss": 46.0, - "step": 25338 - }, - { - "epoch": 4.080679576472483, - "grad_norm": 0.004487820900976658, - "learning_rate": 0.00019999178981276624, - "loss": 46.0, - "step": 25339 - }, - { - "epoch": 4.0808406135512705, - "grad_norm": 0.004285737406462431, - "learning_rate": 0.00019999178916447878, - "loss": 46.0, - "step": 25340 - }, - { - "epoch": 4.081001650630058, - "grad_norm": 0.002617442747578025, - "learning_rate": 0.0001999917885161657, - "loss": 46.0, - "step": 25341 - }, - { - "epoch": 4.081162687708845, - "grad_norm": 0.0018709114519879222, - "learning_rate": 0.00019999178786782703, - "loss": 46.0, - "step": 25342 - }, - { - "epoch": 4.081323724787632, - "grad_norm": 0.0020592729561030865, - "learning_rate": 0.00019999178721946272, - "loss": 46.0, - "step": 25343 - }, - { - "epoch": 4.081484761866419, - "grad_norm": 0.007648243103176355, - "learning_rate": 0.00019999178657107287, - "loss": 46.0, - "step": 25344 - }, - { - "epoch": 4.081645798945207, - "grad_norm": 0.006734162103384733, - "learning_rate": 0.00019999178592265741, - "loss": 46.0, - "step": 25345 - }, - { - "epoch": 4.081806836023994, - "grad_norm": 0.0062074181623756886, - "learning_rate": 0.00019999178527421637, - "loss": 46.0, - "step": 25346 - }, - { - "epoch": 4.081967873102782, - "grad_norm": 0.004024601075798273, - "learning_rate": 0.0001999917846257497, - "loss": 46.0, - "step": 25347 - }, - { - "epoch": 4.082128910181569, - "grad_norm": 0.011393807828426361, - "learning_rate": 0.00019999178397725752, - "loss": 46.0, - "step": 25348 - }, - { - "epoch": 4.082289947260357, - "grad_norm": 0.009684430435299873, - "learning_rate": 0.00019999178332873966, - "loss": 46.0, - "step": 25349 - }, - { - "epoch": 4.082450984339144, - "grad_norm": 0.005655083805322647, - "learning_rate": 0.00019999178268019624, - "loss": 46.0, - "step": 25350 - }, - { - "epoch": 4.082612021417932, - "grad_norm": 0.023631690070033073, - "learning_rate": 0.00019999178203162723, - "loss": 46.0, - "step": 25351 - }, - { - "epoch": 4.082773058496719, - "grad_norm": 0.005316827446222305, - "learning_rate": 0.00019999178138303264, - "loss": 46.0, - "step": 25352 - }, - { - "epoch": 4.0829340955755065, - "grad_norm": 0.008111649192869663, - "learning_rate": 0.00019999178073441243, - "loss": 46.0, - "step": 25353 - }, - { - "epoch": 4.083095132654294, - "grad_norm": 0.007422955706715584, - "learning_rate": 0.00019999178008576666, - "loss": 46.0, - "step": 25354 - }, - { - "epoch": 4.083256169733081, - "grad_norm": 0.005682788789272308, - "learning_rate": 0.00019999177943709527, - "loss": 46.0, - "step": 25355 - }, - { - "epoch": 4.083417206811869, - "grad_norm": 0.006704689469188452, - "learning_rate": 0.00019999177878839828, - "loss": 46.0, - "step": 25356 - }, - { - "epoch": 4.083578243890656, - "grad_norm": 0.006221194285899401, - "learning_rate": 0.00019999177813967572, - "loss": 46.0, - "step": 25357 - }, - { - "epoch": 4.083739280969443, - "grad_norm": 0.006843688897788525, - "learning_rate": 0.00019999177749092755, - "loss": 46.0, - "step": 25358 - }, - { - "epoch": 4.08390031804823, - "grad_norm": 0.00809916015714407, - "learning_rate": 0.00019999177684215379, - "loss": 46.0, - "step": 25359 - }, - { - "epoch": 4.084061355127018, - "grad_norm": 0.0032513048499822617, - "learning_rate": 0.00019999177619335444, - "loss": 46.0, - "step": 25360 - }, - { - "epoch": 4.084222392205805, - "grad_norm": 0.007452653255313635, - "learning_rate": 0.0001999917755445295, - "loss": 46.0, - "step": 25361 - }, - { - "epoch": 4.084383429284593, - "grad_norm": 0.004726361948996782, - "learning_rate": 0.00019999177489567899, - "loss": 46.0, - "step": 25362 - }, - { - "epoch": 4.08454446636338, - "grad_norm": 0.0025916057638823986, - "learning_rate": 0.00019999177424680285, - "loss": 46.0, - "step": 25363 - }, - { - "epoch": 4.0847055034421675, - "grad_norm": 0.0011292851995676756, - "learning_rate": 0.0001999917735979011, - "loss": 46.0, - "step": 25364 - }, - { - "epoch": 4.084866540520955, - "grad_norm": 0.0009537922451272607, - "learning_rate": 0.0001999917729489738, - "loss": 46.0, - "step": 25365 - }, - { - "epoch": 4.085027577599742, - "grad_norm": 0.002446504542604089, - "learning_rate": 0.0001999917723000209, - "loss": 46.0, - "step": 25366 - }, - { - "epoch": 4.08518861467853, - "grad_norm": 0.017476635053753853, - "learning_rate": 0.0001999917716510424, - "loss": 46.0, - "step": 25367 - }, - { - "epoch": 4.085349651757317, - "grad_norm": 0.0022909617982804775, - "learning_rate": 0.0001999917710020383, - "loss": 46.0, - "step": 25368 - }, - { - "epoch": 4.085510688836105, - "grad_norm": 0.0018288498977199197, - "learning_rate": 0.00019999177035300865, - "loss": 46.0, - "step": 25369 - }, - { - "epoch": 4.085671725914892, - "grad_norm": 0.0016076794127002358, - "learning_rate": 0.00019999176970395338, - "loss": 46.0, - "step": 25370 - }, - { - "epoch": 4.08583276299368, - "grad_norm": 0.000692850211635232, - "learning_rate": 0.0001999917690548725, - "loss": 46.0, - "step": 25371 - }, - { - "epoch": 4.085993800072466, - "grad_norm": 0.0034705435391515493, - "learning_rate": 0.00019999176840576602, - "loss": 46.0, - "step": 25372 - }, - { - "epoch": 4.086154837151254, - "grad_norm": 0.0033773507457226515, - "learning_rate": 0.000199991767756634, - "loss": 46.0, - "step": 25373 - }, - { - "epoch": 4.086315874230041, - "grad_norm": 0.00610406044870615, - "learning_rate": 0.00019999176710747634, - "loss": 46.0, - "step": 25374 - }, - { - "epoch": 4.086476911308829, - "grad_norm": 0.009026920422911644, - "learning_rate": 0.0001999917664582931, - "loss": 46.0, - "step": 25375 - }, - { - "epoch": 4.086637948387616, - "grad_norm": 0.003868024330586195, - "learning_rate": 0.00019999176580908428, - "loss": 46.0, - "step": 25376 - }, - { - "epoch": 4.0867989854664035, - "grad_norm": 0.003424036782234907, - "learning_rate": 0.00019999176515984982, - "loss": 46.0, - "step": 25377 - }, - { - "epoch": 4.086960022545191, - "grad_norm": 0.008421915583312511, - "learning_rate": 0.00019999176451058982, - "loss": 46.0, - "step": 25378 - }, - { - "epoch": 4.087121059623978, - "grad_norm": 0.005649816710501909, - "learning_rate": 0.0001999917638613042, - "loss": 46.0, - "step": 25379 - }, - { - "epoch": 4.087282096702766, - "grad_norm": 0.003014152869582176, - "learning_rate": 0.000199991763211993, - "loss": 46.0, - "step": 25380 - }, - { - "epoch": 4.087443133781553, - "grad_norm": 0.0066256653517484665, - "learning_rate": 0.00019999176256265623, - "loss": 46.0, - "step": 25381 - }, - { - "epoch": 4.087604170860341, - "grad_norm": 0.005814736243337393, - "learning_rate": 0.00019999176191329383, - "loss": 46.0, - "step": 25382 - }, - { - "epoch": 4.087765207939128, - "grad_norm": 0.0035878093913197517, - "learning_rate": 0.00019999176126390584, - "loss": 46.0, - "step": 25383 - }, - { - "epoch": 4.087926245017916, - "grad_norm": 0.00922172237187624, - "learning_rate": 0.00019999176061449226, - "loss": 46.0, - "step": 25384 - }, - { - "epoch": 4.088087282096703, - "grad_norm": 0.004588711075484753, - "learning_rate": 0.0001999917599650531, - "loss": 46.0, - "step": 25385 - }, - { - "epoch": 4.088248319175491, - "grad_norm": 0.00894121639430523, - "learning_rate": 0.00019999175931558835, - "loss": 46.0, - "step": 25386 - }, - { - "epoch": 4.088409356254277, - "grad_norm": 0.0024799557868391275, - "learning_rate": 0.000199991758666098, - "loss": 46.0, - "step": 25387 - }, - { - "epoch": 4.088570393333065, - "grad_norm": 0.007205348927527666, - "learning_rate": 0.00019999175801658204, - "loss": 46.0, - "step": 25388 - }, - { - "epoch": 4.088731430411852, - "grad_norm": 0.00122648268006742, - "learning_rate": 0.0001999917573670405, - "loss": 46.0, - "step": 25389 - }, - { - "epoch": 4.0888924674906395, - "grad_norm": 0.0010580611415207386, - "learning_rate": 0.00019999175671747338, - "loss": 46.0, - "step": 25390 - }, - { - "epoch": 4.089053504569427, - "grad_norm": 0.0029637455008924007, - "learning_rate": 0.00019999175606788064, - "loss": 46.0, - "step": 25391 - }, - { - "epoch": 4.089214541648214, - "grad_norm": 0.0019093838054686785, - "learning_rate": 0.00019999175541826234, - "loss": 46.0, - "step": 25392 - }, - { - "epoch": 4.089375578727002, - "grad_norm": 0.0026252414099872112, - "learning_rate": 0.00019999175476861846, - "loss": 46.0, - "step": 25393 - }, - { - "epoch": 4.089536615805789, - "grad_norm": 0.003417298197746277, - "learning_rate": 0.00019999175411894893, - "loss": 46.0, - "step": 25394 - }, - { - "epoch": 4.089697652884577, - "grad_norm": 0.004173026420176029, - "learning_rate": 0.00019999175346925382, - "loss": 46.0, - "step": 25395 - }, - { - "epoch": 4.089858689963364, - "grad_norm": 0.0015862321015447378, - "learning_rate": 0.00019999175281953314, - "loss": 46.0, - "step": 25396 - }, - { - "epoch": 4.090019727042152, - "grad_norm": 0.004064126871526241, - "learning_rate": 0.00019999175216978688, - "loss": 46.0, - "step": 25397 - }, - { - "epoch": 4.090180764120939, - "grad_norm": 0.0025165406987071037, - "learning_rate": 0.000199991751520015, - "loss": 46.0, - "step": 25398 - }, - { - "epoch": 4.090341801199727, - "grad_norm": 0.014682458713650703, - "learning_rate": 0.00019999175087021754, - "loss": 46.0, - "step": 25399 - }, - { - "epoch": 4.090502838278514, - "grad_norm": 0.0016353977844119072, - "learning_rate": 0.00019999175022039446, - "loss": 46.0, - "step": 25400 - }, - { - "epoch": 4.090663875357301, - "grad_norm": 0.002113655675202608, - "learning_rate": 0.0001999917495705458, - "loss": 46.0, - "step": 25401 - }, - { - "epoch": 4.090824912436088, - "grad_norm": 0.00485873082652688, - "learning_rate": 0.00019999174892067158, - "loss": 46.0, - "step": 25402 - }, - { - "epoch": 4.0909859495148755, - "grad_norm": 0.007050623185932636, - "learning_rate": 0.00019999174827077174, - "loss": 46.0, - "step": 25403 - }, - { - "epoch": 4.091146986593663, - "grad_norm": 0.013660944066941738, - "learning_rate": 0.0001999917476208463, - "loss": 46.0, - "step": 25404 - }, - { - "epoch": 4.09130802367245, - "grad_norm": 0.004856301471590996, - "learning_rate": 0.0001999917469708953, - "loss": 46.0, - "step": 25405 - }, - { - "epoch": 4.091469060751238, - "grad_norm": 0.002449101535603404, - "learning_rate": 0.00019999174632091867, - "loss": 46.0, - "step": 25406 - }, - { - "epoch": 4.091630097830025, - "grad_norm": 0.0032331738620996475, - "learning_rate": 0.00019999174567091648, - "loss": 46.0, - "step": 25407 - }, - { - "epoch": 4.091791134908813, - "grad_norm": 0.004462626297026873, - "learning_rate": 0.00019999174502088865, - "loss": 46.0, - "step": 25408 - }, - { - "epoch": 4.0919521719876, - "grad_norm": 0.0015092194080352783, - "learning_rate": 0.00019999174437083526, - "loss": 46.0, - "step": 25409 - }, - { - "epoch": 4.092113209066388, - "grad_norm": 0.012744955718517303, - "learning_rate": 0.0001999917437207563, - "loss": 46.0, - "step": 25410 - }, - { - "epoch": 4.092274246145175, - "grad_norm": 0.0034201527014374733, - "learning_rate": 0.00019999174307065173, - "loss": 46.0, - "step": 25411 - }, - { - "epoch": 4.0924352832239625, - "grad_norm": 0.0012307639699429274, - "learning_rate": 0.00019999174242052155, - "loss": 46.0, - "step": 25412 - }, - { - "epoch": 4.09259632030275, - "grad_norm": 0.0033559866715222597, - "learning_rate": 0.00019999174177036578, - "loss": 46.0, - "step": 25413 - }, - { - "epoch": 4.0927573573815375, - "grad_norm": 0.0020630250219255686, - "learning_rate": 0.0001999917411201844, - "loss": 46.0, - "step": 25414 - }, - { - "epoch": 4.092918394460325, - "grad_norm": 0.0012368483003228903, - "learning_rate": 0.00019999174046997747, - "loss": 46.0, - "step": 25415 - }, - { - "epoch": 4.0930794315391115, - "grad_norm": 0.0018089090008288622, - "learning_rate": 0.00019999173981974494, - "loss": 46.0, - "step": 25416 - }, - { - "epoch": 4.093240468617899, - "grad_norm": 0.013004492968320847, - "learning_rate": 0.0001999917391694868, - "loss": 46.0, - "step": 25417 - }, - { - "epoch": 4.093401505696686, - "grad_norm": 0.008340667001903057, - "learning_rate": 0.00019999173851920307, - "loss": 46.0, - "step": 25418 - }, - { - "epoch": 4.093562542775474, - "grad_norm": 0.002470900071784854, - "learning_rate": 0.00019999173786889376, - "loss": 46.0, - "step": 25419 - }, - { - "epoch": 4.093723579854261, - "grad_norm": 0.00087618303950876, - "learning_rate": 0.00019999173721855883, - "loss": 46.0, - "step": 25420 - }, - { - "epoch": 4.093884616933049, - "grad_norm": 0.004502859897911549, - "learning_rate": 0.00019999173656819834, - "loss": 46.0, - "step": 25421 - }, - { - "epoch": 4.094045654011836, - "grad_norm": 0.0018137659644708037, - "learning_rate": 0.00019999173591781223, - "loss": 46.0, - "step": 25422 - }, - { - "epoch": 4.094206691090624, - "grad_norm": 0.001247807638719678, - "learning_rate": 0.00019999173526740054, - "loss": 46.0, - "step": 25423 - }, - { - "epoch": 4.094367728169411, - "grad_norm": 0.003618153277784586, - "learning_rate": 0.00019999173461696327, - "loss": 46.0, - "step": 25424 - }, - { - "epoch": 4.0945287652481985, - "grad_norm": 0.0016069714911282063, - "learning_rate": 0.00019999173396650037, - "loss": 46.0, - "step": 25425 - }, - { - "epoch": 4.094689802326986, - "grad_norm": 0.0011741031194105744, - "learning_rate": 0.00019999173331601192, - "loss": 46.0, - "step": 25426 - }, - { - "epoch": 4.094850839405773, - "grad_norm": 0.001205502892844379, - "learning_rate": 0.00019999173266549785, - "loss": 46.0, - "step": 25427 - }, - { - "epoch": 4.095011876484561, - "grad_norm": 0.0017412963788956404, - "learning_rate": 0.0001999917320149582, - "loss": 46.0, - "step": 25428 - }, - { - "epoch": 4.095172913563348, - "grad_norm": 0.0013506341492757201, - "learning_rate": 0.00019999173136439296, - "loss": 46.0, - "step": 25429 - }, - { - "epoch": 4.095333950642136, - "grad_norm": 0.0027436986565589905, - "learning_rate": 0.00019999173071380208, - "loss": 46.0, - "step": 25430 - }, - { - "epoch": 4.095494987720922, - "grad_norm": 0.002620898885652423, - "learning_rate": 0.0001999917300631857, - "loss": 46.0, - "step": 25431 - }, - { - "epoch": 4.09565602479971, - "grad_norm": 0.01690833829343319, - "learning_rate": 0.00019999172941254366, - "loss": 46.0, - "step": 25432 - }, - { - "epoch": 4.095817061878497, - "grad_norm": 0.0021657529287040234, - "learning_rate": 0.00019999172876187602, - "loss": 46.0, - "step": 25433 - }, - { - "epoch": 4.095978098957285, - "grad_norm": 0.013728772290050983, - "learning_rate": 0.0001999917281111828, - "loss": 46.0, - "step": 25434 - }, - { - "epoch": 4.096139136036072, - "grad_norm": 0.008101335726678371, - "learning_rate": 0.000199991727460464, - "loss": 46.0, - "step": 25435 - }, - { - "epoch": 4.09630017311486, - "grad_norm": 0.0013107523554936051, - "learning_rate": 0.00019999172680971962, - "loss": 46.0, - "step": 25436 - }, - { - "epoch": 4.096461210193647, - "grad_norm": 0.003535193158313632, - "learning_rate": 0.00019999172615894962, - "loss": 46.0, - "step": 25437 - }, - { - "epoch": 4.0966222472724345, - "grad_norm": 0.009353358298540115, - "learning_rate": 0.00019999172550815405, - "loss": 46.0, - "step": 25438 - }, - { - "epoch": 4.096783284351222, - "grad_norm": 0.0039663491770625114, - "learning_rate": 0.00019999172485733285, - "loss": 46.0, - "step": 25439 - }, - { - "epoch": 4.096944321430009, - "grad_norm": 0.0015022153966128826, - "learning_rate": 0.0001999917242064861, - "loss": 46.0, - "step": 25440 - }, - { - "epoch": 4.097105358508797, - "grad_norm": 0.0021316735073924065, - "learning_rate": 0.00019999172355561373, - "loss": 46.0, - "step": 25441 - }, - { - "epoch": 4.097266395587584, - "grad_norm": 0.005879572592675686, - "learning_rate": 0.00019999172290471577, - "loss": 46.0, - "step": 25442 - }, - { - "epoch": 4.097427432666372, - "grad_norm": 0.005232840310782194, - "learning_rate": 0.00019999172225379223, - "loss": 46.0, - "step": 25443 - }, - { - "epoch": 4.097588469745159, - "grad_norm": 0.002854888793081045, - "learning_rate": 0.00019999172160284307, - "loss": 46.0, - "step": 25444 - }, - { - "epoch": 4.097749506823946, - "grad_norm": 0.010229691863059998, - "learning_rate": 0.00019999172095186836, - "loss": 46.0, - "step": 25445 - }, - { - "epoch": 4.097910543902733, - "grad_norm": 0.0035191874485462904, - "learning_rate": 0.00019999172030086803, - "loss": 46.0, - "step": 25446 - }, - { - "epoch": 4.098071580981521, - "grad_norm": 0.017856374382972717, - "learning_rate": 0.0001999917196498421, - "loss": 46.0, - "step": 25447 - }, - { - "epoch": 4.098232618060308, - "grad_norm": 0.0037848991341888905, - "learning_rate": 0.00019999171899879058, - "loss": 46.0, - "step": 25448 - }, - { - "epoch": 4.098393655139096, - "grad_norm": 0.01623384840786457, - "learning_rate": 0.0001999917183477135, - "loss": 46.0, - "step": 25449 - }, - { - "epoch": 4.098554692217883, - "grad_norm": 0.007264601066708565, - "learning_rate": 0.0001999917176966108, - "loss": 46.0, - "step": 25450 - }, - { - "epoch": 4.0987157292966705, - "grad_norm": 0.0008852232131175697, - "learning_rate": 0.0001999917170454825, - "loss": 46.0, - "step": 25451 - }, - { - "epoch": 4.098876766375458, - "grad_norm": 0.0022032661363482475, - "learning_rate": 0.00019999171639432863, - "loss": 46.0, - "step": 25452 - }, - { - "epoch": 4.099037803454245, - "grad_norm": 0.008992953225970268, - "learning_rate": 0.00019999171574314916, - "loss": 46.0, - "step": 25453 - }, - { - "epoch": 4.099198840533033, - "grad_norm": 0.00500809820368886, - "learning_rate": 0.0001999917150919441, - "loss": 46.0, - "step": 25454 - }, - { - "epoch": 4.09935987761182, - "grad_norm": 0.007882903330028057, - "learning_rate": 0.00019999171444071343, - "loss": 46.0, - "step": 25455 - }, - { - "epoch": 4.099520914690608, - "grad_norm": 0.011945423670113087, - "learning_rate": 0.00019999171378945718, - "loss": 46.0, - "step": 25456 - }, - { - "epoch": 4.099681951769395, - "grad_norm": 0.0037769211921840906, - "learning_rate": 0.00019999171313817536, - "loss": 46.0, - "step": 25457 - }, - { - "epoch": 4.099842988848183, - "grad_norm": 0.003988330252468586, - "learning_rate": 0.0001999917124868679, - "loss": 46.0, - "step": 25458 - }, - { - "epoch": 4.10000402592697, - "grad_norm": 0.004875501152127981, - "learning_rate": 0.00019999171183553486, - "loss": 46.0, - "step": 25459 - }, - { - "epoch": 4.100165063005757, - "grad_norm": 0.0023024508263915777, - "learning_rate": 0.00019999171118417625, - "loss": 46.0, - "step": 25460 - }, - { - "epoch": 4.100326100084544, - "grad_norm": 0.00654234504327178, - "learning_rate": 0.00019999171053279203, - "loss": 46.0, - "step": 25461 - }, - { - "epoch": 4.100487137163332, - "grad_norm": 0.002930715214461088, - "learning_rate": 0.00019999170988138222, - "loss": 46.0, - "step": 25462 - }, - { - "epoch": 4.100648174242119, - "grad_norm": 0.009244444780051708, - "learning_rate": 0.0001999917092299468, - "loss": 46.0, - "step": 25463 - }, - { - "epoch": 4.1008092113209065, - "grad_norm": 0.0007282950100488961, - "learning_rate": 0.00019999170857848585, - "loss": 46.0, - "step": 25464 - }, - { - "epoch": 4.100970248399694, - "grad_norm": 0.005571243353188038, - "learning_rate": 0.00019999170792699925, - "loss": 46.0, - "step": 25465 - }, - { - "epoch": 4.101131285478481, - "grad_norm": 0.00936428364366293, - "learning_rate": 0.00019999170727548707, - "loss": 46.0, - "step": 25466 - }, - { - "epoch": 4.101292322557269, - "grad_norm": 0.0033771838061511517, - "learning_rate": 0.0001999917066239493, - "loss": 46.0, - "step": 25467 - }, - { - "epoch": 4.101453359636056, - "grad_norm": 0.006844170391559601, - "learning_rate": 0.00019999170597238591, - "loss": 46.0, - "step": 25468 - }, - { - "epoch": 4.101614396714844, - "grad_norm": 0.0017905480926856399, - "learning_rate": 0.00019999170532079697, - "loss": 46.0, - "step": 25469 - }, - { - "epoch": 4.101775433793631, - "grad_norm": 0.0075187901966273785, - "learning_rate": 0.0001999917046691824, - "loss": 46.0, - "step": 25470 - }, - { - "epoch": 4.101936470872419, - "grad_norm": 0.005218985956162214, - "learning_rate": 0.00019999170401754226, - "loss": 46.0, - "step": 25471 - }, - { - "epoch": 4.102097507951206, - "grad_norm": 0.005367629695683718, - "learning_rate": 0.00019999170336587653, - "loss": 46.0, - "step": 25472 - }, - { - "epoch": 4.1022585450299935, - "grad_norm": 0.010221910662949085, - "learning_rate": 0.00019999170271418518, - "loss": 46.0, - "step": 25473 - }, - { - "epoch": 4.10241958210878, - "grad_norm": 0.004604964051395655, - "learning_rate": 0.00019999170206246828, - "loss": 46.0, - "step": 25474 - }, - { - "epoch": 4.102580619187568, - "grad_norm": 0.003356988076120615, - "learning_rate": 0.00019999170141072576, - "loss": 46.0, - "step": 25475 - }, - { - "epoch": 4.102741656266355, - "grad_norm": 0.0037764401640743017, - "learning_rate": 0.00019999170075895765, - "loss": 46.0, - "step": 25476 - }, - { - "epoch": 4.1029026933451425, - "grad_norm": 0.0042011430487036705, - "learning_rate": 0.00019999170010716398, - "loss": 46.0, - "step": 25477 - }, - { - "epoch": 4.10306373042393, - "grad_norm": 0.0023326058872044086, - "learning_rate": 0.00019999169945534467, - "loss": 46.0, - "step": 25478 - }, - { - "epoch": 4.103224767502717, - "grad_norm": 0.004028123803436756, - "learning_rate": 0.00019999169880349977, - "loss": 46.0, - "step": 25479 - }, - { - "epoch": 4.103385804581505, - "grad_norm": 0.004875359125435352, - "learning_rate": 0.0001999916981516293, - "loss": 46.0, - "step": 25480 - }, - { - "epoch": 4.103546841660292, - "grad_norm": 0.007139843422919512, - "learning_rate": 0.00019999169749973324, - "loss": 46.0, - "step": 25481 - }, - { - "epoch": 4.10370787873908, - "grad_norm": 0.008145545609295368, - "learning_rate": 0.00019999169684781158, - "loss": 46.0, - "step": 25482 - }, - { - "epoch": 4.103868915817867, - "grad_norm": 0.003824364859610796, - "learning_rate": 0.00019999169619586434, - "loss": 46.0, - "step": 25483 - }, - { - "epoch": 4.104029952896655, - "grad_norm": 0.0010947324335575104, - "learning_rate": 0.00019999169554389145, - "loss": 46.0, - "step": 25484 - }, - { - "epoch": 4.104190989975442, - "grad_norm": 0.001225115149281919, - "learning_rate": 0.00019999169489189303, - "loss": 46.0, - "step": 25485 - }, - { - "epoch": 4.1043520270542295, - "grad_norm": 0.004524054005742073, - "learning_rate": 0.000199991694239869, - "loss": 46.0, - "step": 25486 - }, - { - "epoch": 4.104513064133017, - "grad_norm": 0.004667694214731455, - "learning_rate": 0.00019999169358781934, - "loss": 46.0, - "step": 25487 - }, - { - "epoch": 4.104674101211804, - "grad_norm": 0.00201613362878561, - "learning_rate": 0.00019999169293574416, - "loss": 46.0, - "step": 25488 - }, - { - "epoch": 4.104835138290591, - "grad_norm": 0.007933225482702255, - "learning_rate": 0.00019999169228364334, - "loss": 46.0, - "step": 25489 - }, - { - "epoch": 4.104996175369378, - "grad_norm": 0.005335974041372538, - "learning_rate": 0.00019999169163151693, - "loss": 46.0, - "step": 25490 - }, - { - "epoch": 4.105157212448166, - "grad_norm": 0.001674870727583766, - "learning_rate": 0.00019999169097936493, - "loss": 46.0, - "step": 25491 - }, - { - "epoch": 4.105318249526953, - "grad_norm": 0.0024037370458245277, - "learning_rate": 0.00019999169032718732, - "loss": 46.0, - "step": 25492 - }, - { - "epoch": 4.105479286605741, - "grad_norm": 0.006767706014215946, - "learning_rate": 0.00019999168967498415, - "loss": 46.0, - "step": 25493 - }, - { - "epoch": 4.105640323684528, - "grad_norm": 0.002236138330772519, - "learning_rate": 0.00019999168902275539, - "loss": 46.0, - "step": 25494 - }, - { - "epoch": 4.105801360763316, - "grad_norm": 0.001654462656006217, - "learning_rate": 0.00019999168837050099, - "loss": 46.0, - "step": 25495 - }, - { - "epoch": 4.105962397842103, - "grad_norm": 0.014386680908501148, - "learning_rate": 0.00019999168771822102, - "loss": 46.0, - "step": 25496 - }, - { - "epoch": 4.106123434920891, - "grad_norm": 0.003681241301819682, - "learning_rate": 0.00019999168706591548, - "loss": 46.0, - "step": 25497 - }, - { - "epoch": 4.106284471999678, - "grad_norm": 0.0005037786322645843, - "learning_rate": 0.00019999168641358434, - "loss": 46.0, - "step": 25498 - }, - { - "epoch": 4.1064455090784655, - "grad_norm": 0.007998153567314148, - "learning_rate": 0.0001999916857612276, - "loss": 46.0, - "step": 25499 - }, - { - "epoch": 4.106606546157253, - "grad_norm": 0.006519208196550608, - "learning_rate": 0.00019999168510884528, - "loss": 46.0, - "step": 25500 - }, - { - "epoch": 4.10676758323604, - "grad_norm": 0.012437457218766212, - "learning_rate": 0.00019999168445643733, - "loss": 46.0, - "step": 25501 - }, - { - "epoch": 4.106928620314828, - "grad_norm": 0.0024969023652374744, - "learning_rate": 0.00019999168380400382, - "loss": 46.0, - "step": 25502 - }, - { - "epoch": 4.107089657393615, - "grad_norm": 0.007852721959352493, - "learning_rate": 0.0001999916831515447, - "loss": 46.0, - "step": 25503 - }, - { - "epoch": 4.107250694472402, - "grad_norm": 0.0032842422369867563, - "learning_rate": 0.00019999168249906, - "loss": 46.0, - "step": 25504 - }, - { - "epoch": 4.107411731551189, - "grad_norm": 0.001273187343031168, - "learning_rate": 0.0001999916818465497, - "loss": 46.0, - "step": 25505 - }, - { - "epoch": 4.107572768629977, - "grad_norm": 0.01275511085987091, - "learning_rate": 0.00019999168119401382, - "loss": 46.0, - "step": 25506 - }, - { - "epoch": 4.107733805708764, - "grad_norm": 0.004364375956356525, - "learning_rate": 0.00019999168054145235, - "loss": 46.0, - "step": 25507 - }, - { - "epoch": 4.107894842787552, - "grad_norm": 0.0128211984410882, - "learning_rate": 0.00019999167988886526, - "loss": 46.0, - "step": 25508 - }, - { - "epoch": 4.108055879866339, - "grad_norm": 0.014299626462161541, - "learning_rate": 0.0001999916792362526, - "loss": 46.0, - "step": 25509 - }, - { - "epoch": 4.108216916945127, - "grad_norm": 0.0030823044944554567, - "learning_rate": 0.00019999167858361435, - "loss": 46.0, - "step": 25510 - }, - { - "epoch": 4.108377954023914, - "grad_norm": 0.0015012375079095364, - "learning_rate": 0.00019999167793095048, - "loss": 46.0, - "step": 25511 - }, - { - "epoch": 4.1085389911027015, - "grad_norm": 0.0028856382705271244, - "learning_rate": 0.000199991677278261, - "loss": 46.0, - "step": 25512 - }, - { - "epoch": 4.108700028181489, - "grad_norm": 0.001829297747462988, - "learning_rate": 0.000199991676625546, - "loss": 46.0, - "step": 25513 - }, - { - "epoch": 4.108861065260276, - "grad_norm": 0.001995287137106061, - "learning_rate": 0.00019999167597280538, - "loss": 46.0, - "step": 25514 - }, - { - "epoch": 4.109022102339064, - "grad_norm": 0.004471911583095789, - "learning_rate": 0.00019999167532003912, - "loss": 46.0, - "step": 25515 - }, - { - "epoch": 4.109183139417851, - "grad_norm": 0.0019047876121476293, - "learning_rate": 0.00019999167466724734, - "loss": 46.0, - "step": 25516 - }, - { - "epoch": 4.109344176496639, - "grad_norm": 0.0047268345952034, - "learning_rate": 0.0001999916740144299, - "loss": 46.0, - "step": 25517 - }, - { - "epoch": 4.109505213575425, - "grad_norm": 0.0009277617791667581, - "learning_rate": 0.00019999167336158692, - "loss": 46.0, - "step": 25518 - }, - { - "epoch": 4.109666250654213, - "grad_norm": 0.006467899307608604, - "learning_rate": 0.00019999167270871832, - "loss": 46.0, - "step": 25519 - }, - { - "epoch": 4.109827287733, - "grad_norm": 0.00186252873390913, - "learning_rate": 0.00019999167205582413, - "loss": 46.0, - "step": 25520 - }, - { - "epoch": 4.109988324811788, - "grad_norm": 0.0011660184245556593, - "learning_rate": 0.00019999167140290436, - "loss": 46.0, - "step": 25521 - }, - { - "epoch": 4.110149361890575, - "grad_norm": 0.004667657893151045, - "learning_rate": 0.000199991670749959, - "loss": 46.0, - "step": 25522 - }, - { - "epoch": 4.110310398969363, - "grad_norm": 0.002636682940647006, - "learning_rate": 0.00019999167009698802, - "loss": 46.0, - "step": 25523 - }, - { - "epoch": 4.11047143604815, - "grad_norm": 0.002740677446126938, - "learning_rate": 0.00019999166944399146, - "loss": 46.0, - "step": 25524 - }, - { - "epoch": 4.1106324731269375, - "grad_norm": 0.001717817853204906, - "learning_rate": 0.0001999916687909693, - "loss": 46.0, - "step": 25525 - }, - { - "epoch": 4.110793510205725, - "grad_norm": 0.006547986064106226, - "learning_rate": 0.00019999166813792157, - "loss": 46.0, - "step": 25526 - }, - { - "epoch": 4.110954547284512, - "grad_norm": 0.0006603376241400838, - "learning_rate": 0.00019999166748484824, - "loss": 46.0, - "step": 25527 - }, - { - "epoch": 4.1111155843633, - "grad_norm": 0.013379744254052639, - "learning_rate": 0.00019999166683174927, - "loss": 46.0, - "step": 25528 - }, - { - "epoch": 4.111276621442087, - "grad_norm": 0.0016734004020690918, - "learning_rate": 0.00019999166617862477, - "loss": 46.0, - "step": 25529 - }, - { - "epoch": 4.111437658520875, - "grad_norm": 0.008375810459256172, - "learning_rate": 0.00019999166552547466, - "loss": 46.0, - "step": 25530 - }, - { - "epoch": 4.111598695599662, - "grad_norm": 0.0014235987327992916, - "learning_rate": 0.00019999166487229896, - "loss": 46.0, - "step": 25531 - }, - { - "epoch": 4.11175973267845, - "grad_norm": 0.005986319854855537, - "learning_rate": 0.00019999166421909764, - "loss": 46.0, - "step": 25532 - }, - { - "epoch": 4.111920769757236, - "grad_norm": 0.006131486501544714, - "learning_rate": 0.00019999166356587077, - "loss": 46.0, - "step": 25533 - }, - { - "epoch": 4.112081806836024, - "grad_norm": 0.008920717053115368, - "learning_rate": 0.00019999166291261825, - "loss": 46.0, - "step": 25534 - }, - { - "epoch": 4.112242843914811, - "grad_norm": 0.0019028628012165427, - "learning_rate": 0.00019999166225934017, - "loss": 46.0, - "step": 25535 - }, - { - "epoch": 4.1124038809935985, - "grad_norm": 0.002718705916777253, - "learning_rate": 0.00019999166160603653, - "loss": 46.0, - "step": 25536 - }, - { - "epoch": 4.112564918072386, - "grad_norm": 0.0020678136497735977, - "learning_rate": 0.00019999166095270726, - "loss": 46.0, - "step": 25537 - }, - { - "epoch": 4.1127259551511735, - "grad_norm": 0.0060973805375397205, - "learning_rate": 0.0001999916602993524, - "loss": 46.0, - "step": 25538 - }, - { - "epoch": 4.112886992229961, - "grad_norm": 0.00440619932487607, - "learning_rate": 0.00019999165964597194, - "loss": 46.0, - "step": 25539 - }, - { - "epoch": 4.113048029308748, - "grad_norm": 0.0037918188609182835, - "learning_rate": 0.0001999916589925659, - "loss": 46.0, - "step": 25540 - }, - { - "epoch": 4.113209066387536, - "grad_norm": 0.001046994817443192, - "learning_rate": 0.00019999165833913427, - "loss": 46.0, - "step": 25541 - }, - { - "epoch": 4.113370103466323, - "grad_norm": 0.006555836647748947, - "learning_rate": 0.00019999165768567703, - "loss": 46.0, - "step": 25542 - }, - { - "epoch": 4.113531140545111, - "grad_norm": 0.0010875486768782139, - "learning_rate": 0.00019999165703219423, - "loss": 46.0, - "step": 25543 - }, - { - "epoch": 4.113692177623898, - "grad_norm": 0.0016576419584453106, - "learning_rate": 0.0001999916563786858, - "loss": 46.0, - "step": 25544 - }, - { - "epoch": 4.113853214702686, - "grad_norm": 0.004397212993353605, - "learning_rate": 0.0001999916557251518, - "loss": 46.0, - "step": 25545 - }, - { - "epoch": 4.114014251781473, - "grad_norm": 0.003139763604849577, - "learning_rate": 0.00019999165507159222, - "loss": 46.0, - "step": 25546 - }, - { - "epoch": 4.11417528886026, - "grad_norm": 0.007025458384305239, - "learning_rate": 0.000199991654418007, - "loss": 46.0, - "step": 25547 - }, - { - "epoch": 4.114336325939047, - "grad_norm": 0.001014004461467266, - "learning_rate": 0.00019999165376439622, - "loss": 46.0, - "step": 25548 - }, - { - "epoch": 4.1144973630178345, - "grad_norm": 0.0013552741147577763, - "learning_rate": 0.00019999165311075984, - "loss": 46.0, - "step": 25549 - }, - { - "epoch": 4.114658400096622, - "grad_norm": 0.004622077569365501, - "learning_rate": 0.00019999165245709787, - "loss": 46.0, - "step": 25550 - }, - { - "epoch": 4.114819437175409, - "grad_norm": 0.004090077243745327, - "learning_rate": 0.0001999916518034103, - "loss": 46.0, - "step": 25551 - }, - { - "epoch": 4.114980474254197, - "grad_norm": 0.02914608083665371, - "learning_rate": 0.00019999165114969715, - "loss": 46.0, - "step": 25552 - }, - { - "epoch": 4.115141511332984, - "grad_norm": 0.000994830159470439, - "learning_rate": 0.00019999165049595843, - "loss": 46.0, - "step": 25553 - }, - { - "epoch": 4.115302548411772, - "grad_norm": 0.0025630900636315346, - "learning_rate": 0.00019999164984219408, - "loss": 46.0, - "step": 25554 - }, - { - "epoch": 4.115463585490559, - "grad_norm": 0.001853022607974708, - "learning_rate": 0.00019999164918840413, - "loss": 46.0, - "step": 25555 - }, - { - "epoch": 4.115624622569347, - "grad_norm": 0.00142704788595438, - "learning_rate": 0.0001999916485345886, - "loss": 46.0, - "step": 25556 - }, - { - "epoch": 4.115785659648134, - "grad_norm": 0.009458962827920914, - "learning_rate": 0.00019999164788074748, - "loss": 46.0, - "step": 25557 - }, - { - "epoch": 4.115946696726922, - "grad_norm": 0.0011380433570593596, - "learning_rate": 0.0001999916472268808, - "loss": 46.0, - "step": 25558 - }, - { - "epoch": 4.116107733805709, - "grad_norm": 0.0017686458304524422, - "learning_rate": 0.00019999164657298849, - "loss": 46.0, - "step": 25559 - }, - { - "epoch": 4.1162687708844965, - "grad_norm": 0.005422645248472691, - "learning_rate": 0.0001999916459190706, - "loss": 46.0, - "step": 25560 - }, - { - "epoch": 4.116429807963284, - "grad_norm": 0.0033317890483886003, - "learning_rate": 0.00019999164526512711, - "loss": 46.0, - "step": 25561 - }, - { - "epoch": 4.1165908450420705, - "grad_norm": 0.0007542499806731939, - "learning_rate": 0.000199991644611158, - "loss": 46.0, - "step": 25562 - }, - { - "epoch": 4.116751882120858, - "grad_norm": 0.015065771527588367, - "learning_rate": 0.0001999916439571633, - "loss": 46.0, - "step": 25563 - }, - { - "epoch": 4.116912919199645, - "grad_norm": 0.003225766122341156, - "learning_rate": 0.00019999164330314307, - "loss": 46.0, - "step": 25564 - }, - { - "epoch": 4.117073956278433, - "grad_norm": 0.0027500493451952934, - "learning_rate": 0.0001999916426490972, - "loss": 46.0, - "step": 25565 - }, - { - "epoch": 4.11723499335722, - "grad_norm": 0.0054383049719035625, - "learning_rate": 0.00019999164199502575, - "loss": 46.0, - "step": 25566 - }, - { - "epoch": 4.117396030436008, - "grad_norm": 0.00944253895431757, - "learning_rate": 0.00019999164134092872, - "loss": 46.0, - "step": 25567 - }, - { - "epoch": 4.117557067514795, - "grad_norm": 0.004202804993838072, - "learning_rate": 0.00019999164068680605, - "loss": 46.0, - "step": 25568 - }, - { - "epoch": 4.117718104593583, - "grad_norm": 0.0015304501866921782, - "learning_rate": 0.00019999164003265784, - "loss": 46.0, - "step": 25569 - }, - { - "epoch": 4.11787914167237, - "grad_norm": 0.011983905918896198, - "learning_rate": 0.00019999163937848403, - "loss": 46.0, - "step": 25570 - }, - { - "epoch": 4.118040178751158, - "grad_norm": 0.002110941568389535, - "learning_rate": 0.0001999916387242846, - "loss": 46.0, - "step": 25571 - }, - { - "epoch": 4.118201215829945, - "grad_norm": 0.006360162515193224, - "learning_rate": 0.00019999163807005957, - "loss": 46.0, - "step": 25572 - }, - { - "epoch": 4.1183622529087325, - "grad_norm": 0.0006603975198231637, - "learning_rate": 0.00019999163741580897, - "loss": 46.0, - "step": 25573 - }, - { - "epoch": 4.11852328998752, - "grad_norm": 0.007174971979111433, - "learning_rate": 0.00019999163676153277, - "loss": 46.0, - "step": 25574 - }, - { - "epoch": 4.118684327066307, - "grad_norm": 0.002847258932888508, - "learning_rate": 0.000199991636107231, - "loss": 46.0, - "step": 25575 - }, - { - "epoch": 4.118845364145095, - "grad_norm": 0.0034133384469896555, - "learning_rate": 0.00019999163545290362, - "loss": 46.0, - "step": 25576 - }, - { - "epoch": 4.119006401223881, - "grad_norm": 0.0021964781917631626, - "learning_rate": 0.00019999163479855064, - "loss": 46.0, - "step": 25577 - }, - { - "epoch": 4.119167438302669, - "grad_norm": 0.002158648334443569, - "learning_rate": 0.00019999163414417205, - "loss": 46.0, - "step": 25578 - }, - { - "epoch": 4.119328475381456, - "grad_norm": 0.002800311893224716, - "learning_rate": 0.00019999163348976792, - "loss": 46.0, - "step": 25579 - }, - { - "epoch": 4.119489512460244, - "grad_norm": 0.012722013518214226, - "learning_rate": 0.00019999163283533817, - "loss": 46.0, - "step": 25580 - }, - { - "epoch": 4.119650549539031, - "grad_norm": 0.013124719262123108, - "learning_rate": 0.00019999163218088281, - "loss": 46.0, - "step": 25581 - }, - { - "epoch": 4.119811586617819, - "grad_norm": 0.0015910544898360968, - "learning_rate": 0.00019999163152640187, - "loss": 46.0, - "step": 25582 - }, - { - "epoch": 4.119972623696606, - "grad_norm": 0.003991268575191498, - "learning_rate": 0.00019999163087189534, - "loss": 46.0, - "step": 25583 - }, - { - "epoch": 4.120133660775394, - "grad_norm": 0.005277678370475769, - "learning_rate": 0.00019999163021736322, - "loss": 46.0, - "step": 25584 - }, - { - "epoch": 4.120294697854181, - "grad_norm": 0.0029431104194372892, - "learning_rate": 0.00019999162956280554, - "loss": 46.0, - "step": 25585 - }, - { - "epoch": 4.1204557349329685, - "grad_norm": 0.006882273126393557, - "learning_rate": 0.00019999162890822222, - "loss": 46.0, - "step": 25586 - }, - { - "epoch": 4.120616772011756, - "grad_norm": 0.011436086148023605, - "learning_rate": 0.0001999916282536133, - "loss": 46.0, - "step": 25587 - }, - { - "epoch": 4.120777809090543, - "grad_norm": 0.005408705677837133, - "learning_rate": 0.00019999162759897884, - "loss": 46.0, - "step": 25588 - }, - { - "epoch": 4.120938846169331, - "grad_norm": 0.0021844564471393824, - "learning_rate": 0.00019999162694431873, - "loss": 46.0, - "step": 25589 - }, - { - "epoch": 4.121099883248118, - "grad_norm": 0.0036464945878833532, - "learning_rate": 0.00019999162628963303, - "loss": 46.0, - "step": 25590 - }, - { - "epoch": 4.121260920326906, - "grad_norm": 0.0017811524448916316, - "learning_rate": 0.00019999162563492178, - "loss": 46.0, - "step": 25591 - }, - { - "epoch": 4.121421957405692, - "grad_norm": 0.0027819517999887466, - "learning_rate": 0.0001999916249801849, - "loss": 46.0, - "step": 25592 - }, - { - "epoch": 4.12158299448448, - "grad_norm": 0.0021310080774128437, - "learning_rate": 0.00019999162432542247, - "loss": 46.0, - "step": 25593 - }, - { - "epoch": 4.121744031563267, - "grad_norm": 0.00754657294601202, - "learning_rate": 0.0001999916236706344, - "loss": 46.0, - "step": 25594 - }, - { - "epoch": 4.121905068642055, - "grad_norm": 0.002623029751703143, - "learning_rate": 0.00019999162301582074, - "loss": 46.0, - "step": 25595 - }, - { - "epoch": 4.122066105720842, - "grad_norm": 0.002840273082256317, - "learning_rate": 0.00019999162236098152, - "loss": 46.0, - "step": 25596 - }, - { - "epoch": 4.1222271427996295, - "grad_norm": 0.005038201343268156, - "learning_rate": 0.00019999162170611669, - "loss": 46.0, - "step": 25597 - }, - { - "epoch": 4.122388179878417, - "grad_norm": 0.016149956732988358, - "learning_rate": 0.00019999162105122626, - "loss": 46.0, - "step": 25598 - }, - { - "epoch": 4.122549216957204, - "grad_norm": 0.002412924077361822, - "learning_rate": 0.00019999162039631028, - "loss": 46.0, - "step": 25599 - }, - { - "epoch": 4.122710254035992, - "grad_norm": 0.0009178838809020817, - "learning_rate": 0.00019999161974136866, - "loss": 46.0, - "step": 25600 - }, - { - "epoch": 4.122871291114779, - "grad_norm": 0.0012937730643898249, - "learning_rate": 0.00019999161908640148, - "loss": 46.0, - "step": 25601 - }, - { - "epoch": 4.123032328193567, - "grad_norm": 0.0023727118968963623, - "learning_rate": 0.00019999161843140865, - "loss": 46.0, - "step": 25602 - }, - { - "epoch": 4.123193365272354, - "grad_norm": 0.0032714291010051966, - "learning_rate": 0.0001999916177763903, - "loss": 46.0, - "step": 25603 - }, - { - "epoch": 4.123354402351142, - "grad_norm": 0.00729818781837821, - "learning_rate": 0.0001999916171213463, - "loss": 46.0, - "step": 25604 - }, - { - "epoch": 4.123515439429929, - "grad_norm": 0.003525134874507785, - "learning_rate": 0.00019999161646627674, - "loss": 46.0, - "step": 25605 - }, - { - "epoch": 4.123676476508716, - "grad_norm": 0.005336896050721407, - "learning_rate": 0.00019999161581118157, - "loss": 46.0, - "step": 25606 - }, - { - "epoch": 4.123837513587503, - "grad_norm": 0.005608264822512865, - "learning_rate": 0.00019999161515606083, - "loss": 46.0, - "step": 25607 - }, - { - "epoch": 4.123998550666291, - "grad_norm": 0.001506253145635128, - "learning_rate": 0.00019999161450091446, - "loss": 46.0, - "step": 25608 - }, - { - "epoch": 4.124159587745078, - "grad_norm": 0.00699825631454587, - "learning_rate": 0.00019999161384574252, - "loss": 46.0, - "step": 25609 - }, - { - "epoch": 4.1243206248238655, - "grad_norm": 0.018045881763100624, - "learning_rate": 0.000199991613190545, - "loss": 46.0, - "step": 25610 - }, - { - "epoch": 4.124481661902653, - "grad_norm": 0.003668231889605522, - "learning_rate": 0.0001999916125353219, - "loss": 46.0, - "step": 25611 - }, - { - "epoch": 4.12464269898144, - "grad_norm": 0.00043198675848543644, - "learning_rate": 0.00019999161188007314, - "loss": 46.0, - "step": 25612 - }, - { - "epoch": 4.124803736060228, - "grad_norm": 0.0017844585236161947, - "learning_rate": 0.00019999161122479883, - "loss": 46.0, - "step": 25613 - }, - { - "epoch": 4.124964773139015, - "grad_norm": 0.007753214333206415, - "learning_rate": 0.00019999161056949893, - "loss": 46.0, - "step": 25614 - }, - { - "epoch": 4.125125810217803, - "grad_norm": 0.0009150505647994578, - "learning_rate": 0.00019999160991417342, - "loss": 46.0, - "step": 25615 - }, - { - "epoch": 4.12528684729659, - "grad_norm": 0.00484734121710062, - "learning_rate": 0.00019999160925882235, - "loss": 46.0, - "step": 25616 - }, - { - "epoch": 4.125447884375378, - "grad_norm": 0.0038678511045873165, - "learning_rate": 0.00019999160860344567, - "loss": 46.0, - "step": 25617 - }, - { - "epoch": 4.125608921454165, - "grad_norm": 0.003355953376740217, - "learning_rate": 0.0001999916079480434, - "loss": 46.0, - "step": 25618 - }, - { - "epoch": 4.125769958532953, - "grad_norm": 0.014977559447288513, - "learning_rate": 0.00019999160729261553, - "loss": 46.0, - "step": 25619 - }, - { - "epoch": 4.125930995611739, - "grad_norm": 0.0042245956137776375, - "learning_rate": 0.00019999160663716206, - "loss": 46.0, - "step": 25620 - }, - { - "epoch": 4.126092032690527, - "grad_norm": 0.002997392090037465, - "learning_rate": 0.000199991605981683, - "loss": 46.0, - "step": 25621 - }, - { - "epoch": 4.126253069769314, - "grad_norm": 0.009510383009910583, - "learning_rate": 0.00019999160532617835, - "loss": 46.0, - "step": 25622 - }, - { - "epoch": 4.1264141068481015, - "grad_norm": 0.0033083290327340364, - "learning_rate": 0.00019999160467064814, - "loss": 46.0, - "step": 25623 - }, - { - "epoch": 4.126575143926889, - "grad_norm": 0.006896724924445152, - "learning_rate": 0.00019999160401509229, - "loss": 46.0, - "step": 25624 - }, - { - "epoch": 4.126736181005676, - "grad_norm": 0.0031561029609292746, - "learning_rate": 0.00019999160335951085, - "loss": 46.0, - "step": 25625 - }, - { - "epoch": 4.126897218084464, - "grad_norm": 0.001359561923891306, - "learning_rate": 0.00019999160270390385, - "loss": 46.0, - "step": 25626 - }, - { - "epoch": 4.127058255163251, - "grad_norm": 0.008148791268467903, - "learning_rate": 0.00019999160204827124, - "loss": 46.0, - "step": 25627 - }, - { - "epoch": 4.127219292242039, - "grad_norm": 0.002030620351433754, - "learning_rate": 0.00019999160139261304, - "loss": 46.0, - "step": 25628 - }, - { - "epoch": 4.127380329320826, - "grad_norm": 0.009539092890918255, - "learning_rate": 0.00019999160073692925, - "loss": 46.0, - "step": 25629 - }, - { - "epoch": 4.127541366399614, - "grad_norm": 0.002409338718280196, - "learning_rate": 0.00019999160008121985, - "loss": 46.0, - "step": 25630 - }, - { - "epoch": 4.127702403478401, - "grad_norm": 0.0011922902194783092, - "learning_rate": 0.00019999159942548486, - "loss": 46.0, - "step": 25631 - }, - { - "epoch": 4.127863440557189, - "grad_norm": 0.003443419933319092, - "learning_rate": 0.0001999915987697243, - "loss": 46.0, - "step": 25632 - }, - { - "epoch": 4.128024477635976, - "grad_norm": 0.004027842544019222, - "learning_rate": 0.00019999159811393813, - "loss": 46.0, - "step": 25633 - }, - { - "epoch": 4.1281855147147635, - "grad_norm": 0.002964272629469633, - "learning_rate": 0.00019999159745812638, - "loss": 46.0, - "step": 25634 - }, - { - "epoch": 4.12834655179355, - "grad_norm": 0.00287226471118629, - "learning_rate": 0.00019999159680228901, - "loss": 46.0, - "step": 25635 - }, - { - "epoch": 4.1285075888723375, - "grad_norm": 0.0044133830815553665, - "learning_rate": 0.0001999915961464261, - "loss": 46.0, - "step": 25636 - }, - { - "epoch": 4.128668625951125, - "grad_norm": 0.0032337193842977285, - "learning_rate": 0.00019999159549053755, - "loss": 46.0, - "step": 25637 - }, - { - "epoch": 4.128829663029912, - "grad_norm": 0.007156878709793091, - "learning_rate": 0.00019999159483462343, - "loss": 46.0, - "step": 25638 - }, - { - "epoch": 4.1289907001087, - "grad_norm": 0.0004423867794685066, - "learning_rate": 0.00019999159417868366, - "loss": 46.0, - "step": 25639 - }, - { - "epoch": 4.129151737187487, - "grad_norm": 0.004037897102534771, - "learning_rate": 0.00019999159352271836, - "loss": 46.0, - "step": 25640 - }, - { - "epoch": 4.129312774266275, - "grad_norm": 0.0017991041531786323, - "learning_rate": 0.00019999159286672745, - "loss": 46.0, - "step": 25641 - }, - { - "epoch": 4.129473811345062, - "grad_norm": 0.007354862987995148, - "learning_rate": 0.00019999159221071095, - "loss": 46.0, - "step": 25642 - }, - { - "epoch": 4.12963484842385, - "grad_norm": 0.008874555118381977, - "learning_rate": 0.00019999159155466886, - "loss": 46.0, - "step": 25643 - }, - { - "epoch": 4.129795885502637, - "grad_norm": 0.001482005463913083, - "learning_rate": 0.0001999915908986012, - "loss": 46.0, - "step": 25644 - }, - { - "epoch": 4.1299569225814245, - "grad_norm": 0.005707482807338238, - "learning_rate": 0.00019999159024250792, - "loss": 46.0, - "step": 25645 - }, - { - "epoch": 4.130117959660212, - "grad_norm": 0.004640547558665276, - "learning_rate": 0.00019999158958638905, - "loss": 46.0, - "step": 25646 - }, - { - "epoch": 4.1302789967389995, - "grad_norm": 0.002107742242515087, - "learning_rate": 0.00019999158893024455, - "loss": 46.0, - "step": 25647 - }, - { - "epoch": 4.130440033817787, - "grad_norm": 0.006413899827748537, - "learning_rate": 0.0001999915882740745, - "loss": 46.0, - "step": 25648 - }, - { - "epoch": 4.130601070896574, - "grad_norm": 0.00154272117651999, - "learning_rate": 0.00019999158761787887, - "loss": 46.0, - "step": 25649 - }, - { - "epoch": 4.130762107975361, - "grad_norm": 0.002160134259611368, - "learning_rate": 0.00019999158696165764, - "loss": 46.0, - "step": 25650 - }, - { - "epoch": 4.130923145054148, - "grad_norm": 0.0016933969454839826, - "learning_rate": 0.0001999915863054108, - "loss": 46.0, - "step": 25651 - }, - { - "epoch": 4.131084182132936, - "grad_norm": 0.002816577907651663, - "learning_rate": 0.00019999158564913835, - "loss": 46.0, - "step": 25652 - }, - { - "epoch": 4.131245219211723, - "grad_norm": 0.0024627463426440954, - "learning_rate": 0.00019999158499284033, - "loss": 46.0, - "step": 25653 - }, - { - "epoch": 4.131406256290511, - "grad_norm": 0.0020824482198804617, - "learning_rate": 0.00019999158433651673, - "loss": 46.0, - "step": 25654 - }, - { - "epoch": 4.131567293369298, - "grad_norm": 0.00415445351973176, - "learning_rate": 0.00019999158368016752, - "loss": 46.0, - "step": 25655 - }, - { - "epoch": 4.131728330448086, - "grad_norm": 0.0021817239467054605, - "learning_rate": 0.0001999915830237927, - "loss": 46.0, - "step": 25656 - }, - { - "epoch": 4.131889367526873, - "grad_norm": 0.002095539588481188, - "learning_rate": 0.00019999158236739232, - "loss": 46.0, - "step": 25657 - }, - { - "epoch": 4.1320504046056605, - "grad_norm": 0.006299429573118687, - "learning_rate": 0.00019999158171096635, - "loss": 46.0, - "step": 25658 - }, - { - "epoch": 4.132211441684448, - "grad_norm": 0.0012190460693091154, - "learning_rate": 0.00019999158105451476, - "loss": 46.0, - "step": 25659 - }, - { - "epoch": 4.132372478763235, - "grad_norm": 0.0038476467598229647, - "learning_rate": 0.0001999915803980376, - "loss": 46.0, - "step": 25660 - }, - { - "epoch": 4.132533515842023, - "grad_norm": 0.004012128803879023, - "learning_rate": 0.00019999157974153484, - "loss": 46.0, - "step": 25661 - }, - { - "epoch": 4.13269455292081, - "grad_norm": 0.001898324117064476, - "learning_rate": 0.00019999157908500649, - "loss": 46.0, - "step": 25662 - }, - { - "epoch": 4.132855589999598, - "grad_norm": 0.002790663857012987, - "learning_rate": 0.00019999157842845252, - "loss": 46.0, - "step": 25663 - }, - { - "epoch": 4.133016627078385, - "grad_norm": 0.007841236889362335, - "learning_rate": 0.00019999157777187302, - "loss": 46.0, - "step": 25664 - }, - { - "epoch": 4.133177664157172, - "grad_norm": 0.010113999247550964, - "learning_rate": 0.00019999157711526785, - "loss": 46.0, - "step": 25665 - }, - { - "epoch": 4.133338701235959, - "grad_norm": 0.010194149799644947, - "learning_rate": 0.00019999157645863712, - "loss": 46.0, - "step": 25666 - }, - { - "epoch": 4.133499738314747, - "grad_norm": 0.0036781923845410347, - "learning_rate": 0.00019999157580198083, - "loss": 46.0, - "step": 25667 - }, - { - "epoch": 4.133660775393534, - "grad_norm": 0.005262341815978289, - "learning_rate": 0.0001999915751452989, - "loss": 46.0, - "step": 25668 - }, - { - "epoch": 4.133821812472322, - "grad_norm": 0.00517252879217267, - "learning_rate": 0.0001999915744885914, - "loss": 46.0, - "step": 25669 - }, - { - "epoch": 4.133982849551109, - "grad_norm": 0.00330323725938797, - "learning_rate": 0.00019999157383185828, - "loss": 46.0, - "step": 25670 - }, - { - "epoch": 4.1341438866298965, - "grad_norm": 0.0017552450299263, - "learning_rate": 0.00019999157317509962, - "loss": 46.0, - "step": 25671 - }, - { - "epoch": 4.134304923708684, - "grad_norm": 0.005632500629872084, - "learning_rate": 0.00019999157251831534, - "loss": 46.0, - "step": 25672 - }, - { - "epoch": 4.134465960787471, - "grad_norm": 0.004503052681684494, - "learning_rate": 0.00019999157186150545, - "loss": 46.0, - "step": 25673 - }, - { - "epoch": 4.134626997866259, - "grad_norm": 0.01812112331390381, - "learning_rate": 0.00019999157120467, - "loss": 46.0, - "step": 25674 - }, - { - "epoch": 4.134788034945046, - "grad_norm": 0.0025516224559396505, - "learning_rate": 0.00019999157054780895, - "loss": 46.0, - "step": 25675 - }, - { - "epoch": 4.134949072023834, - "grad_norm": 0.004877248778939247, - "learning_rate": 0.00019999156989092227, - "loss": 46.0, - "step": 25676 - }, - { - "epoch": 4.135110109102621, - "grad_norm": 0.0030104005709290504, - "learning_rate": 0.00019999156923401003, - "loss": 46.0, - "step": 25677 - }, - { - "epoch": 4.135271146181409, - "grad_norm": 0.0052670701406896114, - "learning_rate": 0.0001999915685770722, - "loss": 46.0, - "step": 25678 - }, - { - "epoch": 4.135432183260195, - "grad_norm": 0.004815518390387297, - "learning_rate": 0.00019999156792010876, - "loss": 46.0, - "step": 25679 - }, - { - "epoch": 4.135593220338983, - "grad_norm": 0.011307308450341225, - "learning_rate": 0.00019999156726311973, - "loss": 46.0, - "step": 25680 - }, - { - "epoch": 4.13575425741777, - "grad_norm": 0.0015974113484844565, - "learning_rate": 0.00019999156660610514, - "loss": 46.0, - "step": 25681 - }, - { - "epoch": 4.135915294496558, - "grad_norm": 0.0063629476353526115, - "learning_rate": 0.0001999915659490649, - "loss": 46.0, - "step": 25682 - }, - { - "epoch": 4.136076331575345, - "grad_norm": 0.006022559013217688, - "learning_rate": 0.0001999915652919991, - "loss": 46.0, - "step": 25683 - }, - { - "epoch": 4.1362373686541325, - "grad_norm": 0.0039491672068834305, - "learning_rate": 0.0001999915646349077, - "loss": 46.0, - "step": 25684 - }, - { - "epoch": 4.13639840573292, - "grad_norm": 0.004771003033965826, - "learning_rate": 0.00019999156397779074, - "loss": 46.0, - "step": 25685 - }, - { - "epoch": 4.136559442811707, - "grad_norm": 0.0039065321907401085, - "learning_rate": 0.00019999156332064816, - "loss": 46.0, - "step": 25686 - }, - { - "epoch": 4.136720479890495, - "grad_norm": 0.0047430070117115974, - "learning_rate": 0.00019999156266348, - "loss": 46.0, - "step": 25687 - }, - { - "epoch": 4.136881516969282, - "grad_norm": 0.00212700292468071, - "learning_rate": 0.0001999915620062862, - "loss": 46.0, - "step": 25688 - }, - { - "epoch": 4.13704255404807, - "grad_norm": 0.001397873042151332, - "learning_rate": 0.00019999156134906687, - "loss": 46.0, - "step": 25689 - }, - { - "epoch": 4.137203591126857, - "grad_norm": 0.000960984849371016, - "learning_rate": 0.0001999915606918219, - "loss": 46.0, - "step": 25690 - }, - { - "epoch": 4.137364628205645, - "grad_norm": 0.0022925168741494417, - "learning_rate": 0.00019999156003455134, - "loss": 46.0, - "step": 25691 - }, - { - "epoch": 4.137525665284432, - "grad_norm": 0.010183616541326046, - "learning_rate": 0.0001999915593772552, - "loss": 46.0, - "step": 25692 - }, - { - "epoch": 4.13768670236322, - "grad_norm": 0.009615905582904816, - "learning_rate": 0.00019999155871993352, - "loss": 46.0, - "step": 25693 - }, - { - "epoch": 4.137847739442006, - "grad_norm": 0.0008418922079727054, - "learning_rate": 0.0001999915580625862, - "loss": 46.0, - "step": 25694 - }, - { - "epoch": 4.138008776520794, - "grad_norm": 0.003879308234900236, - "learning_rate": 0.00019999155740521327, - "loss": 46.0, - "step": 25695 - }, - { - "epoch": 4.138169813599581, - "grad_norm": 0.003745253663510084, - "learning_rate": 0.00019999155674781476, - "loss": 46.0, - "step": 25696 - }, - { - "epoch": 4.1383308506783685, - "grad_norm": 0.002811028389260173, - "learning_rate": 0.00019999155609039067, - "loss": 46.0, - "step": 25697 - }, - { - "epoch": 4.138491887757156, - "grad_norm": 0.014671149663627148, - "learning_rate": 0.00019999155543294096, - "loss": 46.0, - "step": 25698 - }, - { - "epoch": 4.138652924835943, - "grad_norm": 0.003004190046340227, - "learning_rate": 0.00019999155477546567, - "loss": 46.0, - "step": 25699 - }, - { - "epoch": 4.138813961914731, - "grad_norm": 0.009994974359869957, - "learning_rate": 0.0001999915541179648, - "loss": 46.0, - "step": 25700 - }, - { - "epoch": 4.138974998993518, - "grad_norm": 0.003434967016801238, - "learning_rate": 0.00019999155346043832, - "loss": 46.0, - "step": 25701 - }, - { - "epoch": 4.139136036072306, - "grad_norm": 0.004942676518112421, - "learning_rate": 0.00019999155280288626, - "loss": 46.0, - "step": 25702 - }, - { - "epoch": 4.139297073151093, - "grad_norm": 0.0008012540056370199, - "learning_rate": 0.00019999155214530862, - "loss": 46.0, - "step": 25703 - }, - { - "epoch": 4.139458110229881, - "grad_norm": 0.007842950522899628, - "learning_rate": 0.00019999155148770536, - "loss": 46.0, - "step": 25704 - }, - { - "epoch": 4.139619147308668, - "grad_norm": 0.0017749844118952751, - "learning_rate": 0.00019999155083007651, - "loss": 46.0, - "step": 25705 - }, - { - "epoch": 4.1397801843874555, - "grad_norm": 0.010829790495336056, - "learning_rate": 0.00019999155017242208, - "loss": 46.0, - "step": 25706 - }, - { - "epoch": 4.139941221466243, - "grad_norm": 0.001926140277646482, - "learning_rate": 0.00019999154951474206, - "loss": 46.0, - "step": 25707 - }, - { - "epoch": 4.14010225854503, - "grad_norm": 0.001501739607192576, - "learning_rate": 0.00019999154885703643, - "loss": 46.0, - "step": 25708 - }, - { - "epoch": 4.140263295623817, - "grad_norm": 0.0071657029911875725, - "learning_rate": 0.00019999154819930524, - "loss": 46.0, - "step": 25709 - }, - { - "epoch": 4.1404243327026045, - "grad_norm": 0.0005854924093000591, - "learning_rate": 0.0001999915475415484, - "loss": 46.0, - "step": 25710 - }, - { - "epoch": 4.140585369781392, - "grad_norm": 0.008494430221617222, - "learning_rate": 0.00019999154688376603, - "loss": 46.0, - "step": 25711 - }, - { - "epoch": 4.140746406860179, - "grad_norm": 0.005708741489797831, - "learning_rate": 0.00019999154622595805, - "loss": 46.0, - "step": 25712 - }, - { - "epoch": 4.140907443938967, - "grad_norm": 0.001477561891078949, - "learning_rate": 0.00019999154556812445, - "loss": 46.0, - "step": 25713 - }, - { - "epoch": 4.141068481017754, - "grad_norm": 0.003391193225979805, - "learning_rate": 0.00019999154491026527, - "loss": 46.0, - "step": 25714 - }, - { - "epoch": 4.141229518096542, - "grad_norm": 0.010698160156607628, - "learning_rate": 0.0001999915442523805, - "loss": 46.0, - "step": 25715 - }, - { - "epoch": 4.141390555175329, - "grad_norm": 0.007413472048938274, - "learning_rate": 0.00019999154359447014, - "loss": 46.0, - "step": 25716 - }, - { - "epoch": 4.141551592254117, - "grad_norm": 0.005606756079941988, - "learning_rate": 0.0001999915429365342, - "loss": 46.0, - "step": 25717 - }, - { - "epoch": 4.141712629332904, - "grad_norm": 0.0029826743993908167, - "learning_rate": 0.00019999154227857264, - "loss": 46.0, - "step": 25718 - }, - { - "epoch": 4.1418736664116915, - "grad_norm": 0.0037592726293951273, - "learning_rate": 0.00019999154162058552, - "loss": 46.0, - "step": 25719 - }, - { - "epoch": 4.142034703490479, - "grad_norm": 0.001650240388698876, - "learning_rate": 0.00019999154096257278, - "loss": 46.0, - "step": 25720 - }, - { - "epoch": 4.142195740569266, - "grad_norm": 0.0015117531875148416, - "learning_rate": 0.00019999154030453443, - "loss": 46.0, - "step": 25721 - }, - { - "epoch": 4.142356777648054, - "grad_norm": 0.007537306286394596, - "learning_rate": 0.00019999153964647053, - "loss": 46.0, - "step": 25722 - }, - { - "epoch": 4.14251781472684, - "grad_norm": 0.007101949770003557, - "learning_rate": 0.00019999153898838103, - "loss": 46.0, - "step": 25723 - }, - { - "epoch": 4.142678851805628, - "grad_norm": 0.0075798360630869865, - "learning_rate": 0.00019999153833026592, - "loss": 46.0, - "step": 25724 - }, - { - "epoch": 4.142839888884415, - "grad_norm": 0.00363737135194242, - "learning_rate": 0.00019999153767212522, - "loss": 46.0, - "step": 25725 - }, - { - "epoch": 4.143000925963203, - "grad_norm": 0.0017418122151866555, - "learning_rate": 0.00019999153701395894, - "loss": 46.0, - "step": 25726 - }, - { - "epoch": 4.14316196304199, - "grad_norm": 0.004063683561980724, - "learning_rate": 0.00019999153635576704, - "loss": 46.0, - "step": 25727 - }, - { - "epoch": 4.143323000120778, - "grad_norm": 0.0010686945170164108, - "learning_rate": 0.00019999153569754958, - "loss": 46.0, - "step": 25728 - }, - { - "epoch": 4.143484037199565, - "grad_norm": 0.0055069816298782825, - "learning_rate": 0.0001999915350393065, - "loss": 46.0, - "step": 25729 - }, - { - "epoch": 4.143645074278353, - "grad_norm": 0.005126043688505888, - "learning_rate": 0.00019999153438103785, - "loss": 46.0, - "step": 25730 - }, - { - "epoch": 4.14380611135714, - "grad_norm": 0.004279494751244783, - "learning_rate": 0.0001999915337227436, - "loss": 46.0, - "step": 25731 - }, - { - "epoch": 4.1439671484359275, - "grad_norm": 0.00860437098890543, - "learning_rate": 0.00019999153306442374, - "loss": 46.0, - "step": 25732 - }, - { - "epoch": 4.144128185514715, - "grad_norm": 0.0013098431518301368, - "learning_rate": 0.00019999153240607832, - "loss": 46.0, - "step": 25733 - }, - { - "epoch": 4.144289222593502, - "grad_norm": 0.014905581250786781, - "learning_rate": 0.00019999153174770728, - "loss": 46.0, - "step": 25734 - }, - { - "epoch": 4.14445025967229, - "grad_norm": 0.004219368100166321, - "learning_rate": 0.00019999153108931066, - "loss": 46.0, - "step": 25735 - }, - { - "epoch": 4.144611296751077, - "grad_norm": 0.0006310707540251315, - "learning_rate": 0.00019999153043088845, - "loss": 46.0, - "step": 25736 - }, - { - "epoch": 4.144772333829865, - "grad_norm": 0.0008926604641601443, - "learning_rate": 0.00019999152977244065, - "loss": 46.0, - "step": 25737 - }, - { - "epoch": 4.144933370908651, - "grad_norm": 0.005972984712570906, - "learning_rate": 0.00019999152911396727, - "loss": 46.0, - "step": 25738 - }, - { - "epoch": 4.145094407987439, - "grad_norm": 0.0018255076138302684, - "learning_rate": 0.00019999152845546827, - "loss": 46.0, - "step": 25739 - }, - { - "epoch": 4.145255445066226, - "grad_norm": 0.001713650650344789, - "learning_rate": 0.00019999152779694366, - "loss": 46.0, - "step": 25740 - }, - { - "epoch": 4.145416482145014, - "grad_norm": 0.0027448616456240416, - "learning_rate": 0.00019999152713839349, - "loss": 46.0, - "step": 25741 - }, - { - "epoch": 4.145577519223801, - "grad_norm": 0.0012780750403180718, - "learning_rate": 0.00019999152647981773, - "loss": 46.0, - "step": 25742 - }, - { - "epoch": 4.145738556302589, - "grad_norm": 0.0037260556127876043, - "learning_rate": 0.00019999152582121635, - "loss": 46.0, - "step": 25743 - }, - { - "epoch": 4.145899593381376, - "grad_norm": 0.0010752436937764287, - "learning_rate": 0.00019999152516258942, - "loss": 46.0, - "step": 25744 - }, - { - "epoch": 4.1460606304601635, - "grad_norm": 0.003949676640331745, - "learning_rate": 0.00019999152450393687, - "loss": 46.0, - "step": 25745 - }, - { - "epoch": 4.146221667538951, - "grad_norm": 0.004104393068701029, - "learning_rate": 0.00019999152384525873, - "loss": 46.0, - "step": 25746 - }, - { - "epoch": 4.146382704617738, - "grad_norm": 0.006399809382855892, - "learning_rate": 0.00019999152318655496, - "loss": 46.0, - "step": 25747 - }, - { - "epoch": 4.146543741696526, - "grad_norm": 0.0018477010307833552, - "learning_rate": 0.00019999152252782564, - "loss": 46.0, - "step": 25748 - }, - { - "epoch": 4.146704778775313, - "grad_norm": 0.010605844669044018, - "learning_rate": 0.00019999152186907072, - "loss": 46.0, - "step": 25749 - }, - { - "epoch": 4.146865815854101, - "grad_norm": 0.005574748385697603, - "learning_rate": 0.0001999915212102902, - "loss": 46.0, - "step": 25750 - }, - { - "epoch": 4.147026852932888, - "grad_norm": 0.0009573210845701396, - "learning_rate": 0.00019999152055148408, - "loss": 46.0, - "step": 25751 - }, - { - "epoch": 4.147187890011675, - "grad_norm": 0.005036060698330402, - "learning_rate": 0.0001999915198926524, - "loss": 46.0, - "step": 25752 - }, - { - "epoch": 4.147348927090462, - "grad_norm": 0.005134321749210358, - "learning_rate": 0.00019999151923379512, - "loss": 46.0, - "step": 25753 - }, - { - "epoch": 4.14750996416925, - "grad_norm": 0.0035497955977916718, - "learning_rate": 0.00019999151857491224, - "loss": 46.0, - "step": 25754 - }, - { - "epoch": 4.147671001248037, - "grad_norm": 0.0021118242293596268, - "learning_rate": 0.00019999151791600376, - "loss": 46.0, - "step": 25755 - }, - { - "epoch": 4.147832038326825, - "grad_norm": 0.003920841496437788, - "learning_rate": 0.00019999151725706967, - "loss": 46.0, - "step": 25756 - }, - { - "epoch": 4.147993075405612, - "grad_norm": 0.003977254964411259, - "learning_rate": 0.00019999151659811002, - "loss": 46.0, - "step": 25757 - }, - { - "epoch": 4.1481541124843995, - "grad_norm": 0.0011857778299599886, - "learning_rate": 0.0001999915159391248, - "loss": 46.0, - "step": 25758 - }, - { - "epoch": 4.148315149563187, - "grad_norm": 0.004219596274197102, - "learning_rate": 0.0001999915152801139, - "loss": 46.0, - "step": 25759 - }, - { - "epoch": 4.148476186641974, - "grad_norm": 0.003712916513904929, - "learning_rate": 0.00019999151462107747, - "loss": 46.0, - "step": 25760 - }, - { - "epoch": 4.148637223720762, - "grad_norm": 0.01058608666062355, - "learning_rate": 0.00019999151396201545, - "loss": 46.0, - "step": 25761 - }, - { - "epoch": 4.148798260799549, - "grad_norm": 0.0012363122077658772, - "learning_rate": 0.00019999151330292784, - "loss": 46.0, - "step": 25762 - }, - { - "epoch": 4.148959297878337, - "grad_norm": 0.0029525563586503267, - "learning_rate": 0.00019999151264381458, - "loss": 46.0, - "step": 25763 - }, - { - "epoch": 4.149120334957124, - "grad_norm": 0.007817268371582031, - "learning_rate": 0.0001999915119846758, - "loss": 46.0, - "step": 25764 - }, - { - "epoch": 4.149281372035912, - "grad_norm": 0.00783733930438757, - "learning_rate": 0.0001999915113255114, - "loss": 46.0, - "step": 25765 - }, - { - "epoch": 4.149442409114699, - "grad_norm": 0.004292396828532219, - "learning_rate": 0.0001999915106663214, - "loss": 46.0, - "step": 25766 - }, - { - "epoch": 4.149603446193486, - "grad_norm": 0.005185423418879509, - "learning_rate": 0.0001999915100071058, - "loss": 46.0, - "step": 25767 - }, - { - "epoch": 4.149764483272273, - "grad_norm": 0.0025544508825987577, - "learning_rate": 0.00019999150934786464, - "loss": 46.0, - "step": 25768 - }, - { - "epoch": 4.1499255203510605, - "grad_norm": 0.019969942048192024, - "learning_rate": 0.00019999150868859787, - "loss": 46.0, - "step": 25769 - }, - { - "epoch": 4.150086557429848, - "grad_norm": 0.004226475954055786, - "learning_rate": 0.00019999150802930548, - "loss": 46.0, - "step": 25770 - }, - { - "epoch": 4.1502475945086355, - "grad_norm": 0.003954090178012848, - "learning_rate": 0.00019999150736998753, - "loss": 46.0, - "step": 25771 - }, - { - "epoch": 4.150408631587423, - "grad_norm": 0.011470174416899681, - "learning_rate": 0.00019999150671064396, - "loss": 46.0, - "step": 25772 - }, - { - "epoch": 4.15056966866621, - "grad_norm": 0.0014393340097740293, - "learning_rate": 0.00019999150605127484, - "loss": 46.0, - "step": 25773 - }, - { - "epoch": 4.150730705744998, - "grad_norm": 0.021243076771497726, - "learning_rate": 0.00019999150539188007, - "loss": 46.0, - "step": 25774 - }, - { - "epoch": 4.150891742823785, - "grad_norm": 0.011256651021540165, - "learning_rate": 0.00019999150473245974, - "loss": 46.0, - "step": 25775 - }, - { - "epoch": 4.151052779902573, - "grad_norm": 0.011080256663262844, - "learning_rate": 0.00019999150407301383, - "loss": 46.0, - "step": 25776 - }, - { - "epoch": 4.15121381698136, - "grad_norm": 0.004300955682992935, - "learning_rate": 0.0001999915034135423, - "loss": 46.0, - "step": 25777 - }, - { - "epoch": 4.151374854060148, - "grad_norm": 0.002081675687804818, - "learning_rate": 0.0001999915027540452, - "loss": 46.0, - "step": 25778 - }, - { - "epoch": 4.151535891138935, - "grad_norm": 0.002691699657589197, - "learning_rate": 0.0001999915020945225, - "loss": 46.0, - "step": 25779 - }, - { - "epoch": 4.1516969282177225, - "grad_norm": 0.0034728620667010546, - "learning_rate": 0.0001999915014349742, - "loss": 46.0, - "step": 25780 - }, - { - "epoch": 4.151857965296509, - "grad_norm": 0.004659638274461031, - "learning_rate": 0.00019999150077540034, - "loss": 46.0, - "step": 25781 - }, - { - "epoch": 4.1520190023752965, - "grad_norm": 0.0035389536060392857, - "learning_rate": 0.00019999150011580085, - "loss": 46.0, - "step": 25782 - }, - { - "epoch": 4.152180039454084, - "grad_norm": 0.003957580309361219, - "learning_rate": 0.0001999914994561758, - "loss": 46.0, - "step": 25783 - }, - { - "epoch": 4.152341076532871, - "grad_norm": 0.00241960771381855, - "learning_rate": 0.0001999914987965251, - "loss": 46.0, - "step": 25784 - }, - { - "epoch": 4.152502113611659, - "grad_norm": 0.008114400319755077, - "learning_rate": 0.00019999149813684886, - "loss": 46.0, - "step": 25785 - }, - { - "epoch": 4.152663150690446, - "grad_norm": 0.0025531696155667305, - "learning_rate": 0.00019999149747714702, - "loss": 46.0, - "step": 25786 - }, - { - "epoch": 4.152824187769234, - "grad_norm": 0.003837055992335081, - "learning_rate": 0.00019999149681741956, - "loss": 46.0, - "step": 25787 - }, - { - "epoch": 4.152985224848021, - "grad_norm": 0.00905930157750845, - "learning_rate": 0.00019999149615766652, - "loss": 46.0, - "step": 25788 - }, - { - "epoch": 4.153146261926809, - "grad_norm": 0.00726577453315258, - "learning_rate": 0.00019999149549788792, - "loss": 46.0, - "step": 25789 - }, - { - "epoch": 4.153307299005596, - "grad_norm": 0.002783138770610094, - "learning_rate": 0.00019999149483808368, - "loss": 46.0, - "step": 25790 - }, - { - "epoch": 4.153468336084384, - "grad_norm": 0.001132084522396326, - "learning_rate": 0.00019999149417825388, - "loss": 46.0, - "step": 25791 - }, - { - "epoch": 4.153629373163171, - "grad_norm": 0.0028803006280213594, - "learning_rate": 0.00019999149351839846, - "loss": 46.0, - "step": 25792 - }, - { - "epoch": 4.1537904102419585, - "grad_norm": 0.003320316318422556, - "learning_rate": 0.00019999149285851746, - "loss": 46.0, - "step": 25793 - }, - { - "epoch": 4.153951447320746, - "grad_norm": 0.011789404787123203, - "learning_rate": 0.00019999149219861087, - "loss": 46.0, - "step": 25794 - }, - { - "epoch": 4.154112484399533, - "grad_norm": 0.0020085216965526342, - "learning_rate": 0.0001999914915386787, - "loss": 46.0, - "step": 25795 - }, - { - "epoch": 4.15427352147832, - "grad_norm": 0.02010505087673664, - "learning_rate": 0.00019999149087872092, - "loss": 46.0, - "step": 25796 - }, - { - "epoch": 4.154434558557107, - "grad_norm": 0.0045603010803461075, - "learning_rate": 0.00019999149021873754, - "loss": 46.0, - "step": 25797 - }, - { - "epoch": 4.154595595635895, - "grad_norm": 0.0024973773397505283, - "learning_rate": 0.00019999148955872857, - "loss": 46.0, - "step": 25798 - }, - { - "epoch": 4.154756632714682, - "grad_norm": 0.0042458027601242065, - "learning_rate": 0.00019999148889869402, - "loss": 46.0, - "step": 25799 - }, - { - "epoch": 4.15491766979347, - "grad_norm": 0.0034933092538267374, - "learning_rate": 0.00019999148823863388, - "loss": 46.0, - "step": 25800 - }, - { - "epoch": 4.155078706872257, - "grad_norm": 0.005349744576960802, - "learning_rate": 0.00019999148757854812, - "loss": 46.0, - "step": 25801 - }, - { - "epoch": 4.155239743951045, - "grad_norm": 0.006178024224936962, - "learning_rate": 0.0001999914869184368, - "loss": 46.0, - "step": 25802 - }, - { - "epoch": 4.155400781029832, - "grad_norm": 0.00198973435908556, - "learning_rate": 0.00019999148625829988, - "loss": 46.0, - "step": 25803 - }, - { - "epoch": 4.15556181810862, - "grad_norm": 0.0013105213874951005, - "learning_rate": 0.00019999148559813733, - "loss": 46.0, - "step": 25804 - }, - { - "epoch": 4.155722855187407, - "grad_norm": 0.0008887567091733217, - "learning_rate": 0.00019999148493794926, - "loss": 46.0, - "step": 25805 - }, - { - "epoch": 4.1558838922661945, - "grad_norm": 0.00624180119484663, - "learning_rate": 0.00019999148427773556, - "loss": 46.0, - "step": 25806 - }, - { - "epoch": 4.156044929344982, - "grad_norm": 0.004844765178859234, - "learning_rate": 0.00019999148361749623, - "loss": 46.0, - "step": 25807 - }, - { - "epoch": 4.156205966423769, - "grad_norm": 0.006591136567294598, - "learning_rate": 0.00019999148295723137, - "loss": 46.0, - "step": 25808 - }, - { - "epoch": 4.156367003502557, - "grad_norm": 0.004222767893224955, - "learning_rate": 0.00019999148229694086, - "loss": 46.0, - "step": 25809 - }, - { - "epoch": 4.156528040581344, - "grad_norm": 0.0019136391347274184, - "learning_rate": 0.0001999914816366248, - "loss": 46.0, - "step": 25810 - }, - { - "epoch": 4.156689077660131, - "grad_norm": 0.001856248127296567, - "learning_rate": 0.00019999148097628314, - "loss": 46.0, - "step": 25811 - }, - { - "epoch": 4.156850114738918, - "grad_norm": 0.008216975256800652, - "learning_rate": 0.00019999148031591587, - "loss": 46.0, - "step": 25812 - }, - { - "epoch": 4.157011151817706, - "grad_norm": 0.006173674017190933, - "learning_rate": 0.00019999147965552304, - "loss": 46.0, - "step": 25813 - }, - { - "epoch": 4.157172188896493, - "grad_norm": 0.003741530468687415, - "learning_rate": 0.00019999147899510457, - "loss": 46.0, - "step": 25814 - }, - { - "epoch": 4.157333225975281, - "grad_norm": 0.005394606851041317, - "learning_rate": 0.00019999147833466054, - "loss": 46.0, - "step": 25815 - }, - { - "epoch": 4.157494263054068, - "grad_norm": 0.0011143069714307785, - "learning_rate": 0.0001999914776741909, - "loss": 46.0, - "step": 25816 - }, - { - "epoch": 4.157655300132856, - "grad_norm": 0.001310808351263404, - "learning_rate": 0.0001999914770136957, - "loss": 46.0, - "step": 25817 - }, - { - "epoch": 4.157816337211643, - "grad_norm": 0.002002993132919073, - "learning_rate": 0.00019999147635317484, - "loss": 46.0, - "step": 25818 - }, - { - "epoch": 4.1579773742904305, - "grad_norm": 0.003437912557274103, - "learning_rate": 0.00019999147569262844, - "loss": 46.0, - "step": 25819 - }, - { - "epoch": 4.158138411369218, - "grad_norm": 0.007866820320487022, - "learning_rate": 0.00019999147503205645, - "loss": 46.0, - "step": 25820 - }, - { - "epoch": 4.158299448448005, - "grad_norm": 0.00494722742587328, - "learning_rate": 0.00019999147437145887, - "loss": 46.0, - "step": 25821 - }, - { - "epoch": 4.158460485526793, - "grad_norm": 0.0016632643528282642, - "learning_rate": 0.00019999147371083567, - "loss": 46.0, - "step": 25822 - }, - { - "epoch": 4.15862152260558, - "grad_norm": 0.0009520756429992616, - "learning_rate": 0.0001999914730501869, - "loss": 46.0, - "step": 25823 - }, - { - "epoch": 4.158782559684368, - "grad_norm": 0.014313461259007454, - "learning_rate": 0.00019999147238951252, - "loss": 46.0, - "step": 25824 - }, - { - "epoch": 4.158943596763155, - "grad_norm": 0.0028570096474140882, - "learning_rate": 0.00019999147172881254, - "loss": 46.0, - "step": 25825 - }, - { - "epoch": 4.159104633841942, - "grad_norm": 0.002582564949989319, - "learning_rate": 0.000199991471068087, - "loss": 46.0, - "step": 25826 - }, - { - "epoch": 4.159265670920729, - "grad_norm": 0.003907115198671818, - "learning_rate": 0.00019999147040733584, - "loss": 46.0, - "step": 25827 - }, - { - "epoch": 4.159426707999517, - "grad_norm": 0.0027338529471307993, - "learning_rate": 0.00019999146974655912, - "loss": 46.0, - "step": 25828 - }, - { - "epoch": 4.159587745078304, - "grad_norm": 0.007554525509476662, - "learning_rate": 0.00019999146908575676, - "loss": 46.0, - "step": 25829 - }, - { - "epoch": 4.1597487821570915, - "grad_norm": 0.0034019004087895155, - "learning_rate": 0.00019999146842492884, - "loss": 46.0, - "step": 25830 - }, - { - "epoch": 4.159909819235879, - "grad_norm": 0.006143419072031975, - "learning_rate": 0.00019999146776407533, - "loss": 46.0, - "step": 25831 - }, - { - "epoch": 4.160070856314666, - "grad_norm": 0.0030050959903746843, - "learning_rate": 0.0001999914671031962, - "loss": 46.0, - "step": 25832 - }, - { - "epoch": 4.160231893393454, - "grad_norm": 0.004732530098408461, - "learning_rate": 0.0001999914664422915, - "loss": 46.0, - "step": 25833 - }, - { - "epoch": 4.160392930472241, - "grad_norm": 0.00803361739963293, - "learning_rate": 0.0001999914657813612, - "loss": 46.0, - "step": 25834 - }, - { - "epoch": 4.160553967551029, - "grad_norm": 0.0039372858591377735, - "learning_rate": 0.0001999914651204053, - "loss": 46.0, - "step": 25835 - }, - { - "epoch": 4.160715004629816, - "grad_norm": 0.006818860769271851, - "learning_rate": 0.00019999146445942383, - "loss": 46.0, - "step": 25836 - }, - { - "epoch": 4.160876041708604, - "grad_norm": 0.0009600259363651276, - "learning_rate": 0.00019999146379841672, - "loss": 46.0, - "step": 25837 - }, - { - "epoch": 4.161037078787391, - "grad_norm": 0.0019614805933088064, - "learning_rate": 0.00019999146313738408, - "loss": 46.0, - "step": 25838 - }, - { - "epoch": 4.161198115866179, - "grad_norm": 0.003422726644203067, - "learning_rate": 0.00019999146247632582, - "loss": 46.0, - "step": 25839 - }, - { - "epoch": 4.161359152944965, - "grad_norm": 0.01131085678935051, - "learning_rate": 0.00019999146181524195, - "loss": 46.0, - "step": 25840 - }, - { - "epoch": 4.161520190023753, - "grad_norm": 0.0016847657971084118, - "learning_rate": 0.00019999146115413252, - "loss": 46.0, - "step": 25841 - }, - { - "epoch": 4.16168122710254, - "grad_norm": 0.0014023496769368649, - "learning_rate": 0.00019999146049299747, - "loss": 46.0, - "step": 25842 - }, - { - "epoch": 4.1618422641813275, - "grad_norm": 0.0047528911381959915, - "learning_rate": 0.00019999145983183684, - "loss": 46.0, - "step": 25843 - }, - { - "epoch": 4.162003301260115, - "grad_norm": 0.002841240493580699, - "learning_rate": 0.00019999145917065062, - "loss": 46.0, - "step": 25844 - }, - { - "epoch": 4.162164338338902, - "grad_norm": 0.005046960897743702, - "learning_rate": 0.00019999145850943878, - "loss": 46.0, - "step": 25845 - }, - { - "epoch": 4.16232537541769, - "grad_norm": 0.001826892956160009, - "learning_rate": 0.00019999145784820139, - "loss": 46.0, - "step": 25846 - }, - { - "epoch": 4.162486412496477, - "grad_norm": 0.0008144687162712216, - "learning_rate": 0.0001999914571869384, - "loss": 46.0, - "step": 25847 - }, - { - "epoch": 4.162647449575265, - "grad_norm": 0.002259097760543227, - "learning_rate": 0.00019999145652564978, - "loss": 46.0, - "step": 25848 - }, - { - "epoch": 4.162808486654052, - "grad_norm": 0.008259464055299759, - "learning_rate": 0.0001999914558643356, - "loss": 46.0, - "step": 25849 - }, - { - "epoch": 4.16296952373284, - "grad_norm": 0.009847515262663364, - "learning_rate": 0.0001999914552029958, - "loss": 46.0, - "step": 25850 - }, - { - "epoch": 4.163130560811627, - "grad_norm": 0.0024707375559955835, - "learning_rate": 0.0001999914545416304, - "loss": 46.0, - "step": 25851 - }, - { - "epoch": 4.163291597890415, - "grad_norm": 0.001153877703472972, - "learning_rate": 0.00019999145388023947, - "loss": 46.0, - "step": 25852 - }, - { - "epoch": 4.163452634969202, - "grad_norm": 0.0018085596384480596, - "learning_rate": 0.0001999914532188229, - "loss": 46.0, - "step": 25853 - }, - { - "epoch": 4.163613672047989, - "grad_norm": 0.004764970857650042, - "learning_rate": 0.00019999145255738076, - "loss": 46.0, - "step": 25854 - }, - { - "epoch": 4.163774709126776, - "grad_norm": 0.0024147185031324625, - "learning_rate": 0.000199991451895913, - "loss": 46.0, - "step": 25855 - }, - { - "epoch": 4.1639357462055635, - "grad_norm": 0.005599981173872948, - "learning_rate": 0.00019999145123441968, - "loss": 46.0, - "step": 25856 - }, - { - "epoch": 4.164096783284351, - "grad_norm": 0.0013498428743332624, - "learning_rate": 0.00019999145057290074, - "loss": 46.0, - "step": 25857 - }, - { - "epoch": 4.164257820363138, - "grad_norm": 0.006182997953146696, - "learning_rate": 0.00019999144991135622, - "loss": 46.0, - "step": 25858 - }, - { - "epoch": 4.164418857441926, - "grad_norm": 0.0023755005095154047, - "learning_rate": 0.0001999914492497861, - "loss": 46.0, - "step": 25859 - }, - { - "epoch": 4.164579894520713, - "grad_norm": 0.008315110579133034, - "learning_rate": 0.0001999914485881904, - "loss": 46.0, - "step": 25860 - }, - { - "epoch": 4.164740931599501, - "grad_norm": 0.009556795470416546, - "learning_rate": 0.00019999144792656908, - "loss": 46.0, - "step": 25861 - }, - { - "epoch": 4.164901968678288, - "grad_norm": 0.002784101292490959, - "learning_rate": 0.0001999914472649222, - "loss": 46.0, - "step": 25862 - }, - { - "epoch": 4.165063005757076, - "grad_norm": 0.00457377266138792, - "learning_rate": 0.00019999144660324972, - "loss": 46.0, - "step": 25863 - }, - { - "epoch": 4.165224042835863, - "grad_norm": 0.002098704455420375, - "learning_rate": 0.00019999144594155165, - "loss": 46.0, - "step": 25864 - }, - { - "epoch": 4.165385079914651, - "grad_norm": 0.002842725021764636, - "learning_rate": 0.00019999144527982796, - "loss": 46.0, - "step": 25865 - }, - { - "epoch": 4.165546116993438, - "grad_norm": 0.0026035672053694725, - "learning_rate": 0.0001999914446180787, - "loss": 46.0, - "step": 25866 - }, - { - "epoch": 4.1657071540722255, - "grad_norm": 0.008512424305081367, - "learning_rate": 0.00019999144395630385, - "loss": 46.0, - "step": 25867 - }, - { - "epoch": 4.165868191151013, - "grad_norm": 0.0016673884820193052, - "learning_rate": 0.00019999144329450338, - "loss": 46.0, - "step": 25868 - }, - { - "epoch": 4.1660292282297995, - "grad_norm": 0.00853794626891613, - "learning_rate": 0.00019999144263267734, - "loss": 46.0, - "step": 25869 - }, - { - "epoch": 4.166190265308587, - "grad_norm": 0.005087920464575291, - "learning_rate": 0.00019999144197082572, - "loss": 46.0, - "step": 25870 - }, - { - "epoch": 4.166351302387374, - "grad_norm": 0.004212118219584227, - "learning_rate": 0.00019999144130894848, - "loss": 46.0, - "step": 25871 - }, - { - "epoch": 4.166512339466162, - "grad_norm": 0.005717687774449587, - "learning_rate": 0.00019999144064704566, - "loss": 46.0, - "step": 25872 - }, - { - "epoch": 4.166673376544949, - "grad_norm": 0.004749008920043707, - "learning_rate": 0.00019999143998511725, - "loss": 46.0, - "step": 25873 - }, - { - "epoch": 4.166834413623737, - "grad_norm": 0.003818746190518141, - "learning_rate": 0.00019999143932316322, - "loss": 46.0, - "step": 25874 - }, - { - "epoch": 4.166995450702524, - "grad_norm": 0.011723394505679607, - "learning_rate": 0.00019999143866118363, - "loss": 46.0, - "step": 25875 - }, - { - "epoch": 4.167156487781312, - "grad_norm": 0.003460391191765666, - "learning_rate": 0.00019999143799917843, - "loss": 46.0, - "step": 25876 - }, - { - "epoch": 4.167317524860099, - "grad_norm": 0.0020264191552996635, - "learning_rate": 0.00019999143733714765, - "loss": 46.0, - "step": 25877 - }, - { - "epoch": 4.1674785619388865, - "grad_norm": 0.0030912510119378567, - "learning_rate": 0.00019999143667509127, - "loss": 46.0, - "step": 25878 - }, - { - "epoch": 4.167639599017674, - "grad_norm": 0.016076941043138504, - "learning_rate": 0.0001999914360130093, - "loss": 46.0, - "step": 25879 - }, - { - "epoch": 4.1678006360964615, - "grad_norm": 0.002340406645089388, - "learning_rate": 0.00019999143535090173, - "loss": 46.0, - "step": 25880 - }, - { - "epoch": 4.167961673175249, - "grad_norm": 0.002248638542369008, - "learning_rate": 0.0001999914346887686, - "loss": 46.0, - "step": 25881 - }, - { - "epoch": 4.168122710254036, - "grad_norm": 0.004312028642743826, - "learning_rate": 0.00019999143402660985, - "loss": 46.0, - "step": 25882 - }, - { - "epoch": 4.168283747332824, - "grad_norm": 0.007768033538013697, - "learning_rate": 0.00019999143336442548, - "loss": 46.0, - "step": 25883 - }, - { - "epoch": 4.16844478441161, - "grad_norm": 0.008499770425260067, - "learning_rate": 0.00019999143270221556, - "loss": 46.0, - "step": 25884 - }, - { - "epoch": 4.168605821490398, - "grad_norm": 0.0028355568647384644, - "learning_rate": 0.00019999143203998005, - "loss": 46.0, - "step": 25885 - }, - { - "epoch": 4.168766858569185, - "grad_norm": 0.004336782265454531, - "learning_rate": 0.00019999143137771892, - "loss": 46.0, - "step": 25886 - }, - { - "epoch": 4.168927895647973, - "grad_norm": 0.0016734452219679952, - "learning_rate": 0.0001999914307154322, - "loss": 46.0, - "step": 25887 - }, - { - "epoch": 4.16908893272676, - "grad_norm": 0.005384510848671198, - "learning_rate": 0.0001999914300531199, - "loss": 46.0, - "step": 25888 - }, - { - "epoch": 4.169249969805548, - "grad_norm": 0.0032869975548237562, - "learning_rate": 0.00019999142939078202, - "loss": 46.0, - "step": 25889 - }, - { - "epoch": 4.169411006884335, - "grad_norm": 0.002316078171133995, - "learning_rate": 0.00019999142872841855, - "loss": 46.0, - "step": 25890 - }, - { - "epoch": 4.1695720439631225, - "grad_norm": 0.011938927695155144, - "learning_rate": 0.00019999142806602946, - "loss": 46.0, - "step": 25891 - }, - { - "epoch": 4.16973308104191, - "grad_norm": 0.002336437115445733, - "learning_rate": 0.00019999142740361475, - "loss": 46.0, - "step": 25892 - }, - { - "epoch": 4.169894118120697, - "grad_norm": 0.010293380357325077, - "learning_rate": 0.0001999914267411745, - "loss": 46.0, - "step": 25893 - }, - { - "epoch": 4.170055155199485, - "grad_norm": 0.006510206498205662, - "learning_rate": 0.00019999142607870864, - "loss": 46.0, - "step": 25894 - }, - { - "epoch": 4.170216192278272, - "grad_norm": 0.00653754873201251, - "learning_rate": 0.00019999142541621718, - "loss": 46.0, - "step": 25895 - }, - { - "epoch": 4.17037722935706, - "grad_norm": 0.0033379278611391783, - "learning_rate": 0.00019999142475370015, - "loss": 46.0, - "step": 25896 - }, - { - "epoch": 4.170538266435847, - "grad_norm": 0.00745510496199131, - "learning_rate": 0.0001999914240911575, - "loss": 46.0, - "step": 25897 - }, - { - "epoch": 4.170699303514635, - "grad_norm": 0.0014718774473294616, - "learning_rate": 0.0001999914234285893, - "loss": 46.0, - "step": 25898 - }, - { - "epoch": 4.170860340593421, - "grad_norm": 0.0015339652309194207, - "learning_rate": 0.00019999142276599545, - "loss": 46.0, - "step": 25899 - }, - { - "epoch": 4.171021377672209, - "grad_norm": 0.0006713454495184124, - "learning_rate": 0.00019999142210337602, - "loss": 46.0, - "step": 25900 - }, - { - "epoch": 4.171182414750996, - "grad_norm": 0.0007346144411712885, - "learning_rate": 0.00019999142144073103, - "loss": 46.0, - "step": 25901 - }, - { - "epoch": 4.171343451829784, - "grad_norm": 0.004603332839906216, - "learning_rate": 0.00019999142077806043, - "loss": 46.0, - "step": 25902 - }, - { - "epoch": 4.171504488908571, - "grad_norm": 0.023984026163816452, - "learning_rate": 0.0001999914201153642, - "loss": 46.0, - "step": 25903 - }, - { - "epoch": 4.1716655259873585, - "grad_norm": 0.002608900424093008, - "learning_rate": 0.00019999141945264244, - "loss": 46.0, - "step": 25904 - }, - { - "epoch": 4.171826563066146, - "grad_norm": 0.004634843673557043, - "learning_rate": 0.00019999141878989507, - "loss": 46.0, - "step": 25905 - }, - { - "epoch": 4.171987600144933, - "grad_norm": 0.00967329554259777, - "learning_rate": 0.0001999914181271221, - "loss": 46.0, - "step": 25906 - }, - { - "epoch": 4.172148637223721, - "grad_norm": 0.0016136339399963617, - "learning_rate": 0.00019999141746432356, - "loss": 46.0, - "step": 25907 - }, - { - "epoch": 4.172309674302508, - "grad_norm": 0.0045011527836322784, - "learning_rate": 0.00019999141680149938, - "loss": 46.0, - "step": 25908 - }, - { - "epoch": 4.172470711381296, - "grad_norm": 0.0010537662310525775, - "learning_rate": 0.0001999914161386496, - "loss": 46.0, - "step": 25909 - }, - { - "epoch": 4.172631748460083, - "grad_norm": 0.0017520738765597343, - "learning_rate": 0.00019999141547577426, - "loss": 46.0, - "step": 25910 - }, - { - "epoch": 4.172792785538871, - "grad_norm": 0.006552866194397211, - "learning_rate": 0.00019999141481287334, - "loss": 46.0, - "step": 25911 - }, - { - "epoch": 4.172953822617658, - "grad_norm": 0.004470761399716139, - "learning_rate": 0.00019999141414994682, - "loss": 46.0, - "step": 25912 - }, - { - "epoch": 4.173114859696445, - "grad_norm": 0.0013951323926448822, - "learning_rate": 0.00019999141348699467, - "loss": 46.0, - "step": 25913 - }, - { - "epoch": 4.173275896775232, - "grad_norm": 0.004247300326824188, - "learning_rate": 0.000199991412824017, - "loss": 46.0, - "step": 25914 - }, - { - "epoch": 4.17343693385402, - "grad_norm": 0.0028988532721996307, - "learning_rate": 0.00019999141216101368, - "loss": 46.0, - "step": 25915 - }, - { - "epoch": 4.173597970932807, - "grad_norm": 0.0037056265864521265, - "learning_rate": 0.0001999914114979848, - "loss": 46.0, - "step": 25916 - }, - { - "epoch": 4.1737590080115945, - "grad_norm": 0.0030603790655732155, - "learning_rate": 0.0001999914108349303, - "loss": 46.0, - "step": 25917 - }, - { - "epoch": 4.173920045090382, - "grad_norm": 0.004141510464251041, - "learning_rate": 0.0001999914101718502, - "loss": 46.0, - "step": 25918 - }, - { - "epoch": 4.174081082169169, - "grad_norm": 0.004250417463481426, - "learning_rate": 0.00019999140950874454, - "loss": 46.0, - "step": 25919 - }, - { - "epoch": 4.174242119247957, - "grad_norm": 0.012560942210257053, - "learning_rate": 0.00019999140884561327, - "loss": 46.0, - "step": 25920 - }, - { - "epoch": 4.174403156326744, - "grad_norm": 0.0010623267153277993, - "learning_rate": 0.0001999914081824564, - "loss": 46.0, - "step": 25921 - }, - { - "epoch": 4.174564193405532, - "grad_norm": 0.0036591319367289543, - "learning_rate": 0.00019999140751927394, - "loss": 46.0, - "step": 25922 - }, - { - "epoch": 4.174725230484319, - "grad_norm": 0.0076692369766533375, - "learning_rate": 0.0001999914068560659, - "loss": 46.0, - "step": 25923 - }, - { - "epoch": 4.174886267563107, - "grad_norm": 0.0024209062103182077, - "learning_rate": 0.00019999140619283225, - "loss": 46.0, - "step": 25924 - }, - { - "epoch": 4.175047304641894, - "grad_norm": 0.004220735281705856, - "learning_rate": 0.00019999140552957303, - "loss": 46.0, - "step": 25925 - }, - { - "epoch": 4.175208341720682, - "grad_norm": 0.010969248600304127, - "learning_rate": 0.0001999914048662882, - "loss": 46.0, - "step": 25926 - }, - { - "epoch": 4.175369378799468, - "grad_norm": 0.0036004185676574707, - "learning_rate": 0.0001999914042029778, - "loss": 46.0, - "step": 25927 - }, - { - "epoch": 4.175530415878256, - "grad_norm": 0.003311747219413519, - "learning_rate": 0.00019999140353964176, - "loss": 46.0, - "step": 25928 - }, - { - "epoch": 4.175691452957043, - "grad_norm": 0.0037359322886914015, - "learning_rate": 0.00019999140287628016, - "loss": 46.0, - "step": 25929 - }, - { - "epoch": 4.1758524900358305, - "grad_norm": 0.0011967142345383763, - "learning_rate": 0.00019999140221289296, - "loss": 46.0, - "step": 25930 - }, - { - "epoch": 4.176013527114618, - "grad_norm": 0.002566169947385788, - "learning_rate": 0.0001999914015494802, - "loss": 46.0, - "step": 25931 - }, - { - "epoch": 4.176174564193405, - "grad_norm": 0.0010997374774888158, - "learning_rate": 0.00019999140088604181, - "loss": 46.0, - "step": 25932 - }, - { - "epoch": 4.176335601272193, - "grad_norm": 0.004429088439792395, - "learning_rate": 0.00019999140022257782, - "loss": 46.0, - "step": 25933 - }, - { - "epoch": 4.17649663835098, - "grad_norm": 0.004189136903733015, - "learning_rate": 0.00019999139955908827, - "loss": 46.0, - "step": 25934 - }, - { - "epoch": 4.176657675429768, - "grad_norm": 0.0036843728739768267, - "learning_rate": 0.0001999913988955731, - "loss": 46.0, - "step": 25935 - }, - { - "epoch": 4.176818712508555, - "grad_norm": 0.002175193279981613, - "learning_rate": 0.00019999139823203234, - "loss": 46.0, - "step": 25936 - }, - { - "epoch": 4.176979749587343, - "grad_norm": 0.0028209525626152754, - "learning_rate": 0.000199991397568466, - "loss": 46.0, - "step": 25937 - }, - { - "epoch": 4.17714078666613, - "grad_norm": 0.0025836420245468616, - "learning_rate": 0.00019999139690487407, - "loss": 46.0, - "step": 25938 - }, - { - "epoch": 4.1773018237449175, - "grad_norm": 0.0052740322425961494, - "learning_rate": 0.00019999139624125653, - "loss": 46.0, - "step": 25939 - }, - { - "epoch": 4.177462860823705, - "grad_norm": 0.006183016579598188, - "learning_rate": 0.00019999139557761342, - "loss": 46.0, - "step": 25940 - }, - { - "epoch": 4.1776238979024924, - "grad_norm": 0.0026666871272027493, - "learning_rate": 0.0001999913949139447, - "loss": 46.0, - "step": 25941 - }, - { - "epoch": 4.177784934981279, - "grad_norm": 0.0018965437775477767, - "learning_rate": 0.0001999913942502504, - "loss": 46.0, - "step": 25942 - }, - { - "epoch": 4.1779459720600665, - "grad_norm": 0.0069977957755327225, - "learning_rate": 0.00019999139358653048, - "loss": 46.0, - "step": 25943 - }, - { - "epoch": 4.178107009138854, - "grad_norm": 0.0058150095865130424, - "learning_rate": 0.000199991392922785, - "loss": 46.0, - "step": 25944 - }, - { - "epoch": 4.178268046217641, - "grad_norm": 0.0025091199204325676, - "learning_rate": 0.0001999913922590139, - "loss": 46.0, - "step": 25945 - }, - { - "epoch": 4.178429083296429, - "grad_norm": 0.004277421161532402, - "learning_rate": 0.00019999139159521722, - "loss": 46.0, - "step": 25946 - }, - { - "epoch": 4.178590120375216, - "grad_norm": 0.002977653406560421, - "learning_rate": 0.00019999139093139495, - "loss": 46.0, - "step": 25947 - }, - { - "epoch": 4.178751157454004, - "grad_norm": 0.0017982794670388103, - "learning_rate": 0.0001999913902675471, - "loss": 46.0, - "step": 25948 - }, - { - "epoch": 4.178912194532791, - "grad_norm": 0.005572856403887272, - "learning_rate": 0.00019999138960367363, - "loss": 46.0, - "step": 25949 - }, - { - "epoch": 4.179073231611579, - "grad_norm": 0.001722762011922896, - "learning_rate": 0.00019999138893977457, - "loss": 46.0, - "step": 25950 - }, - { - "epoch": 4.179234268690366, - "grad_norm": 0.015083261765539646, - "learning_rate": 0.00019999138827584995, - "loss": 46.0, - "step": 25951 - }, - { - "epoch": 4.1793953057691535, - "grad_norm": 0.0018014807719737291, - "learning_rate": 0.0001999913876118997, - "loss": 46.0, - "step": 25952 - }, - { - "epoch": 4.179556342847941, - "grad_norm": 0.015364490449428558, - "learning_rate": 0.00019999138694792387, - "loss": 46.0, - "step": 25953 - }, - { - "epoch": 4.179717379926728, - "grad_norm": 0.002354520605877042, - "learning_rate": 0.00019999138628392247, - "loss": 46.0, - "step": 25954 - }, - { - "epoch": 4.179878417005516, - "grad_norm": 0.00257012783549726, - "learning_rate": 0.00019999138561989545, - "loss": 46.0, - "step": 25955 - }, - { - "epoch": 4.180039454084303, - "grad_norm": 0.006188462022691965, - "learning_rate": 0.00019999138495584284, - "loss": 46.0, - "step": 25956 - }, - { - "epoch": 4.18020049116309, - "grad_norm": 0.012042365036904812, - "learning_rate": 0.00019999138429176465, - "loss": 46.0, - "step": 25957 - }, - { - "epoch": 4.180361528241877, - "grad_norm": 0.004100468475371599, - "learning_rate": 0.00019999138362766086, - "loss": 46.0, - "step": 25958 - }, - { - "epoch": 4.180522565320665, - "grad_norm": 0.0027238535694777966, - "learning_rate": 0.00019999138296353147, - "loss": 46.0, - "step": 25959 - }, - { - "epoch": 4.180683602399452, - "grad_norm": 0.0014333584113046527, - "learning_rate": 0.0001999913822993765, - "loss": 46.0, - "step": 25960 - }, - { - "epoch": 4.18084463947824, - "grad_norm": 0.006552796810865402, - "learning_rate": 0.00019999138163519594, - "loss": 46.0, - "step": 25961 - }, - { - "epoch": 4.181005676557027, - "grad_norm": 0.001441031345166266, - "learning_rate": 0.00019999138097098978, - "loss": 46.0, - "step": 25962 - }, - { - "epoch": 4.181166713635815, - "grad_norm": 0.003725338028743863, - "learning_rate": 0.000199991380306758, - "loss": 46.0, - "step": 25963 - }, - { - "epoch": 4.181327750714602, - "grad_norm": 0.007430018857121468, - "learning_rate": 0.00019999137964250068, - "loss": 46.0, - "step": 25964 - }, - { - "epoch": 4.1814887877933895, - "grad_norm": 0.003349131438881159, - "learning_rate": 0.00019999137897821773, - "loss": 46.0, - "step": 25965 - }, - { - "epoch": 4.181649824872177, - "grad_norm": 0.004528583958745003, - "learning_rate": 0.0001999913783139092, - "loss": 46.0, - "step": 25966 - }, - { - "epoch": 4.181810861950964, - "grad_norm": 0.007112594787031412, - "learning_rate": 0.00019999137764957508, - "loss": 46.0, - "step": 25967 - }, - { - "epoch": 4.181971899029752, - "grad_norm": 0.011922754347324371, - "learning_rate": 0.00019999137698521535, - "loss": 46.0, - "step": 25968 - }, - { - "epoch": 4.182132936108539, - "grad_norm": 0.0030051604844629765, - "learning_rate": 0.00019999137632083005, - "loss": 46.0, - "step": 25969 - }, - { - "epoch": 4.182293973187327, - "grad_norm": 0.0033459635451436043, - "learning_rate": 0.00019999137565641914, - "loss": 46.0, - "step": 25970 - }, - { - "epoch": 4.182455010266114, - "grad_norm": 0.02054242603480816, - "learning_rate": 0.00019999137499198267, - "loss": 46.0, - "step": 25971 - }, - { - "epoch": 4.182616047344901, - "grad_norm": 0.004144029226154089, - "learning_rate": 0.00019999137432752056, - "loss": 46.0, - "step": 25972 - }, - { - "epoch": 4.182777084423688, - "grad_norm": 0.0021680439822375774, - "learning_rate": 0.00019999137366303286, - "loss": 46.0, - "step": 25973 - }, - { - "epoch": 4.182938121502476, - "grad_norm": 0.007987151853740215, - "learning_rate": 0.0001999913729985196, - "loss": 46.0, - "step": 25974 - }, - { - "epoch": 4.183099158581263, - "grad_norm": 0.007736022584140301, - "learning_rate": 0.00019999137233398073, - "loss": 46.0, - "step": 25975 - }, - { - "epoch": 4.183260195660051, - "grad_norm": 0.0018217575270682573, - "learning_rate": 0.00019999137166941628, - "loss": 46.0, - "step": 25976 - }, - { - "epoch": 4.183421232738838, - "grad_norm": 0.00365527905523777, - "learning_rate": 0.00019999137100482623, - "loss": 46.0, - "step": 25977 - }, - { - "epoch": 4.1835822698176255, - "grad_norm": 0.006252591032534838, - "learning_rate": 0.00019999137034021057, - "loss": 46.0, - "step": 25978 - }, - { - "epoch": 4.183743306896413, - "grad_norm": 0.0016039125621318817, - "learning_rate": 0.00019999136967556935, - "loss": 46.0, - "step": 25979 - }, - { - "epoch": 4.1839043439752, - "grad_norm": 0.002131569664925337, - "learning_rate": 0.0001999913690109025, - "loss": 46.0, - "step": 25980 - }, - { - "epoch": 4.184065381053988, - "grad_norm": 0.0071007730439305305, - "learning_rate": 0.00019999136834621012, - "loss": 46.0, - "step": 25981 - }, - { - "epoch": 4.184226418132775, - "grad_norm": 0.005144949536770582, - "learning_rate": 0.00019999136768149208, - "loss": 46.0, - "step": 25982 - }, - { - "epoch": 4.184387455211563, - "grad_norm": 0.0024578217417001724, - "learning_rate": 0.00019999136701674848, - "loss": 46.0, - "step": 25983 - }, - { - "epoch": 4.18454849229035, - "grad_norm": 0.0034769868943840265, - "learning_rate": 0.00019999136635197927, - "loss": 46.0, - "step": 25984 - }, - { - "epoch": 4.184709529369138, - "grad_norm": 0.0010151652386412024, - "learning_rate": 0.00019999136568718448, - "loss": 46.0, - "step": 25985 - }, - { - "epoch": 4.184870566447924, - "grad_norm": 0.0035235495306551456, - "learning_rate": 0.00019999136502236412, - "loss": 46.0, - "step": 25986 - }, - { - "epoch": 4.185031603526712, - "grad_norm": 0.0016252518398687243, - "learning_rate": 0.00019999136435751812, - "loss": 46.0, - "step": 25987 - }, - { - "epoch": 4.185192640605499, - "grad_norm": 0.007759832311421633, - "learning_rate": 0.00019999136369264656, - "loss": 46.0, - "step": 25988 - }, - { - "epoch": 4.185353677684287, - "grad_norm": 0.005069665610790253, - "learning_rate": 0.00019999136302774936, - "loss": 46.0, - "step": 25989 - }, - { - "epoch": 4.185514714763074, - "grad_norm": 0.0035398025065660477, - "learning_rate": 0.00019999136236282662, - "loss": 46.0, - "step": 25990 - }, - { - "epoch": 4.1856757518418615, - "grad_norm": 0.009738585911691189, - "learning_rate": 0.00019999136169787828, - "loss": 46.0, - "step": 25991 - }, - { - "epoch": 4.185836788920649, - "grad_norm": 0.0013918853364884853, - "learning_rate": 0.00019999136103290431, - "loss": 46.0, - "step": 25992 - }, - { - "epoch": 4.185997825999436, - "grad_norm": 0.0032958476804196835, - "learning_rate": 0.0001999913603679048, - "loss": 46.0, - "step": 25993 - }, - { - "epoch": 4.186158863078224, - "grad_norm": 0.0035923910327255726, - "learning_rate": 0.00019999135970287968, - "loss": 46.0, - "step": 25994 - }, - { - "epoch": 4.186319900157011, - "grad_norm": 0.00758514367043972, - "learning_rate": 0.00019999135903782898, - "loss": 46.0, - "step": 25995 - }, - { - "epoch": 4.186480937235799, - "grad_norm": 0.0036441378761082888, - "learning_rate": 0.00019999135837275265, - "loss": 46.0, - "step": 25996 - }, - { - "epoch": 4.186641974314586, - "grad_norm": 0.002245335839688778, - "learning_rate": 0.00019999135770765075, - "loss": 46.0, - "step": 25997 - }, - { - "epoch": 4.186803011393374, - "grad_norm": 0.0045206849463284016, - "learning_rate": 0.00019999135704252324, - "loss": 46.0, - "step": 25998 - }, - { - "epoch": 4.186964048472161, - "grad_norm": 0.0017208128701895475, - "learning_rate": 0.00019999135637737014, - "loss": 46.0, - "step": 25999 - }, - { - "epoch": 4.1871250855509485, - "grad_norm": 0.002556613879278302, - "learning_rate": 0.00019999135571219148, - "loss": 46.0, - "step": 26000 - }, - { - "epoch": 4.187286122629735, - "grad_norm": 0.0019640920218080282, - "learning_rate": 0.00019999135504698718, - "loss": 46.0, - "step": 26001 - }, - { - "epoch": 4.1874471597085225, - "grad_norm": 0.004104936961084604, - "learning_rate": 0.00019999135438175734, - "loss": 46.0, - "step": 26002 - }, - { - "epoch": 4.18760819678731, - "grad_norm": 0.005912536755204201, - "learning_rate": 0.00019999135371650187, - "loss": 46.0, - "step": 26003 - }, - { - "epoch": 4.1877692338660975, - "grad_norm": 0.005457097664475441, - "learning_rate": 0.00019999135305122083, - "loss": 46.0, - "step": 26004 - }, - { - "epoch": 4.187930270944885, - "grad_norm": 0.013931632973253727, - "learning_rate": 0.00019999135238591418, - "loss": 46.0, - "step": 26005 - }, - { - "epoch": 4.188091308023672, - "grad_norm": 0.007091052830219269, - "learning_rate": 0.00019999135172058194, - "loss": 46.0, - "step": 26006 - }, - { - "epoch": 4.18825234510246, - "grad_norm": 0.0008303801296278834, - "learning_rate": 0.0001999913510552241, - "loss": 46.0, - "step": 26007 - }, - { - "epoch": 4.188413382181247, - "grad_norm": 0.015890713781118393, - "learning_rate": 0.00019999135038984068, - "loss": 46.0, - "step": 26008 - }, - { - "epoch": 4.188574419260035, - "grad_norm": 0.0024414905346930027, - "learning_rate": 0.00019999134972443168, - "loss": 46.0, - "step": 26009 - }, - { - "epoch": 4.188735456338822, - "grad_norm": 0.0013518971391022205, - "learning_rate": 0.00019999134905899704, - "loss": 46.0, - "step": 26010 - }, - { - "epoch": 4.18889649341761, - "grad_norm": 0.006290055345743895, - "learning_rate": 0.00019999134839353687, - "loss": 46.0, - "step": 26011 - }, - { - "epoch": 4.189057530496397, - "grad_norm": 0.0018630078993737698, - "learning_rate": 0.00019999134772805106, - "loss": 46.0, - "step": 26012 - }, - { - "epoch": 4.1892185675751845, - "grad_norm": 0.008745655417442322, - "learning_rate": 0.00019999134706253968, - "loss": 46.0, - "step": 26013 - }, - { - "epoch": 4.189379604653972, - "grad_norm": 0.0027428437024354935, - "learning_rate": 0.00019999134639700267, - "loss": 46.0, - "step": 26014 - }, - { - "epoch": 4.1895406417327585, - "grad_norm": 0.0038365055806934834, - "learning_rate": 0.00019999134573144012, - "loss": 46.0, - "step": 26015 - }, - { - "epoch": 4.189701678811546, - "grad_norm": 0.0038948110304772854, - "learning_rate": 0.00019999134506585195, - "loss": 46.0, - "step": 26016 - }, - { - "epoch": 4.189862715890333, - "grad_norm": 0.0009821417042985559, - "learning_rate": 0.00019999134440023818, - "loss": 46.0, - "step": 26017 - }, - { - "epoch": 4.190023752969121, - "grad_norm": 0.001074650906957686, - "learning_rate": 0.00019999134373459884, - "loss": 46.0, - "step": 26018 - }, - { - "epoch": 4.190184790047908, - "grad_norm": 0.0015313818585127592, - "learning_rate": 0.0001999913430689339, - "loss": 46.0, - "step": 26019 - }, - { - "epoch": 4.190345827126696, - "grad_norm": 0.0018712281016632915, - "learning_rate": 0.00019999134240324335, - "loss": 46.0, - "step": 26020 - }, - { - "epoch": 4.190506864205483, - "grad_norm": 0.0031497799791395664, - "learning_rate": 0.00019999134173752725, - "loss": 46.0, - "step": 26021 - }, - { - "epoch": 4.190667901284271, - "grad_norm": 0.006303925067186356, - "learning_rate": 0.0001999913410717855, - "loss": 46.0, - "step": 26022 - }, - { - "epoch": 4.190828938363058, - "grad_norm": 0.0014620608417317271, - "learning_rate": 0.0001999913404060182, - "loss": 46.0, - "step": 26023 - }, - { - "epoch": 4.190989975441846, - "grad_norm": 0.004717608913779259, - "learning_rate": 0.00019999133974022527, - "loss": 46.0, - "step": 26024 - }, - { - "epoch": 4.191151012520633, - "grad_norm": 0.0011663367040455341, - "learning_rate": 0.00019999133907440682, - "loss": 46.0, - "step": 26025 - }, - { - "epoch": 4.1913120495994205, - "grad_norm": 0.0015338532393798232, - "learning_rate": 0.00019999133840856273, - "loss": 46.0, - "step": 26026 - }, - { - "epoch": 4.191473086678208, - "grad_norm": 0.0032446887344121933, - "learning_rate": 0.000199991337742693, - "loss": 46.0, - "step": 26027 - }, - { - "epoch": 4.191634123756995, - "grad_norm": 0.007691057864576578, - "learning_rate": 0.00019999133707679777, - "loss": 46.0, - "step": 26028 - }, - { - "epoch": 4.191795160835783, - "grad_norm": 0.0011235671117901802, - "learning_rate": 0.0001999913364108769, - "loss": 46.0, - "step": 26029 - }, - { - "epoch": 4.191956197914569, - "grad_norm": 0.011805804446339607, - "learning_rate": 0.00019999133574493042, - "loss": 46.0, - "step": 26030 - }, - { - "epoch": 4.192117234993357, - "grad_norm": 0.009845497086644173, - "learning_rate": 0.00019999133507895834, - "loss": 46.0, - "step": 26031 - }, - { - "epoch": 4.192278272072144, - "grad_norm": 0.005638534668833017, - "learning_rate": 0.0001999913344129607, - "loss": 46.0, - "step": 26032 - }, - { - "epoch": 4.192439309150932, - "grad_norm": 0.002307936316356063, - "learning_rate": 0.00019999133374693745, - "loss": 46.0, - "step": 26033 - }, - { - "epoch": 4.192600346229719, - "grad_norm": 0.0071575697511434555, - "learning_rate": 0.00019999133308088864, - "loss": 46.0, - "step": 26034 - }, - { - "epoch": 4.192761383308507, - "grad_norm": 0.000621053040958941, - "learning_rate": 0.0001999913324148142, - "loss": 46.0, - "step": 26035 - }, - { - "epoch": 4.192922420387294, - "grad_norm": 0.004900424275547266, - "learning_rate": 0.0001999913317487142, - "loss": 46.0, - "step": 26036 - }, - { - "epoch": 4.193083457466082, - "grad_norm": 0.0015247509581968188, - "learning_rate": 0.0001999913310825886, - "loss": 46.0, - "step": 26037 - }, - { - "epoch": 4.193244494544869, - "grad_norm": 0.005635363049805164, - "learning_rate": 0.00019999133041643738, - "loss": 46.0, - "step": 26038 - }, - { - "epoch": 4.1934055316236565, - "grad_norm": 0.006605396047234535, - "learning_rate": 0.00019999132975026057, - "loss": 46.0, - "step": 26039 - }, - { - "epoch": 4.193566568702444, - "grad_norm": 0.005611578933894634, - "learning_rate": 0.0001999913290840582, - "loss": 46.0, - "step": 26040 - }, - { - "epoch": 4.193727605781231, - "grad_norm": 0.004594260361045599, - "learning_rate": 0.0001999913284178302, - "loss": 46.0, - "step": 26041 - }, - { - "epoch": 4.193888642860019, - "grad_norm": 0.003198167309165001, - "learning_rate": 0.00019999132775157664, - "loss": 46.0, - "step": 26042 - }, - { - "epoch": 4.194049679938806, - "grad_norm": 0.00684497831389308, - "learning_rate": 0.00019999132708529746, - "loss": 46.0, - "step": 26043 - }, - { - "epoch": 4.194210717017594, - "grad_norm": 0.003949678037315607, - "learning_rate": 0.00019999132641899272, - "loss": 46.0, - "step": 26044 - }, - { - "epoch": 4.19437175409638, - "grad_norm": 0.0029609936755150557, - "learning_rate": 0.00019999132575266234, - "loss": 46.0, - "step": 26045 - }, - { - "epoch": 4.194532791175168, - "grad_norm": 0.013808994553983212, - "learning_rate": 0.00019999132508630643, - "loss": 46.0, - "step": 26046 - }, - { - "epoch": 4.194693828253955, - "grad_norm": 0.0022515649907290936, - "learning_rate": 0.00019999132441992487, - "loss": 46.0, - "step": 26047 - }, - { - "epoch": 4.194854865332743, - "grad_norm": 0.0062814923003315926, - "learning_rate": 0.00019999132375351776, - "loss": 46.0, - "step": 26048 - }, - { - "epoch": 4.19501590241153, - "grad_norm": 0.005903781391680241, - "learning_rate": 0.00019999132308708503, - "loss": 46.0, - "step": 26049 - }, - { - "epoch": 4.195176939490318, - "grad_norm": 0.014126066118478775, - "learning_rate": 0.0001999913224206267, - "loss": 46.0, - "step": 26050 - }, - { - "epoch": 4.195337976569105, - "grad_norm": 0.01271476037800312, - "learning_rate": 0.0001999913217541428, - "loss": 46.0, - "step": 26051 - }, - { - "epoch": 4.1954990136478925, - "grad_norm": 0.004971700720489025, - "learning_rate": 0.00019999132108763332, - "loss": 46.0, - "step": 26052 - }, - { - "epoch": 4.19566005072668, - "grad_norm": 0.00850827619433403, - "learning_rate": 0.00019999132042109824, - "loss": 46.0, - "step": 26053 - }, - { - "epoch": 4.195821087805467, - "grad_norm": 0.005059359595179558, - "learning_rate": 0.00019999131975453752, - "loss": 46.0, - "step": 26054 - }, - { - "epoch": 4.195982124884255, - "grad_norm": 0.0032999475952237844, - "learning_rate": 0.00019999131908795127, - "loss": 46.0, - "step": 26055 - }, - { - "epoch": 4.196143161963042, - "grad_norm": 0.002233864739537239, - "learning_rate": 0.00019999131842133938, - "loss": 46.0, - "step": 26056 - }, - { - "epoch": 4.19630419904183, - "grad_norm": 0.007805574685335159, - "learning_rate": 0.00019999131775470192, - "loss": 46.0, - "step": 26057 - }, - { - "epoch": 4.196465236120617, - "grad_norm": 0.00768780754879117, - "learning_rate": 0.00019999131708803885, - "loss": 46.0, - "step": 26058 - }, - { - "epoch": 4.196626273199404, - "grad_norm": 0.007372531108558178, - "learning_rate": 0.0001999913164213502, - "loss": 46.0, - "step": 26059 - }, - { - "epoch": 4.196787310278191, - "grad_norm": 0.0017058696830645204, - "learning_rate": 0.00019999131575463596, - "loss": 46.0, - "step": 26060 - }, - { - "epoch": 4.196948347356979, - "grad_norm": 0.0016624911222606897, - "learning_rate": 0.00019999131508789613, - "loss": 46.0, - "step": 26061 - }, - { - "epoch": 4.197109384435766, - "grad_norm": 0.0008786537800915539, - "learning_rate": 0.0001999913144211307, - "loss": 46.0, - "step": 26062 - }, - { - "epoch": 4.1972704215145535, - "grad_norm": 0.0029030819423496723, - "learning_rate": 0.0001999913137543397, - "loss": 46.0, - "step": 26063 - }, - { - "epoch": 4.197431458593341, - "grad_norm": 0.003672652179375291, - "learning_rate": 0.0001999913130875231, - "loss": 46.0, - "step": 26064 - }, - { - "epoch": 4.197592495672128, - "grad_norm": 0.009355917572975159, - "learning_rate": 0.00019999131242068088, - "loss": 46.0, - "step": 26065 - }, - { - "epoch": 4.197753532750916, - "grad_norm": 0.01713617891073227, - "learning_rate": 0.00019999131175381306, - "loss": 46.0, - "step": 26066 - }, - { - "epoch": 4.197914569829703, - "grad_norm": 0.0029010928701609373, - "learning_rate": 0.00019999131108691968, - "loss": 46.0, - "step": 26067 - }, - { - "epoch": 4.198075606908491, - "grad_norm": 0.0013409628299996257, - "learning_rate": 0.0001999913104200007, - "loss": 46.0, - "step": 26068 - }, - { - "epoch": 4.198236643987278, - "grad_norm": 0.004620407707989216, - "learning_rate": 0.00019999130975305613, - "loss": 46.0, - "step": 26069 - }, - { - "epoch": 4.198397681066066, - "grad_norm": 0.015187554992735386, - "learning_rate": 0.00019999130908608597, - "loss": 46.0, - "step": 26070 - }, - { - "epoch": 4.198558718144853, - "grad_norm": 0.002239885739982128, - "learning_rate": 0.00019999130841909018, - "loss": 46.0, - "step": 26071 - }, - { - "epoch": 4.198719755223641, - "grad_norm": 0.004163686186075211, - "learning_rate": 0.00019999130775206884, - "loss": 46.0, - "step": 26072 - }, - { - "epoch": 4.198880792302428, - "grad_norm": 0.0018952051177620888, - "learning_rate": 0.00019999130708502188, - "loss": 46.0, - "step": 26073 - }, - { - "epoch": 4.199041829381215, - "grad_norm": 0.0025391767267137766, - "learning_rate": 0.00019999130641794937, - "loss": 46.0, - "step": 26074 - }, - { - "epoch": 4.199202866460002, - "grad_norm": 0.0011277308221906424, - "learning_rate": 0.00019999130575085124, - "loss": 46.0, - "step": 26075 - }, - { - "epoch": 4.1993639035387895, - "grad_norm": 0.0027454053051769733, - "learning_rate": 0.00019999130508372752, - "loss": 46.0, - "step": 26076 - }, - { - "epoch": 4.199524940617577, - "grad_norm": 0.002370435744524002, - "learning_rate": 0.00019999130441657818, - "loss": 46.0, - "step": 26077 - }, - { - "epoch": 4.199685977696364, - "grad_norm": 0.0012260667281225324, - "learning_rate": 0.00019999130374940326, - "loss": 46.0, - "step": 26078 - }, - { - "epoch": 4.199847014775152, - "grad_norm": 0.005955003201961517, - "learning_rate": 0.00019999130308220278, - "loss": 46.0, - "step": 26079 - }, - { - "epoch": 4.200008051853939, - "grad_norm": 0.0014732145937159657, - "learning_rate": 0.0001999913024149767, - "loss": 46.0, - "step": 26080 - }, - { - "epoch": 4.200169088932727, - "grad_norm": 0.003810550319030881, - "learning_rate": 0.00019999130174772498, - "loss": 46.0, - "step": 26081 - }, - { - "epoch": 4.200330126011514, - "grad_norm": 0.01232217438519001, - "learning_rate": 0.0001999913010804477, - "loss": 46.0, - "step": 26082 - }, - { - "epoch": 4.200491163090302, - "grad_norm": 0.0014904760755598545, - "learning_rate": 0.00019999130041314486, - "loss": 46.0, - "step": 26083 - }, - { - "epoch": 4.200652200169089, - "grad_norm": 0.002004674170166254, - "learning_rate": 0.0001999912997458164, - "loss": 46.0, - "step": 26084 - }, - { - "epoch": 4.200813237247877, - "grad_norm": 0.00470479391515255, - "learning_rate": 0.00019999129907846233, - "loss": 46.0, - "step": 26085 - }, - { - "epoch": 4.200974274326664, - "grad_norm": 0.007371739484369755, - "learning_rate": 0.00019999129841108269, - "loss": 46.0, - "step": 26086 - }, - { - "epoch": 4.2011353114054515, - "grad_norm": 0.01456478238105774, - "learning_rate": 0.00019999129774367745, - "loss": 46.0, - "step": 26087 - }, - { - "epoch": 4.201296348484238, - "grad_norm": 0.0057002645917236805, - "learning_rate": 0.00019999129707624658, - "loss": 46.0, - "step": 26088 - }, - { - "epoch": 4.2014573855630255, - "grad_norm": 0.002759326249361038, - "learning_rate": 0.00019999129640879018, - "loss": 46.0, - "step": 26089 - }, - { - "epoch": 4.201618422641813, - "grad_norm": 0.0014946566661819816, - "learning_rate": 0.00019999129574130816, - "loss": 46.0, - "step": 26090 - }, - { - "epoch": 4.2017794597206, - "grad_norm": 0.0024217774625867605, - "learning_rate": 0.00019999129507380055, - "loss": 46.0, - "step": 26091 - }, - { - "epoch": 4.201940496799388, - "grad_norm": 0.0016628142911940813, - "learning_rate": 0.00019999129440626733, - "loss": 46.0, - "step": 26092 - }, - { - "epoch": 4.202101533878175, - "grad_norm": 0.006629998330026865, - "learning_rate": 0.00019999129373870852, - "loss": 46.0, - "step": 26093 - }, - { - "epoch": 4.202262570956963, - "grad_norm": 0.0035316506400704384, - "learning_rate": 0.00019999129307112412, - "loss": 46.0, - "step": 26094 - }, - { - "epoch": 4.20242360803575, - "grad_norm": 0.0024842265993356705, - "learning_rate": 0.00019999129240351417, - "loss": 46.0, - "step": 26095 - }, - { - "epoch": 4.202584645114538, - "grad_norm": 0.005518514197319746, - "learning_rate": 0.00019999129173587857, - "loss": 46.0, - "step": 26096 - }, - { - "epoch": 4.202745682193325, - "grad_norm": 0.0038072566967457533, - "learning_rate": 0.00019999129106821738, - "loss": 46.0, - "step": 26097 - }, - { - "epoch": 4.202906719272113, - "grad_norm": 0.007764378096908331, - "learning_rate": 0.00019999129040053064, - "loss": 46.0, - "step": 26098 - }, - { - "epoch": 4.2030677563509, - "grad_norm": 0.005074677523225546, - "learning_rate": 0.00019999128973281828, - "loss": 46.0, - "step": 26099 - }, - { - "epoch": 4.2032287934296875, - "grad_norm": 0.0011158455163240433, - "learning_rate": 0.00019999128906508033, - "loss": 46.0, - "step": 26100 - }, - { - "epoch": 4.203389830508475, - "grad_norm": 0.0019502454670146108, - "learning_rate": 0.0001999912883973168, - "loss": 46.0, - "step": 26101 - }, - { - "epoch": 4.203550867587262, - "grad_norm": 0.0078010293655097485, - "learning_rate": 0.00019999128772952765, - "loss": 46.0, - "step": 26102 - }, - { - "epoch": 4.203711904666049, - "grad_norm": 0.010709693655371666, - "learning_rate": 0.00019999128706171292, - "loss": 46.0, - "step": 26103 - }, - { - "epoch": 4.203872941744836, - "grad_norm": 0.005534814670681953, - "learning_rate": 0.00019999128639387262, - "loss": 46.0, - "step": 26104 - }, - { - "epoch": 4.204033978823624, - "grad_norm": 0.0022047837264835835, - "learning_rate": 0.00019999128572600671, - "loss": 46.0, - "step": 26105 - }, - { - "epoch": 4.204195015902411, - "grad_norm": 0.0052042207680642605, - "learning_rate": 0.0001999912850581152, - "loss": 46.0, - "step": 26106 - }, - { - "epoch": 4.204356052981199, - "grad_norm": 0.011884715408086777, - "learning_rate": 0.0001999912843901981, - "loss": 46.0, - "step": 26107 - }, - { - "epoch": 4.204517090059986, - "grad_norm": 0.0017527422169223428, - "learning_rate": 0.0001999912837222554, - "loss": 46.0, - "step": 26108 - }, - { - "epoch": 4.204678127138774, - "grad_norm": 0.005948703270405531, - "learning_rate": 0.00019999128305428715, - "loss": 46.0, - "step": 26109 - }, - { - "epoch": 4.204839164217561, - "grad_norm": 0.0041191610507667065, - "learning_rate": 0.00019999128238629328, - "loss": 46.0, - "step": 26110 - }, - { - "epoch": 4.2050002012963485, - "grad_norm": 0.007457006257027388, - "learning_rate": 0.00019999128171827382, - "loss": 46.0, - "step": 26111 - }, - { - "epoch": 4.205161238375136, - "grad_norm": 0.006035392638295889, - "learning_rate": 0.00019999128105022872, - "loss": 46.0, - "step": 26112 - }, - { - "epoch": 4.2053222754539235, - "grad_norm": 0.013747980818152428, - "learning_rate": 0.00019999128038215811, - "loss": 46.0, - "step": 26113 - }, - { - "epoch": 4.205483312532711, - "grad_norm": 0.0013991923769935966, - "learning_rate": 0.00019999127971406187, - "loss": 46.0, - "step": 26114 - }, - { - "epoch": 4.205644349611498, - "grad_norm": 0.0037634915206581354, - "learning_rate": 0.00019999127904594, - "loss": 46.0, - "step": 26115 - }, - { - "epoch": 4.205805386690286, - "grad_norm": 0.005118495784699917, - "learning_rate": 0.00019999127837779258, - "loss": 46.0, - "step": 26116 - }, - { - "epoch": 4.205966423769073, - "grad_norm": 0.00338145368732512, - "learning_rate": 0.00019999127770961957, - "loss": 46.0, - "step": 26117 - }, - { - "epoch": 4.20612746084786, - "grad_norm": 0.005637587048113346, - "learning_rate": 0.00019999127704142095, - "loss": 46.0, - "step": 26118 - }, - { - "epoch": 4.206288497926647, - "grad_norm": 0.007844589650630951, - "learning_rate": 0.0001999912763731967, - "loss": 46.0, - "step": 26119 - }, - { - "epoch": 4.206449535005435, - "grad_norm": 0.00894088577479124, - "learning_rate": 0.00019999127570494694, - "loss": 46.0, - "step": 26120 - }, - { - "epoch": 4.206610572084222, - "grad_norm": 0.0014346856623888016, - "learning_rate": 0.00019999127503667156, - "loss": 46.0, - "step": 26121 - }, - { - "epoch": 4.20677160916301, - "grad_norm": 0.004349898546934128, - "learning_rate": 0.00019999127436837056, - "loss": 46.0, - "step": 26122 - }, - { - "epoch": 4.206932646241797, - "grad_norm": 0.0029623203445225954, - "learning_rate": 0.00019999127370004397, - "loss": 46.0, - "step": 26123 - }, - { - "epoch": 4.2070936833205845, - "grad_norm": 0.0021699510980397463, - "learning_rate": 0.00019999127303169182, - "loss": 46.0, - "step": 26124 - }, - { - "epoch": 4.207254720399372, - "grad_norm": 0.004261610563844442, - "learning_rate": 0.00019999127236331406, - "loss": 46.0, - "step": 26125 - }, - { - "epoch": 4.207415757478159, - "grad_norm": 0.0021020250860601664, - "learning_rate": 0.0001999912716949107, - "loss": 46.0, - "step": 26126 - }, - { - "epoch": 4.207576794556947, - "grad_norm": 0.004452476277947426, - "learning_rate": 0.00019999127102648175, - "loss": 46.0, - "step": 26127 - }, - { - "epoch": 4.207737831635734, - "grad_norm": 0.002101828809827566, - "learning_rate": 0.0001999912703580272, - "loss": 46.0, - "step": 26128 - }, - { - "epoch": 4.207898868714522, - "grad_norm": 0.006327989976853132, - "learning_rate": 0.00019999126968954707, - "loss": 46.0, - "step": 26129 - }, - { - "epoch": 4.208059905793309, - "grad_norm": 0.010032350197434425, - "learning_rate": 0.00019999126902104134, - "loss": 46.0, - "step": 26130 - }, - { - "epoch": 4.208220942872097, - "grad_norm": 0.00930061750113964, - "learning_rate": 0.00019999126835251003, - "loss": 46.0, - "step": 26131 - }, - { - "epoch": 4.208381979950884, - "grad_norm": 0.005770361516624689, - "learning_rate": 0.0001999912676839531, - "loss": 46.0, - "step": 26132 - }, - { - "epoch": 4.208543017029671, - "grad_norm": 0.0010209655156359076, - "learning_rate": 0.00019999126701537062, - "loss": 46.0, - "step": 26133 - }, - { - "epoch": 4.208704054108458, - "grad_norm": 0.006891271099448204, - "learning_rate": 0.00019999126634676252, - "loss": 46.0, - "step": 26134 - }, - { - "epoch": 4.208865091187246, - "grad_norm": 0.002525690710172057, - "learning_rate": 0.00019999126567812883, - "loss": 46.0, - "step": 26135 - }, - { - "epoch": 4.209026128266033, - "grad_norm": 0.0023005539551377296, - "learning_rate": 0.00019999126500946953, - "loss": 46.0, - "step": 26136 - }, - { - "epoch": 4.2091871653448205, - "grad_norm": 0.004584599751979113, - "learning_rate": 0.00019999126434078467, - "loss": 46.0, - "step": 26137 - }, - { - "epoch": 4.209348202423608, - "grad_norm": 0.0007366443751379848, - "learning_rate": 0.00019999126367207422, - "loss": 46.0, - "step": 26138 - }, - { - "epoch": 4.209509239502395, - "grad_norm": 0.0011814075987786055, - "learning_rate": 0.00019999126300333813, - "loss": 46.0, - "step": 26139 - }, - { - "epoch": 4.209670276581183, - "grad_norm": 0.004154510796070099, - "learning_rate": 0.0001999912623345765, - "loss": 46.0, - "step": 26140 - }, - { - "epoch": 4.20983131365997, - "grad_norm": 0.00594858406111598, - "learning_rate": 0.00019999126166578925, - "loss": 46.0, - "step": 26141 - }, - { - "epoch": 4.209992350738758, - "grad_norm": 0.004756337963044643, - "learning_rate": 0.0001999912609969764, - "loss": 46.0, - "step": 26142 - }, - { - "epoch": 4.210153387817545, - "grad_norm": 0.015073360875248909, - "learning_rate": 0.00019999126032813798, - "loss": 46.0, - "step": 26143 - }, - { - "epoch": 4.210314424896333, - "grad_norm": 0.007323187310248613, - "learning_rate": 0.00019999125965927393, - "loss": 46.0, - "step": 26144 - }, - { - "epoch": 4.21047546197512, - "grad_norm": 0.003056993242353201, - "learning_rate": 0.00019999125899038434, - "loss": 46.0, - "step": 26145 - }, - { - "epoch": 4.210636499053908, - "grad_norm": 0.00145050382707268, - "learning_rate": 0.00019999125832146912, - "loss": 46.0, - "step": 26146 - }, - { - "epoch": 4.210797536132694, - "grad_norm": 0.004086241591721773, - "learning_rate": 0.00019999125765252833, - "loss": 46.0, - "step": 26147 - }, - { - "epoch": 4.210958573211482, - "grad_norm": 0.00959494523704052, - "learning_rate": 0.00019999125698356193, - "loss": 46.0, - "step": 26148 - }, - { - "epoch": 4.211119610290269, - "grad_norm": 0.0040459055453538895, - "learning_rate": 0.00019999125631456994, - "loss": 46.0, - "step": 26149 - }, - { - "epoch": 4.2112806473690565, - "grad_norm": 0.0015438953414559364, - "learning_rate": 0.00019999125564555237, - "loss": 46.0, - "step": 26150 - }, - { - "epoch": 4.211441684447844, - "grad_norm": 0.002931318012997508, - "learning_rate": 0.0001999912549765092, - "loss": 46.0, - "step": 26151 - }, - { - "epoch": 4.211602721526631, - "grad_norm": 0.0008940070983953774, - "learning_rate": 0.00019999125430744043, - "loss": 46.0, - "step": 26152 - }, - { - "epoch": 4.211763758605419, - "grad_norm": 0.0026036081835627556, - "learning_rate": 0.00019999125363834606, - "loss": 46.0, - "step": 26153 - }, - { - "epoch": 4.211924795684206, - "grad_norm": 0.008871166966855526, - "learning_rate": 0.0001999912529692261, - "loss": 46.0, - "step": 26154 - }, - { - "epoch": 4.212085832762994, - "grad_norm": 0.004649289418011904, - "learning_rate": 0.00019999125230008057, - "loss": 46.0, - "step": 26155 - }, - { - "epoch": 4.212246869841781, - "grad_norm": 0.0029622167348861694, - "learning_rate": 0.00019999125163090944, - "loss": 46.0, - "step": 26156 - }, - { - "epoch": 4.212407906920569, - "grad_norm": 0.009121979586780071, - "learning_rate": 0.0001999912509617127, - "loss": 46.0, - "step": 26157 - }, - { - "epoch": 4.212568943999356, - "grad_norm": 0.0007816595025360584, - "learning_rate": 0.0001999912502924904, - "loss": 46.0, - "step": 26158 - }, - { - "epoch": 4.212729981078144, - "grad_norm": 0.006691058166325092, - "learning_rate": 0.00019999124962324246, - "loss": 46.0, - "step": 26159 - }, - { - "epoch": 4.212891018156931, - "grad_norm": 0.008115086704492569, - "learning_rate": 0.000199991248953969, - "loss": 46.0, - "step": 26160 - }, - { - "epoch": 4.213052055235718, - "grad_norm": 0.005389650817960501, - "learning_rate": 0.00019999124828466987, - "loss": 46.0, - "step": 26161 - }, - { - "epoch": 4.213213092314505, - "grad_norm": 0.008649742230772972, - "learning_rate": 0.0001999912476153452, - "loss": 46.0, - "step": 26162 - }, - { - "epoch": 4.2133741293932925, - "grad_norm": 0.002736145630478859, - "learning_rate": 0.00019999124694599488, - "loss": 46.0, - "step": 26163 - }, - { - "epoch": 4.21353516647208, - "grad_norm": 0.0196493249386549, - "learning_rate": 0.00019999124627661903, - "loss": 46.0, - "step": 26164 - }, - { - "epoch": 4.213696203550867, - "grad_norm": 0.005836357828229666, - "learning_rate": 0.00019999124560721756, - "loss": 46.0, - "step": 26165 - }, - { - "epoch": 4.213857240629655, - "grad_norm": 0.008302832953631878, - "learning_rate": 0.00019999124493779049, - "loss": 46.0, - "step": 26166 - }, - { - "epoch": 4.214018277708442, - "grad_norm": 0.0025642337277531624, - "learning_rate": 0.00019999124426833787, - "loss": 46.0, - "step": 26167 - }, - { - "epoch": 4.21417931478723, - "grad_norm": 0.0027002268470823765, - "learning_rate": 0.00019999124359885962, - "loss": 46.0, - "step": 26168 - }, - { - "epoch": 4.214340351866017, - "grad_norm": 0.002399461343884468, - "learning_rate": 0.00019999124292935575, - "loss": 46.0, - "step": 26169 - }, - { - "epoch": 4.214501388944805, - "grad_norm": 0.0019487167010083795, - "learning_rate": 0.00019999124225982633, - "loss": 46.0, - "step": 26170 - }, - { - "epoch": 4.214662426023592, - "grad_norm": 0.0008998162229545414, - "learning_rate": 0.00019999124159027128, - "loss": 46.0, - "step": 26171 - }, - { - "epoch": 4.2148234631023795, - "grad_norm": 0.005393853411078453, - "learning_rate": 0.00019999124092069068, - "loss": 46.0, - "step": 26172 - }, - { - "epoch": 4.214984500181167, - "grad_norm": 0.0061567011289298534, - "learning_rate": 0.0001999912402510845, - "loss": 46.0, - "step": 26173 - }, - { - "epoch": 4.2151455372599544, - "grad_norm": 0.006997460033744574, - "learning_rate": 0.00019999123958145266, - "loss": 46.0, - "step": 26174 - }, - { - "epoch": 4.215306574338742, - "grad_norm": 0.004063728731125593, - "learning_rate": 0.00019999123891179527, - "loss": 46.0, - "step": 26175 - }, - { - "epoch": 4.2154676114175285, - "grad_norm": 0.001859088079072535, - "learning_rate": 0.0001999912382421123, - "loss": 46.0, - "step": 26176 - }, - { - "epoch": 4.215628648496316, - "grad_norm": 0.0017855068435892463, - "learning_rate": 0.0001999912375724037, - "loss": 46.0, - "step": 26177 - }, - { - "epoch": 4.215789685575103, - "grad_norm": 0.0018491818336769938, - "learning_rate": 0.00019999123690266955, - "loss": 46.0, - "step": 26178 - }, - { - "epoch": 4.215950722653891, - "grad_norm": 0.004011351149529219, - "learning_rate": 0.00019999123623290976, - "loss": 46.0, - "step": 26179 - }, - { - "epoch": 4.216111759732678, - "grad_norm": 0.0011792851146310568, - "learning_rate": 0.00019999123556312443, - "loss": 46.0, - "step": 26180 - }, - { - "epoch": 4.216272796811466, - "grad_norm": 0.01074582152068615, - "learning_rate": 0.00019999123489331346, - "loss": 46.0, - "step": 26181 - }, - { - "epoch": 4.216433833890253, - "grad_norm": 0.013922140933573246, - "learning_rate": 0.0001999912342234769, - "loss": 46.0, - "step": 26182 - }, - { - "epoch": 4.216594870969041, - "grad_norm": 0.002625813940539956, - "learning_rate": 0.0001999912335536148, - "loss": 46.0, - "step": 26183 - }, - { - "epoch": 4.216755908047828, - "grad_norm": 0.01154703926295042, - "learning_rate": 0.00019999123288372706, - "loss": 46.0, - "step": 26184 - }, - { - "epoch": 4.2169169451266155, - "grad_norm": 0.0023028203286230564, - "learning_rate": 0.00019999123221381372, - "loss": 46.0, - "step": 26185 - }, - { - "epoch": 4.217077982205403, - "grad_norm": 0.003878034418448806, - "learning_rate": 0.00019999123154387482, - "loss": 46.0, - "step": 26186 - }, - { - "epoch": 4.21723901928419, - "grad_norm": 0.0031337235122919083, - "learning_rate": 0.00019999123087391033, - "loss": 46.0, - "step": 26187 - }, - { - "epoch": 4.217400056362978, - "grad_norm": 0.003292187349870801, - "learning_rate": 0.00019999123020392022, - "loss": 46.0, - "step": 26188 - }, - { - "epoch": 4.217561093441765, - "grad_norm": 0.003771548392251134, - "learning_rate": 0.00019999122953390455, - "loss": 46.0, - "step": 26189 - }, - { - "epoch": 4.217722130520553, - "grad_norm": 0.005378633737564087, - "learning_rate": 0.00019999122886386322, - "loss": 46.0, - "step": 26190 - }, - { - "epoch": 4.217883167599339, - "grad_norm": 0.004960936959832907, - "learning_rate": 0.00019999122819379635, - "loss": 46.0, - "step": 26191 - }, - { - "epoch": 4.218044204678127, - "grad_norm": 0.004270308651030064, - "learning_rate": 0.00019999122752370393, - "loss": 46.0, - "step": 26192 - }, - { - "epoch": 4.218205241756914, - "grad_norm": 0.0016056271269917488, - "learning_rate": 0.00019999122685358583, - "loss": 46.0, - "step": 26193 - }, - { - "epoch": 4.218366278835702, - "grad_norm": 0.01219590101391077, - "learning_rate": 0.0001999912261834422, - "loss": 46.0, - "step": 26194 - }, - { - "epoch": 4.218527315914489, - "grad_norm": 0.002686714520677924, - "learning_rate": 0.00019999122551327294, - "loss": 46.0, - "step": 26195 - }, - { - "epoch": 4.218688352993277, - "grad_norm": 0.0009120117174461484, - "learning_rate": 0.0001999912248430781, - "loss": 46.0, - "step": 26196 - }, - { - "epoch": 4.218849390072064, - "grad_norm": 0.002591249532997608, - "learning_rate": 0.00019999122417285766, - "loss": 46.0, - "step": 26197 - }, - { - "epoch": 4.2190104271508515, - "grad_norm": 0.003160852240398526, - "learning_rate": 0.00019999122350261163, - "loss": 46.0, - "step": 26198 - }, - { - "epoch": 4.219171464229639, - "grad_norm": 0.0018156197620555758, - "learning_rate": 0.00019999122283234, - "loss": 46.0, - "step": 26199 - }, - { - "epoch": 4.219332501308426, - "grad_norm": 0.0076890247873961926, - "learning_rate": 0.0001999912221620428, - "loss": 46.0, - "step": 26200 - }, - { - "epoch": 4.219493538387214, - "grad_norm": 0.002638919046148658, - "learning_rate": 0.00019999122149172002, - "loss": 46.0, - "step": 26201 - }, - { - "epoch": 4.219654575466001, - "grad_norm": 0.008569651283323765, - "learning_rate": 0.00019999122082137164, - "loss": 46.0, - "step": 26202 - }, - { - "epoch": 4.219815612544789, - "grad_norm": 0.008760536089539528, - "learning_rate": 0.00019999122015099761, - "loss": 46.0, - "step": 26203 - }, - { - "epoch": 4.219976649623576, - "grad_norm": 0.00193566654343158, - "learning_rate": 0.00019999121948059803, - "loss": 46.0, - "step": 26204 - }, - { - "epoch": 4.220137686702364, - "grad_norm": 0.006570394616574049, - "learning_rate": 0.00019999121881017286, - "loss": 46.0, - "step": 26205 - }, - { - "epoch": 4.22029872378115, - "grad_norm": 0.0019867916125804186, - "learning_rate": 0.0001999912181397221, - "loss": 46.0, - "step": 26206 - }, - { - "epoch": 4.220459760859938, - "grad_norm": 0.0013249969342723489, - "learning_rate": 0.00019999121746924574, - "loss": 46.0, - "step": 26207 - }, - { - "epoch": 4.220620797938725, - "grad_norm": 0.002329988870769739, - "learning_rate": 0.0001999912167987438, - "loss": 46.0, - "step": 26208 - }, - { - "epoch": 4.220781835017513, - "grad_norm": 0.0028690302278846502, - "learning_rate": 0.00019999121612821624, - "loss": 46.0, - "step": 26209 - }, - { - "epoch": 4.2209428720963, - "grad_norm": 0.00662878667935729, - "learning_rate": 0.00019999121545766313, - "loss": 46.0, - "step": 26210 - }, - { - "epoch": 4.2211039091750875, - "grad_norm": 0.004774064291268587, - "learning_rate": 0.0001999912147870844, - "loss": 46.0, - "step": 26211 - }, - { - "epoch": 4.221264946253875, - "grad_norm": 0.004695198033004999, - "learning_rate": 0.00019999121411648006, - "loss": 46.0, - "step": 26212 - }, - { - "epoch": 4.221425983332662, - "grad_norm": 0.0052057174034416676, - "learning_rate": 0.00019999121344585014, - "loss": 46.0, - "step": 26213 - }, - { - "epoch": 4.22158702041145, - "grad_norm": 0.006722983904182911, - "learning_rate": 0.00019999121277519463, - "loss": 46.0, - "step": 26214 - }, - { - "epoch": 4.221748057490237, - "grad_norm": 0.002662076847627759, - "learning_rate": 0.00019999121210451354, - "loss": 46.0, - "step": 26215 - }, - { - "epoch": 4.221909094569025, - "grad_norm": 0.005133450962603092, - "learning_rate": 0.00019999121143380685, - "loss": 46.0, - "step": 26216 - }, - { - "epoch": 4.222070131647812, - "grad_norm": 0.010997754521667957, - "learning_rate": 0.00019999121076307456, - "loss": 46.0, - "step": 26217 - }, - { - "epoch": 4.2222311687266, - "grad_norm": 0.001192996627651155, - "learning_rate": 0.0001999912100923167, - "loss": 46.0, - "step": 26218 - }, - { - "epoch": 4.222392205805387, - "grad_norm": 0.0022864823695272207, - "learning_rate": 0.00019999120942153323, - "loss": 46.0, - "step": 26219 - }, - { - "epoch": 4.222553242884174, - "grad_norm": 0.0033758634235709906, - "learning_rate": 0.00019999120875072418, - "loss": 46.0, - "step": 26220 - }, - { - "epoch": 4.222714279962961, - "grad_norm": 0.005932088941335678, - "learning_rate": 0.0001999912080798895, - "loss": 46.0, - "step": 26221 - }, - { - "epoch": 4.222875317041749, - "grad_norm": 0.0035091752652078867, - "learning_rate": 0.00019999120740902925, - "loss": 46.0, - "step": 26222 - }, - { - "epoch": 4.223036354120536, - "grad_norm": 0.0006309850141406059, - "learning_rate": 0.0001999912067381434, - "loss": 46.0, - "step": 26223 - }, - { - "epoch": 4.2231973911993235, - "grad_norm": 0.008238116279244423, - "learning_rate": 0.000199991206067232, - "loss": 46.0, - "step": 26224 - }, - { - "epoch": 4.223358428278111, - "grad_norm": 0.0032019237987697124, - "learning_rate": 0.00019999120539629495, - "loss": 46.0, - "step": 26225 - }, - { - "epoch": 4.223519465356898, - "grad_norm": 0.005181713495403528, - "learning_rate": 0.00019999120472533234, - "loss": 46.0, - "step": 26226 - }, - { - "epoch": 4.223680502435686, - "grad_norm": 0.0021451578941196203, - "learning_rate": 0.00019999120405434412, - "loss": 46.0, - "step": 26227 - }, - { - "epoch": 4.223841539514473, - "grad_norm": 0.007014904171228409, - "learning_rate": 0.0001999912033833303, - "loss": 46.0, - "step": 26228 - }, - { - "epoch": 4.224002576593261, - "grad_norm": 0.002018124796450138, - "learning_rate": 0.0001999912027122909, - "loss": 46.0, - "step": 26229 - }, - { - "epoch": 4.224163613672048, - "grad_norm": 0.003496370278298855, - "learning_rate": 0.00019999120204122593, - "loss": 46.0, - "step": 26230 - }, - { - "epoch": 4.224324650750836, - "grad_norm": 0.0058142924681305885, - "learning_rate": 0.00019999120137013536, - "loss": 46.0, - "step": 26231 - }, - { - "epoch": 4.224485687829623, - "grad_norm": 0.006493426393717527, - "learning_rate": 0.00019999120069901917, - "loss": 46.0, - "step": 26232 - }, - { - "epoch": 4.2246467249084105, - "grad_norm": 0.002277251798659563, - "learning_rate": 0.0001999912000278774, - "loss": 46.0, - "step": 26233 - }, - { - "epoch": 4.224807761987197, - "grad_norm": 0.008645284920930862, - "learning_rate": 0.00019999119935671004, - "loss": 46.0, - "step": 26234 - }, - { - "epoch": 4.2249687990659845, - "grad_norm": 0.0012503694742918015, - "learning_rate": 0.0001999911986855171, - "loss": 46.0, - "step": 26235 - }, - { - "epoch": 4.225129836144772, - "grad_norm": 0.0045026084408164024, - "learning_rate": 0.00019999119801429856, - "loss": 46.0, - "step": 26236 - }, - { - "epoch": 4.2252908732235595, - "grad_norm": 0.0014557895483449101, - "learning_rate": 0.00019999119734305442, - "loss": 46.0, - "step": 26237 - }, - { - "epoch": 4.225451910302347, - "grad_norm": 0.008168893866240978, - "learning_rate": 0.00019999119667178468, - "loss": 46.0, - "step": 26238 - }, - { - "epoch": 4.225612947381134, - "grad_norm": 0.0018320472445338964, - "learning_rate": 0.00019999119600048936, - "loss": 46.0, - "step": 26239 - }, - { - "epoch": 4.225773984459922, - "grad_norm": 0.006885674316436052, - "learning_rate": 0.00019999119532916842, - "loss": 46.0, - "step": 26240 - }, - { - "epoch": 4.225935021538709, - "grad_norm": 0.00178134860470891, - "learning_rate": 0.00019999119465782196, - "loss": 46.0, - "step": 26241 - }, - { - "epoch": 4.226096058617497, - "grad_norm": 0.0034408732317388058, - "learning_rate": 0.00019999119398644985, - "loss": 46.0, - "step": 26242 - }, - { - "epoch": 4.226257095696284, - "grad_norm": 0.009279229678213596, - "learning_rate": 0.00019999119331505215, - "loss": 46.0, - "step": 26243 - }, - { - "epoch": 4.226418132775072, - "grad_norm": 0.002921992912888527, - "learning_rate": 0.00019999119264362886, - "loss": 46.0, - "step": 26244 - }, - { - "epoch": 4.226579169853859, - "grad_norm": 0.00111789267975837, - "learning_rate": 0.00019999119197217996, - "loss": 46.0, - "step": 26245 - }, - { - "epoch": 4.2267402069326465, - "grad_norm": 0.00583791546523571, - "learning_rate": 0.0001999911913007055, - "loss": 46.0, - "step": 26246 - }, - { - "epoch": 4.226901244011434, - "grad_norm": 0.0016119221691042185, - "learning_rate": 0.00019999119062920543, - "loss": 46.0, - "step": 26247 - }, - { - "epoch": 4.227062281090221, - "grad_norm": 0.004340555984526873, - "learning_rate": 0.0001999911899576798, - "loss": 46.0, - "step": 26248 - }, - { - "epoch": 4.227223318169008, - "grad_norm": 0.012931088916957378, - "learning_rate": 0.00019999118928612855, - "loss": 46.0, - "step": 26249 - }, - { - "epoch": 4.227384355247795, - "grad_norm": 0.0026871436275541782, - "learning_rate": 0.00019999118861455171, - "loss": 46.0, - "step": 26250 - }, - { - "epoch": 4.227545392326583, - "grad_norm": 0.0041232905350625515, - "learning_rate": 0.00019999118794294927, - "loss": 46.0, - "step": 26251 - }, - { - "epoch": 4.22770642940537, - "grad_norm": 0.0015007088659331203, - "learning_rate": 0.00019999118727132126, - "loss": 46.0, - "step": 26252 - }, - { - "epoch": 4.227867466484158, - "grad_norm": 0.004195880610495806, - "learning_rate": 0.00019999118659966763, - "loss": 46.0, - "step": 26253 - }, - { - "epoch": 4.228028503562945, - "grad_norm": 0.008949180133640766, - "learning_rate": 0.00019999118592798842, - "loss": 46.0, - "step": 26254 - }, - { - "epoch": 4.228189540641733, - "grad_norm": 0.002593415090814233, - "learning_rate": 0.0001999911852562836, - "loss": 46.0, - "step": 26255 - }, - { - "epoch": 4.22835057772052, - "grad_norm": 0.0029092226177453995, - "learning_rate": 0.0001999911845845532, - "loss": 46.0, - "step": 26256 - }, - { - "epoch": 4.228511614799308, - "grad_norm": 0.0051793926395475864, - "learning_rate": 0.0001999911839127972, - "loss": 46.0, - "step": 26257 - }, - { - "epoch": 4.228672651878095, - "grad_norm": 0.0020929318852722645, - "learning_rate": 0.00019999118324101565, - "loss": 46.0, - "step": 26258 - }, - { - "epoch": 4.2288336889568825, - "grad_norm": 0.01367963757365942, - "learning_rate": 0.00019999118256920845, - "loss": 46.0, - "step": 26259 - }, - { - "epoch": 4.22899472603567, - "grad_norm": 0.005756871309131384, - "learning_rate": 0.0001999911818973757, - "loss": 46.0, - "step": 26260 - }, - { - "epoch": 4.229155763114457, - "grad_norm": 0.010185593739151955, - "learning_rate": 0.00019999118122551734, - "loss": 46.0, - "step": 26261 - }, - { - "epoch": 4.229316800193245, - "grad_norm": 0.0005250861286185682, - "learning_rate": 0.00019999118055363338, - "loss": 46.0, - "step": 26262 - }, - { - "epoch": 4.229477837272032, - "grad_norm": 0.005665406119078398, - "learning_rate": 0.00019999117988172383, - "loss": 46.0, - "step": 26263 - }, - { - "epoch": 4.229638874350819, - "grad_norm": 0.00416199816390872, - "learning_rate": 0.0001999911792097887, - "loss": 46.0, - "step": 26264 - }, - { - "epoch": 4.229799911429606, - "grad_norm": 0.004688936751335859, - "learning_rate": 0.00019999117853782797, - "loss": 46.0, - "step": 26265 - }, - { - "epoch": 4.229960948508394, - "grad_norm": 0.003609878709539771, - "learning_rate": 0.00019999117786584166, - "loss": 46.0, - "step": 26266 - }, - { - "epoch": 4.230121985587181, - "grad_norm": 0.001283125951886177, - "learning_rate": 0.00019999117719382974, - "loss": 46.0, - "step": 26267 - }, - { - "epoch": 4.230283022665969, - "grad_norm": 0.00844255369156599, - "learning_rate": 0.00019999117652179222, - "loss": 46.0, - "step": 26268 - }, - { - "epoch": 4.230444059744756, - "grad_norm": 0.0014399484498426318, - "learning_rate": 0.00019999117584972912, - "loss": 46.0, - "step": 26269 - }, - { - "epoch": 4.230605096823544, - "grad_norm": 0.0013790338998660445, - "learning_rate": 0.00019999117517764044, - "loss": 46.0, - "step": 26270 - }, - { - "epoch": 4.230766133902331, - "grad_norm": 0.0026121370028704405, - "learning_rate": 0.00019999117450552616, - "loss": 46.0, - "step": 26271 - }, - { - "epoch": 4.2309271709811185, - "grad_norm": 0.008186489343643188, - "learning_rate": 0.00019999117383338625, - "loss": 46.0, - "step": 26272 - }, - { - "epoch": 4.231088208059906, - "grad_norm": 0.007265111431479454, - "learning_rate": 0.00019999117316122077, - "loss": 46.0, - "step": 26273 - }, - { - "epoch": 4.231249245138693, - "grad_norm": 0.0024882478173822165, - "learning_rate": 0.0001999911724890297, - "loss": 46.0, - "step": 26274 - }, - { - "epoch": 4.231410282217481, - "grad_norm": 0.005082158371806145, - "learning_rate": 0.00019999117181681309, - "loss": 46.0, - "step": 26275 - }, - { - "epoch": 4.231571319296268, - "grad_norm": 0.00590904988348484, - "learning_rate": 0.00019999117114457082, - "loss": 46.0, - "step": 26276 - }, - { - "epoch": 4.231732356375056, - "grad_norm": 0.00529545359313488, - "learning_rate": 0.00019999117047230297, - "loss": 46.0, - "step": 26277 - }, - { - "epoch": 4.231893393453843, - "grad_norm": 0.0015442437725141644, - "learning_rate": 0.00019999116980000956, - "loss": 46.0, - "step": 26278 - }, - { - "epoch": 4.23205443053263, - "grad_norm": 0.0037009220104664564, - "learning_rate": 0.0001999911691276905, - "loss": 46.0, - "step": 26279 - }, - { - "epoch": 4.232215467611417, - "grad_norm": 0.005697970744222403, - "learning_rate": 0.0001999911684553459, - "loss": 46.0, - "step": 26280 - }, - { - "epoch": 4.232376504690205, - "grad_norm": 0.0007133521139621735, - "learning_rate": 0.0001999911677829757, - "loss": 46.0, - "step": 26281 - }, - { - "epoch": 4.232537541768992, - "grad_norm": 0.005868204869329929, - "learning_rate": 0.00019999116711057988, - "loss": 46.0, - "step": 26282 - }, - { - "epoch": 4.23269857884778, - "grad_norm": 0.0053249988704919815, - "learning_rate": 0.0001999911664381585, - "loss": 46.0, - "step": 26283 - }, - { - "epoch": 4.232859615926567, - "grad_norm": 0.0036060435231775045, - "learning_rate": 0.00019999116576571152, - "loss": 46.0, - "step": 26284 - }, - { - "epoch": 4.2330206530053545, - "grad_norm": 0.005285861436277628, - "learning_rate": 0.0001999911650932389, - "loss": 46.0, - "step": 26285 - }, - { - "epoch": 4.233181690084142, - "grad_norm": 0.0017002781387418509, - "learning_rate": 0.00019999116442074075, - "loss": 46.0, - "step": 26286 - }, - { - "epoch": 4.233342727162929, - "grad_norm": 0.0013269941555336118, - "learning_rate": 0.00019999116374821697, - "loss": 46.0, - "step": 26287 - }, - { - "epoch": 4.233503764241717, - "grad_norm": 0.001127561554312706, - "learning_rate": 0.0001999911630756676, - "loss": 46.0, - "step": 26288 - }, - { - "epoch": 4.233664801320504, - "grad_norm": 0.003587308106943965, - "learning_rate": 0.00019999116240309266, - "loss": 46.0, - "step": 26289 - }, - { - "epoch": 4.233825838399292, - "grad_norm": 0.0031553911976516247, - "learning_rate": 0.0001999911617304921, - "loss": 46.0, - "step": 26290 - }, - { - "epoch": 4.233986875478079, - "grad_norm": 0.0026676699053496122, - "learning_rate": 0.00019999116105786597, - "loss": 46.0, - "step": 26291 - }, - { - "epoch": 4.234147912556867, - "grad_norm": 0.012290775775909424, - "learning_rate": 0.00019999116038521423, - "loss": 46.0, - "step": 26292 - }, - { - "epoch": 4.234308949635653, - "grad_norm": 0.013185273855924606, - "learning_rate": 0.00019999115971253693, - "loss": 46.0, - "step": 26293 - }, - { - "epoch": 4.234469986714441, - "grad_norm": 0.0036340963561087847, - "learning_rate": 0.000199991159039834, - "loss": 46.0, - "step": 26294 - }, - { - "epoch": 4.234631023793228, - "grad_norm": 0.002658189507201314, - "learning_rate": 0.00019999115836710548, - "loss": 46.0, - "step": 26295 - }, - { - "epoch": 4.2347920608720155, - "grad_norm": 0.004227503202855587, - "learning_rate": 0.0001999911576943514, - "loss": 46.0, - "step": 26296 - }, - { - "epoch": 4.234953097950803, - "grad_norm": 0.013710539788007736, - "learning_rate": 0.0001999911570215717, - "loss": 46.0, - "step": 26297 - }, - { - "epoch": 4.23511413502959, - "grad_norm": 0.008512726984918118, - "learning_rate": 0.00019999115634876643, - "loss": 46.0, - "step": 26298 - }, - { - "epoch": 4.235275172108378, - "grad_norm": 0.005582069046795368, - "learning_rate": 0.00019999115567593552, - "loss": 46.0, - "step": 26299 - }, - { - "epoch": 4.235436209187165, - "grad_norm": 0.007138532120734453, - "learning_rate": 0.00019999115500307906, - "loss": 46.0, - "step": 26300 - }, - { - "epoch": 4.235597246265953, - "grad_norm": 0.003104856237769127, - "learning_rate": 0.00019999115433019698, - "loss": 46.0, - "step": 26301 - }, - { - "epoch": 4.23575828334474, - "grad_norm": 0.0017345809610560536, - "learning_rate": 0.00019999115365728934, - "loss": 46.0, - "step": 26302 - }, - { - "epoch": 4.235919320423528, - "grad_norm": 0.005846432875841856, - "learning_rate": 0.0001999911529843561, - "loss": 46.0, - "step": 26303 - }, - { - "epoch": 4.236080357502315, - "grad_norm": 0.009179968386888504, - "learning_rate": 0.00019999115231139725, - "loss": 46.0, - "step": 26304 - }, - { - "epoch": 4.236241394581103, - "grad_norm": 0.004852697718888521, - "learning_rate": 0.0001999911516384128, - "loss": 46.0, - "step": 26305 - }, - { - "epoch": 4.23640243165989, - "grad_norm": 0.0015597400488331914, - "learning_rate": 0.0001999911509654028, - "loss": 46.0, - "step": 26306 - }, - { - "epoch": 4.2365634687386775, - "grad_norm": 0.004277390893548727, - "learning_rate": 0.00019999115029236718, - "loss": 46.0, - "step": 26307 - }, - { - "epoch": 4.236724505817464, - "grad_norm": 0.002068681875243783, - "learning_rate": 0.00019999114961930596, - "loss": 46.0, - "step": 26308 - }, - { - "epoch": 4.2368855428962515, - "grad_norm": 0.008431944064795971, - "learning_rate": 0.00019999114894621913, - "loss": 46.0, - "step": 26309 - }, - { - "epoch": 4.237046579975039, - "grad_norm": 0.0029291450046002865, - "learning_rate": 0.00019999114827310674, - "loss": 46.0, - "step": 26310 - }, - { - "epoch": 4.237207617053826, - "grad_norm": 0.0039008574094623327, - "learning_rate": 0.00019999114759996876, - "loss": 46.0, - "step": 26311 - }, - { - "epoch": 4.237368654132614, - "grad_norm": 0.0014049317687749863, - "learning_rate": 0.00019999114692680517, - "loss": 46.0, - "step": 26312 - }, - { - "epoch": 4.237529691211401, - "grad_norm": 0.0016699208645150065, - "learning_rate": 0.00019999114625361602, - "loss": 46.0, - "step": 26313 - }, - { - "epoch": 4.237690728290189, - "grad_norm": 0.00632148003205657, - "learning_rate": 0.00019999114558040125, - "loss": 46.0, - "step": 26314 - }, - { - "epoch": 4.237851765368976, - "grad_norm": 0.0028135075699537992, - "learning_rate": 0.00019999114490716087, - "loss": 46.0, - "step": 26315 - }, - { - "epoch": 4.238012802447764, - "grad_norm": 0.012456709519028664, - "learning_rate": 0.0001999911442338949, - "loss": 46.0, - "step": 26316 - }, - { - "epoch": 4.238173839526551, - "grad_norm": 0.0014022429240867496, - "learning_rate": 0.00019999114356060335, - "loss": 46.0, - "step": 26317 - }, - { - "epoch": 4.238334876605339, - "grad_norm": 0.0030872586648911238, - "learning_rate": 0.0001999911428872862, - "loss": 46.0, - "step": 26318 - }, - { - "epoch": 4.238495913684126, - "grad_norm": 0.002695930888876319, - "learning_rate": 0.0001999911422139435, - "loss": 46.0, - "step": 26319 - }, - { - "epoch": 4.2386569507629135, - "grad_norm": 0.005902216769754887, - "learning_rate": 0.00019999114154057516, - "loss": 46.0, - "step": 26320 - }, - { - "epoch": 4.238817987841701, - "grad_norm": 0.002503668423742056, - "learning_rate": 0.00019999114086718126, - "loss": 46.0, - "step": 26321 - }, - { - "epoch": 4.2389790249204875, - "grad_norm": 0.006871354300528765, - "learning_rate": 0.00019999114019376174, - "loss": 46.0, - "step": 26322 - }, - { - "epoch": 4.239140061999275, - "grad_norm": 0.001992875011637807, - "learning_rate": 0.00019999113952031666, - "loss": 46.0, - "step": 26323 - }, - { - "epoch": 4.239301099078062, - "grad_norm": 0.008072340860962868, - "learning_rate": 0.00019999113884684597, - "loss": 46.0, - "step": 26324 - }, - { - "epoch": 4.23946213615685, - "grad_norm": 0.002895774319767952, - "learning_rate": 0.00019999113817334966, - "loss": 46.0, - "step": 26325 - }, - { - "epoch": 4.239623173235637, - "grad_norm": 0.008577320724725723, - "learning_rate": 0.0001999911374998278, - "loss": 46.0, - "step": 26326 - }, - { - "epoch": 4.239784210314425, - "grad_norm": 0.00791993085294962, - "learning_rate": 0.0001999911368262803, - "loss": 46.0, - "step": 26327 - }, - { - "epoch": 4.239945247393212, - "grad_norm": 0.0024015309754759073, - "learning_rate": 0.00019999113615270725, - "loss": 46.0, - "step": 26328 - }, - { - "epoch": 4.240106284472, - "grad_norm": 0.003368641948327422, - "learning_rate": 0.0001999911354791086, - "loss": 46.0, - "step": 26329 - }, - { - "epoch": 4.240267321550787, - "grad_norm": 0.005208450369536877, - "learning_rate": 0.00019999113480548435, - "loss": 46.0, - "step": 26330 - }, - { - "epoch": 4.240428358629575, - "grad_norm": 0.004403963685035706, - "learning_rate": 0.0001999911341318345, - "loss": 46.0, - "step": 26331 - }, - { - "epoch": 4.240589395708362, - "grad_norm": 0.01280742883682251, - "learning_rate": 0.00019999113345815905, - "loss": 46.0, - "step": 26332 - }, - { - "epoch": 4.2407504327871495, - "grad_norm": 0.014430827461183071, - "learning_rate": 0.00019999113278445805, - "loss": 46.0, - "step": 26333 - }, - { - "epoch": 4.240911469865937, - "grad_norm": 0.004967774264514446, - "learning_rate": 0.00019999113211073143, - "loss": 46.0, - "step": 26334 - }, - { - "epoch": 4.241072506944724, - "grad_norm": 0.0009713007602840662, - "learning_rate": 0.0001999911314369792, - "loss": 46.0, - "step": 26335 - }, - { - "epoch": 4.241233544023512, - "grad_norm": 0.004461093805730343, - "learning_rate": 0.0001999911307632014, - "loss": 46.0, - "step": 26336 - }, - { - "epoch": 4.241394581102298, - "grad_norm": 0.006361424457281828, - "learning_rate": 0.000199991130089398, - "loss": 46.0, - "step": 26337 - }, - { - "epoch": 4.241555618181086, - "grad_norm": 0.005571140442043543, - "learning_rate": 0.000199991129415569, - "loss": 46.0, - "step": 26338 - }, - { - "epoch": 4.241716655259873, - "grad_norm": 0.005587654188275337, - "learning_rate": 0.00019999112874171445, - "loss": 46.0, - "step": 26339 - }, - { - "epoch": 4.241877692338661, - "grad_norm": 0.007114875130355358, - "learning_rate": 0.00019999112806783425, - "loss": 46.0, - "step": 26340 - }, - { - "epoch": 4.242038729417448, - "grad_norm": 0.002650391310453415, - "learning_rate": 0.0001999911273939285, - "loss": 46.0, - "step": 26341 - }, - { - "epoch": 4.242199766496236, - "grad_norm": 0.0023981230333447456, - "learning_rate": 0.0001999911267199971, - "loss": 46.0, - "step": 26342 - }, - { - "epoch": 4.242360803575023, - "grad_norm": 0.003958090674132109, - "learning_rate": 0.00019999112604604017, - "loss": 46.0, - "step": 26343 - }, - { - "epoch": 4.2425218406538105, - "grad_norm": 0.0031815425027161837, - "learning_rate": 0.00019999112537205763, - "loss": 46.0, - "step": 26344 - }, - { - "epoch": 4.242682877732598, - "grad_norm": 0.003761272644624114, - "learning_rate": 0.00019999112469804947, - "loss": 46.0, - "step": 26345 - }, - { - "epoch": 4.2428439148113855, - "grad_norm": 0.003021727781742811, - "learning_rate": 0.00019999112402401575, - "loss": 46.0, - "step": 26346 - }, - { - "epoch": 4.243004951890173, - "grad_norm": 0.0035204461310058832, - "learning_rate": 0.00019999112334995642, - "loss": 46.0, - "step": 26347 - }, - { - "epoch": 4.24316598896896, - "grad_norm": 0.003963842056691647, - "learning_rate": 0.0001999911226758715, - "loss": 46.0, - "step": 26348 - }, - { - "epoch": 4.243327026047748, - "grad_norm": 0.006535484455525875, - "learning_rate": 0.000199991122001761, - "loss": 46.0, - "step": 26349 - }, - { - "epoch": 4.243488063126535, - "grad_norm": 0.0019848307128995657, - "learning_rate": 0.00019999112132762493, - "loss": 46.0, - "step": 26350 - }, - { - "epoch": 4.243649100205323, - "grad_norm": 0.014494430273771286, - "learning_rate": 0.00019999112065346322, - "loss": 46.0, - "step": 26351 - }, - { - "epoch": 4.243810137284109, - "grad_norm": 0.0017849645810201764, - "learning_rate": 0.00019999111997927592, - "loss": 46.0, - "step": 26352 - }, - { - "epoch": 4.243971174362897, - "grad_norm": 0.004987181629985571, - "learning_rate": 0.00019999111930506304, - "loss": 46.0, - "step": 26353 - }, - { - "epoch": 4.244132211441684, - "grad_norm": 0.0034145419485867023, - "learning_rate": 0.00019999111863082457, - "loss": 46.0, - "step": 26354 - }, - { - "epoch": 4.244293248520472, - "grad_norm": 0.002710689790546894, - "learning_rate": 0.0001999911179565605, - "loss": 46.0, - "step": 26355 - }, - { - "epoch": 4.244454285599259, - "grad_norm": 0.004202722106128931, - "learning_rate": 0.00019999111728227084, - "loss": 46.0, - "step": 26356 - }, - { - "epoch": 4.2446153226780465, - "grad_norm": 0.006209149956703186, - "learning_rate": 0.0001999911166079556, - "loss": 46.0, - "step": 26357 - }, - { - "epoch": 4.244776359756834, - "grad_norm": 0.0010890814010053873, - "learning_rate": 0.00019999111593361474, - "loss": 46.0, - "step": 26358 - }, - { - "epoch": 4.244937396835621, - "grad_norm": 0.00703685637563467, - "learning_rate": 0.00019999111525924833, - "loss": 46.0, - "step": 26359 - }, - { - "epoch": 4.245098433914409, - "grad_norm": 0.00873405672609806, - "learning_rate": 0.00019999111458485628, - "loss": 46.0, - "step": 26360 - }, - { - "epoch": 4.245259470993196, - "grad_norm": 0.0009693785104900599, - "learning_rate": 0.00019999111391043868, - "loss": 46.0, - "step": 26361 - }, - { - "epoch": 4.245420508071984, - "grad_norm": 0.008365047164261341, - "learning_rate": 0.00019999111323599545, - "loss": 46.0, - "step": 26362 - }, - { - "epoch": 4.245581545150771, - "grad_norm": 0.004657451994717121, - "learning_rate": 0.00019999111256152664, - "loss": 46.0, - "step": 26363 - }, - { - "epoch": 4.245742582229559, - "grad_norm": 0.007005263119935989, - "learning_rate": 0.00019999111188703225, - "loss": 46.0, - "step": 26364 - }, - { - "epoch": 4.245903619308346, - "grad_norm": 0.008059971034526825, - "learning_rate": 0.00019999111121251227, - "loss": 46.0, - "step": 26365 - }, - { - "epoch": 4.246064656387133, - "grad_norm": 0.009876109659671783, - "learning_rate": 0.0001999911105379667, - "loss": 46.0, - "step": 26366 - }, - { - "epoch": 4.24622569346592, - "grad_norm": 0.001157525461167097, - "learning_rate": 0.0001999911098633955, - "loss": 46.0, - "step": 26367 - }, - { - "epoch": 4.246386730544708, - "grad_norm": 0.004886473994702101, - "learning_rate": 0.00019999110918879877, - "loss": 46.0, - "step": 26368 - }, - { - "epoch": 4.246547767623495, - "grad_norm": 0.0037689292803406715, - "learning_rate": 0.00019999110851417638, - "loss": 46.0, - "step": 26369 - }, - { - "epoch": 4.2467088047022825, - "grad_norm": 0.006638484075665474, - "learning_rate": 0.0001999911078395284, - "loss": 46.0, - "step": 26370 - }, - { - "epoch": 4.24686984178107, - "grad_norm": 0.0046528177335858345, - "learning_rate": 0.00019999110716485487, - "loss": 46.0, - "step": 26371 - }, - { - "epoch": 4.247030878859857, - "grad_norm": 0.010144357569515705, - "learning_rate": 0.00019999110649015573, - "loss": 46.0, - "step": 26372 - }, - { - "epoch": 4.247191915938645, - "grad_norm": 0.0026838178746402264, - "learning_rate": 0.000199991105815431, - "loss": 46.0, - "step": 26373 - }, - { - "epoch": 4.247352953017432, - "grad_norm": 0.003250395180657506, - "learning_rate": 0.0001999911051406807, - "loss": 46.0, - "step": 26374 - }, - { - "epoch": 4.24751399009622, - "grad_norm": 0.010543163865804672, - "learning_rate": 0.00019999110446590476, - "loss": 46.0, - "step": 26375 - }, - { - "epoch": 4.247675027175007, - "grad_norm": 0.002511736936867237, - "learning_rate": 0.00019999110379110327, - "loss": 46.0, - "step": 26376 - }, - { - "epoch": 4.247836064253795, - "grad_norm": 0.00756703270599246, - "learning_rate": 0.00019999110311627615, - "loss": 46.0, - "step": 26377 - }, - { - "epoch": 4.247997101332582, - "grad_norm": 0.015099585056304932, - "learning_rate": 0.00019999110244142348, - "loss": 46.0, - "step": 26378 - }, - { - "epoch": 4.24815813841137, - "grad_norm": 0.008893853984773159, - "learning_rate": 0.00019999110176654517, - "loss": 46.0, - "step": 26379 - }, - { - "epoch": 4.248319175490157, - "grad_norm": 0.008545872755348682, - "learning_rate": 0.0001999911010916413, - "loss": 46.0, - "step": 26380 - }, - { - "epoch": 4.248480212568944, - "grad_norm": 0.0007415362633764744, - "learning_rate": 0.00019999110041671184, - "loss": 46.0, - "step": 26381 - }, - { - "epoch": 4.248641249647731, - "grad_norm": 0.004860542248934507, - "learning_rate": 0.00019999109974175677, - "loss": 46.0, - "step": 26382 - }, - { - "epoch": 4.2488022867265185, - "grad_norm": 0.007643924560397863, - "learning_rate": 0.0001999910990667761, - "loss": 46.0, - "step": 26383 - }, - { - "epoch": 4.248963323805306, - "grad_norm": 0.0016655635554343462, - "learning_rate": 0.00019999109839176983, - "loss": 46.0, - "step": 26384 - }, - { - "epoch": 4.249124360884093, - "grad_norm": 0.0036033783107995987, - "learning_rate": 0.000199991097716738, - "loss": 46.0, - "step": 26385 - }, - { - "epoch": 4.249285397962881, - "grad_norm": 0.005626278929412365, - "learning_rate": 0.00019999109704168058, - "loss": 46.0, - "step": 26386 - }, - { - "epoch": 4.249446435041668, - "grad_norm": 0.004101132042706013, - "learning_rate": 0.00019999109636659754, - "loss": 46.0, - "step": 26387 - }, - { - "epoch": 4.249607472120456, - "grad_norm": 0.003996509127318859, - "learning_rate": 0.00019999109569148892, - "loss": 46.0, - "step": 26388 - }, - { - "epoch": 4.249768509199243, - "grad_norm": 0.0023707302752882242, - "learning_rate": 0.0001999910950163547, - "loss": 46.0, - "step": 26389 - }, - { - "epoch": 4.249929546278031, - "grad_norm": 0.00349828670732677, - "learning_rate": 0.0001999910943411949, - "loss": 46.0, - "step": 26390 - }, - { - "epoch": 4.250090583356818, - "grad_norm": 0.00982285849750042, - "learning_rate": 0.00019999109366600952, - "loss": 46.0, - "step": 26391 - }, - { - "epoch": 4.250251620435606, - "grad_norm": 0.008573968894779682, - "learning_rate": 0.00019999109299079852, - "loss": 46.0, - "step": 26392 - }, - { - "epoch": 4.250412657514393, - "grad_norm": 0.013444820418953896, - "learning_rate": 0.00019999109231556194, - "loss": 46.0, - "step": 26393 - }, - { - "epoch": 4.2505736945931805, - "grad_norm": 0.011791357770562172, - "learning_rate": 0.00019999109164029974, - "loss": 46.0, - "step": 26394 - }, - { - "epoch": 4.250734731671967, - "grad_norm": 0.0014449275331571698, - "learning_rate": 0.00019999109096501197, - "loss": 46.0, - "step": 26395 - }, - { - "epoch": 4.2508957687507545, - "grad_norm": 0.0008376318146474659, - "learning_rate": 0.00019999109028969863, - "loss": 46.0, - "step": 26396 - }, - { - "epoch": 4.251056805829542, - "grad_norm": 0.0035114444326609373, - "learning_rate": 0.00019999108961435966, - "loss": 46.0, - "step": 26397 - }, - { - "epoch": 4.251217842908329, - "grad_norm": 0.003312203101813793, - "learning_rate": 0.00019999108893899514, - "loss": 46.0, - "step": 26398 - }, - { - "epoch": 4.251378879987117, - "grad_norm": 0.015752699226140976, - "learning_rate": 0.000199991088263605, - "loss": 46.0, - "step": 26399 - }, - { - "epoch": 4.251539917065904, - "grad_norm": 0.007732367608696222, - "learning_rate": 0.00019999108758818925, - "loss": 46.0, - "step": 26400 - }, - { - "epoch": 4.251700954144692, - "grad_norm": 0.007054062094539404, - "learning_rate": 0.00019999108691274792, - "loss": 46.0, - "step": 26401 - }, - { - "epoch": 4.251861991223479, - "grad_norm": 0.0006912920507602394, - "learning_rate": 0.000199991086237281, - "loss": 46.0, - "step": 26402 - }, - { - "epoch": 4.252023028302267, - "grad_norm": 0.007961279712617397, - "learning_rate": 0.0001999910855617885, - "loss": 46.0, - "step": 26403 - }, - { - "epoch": 4.252184065381054, - "grad_norm": 0.004229597747325897, - "learning_rate": 0.0001999910848862704, - "loss": 46.0, - "step": 26404 - }, - { - "epoch": 4.2523451024598415, - "grad_norm": 0.017234614118933678, - "learning_rate": 0.0001999910842107267, - "loss": 46.0, - "step": 26405 - }, - { - "epoch": 4.252506139538629, - "grad_norm": 0.014713821932673454, - "learning_rate": 0.00019999108353515742, - "loss": 46.0, - "step": 26406 - }, - { - "epoch": 4.2526671766174164, - "grad_norm": 0.008469310589134693, - "learning_rate": 0.00019999108285956253, - "loss": 46.0, - "step": 26407 - }, - { - "epoch": 4.252828213696204, - "grad_norm": 0.005367763806134462, - "learning_rate": 0.00019999108218394208, - "loss": 46.0, - "step": 26408 - }, - { - "epoch": 4.252989250774991, - "grad_norm": 0.0026243410538882017, - "learning_rate": 0.00019999108150829602, - "loss": 46.0, - "step": 26409 - }, - { - "epoch": 4.253150287853778, - "grad_norm": 0.0024996225256472826, - "learning_rate": 0.00019999108083262434, - "loss": 46.0, - "step": 26410 - }, - { - "epoch": 4.253311324932565, - "grad_norm": 0.0036348735447973013, - "learning_rate": 0.00019999108015692708, - "loss": 46.0, - "step": 26411 - }, - { - "epoch": 4.253472362011353, - "grad_norm": 0.016380146145820618, - "learning_rate": 0.00019999107948120426, - "loss": 46.0, - "step": 26412 - }, - { - "epoch": 4.25363339909014, - "grad_norm": 0.0023258281871676445, - "learning_rate": 0.00019999107880545582, - "loss": 46.0, - "step": 26413 - }, - { - "epoch": 4.253794436168928, - "grad_norm": 0.0019040658371523023, - "learning_rate": 0.0001999910781296818, - "loss": 46.0, - "step": 26414 - }, - { - "epoch": 4.253955473247715, - "grad_norm": 0.012818894349038601, - "learning_rate": 0.00019999107745388218, - "loss": 46.0, - "step": 26415 - }, - { - "epoch": 4.254116510326503, - "grad_norm": 0.006248683203011751, - "learning_rate": 0.00019999107677805698, - "loss": 46.0, - "step": 26416 - }, - { - "epoch": 4.25427754740529, - "grad_norm": 0.004687626846134663, - "learning_rate": 0.00019999107610220616, - "loss": 46.0, - "step": 26417 - }, - { - "epoch": 4.2544385844840775, - "grad_norm": 0.006617632694542408, - "learning_rate": 0.00019999107542632976, - "loss": 46.0, - "step": 26418 - }, - { - "epoch": 4.254599621562865, - "grad_norm": 0.001413870370015502, - "learning_rate": 0.00019999107475042775, - "loss": 46.0, - "step": 26419 - }, - { - "epoch": 4.254760658641652, - "grad_norm": 0.013979134149849415, - "learning_rate": 0.00019999107407450017, - "loss": 46.0, - "step": 26420 - }, - { - "epoch": 4.25492169572044, - "grad_norm": 0.010164640843868256, - "learning_rate": 0.000199991073398547, - "loss": 46.0, - "step": 26421 - }, - { - "epoch": 4.255082732799227, - "grad_norm": 0.003486220259219408, - "learning_rate": 0.00019999107272256823, - "loss": 46.0, - "step": 26422 - }, - { - "epoch": 4.255243769878015, - "grad_norm": 0.018868697807192802, - "learning_rate": 0.00019999107204656387, - "loss": 46.0, - "step": 26423 - }, - { - "epoch": 4.255404806956802, - "grad_norm": 0.010871385224163532, - "learning_rate": 0.00019999107137053392, - "loss": 46.0, - "step": 26424 - }, - { - "epoch": 4.255565844035589, - "grad_norm": 0.007027751766145229, - "learning_rate": 0.00019999107069447838, - "loss": 46.0, - "step": 26425 - }, - { - "epoch": 4.255726881114376, - "grad_norm": 0.0068663256242871284, - "learning_rate": 0.00019999107001839725, - "loss": 46.0, - "step": 26426 - }, - { - "epoch": 4.255887918193164, - "grad_norm": 0.0010301386937499046, - "learning_rate": 0.0001999910693422905, - "loss": 46.0, - "step": 26427 - }, - { - "epoch": 4.256048955271951, - "grad_norm": 0.005545147228986025, - "learning_rate": 0.00019999106866615818, - "loss": 46.0, - "step": 26428 - }, - { - "epoch": 4.256209992350739, - "grad_norm": 0.00392505619674921, - "learning_rate": 0.00019999106799000027, - "loss": 46.0, - "step": 26429 - }, - { - "epoch": 4.256371029429526, - "grad_norm": 0.0008495452348142862, - "learning_rate": 0.00019999106731381677, - "loss": 46.0, - "step": 26430 - }, - { - "epoch": 4.2565320665083135, - "grad_norm": 0.005274081602692604, - "learning_rate": 0.00019999106663760765, - "loss": 46.0, - "step": 26431 - }, - { - "epoch": 4.256693103587101, - "grad_norm": 0.003307618200778961, - "learning_rate": 0.00019999106596137298, - "loss": 46.0, - "step": 26432 - }, - { - "epoch": 4.256854140665888, - "grad_norm": 0.004555467516183853, - "learning_rate": 0.0001999910652851127, - "loss": 46.0, - "step": 26433 - }, - { - "epoch": 4.257015177744676, - "grad_norm": 0.004250813741236925, - "learning_rate": 0.00019999106460882678, - "loss": 46.0, - "step": 26434 - }, - { - "epoch": 4.257176214823463, - "grad_norm": 0.0038257124833762646, - "learning_rate": 0.00019999106393251532, - "loss": 46.0, - "step": 26435 - }, - { - "epoch": 4.257337251902251, - "grad_norm": 0.0037351020146161318, - "learning_rate": 0.00019999106325617824, - "loss": 46.0, - "step": 26436 - }, - { - "epoch": 4.257498288981038, - "grad_norm": 0.004851972684264183, - "learning_rate": 0.0001999910625798156, - "loss": 46.0, - "step": 26437 - }, - { - "epoch": 4.257659326059826, - "grad_norm": 0.004382154904305935, - "learning_rate": 0.00019999106190342735, - "loss": 46.0, - "step": 26438 - }, - { - "epoch": 4.257820363138613, - "grad_norm": 0.0042005497962236404, - "learning_rate": 0.0001999910612270135, - "loss": 46.0, - "step": 26439 - }, - { - "epoch": 4.2579814002174, - "grad_norm": 0.007045915815979242, - "learning_rate": 0.00019999106055057408, - "loss": 46.0, - "step": 26440 - }, - { - "epoch": 4.258142437296187, - "grad_norm": 0.005091259256005287, - "learning_rate": 0.00019999105987410904, - "loss": 46.0, - "step": 26441 - }, - { - "epoch": 4.258303474374975, - "grad_norm": 0.0036288921255618334, - "learning_rate": 0.00019999105919761844, - "loss": 46.0, - "step": 26442 - }, - { - "epoch": 4.258464511453762, - "grad_norm": 0.004293323494493961, - "learning_rate": 0.00019999105852110222, - "loss": 46.0, - "step": 26443 - }, - { - "epoch": 4.2586255485325495, - "grad_norm": 0.0037275587674230337, - "learning_rate": 0.00019999105784456042, - "loss": 46.0, - "step": 26444 - }, - { - "epoch": 4.258786585611337, - "grad_norm": 0.002928992733359337, - "learning_rate": 0.000199991057167993, - "loss": 46.0, - "step": 26445 - }, - { - "epoch": 4.258947622690124, - "grad_norm": 0.007753681857138872, - "learning_rate": 0.00019999105649140005, - "loss": 46.0, - "step": 26446 - }, - { - "epoch": 4.259108659768912, - "grad_norm": 0.005893183872103691, - "learning_rate": 0.00019999105581478146, - "loss": 46.0, - "step": 26447 - }, - { - "epoch": 4.259269696847699, - "grad_norm": 0.0024961617309600115, - "learning_rate": 0.00019999105513813725, - "loss": 46.0, - "step": 26448 - }, - { - "epoch": 4.259430733926487, - "grad_norm": 0.0051794699393212795, - "learning_rate": 0.0001999910544614675, - "loss": 46.0, - "step": 26449 - }, - { - "epoch": 4.259591771005274, - "grad_norm": 0.004589755553752184, - "learning_rate": 0.00019999105378477213, - "loss": 46.0, - "step": 26450 - }, - { - "epoch": 4.259752808084062, - "grad_norm": 0.0026035637129098177, - "learning_rate": 0.0001999910531080512, - "loss": 46.0, - "step": 26451 - }, - { - "epoch": 4.259913845162849, - "grad_norm": 0.0014852768508717418, - "learning_rate": 0.00019999105243130464, - "loss": 46.0, - "step": 26452 - }, - { - "epoch": 4.2600748822416366, - "grad_norm": 0.007767904549837112, - "learning_rate": 0.0001999910517545325, - "loss": 46.0, - "step": 26453 - }, - { - "epoch": 4.260235919320423, - "grad_norm": 0.0043710567988455296, - "learning_rate": 0.00019999105107773476, - "loss": 46.0, - "step": 26454 - }, - { - "epoch": 4.260396956399211, - "grad_norm": 0.0039058702532202005, - "learning_rate": 0.00019999105040091145, - "loss": 46.0, - "step": 26455 - }, - { - "epoch": 4.260557993477998, - "grad_norm": 0.003990390338003635, - "learning_rate": 0.00019999104972406252, - "loss": 46.0, - "step": 26456 - }, - { - "epoch": 4.2607190305567855, - "grad_norm": 0.0023479436058551073, - "learning_rate": 0.00019999104904718803, - "loss": 46.0, - "step": 26457 - }, - { - "epoch": 4.260880067635573, - "grad_norm": 0.005221907049417496, - "learning_rate": 0.00019999104837028792, - "loss": 46.0, - "step": 26458 - }, - { - "epoch": 4.26104110471436, - "grad_norm": 0.007145897950977087, - "learning_rate": 0.00019999104769336223, - "loss": 46.0, - "step": 26459 - }, - { - "epoch": 4.261202141793148, - "grad_norm": 0.015062019228935242, - "learning_rate": 0.00019999104701641095, - "loss": 46.0, - "step": 26460 - }, - { - "epoch": 4.261363178871935, - "grad_norm": 0.004843233153223991, - "learning_rate": 0.00019999104633943406, - "loss": 46.0, - "step": 26461 - }, - { - "epoch": 4.261524215950723, - "grad_norm": 0.0019011307740584016, - "learning_rate": 0.00019999104566243158, - "loss": 46.0, - "step": 26462 - }, - { - "epoch": 4.26168525302951, - "grad_norm": 0.002416796749457717, - "learning_rate": 0.00019999104498540354, - "loss": 46.0, - "step": 26463 - }, - { - "epoch": 4.261846290108298, - "grad_norm": 0.004971110727638006, - "learning_rate": 0.00019999104430834988, - "loss": 46.0, - "step": 26464 - }, - { - "epoch": 4.262007327187085, - "grad_norm": 0.008334905840456486, - "learning_rate": 0.00019999104363127064, - "loss": 46.0, - "step": 26465 - }, - { - "epoch": 4.2621683642658725, - "grad_norm": 0.0036073343362659216, - "learning_rate": 0.00019999104295416578, - "loss": 46.0, - "step": 26466 - }, - { - "epoch": 4.26232940134466, - "grad_norm": 0.002912555355578661, - "learning_rate": 0.00019999104227703534, - "loss": 46.0, - "step": 26467 - }, - { - "epoch": 4.2624904384234465, - "grad_norm": 0.003374774008989334, - "learning_rate": 0.0001999910415998793, - "loss": 46.0, - "step": 26468 - }, - { - "epoch": 4.262651475502234, - "grad_norm": 0.0019413164118304849, - "learning_rate": 0.0001999910409226977, - "loss": 46.0, - "step": 26469 - }, - { - "epoch": 4.2628125125810215, - "grad_norm": 0.00401551416143775, - "learning_rate": 0.00019999104024549048, - "loss": 46.0, - "step": 26470 - }, - { - "epoch": 4.262973549659809, - "grad_norm": 0.004251905716955662, - "learning_rate": 0.0001999910395682577, - "loss": 46.0, - "step": 26471 - }, - { - "epoch": 4.263134586738596, - "grad_norm": 0.0020622832234948874, - "learning_rate": 0.00019999103889099928, - "loss": 46.0, - "step": 26472 - }, - { - "epoch": 4.263295623817384, - "grad_norm": 0.0013198304222896695, - "learning_rate": 0.0001999910382137153, - "loss": 46.0, - "step": 26473 - }, - { - "epoch": 4.263456660896171, - "grad_norm": 0.005101666320115328, - "learning_rate": 0.0001999910375364057, - "loss": 46.0, - "step": 26474 - }, - { - "epoch": 4.263617697974959, - "grad_norm": 0.013275950215756893, - "learning_rate": 0.00019999103685907054, - "loss": 46.0, - "step": 26475 - }, - { - "epoch": 4.263778735053746, - "grad_norm": 0.010259171016514301, - "learning_rate": 0.00019999103618170976, - "loss": 46.0, - "step": 26476 - }, - { - "epoch": 4.263939772132534, - "grad_norm": 0.001979172695428133, - "learning_rate": 0.00019999103550432342, - "loss": 46.0, - "step": 26477 - }, - { - "epoch": 4.264100809211321, - "grad_norm": 0.009595053270459175, - "learning_rate": 0.00019999103482691143, - "loss": 46.0, - "step": 26478 - }, - { - "epoch": 4.2642618462901085, - "grad_norm": 0.002920767292380333, - "learning_rate": 0.00019999103414947392, - "loss": 46.0, - "step": 26479 - }, - { - "epoch": 4.264422883368896, - "grad_norm": 0.000645426451228559, - "learning_rate": 0.00019999103347201076, - "loss": 46.0, - "step": 26480 - }, - { - "epoch": 4.264583920447683, - "grad_norm": 0.002456387970596552, - "learning_rate": 0.00019999103279452204, - "loss": 46.0, - "step": 26481 - }, - { - "epoch": 4.264744957526471, - "grad_norm": 0.006661789491772652, - "learning_rate": 0.0001999910321170077, - "loss": 46.0, - "step": 26482 - }, - { - "epoch": 4.264905994605257, - "grad_norm": 0.02166179195046425, - "learning_rate": 0.0001999910314394678, - "loss": 46.0, - "step": 26483 - }, - { - "epoch": 4.265067031684045, - "grad_norm": 0.004659612663090229, - "learning_rate": 0.0001999910307619023, - "loss": 46.0, - "step": 26484 - }, - { - "epoch": 4.265228068762832, - "grad_norm": 0.004583099391311407, - "learning_rate": 0.00019999103008431118, - "loss": 46.0, - "step": 26485 - }, - { - "epoch": 4.26538910584162, - "grad_norm": 0.007043486926704645, - "learning_rate": 0.0001999910294066945, - "loss": 46.0, - "step": 26486 - }, - { - "epoch": 4.265550142920407, - "grad_norm": 0.006036832928657532, - "learning_rate": 0.0001999910287290522, - "loss": 46.0, - "step": 26487 - }, - { - "epoch": 4.265711179999195, - "grad_norm": 0.010575943626463413, - "learning_rate": 0.00019999102805138433, - "loss": 46.0, - "step": 26488 - }, - { - "epoch": 4.265872217077982, - "grad_norm": 0.011251966468989849, - "learning_rate": 0.00019999102737369086, - "loss": 46.0, - "step": 26489 - }, - { - "epoch": 4.26603325415677, - "grad_norm": 0.002662515500560403, - "learning_rate": 0.0001999910266959718, - "loss": 46.0, - "step": 26490 - }, - { - "epoch": 4.266194291235557, - "grad_norm": 0.0024956930428743362, - "learning_rate": 0.00019999102601822715, - "loss": 46.0, - "step": 26491 - }, - { - "epoch": 4.2663553283143445, - "grad_norm": 0.001442663837224245, - "learning_rate": 0.0001999910253404569, - "loss": 46.0, - "step": 26492 - }, - { - "epoch": 4.266516365393132, - "grad_norm": 0.0015323837287724018, - "learning_rate": 0.00019999102466266108, - "loss": 46.0, - "step": 26493 - }, - { - "epoch": 4.266677402471919, - "grad_norm": 0.003055414417758584, - "learning_rate": 0.00019999102398483962, - "loss": 46.0, - "step": 26494 - }, - { - "epoch": 4.266838439550707, - "grad_norm": 0.0038292952813208103, - "learning_rate": 0.0001999910233069926, - "loss": 46.0, - "step": 26495 - }, - { - "epoch": 4.266999476629494, - "grad_norm": 0.009217841550707817, - "learning_rate": 0.00019999102262911996, - "loss": 46.0, - "step": 26496 - }, - { - "epoch": 4.267160513708282, - "grad_norm": 0.0015858605038374662, - "learning_rate": 0.00019999102195122177, - "loss": 46.0, - "step": 26497 - }, - { - "epoch": 4.267321550787068, - "grad_norm": 0.00275441468693316, - "learning_rate": 0.00019999102127329796, - "loss": 46.0, - "step": 26498 - }, - { - "epoch": 4.267482587865856, - "grad_norm": 0.0003706343413796276, - "learning_rate": 0.00019999102059534856, - "loss": 46.0, - "step": 26499 - }, - { - "epoch": 4.267643624944643, - "grad_norm": 0.002610041992738843, - "learning_rate": 0.00019999101991737358, - "loss": 46.0, - "step": 26500 - }, - { - "epoch": 4.267804662023431, - "grad_norm": 0.0008353313314728439, - "learning_rate": 0.00019999101923937298, - "loss": 46.0, - "step": 26501 - }, - { - "epoch": 4.267965699102218, - "grad_norm": 0.002252095378935337, - "learning_rate": 0.0001999910185613468, - "loss": 46.0, - "step": 26502 - }, - { - "epoch": 4.268126736181006, - "grad_norm": 0.0016725927125662565, - "learning_rate": 0.00019999101788329505, - "loss": 46.0, - "step": 26503 - }, - { - "epoch": 4.268287773259793, - "grad_norm": 0.0031853329855948687, - "learning_rate": 0.0001999910172052177, - "loss": 46.0, - "step": 26504 - }, - { - "epoch": 4.2684488103385805, - "grad_norm": 0.0007579732919111848, - "learning_rate": 0.00019999101652711475, - "loss": 46.0, - "step": 26505 - }, - { - "epoch": 4.268609847417368, - "grad_norm": 0.002940551843494177, - "learning_rate": 0.00019999101584898622, - "loss": 46.0, - "step": 26506 - }, - { - "epoch": 4.268770884496155, - "grad_norm": 0.001293775043450296, - "learning_rate": 0.00019999101517083204, - "loss": 46.0, - "step": 26507 - }, - { - "epoch": 4.268931921574943, - "grad_norm": 0.00363354803994298, - "learning_rate": 0.0001999910144926523, - "loss": 46.0, - "step": 26508 - }, - { - "epoch": 4.26909295865373, - "grad_norm": 0.0032102023251354694, - "learning_rate": 0.000199991013814447, - "loss": 46.0, - "step": 26509 - }, - { - "epoch": 4.269253995732518, - "grad_norm": 0.005243254359811544, - "learning_rate": 0.00019999101313621608, - "loss": 46.0, - "step": 26510 - }, - { - "epoch": 4.269415032811305, - "grad_norm": 0.005274489521980286, - "learning_rate": 0.00019999101245795958, - "loss": 46.0, - "step": 26511 - }, - { - "epoch": 4.269576069890093, - "grad_norm": 0.008864698931574821, - "learning_rate": 0.0001999910117796775, - "loss": 46.0, - "step": 26512 - }, - { - "epoch": 4.269737106968879, - "grad_norm": 0.0021092614624649286, - "learning_rate": 0.00019999101110136977, - "loss": 46.0, - "step": 26513 - }, - { - "epoch": 4.269898144047667, - "grad_norm": 0.010687408968806267, - "learning_rate": 0.00019999101042303651, - "loss": 46.0, - "step": 26514 - }, - { - "epoch": 4.270059181126454, - "grad_norm": 0.008976457640528679, - "learning_rate": 0.00019999100974467764, - "loss": 46.0, - "step": 26515 - }, - { - "epoch": 4.270220218205242, - "grad_norm": 0.006882063113152981, - "learning_rate": 0.00019999100906629316, - "loss": 46.0, - "step": 26516 - }, - { - "epoch": 4.270381255284029, - "grad_norm": 0.002696617506444454, - "learning_rate": 0.00019999100838788308, - "loss": 46.0, - "step": 26517 - }, - { - "epoch": 4.2705422923628165, - "grad_norm": 0.0024096437264233828, - "learning_rate": 0.00019999100770944742, - "loss": 46.0, - "step": 26518 - }, - { - "epoch": 4.270703329441604, - "grad_norm": 0.0037499365862458944, - "learning_rate": 0.00019999100703098618, - "loss": 46.0, - "step": 26519 - }, - { - "epoch": 4.270864366520391, - "grad_norm": 0.0049476902931928635, - "learning_rate": 0.00019999100635249934, - "loss": 46.0, - "step": 26520 - }, - { - "epoch": 4.271025403599179, - "grad_norm": 0.003276398405432701, - "learning_rate": 0.00019999100567398692, - "loss": 46.0, - "step": 26521 - }, - { - "epoch": 4.271186440677966, - "grad_norm": 0.009021335281431675, - "learning_rate": 0.00019999100499544888, - "loss": 46.0, - "step": 26522 - }, - { - "epoch": 4.271347477756754, - "grad_norm": 0.0009816967649385333, - "learning_rate": 0.00019999100431688526, - "loss": 46.0, - "step": 26523 - }, - { - "epoch": 4.271508514835541, - "grad_norm": 0.0010538423666730523, - "learning_rate": 0.00019999100363829605, - "loss": 46.0, - "step": 26524 - }, - { - "epoch": 4.271669551914329, - "grad_norm": 0.005478078033775091, - "learning_rate": 0.00019999100295968125, - "loss": 46.0, - "step": 26525 - }, - { - "epoch": 4.271830588993116, - "grad_norm": 0.0025303044822067022, - "learning_rate": 0.00019999100228104087, - "loss": 46.0, - "step": 26526 - }, - { - "epoch": 4.2719916260719035, - "grad_norm": 0.0023073945194482803, - "learning_rate": 0.00019999100160237487, - "loss": 46.0, - "step": 26527 - }, - { - "epoch": 4.27215266315069, - "grad_norm": 0.007825607433915138, - "learning_rate": 0.0001999910009236833, - "loss": 46.0, - "step": 26528 - }, - { - "epoch": 4.2723137002294775, - "grad_norm": 0.003671275684610009, - "learning_rate": 0.0001999910002449661, - "loss": 46.0, - "step": 26529 - }, - { - "epoch": 4.272474737308265, - "grad_norm": 0.0028167180716991425, - "learning_rate": 0.00019999099956622335, - "loss": 46.0, - "step": 26530 - }, - { - "epoch": 4.272635774387052, - "grad_norm": 0.0026043830439448357, - "learning_rate": 0.00019999099888745497, - "loss": 46.0, - "step": 26531 - }, - { - "epoch": 4.27279681146584, - "grad_norm": 0.006476242560893297, - "learning_rate": 0.00019999099820866106, - "loss": 46.0, - "step": 26532 - }, - { - "epoch": 4.272957848544627, - "grad_norm": 0.011442218907177448, - "learning_rate": 0.00019999099752984149, - "loss": 46.0, - "step": 26533 - }, - { - "epoch": 4.273118885623415, - "grad_norm": 0.004513797350227833, - "learning_rate": 0.00019999099685099635, - "loss": 46.0, - "step": 26534 - }, - { - "epoch": 4.273279922702202, - "grad_norm": 0.007934493012726307, - "learning_rate": 0.00019999099617212563, - "loss": 46.0, - "step": 26535 - }, - { - "epoch": 4.27344095978099, - "grad_norm": 0.0025239503011107445, - "learning_rate": 0.00019999099549322931, - "loss": 46.0, - "step": 26536 - }, - { - "epoch": 4.273601996859777, - "grad_norm": 0.002945407759398222, - "learning_rate": 0.0001999909948143074, - "loss": 46.0, - "step": 26537 - }, - { - "epoch": 4.273763033938565, - "grad_norm": 0.010019524022936821, - "learning_rate": 0.00019999099413535988, - "loss": 46.0, - "step": 26538 - }, - { - "epoch": 4.273924071017352, - "grad_norm": 0.005350045394152403, - "learning_rate": 0.00019999099345638678, - "loss": 46.0, - "step": 26539 - }, - { - "epoch": 4.2740851080961395, - "grad_norm": 0.002620598068460822, - "learning_rate": 0.00019999099277738812, - "loss": 46.0, - "step": 26540 - }, - { - "epoch": 4.274246145174926, - "grad_norm": 0.011673769913613796, - "learning_rate": 0.00019999099209836382, - "loss": 46.0, - "step": 26541 - }, - { - "epoch": 4.2744071822537135, - "grad_norm": 0.004122912418097258, - "learning_rate": 0.00019999099141931393, - "loss": 46.0, - "step": 26542 - }, - { - "epoch": 4.274568219332501, - "grad_norm": 0.0028292795177549124, - "learning_rate": 0.00019999099074023845, - "loss": 46.0, - "step": 26543 - }, - { - "epoch": 4.274729256411288, - "grad_norm": 0.0029606770258396864, - "learning_rate": 0.0001999909900611374, - "loss": 46.0, - "step": 26544 - }, - { - "epoch": 4.274890293490076, - "grad_norm": 0.004221840295940638, - "learning_rate": 0.00019999098938201074, - "loss": 46.0, - "step": 26545 - }, - { - "epoch": 4.275051330568863, - "grad_norm": 0.0016699066618457437, - "learning_rate": 0.0001999909887028585, - "loss": 46.0, - "step": 26546 - }, - { - "epoch": 4.275212367647651, - "grad_norm": 0.005708546377718449, - "learning_rate": 0.00019999098802368065, - "loss": 46.0, - "step": 26547 - }, - { - "epoch": 4.275373404726438, - "grad_norm": 0.003101825015619397, - "learning_rate": 0.00019999098734447724, - "loss": 46.0, - "step": 26548 - }, - { - "epoch": 4.275534441805226, - "grad_norm": 0.006787819787859917, - "learning_rate": 0.00019999098666524822, - "loss": 46.0, - "step": 26549 - }, - { - "epoch": 4.275695478884013, - "grad_norm": 0.003859987249597907, - "learning_rate": 0.0001999909859859936, - "loss": 46.0, - "step": 26550 - }, - { - "epoch": 4.275856515962801, - "grad_norm": 0.004384852945804596, - "learning_rate": 0.00019999098530671335, - "loss": 46.0, - "step": 26551 - }, - { - "epoch": 4.276017553041588, - "grad_norm": 0.014640344306826591, - "learning_rate": 0.00019999098462740756, - "loss": 46.0, - "step": 26552 - }, - { - "epoch": 4.2761785901203755, - "grad_norm": 0.0023615416139364243, - "learning_rate": 0.00019999098394807616, - "loss": 46.0, - "step": 26553 - }, - { - "epoch": 4.276339627199163, - "grad_norm": 0.00537398224696517, - "learning_rate": 0.0001999909832687192, - "loss": 46.0, - "step": 26554 - }, - { - "epoch": 4.27650066427795, - "grad_norm": 0.007670649327337742, - "learning_rate": 0.0001999909825893366, - "loss": 46.0, - "step": 26555 - }, - { - "epoch": 4.276661701356737, - "grad_norm": 0.004995721857994795, - "learning_rate": 0.0001999909819099284, - "loss": 46.0, - "step": 26556 - }, - { - "epoch": 4.276822738435524, - "grad_norm": 0.004026542883366346, - "learning_rate": 0.00019999098123049466, - "loss": 46.0, - "step": 26557 - }, - { - "epoch": 4.276983775514312, - "grad_norm": 0.002549421042203903, - "learning_rate": 0.00019999098055103532, - "loss": 46.0, - "step": 26558 - }, - { - "epoch": 4.277144812593099, - "grad_norm": 0.0008053979836404324, - "learning_rate": 0.00019999097987155034, - "loss": 46.0, - "step": 26559 - }, - { - "epoch": 4.277305849671887, - "grad_norm": 0.015385954640805721, - "learning_rate": 0.00019999097919203983, - "loss": 46.0, - "step": 26560 - }, - { - "epoch": 4.277466886750674, - "grad_norm": 0.0027865078300237656, - "learning_rate": 0.00019999097851250368, - "loss": 46.0, - "step": 26561 - }, - { - "epoch": 4.277627923829462, - "grad_norm": 0.0029884926043450832, - "learning_rate": 0.00019999097783294194, - "loss": 46.0, - "step": 26562 - }, - { - "epoch": 4.277788960908249, - "grad_norm": 0.002812744118273258, - "learning_rate": 0.0001999909771533546, - "loss": 46.0, - "step": 26563 - }, - { - "epoch": 4.277949997987037, - "grad_norm": 0.0017508711898699403, - "learning_rate": 0.00019999097647374172, - "loss": 46.0, - "step": 26564 - }, - { - "epoch": 4.278111035065824, - "grad_norm": 0.008622458204627037, - "learning_rate": 0.0001999909757941032, - "loss": 46.0, - "step": 26565 - }, - { - "epoch": 4.2782720721446115, - "grad_norm": 0.0008098440011963248, - "learning_rate": 0.0001999909751144391, - "loss": 46.0, - "step": 26566 - }, - { - "epoch": 4.278433109223399, - "grad_norm": 0.005315616726875305, - "learning_rate": 0.0001999909744347494, - "loss": 46.0, - "step": 26567 - }, - { - "epoch": 4.278594146302186, - "grad_norm": 0.0026556290686130524, - "learning_rate": 0.00019999097375503414, - "loss": 46.0, - "step": 26568 - }, - { - "epoch": 4.278755183380974, - "grad_norm": 0.004052278585731983, - "learning_rate": 0.00019999097307529326, - "loss": 46.0, - "step": 26569 - }, - { - "epoch": 4.278916220459761, - "grad_norm": 0.017848588526248932, - "learning_rate": 0.0001999909723955268, - "loss": 46.0, - "step": 26570 - }, - { - "epoch": 4.279077257538548, - "grad_norm": 0.0021919936407357454, - "learning_rate": 0.00019999097171573472, - "loss": 46.0, - "step": 26571 - }, - { - "epoch": 4.279238294617335, - "grad_norm": 0.004026074893772602, - "learning_rate": 0.00019999097103591708, - "loss": 46.0, - "step": 26572 - }, - { - "epoch": 4.279399331696123, - "grad_norm": 0.0068419137969613075, - "learning_rate": 0.00019999097035607383, - "loss": 46.0, - "step": 26573 - }, - { - "epoch": 4.27956036877491, - "grad_norm": 0.007841920480132103, - "learning_rate": 0.000199990969676205, - "loss": 46.0, - "step": 26574 - }, - { - "epoch": 4.279721405853698, - "grad_norm": 0.0017415997572243214, - "learning_rate": 0.00019999096899631056, - "loss": 46.0, - "step": 26575 - }, - { - "epoch": 4.279882442932485, - "grad_norm": 0.0034500970505177975, - "learning_rate": 0.00019999096831639052, - "loss": 46.0, - "step": 26576 - }, - { - "epoch": 4.2800434800112725, - "grad_norm": 0.004743452183902264, - "learning_rate": 0.00019999096763644492, - "loss": 46.0, - "step": 26577 - }, - { - "epoch": 4.28020451709006, - "grad_norm": 0.001275358721613884, - "learning_rate": 0.0001999909669564737, - "loss": 46.0, - "step": 26578 - }, - { - "epoch": 4.2803655541688475, - "grad_norm": 0.003845681669190526, - "learning_rate": 0.0001999909662764769, - "loss": 46.0, - "step": 26579 - }, - { - "epoch": 4.280526591247635, - "grad_norm": 0.0025372994132339954, - "learning_rate": 0.0001999909655964545, - "loss": 46.0, - "step": 26580 - }, - { - "epoch": 4.280687628326422, - "grad_norm": 0.014806030318140984, - "learning_rate": 0.00019999096491640653, - "loss": 46.0, - "step": 26581 - }, - { - "epoch": 4.28084866540521, - "grad_norm": 0.006652695592492819, - "learning_rate": 0.00019999096423633294, - "loss": 46.0, - "step": 26582 - }, - { - "epoch": 4.281009702483997, - "grad_norm": 0.0010407832451164722, - "learning_rate": 0.00019999096355623376, - "loss": 46.0, - "step": 26583 - }, - { - "epoch": 4.281170739562785, - "grad_norm": 0.011185847222805023, - "learning_rate": 0.00019999096287610902, - "loss": 46.0, - "step": 26584 - }, - { - "epoch": 4.281331776641572, - "grad_norm": 0.004261564929038286, - "learning_rate": 0.00019999096219595867, - "loss": 46.0, - "step": 26585 - }, - { - "epoch": 4.281492813720359, - "grad_norm": 0.002568823052570224, - "learning_rate": 0.00019999096151578273, - "loss": 46.0, - "step": 26586 - }, - { - "epoch": 4.281653850799146, - "grad_norm": 0.0023710024543106556, - "learning_rate": 0.00019999096083558114, - "loss": 46.0, - "step": 26587 - }, - { - "epoch": 4.281814887877934, - "grad_norm": 0.0030266176909208298, - "learning_rate": 0.000199990960155354, - "loss": 46.0, - "step": 26588 - }, - { - "epoch": 4.281975924956721, - "grad_norm": 0.007652450352907181, - "learning_rate": 0.0001999909594751013, - "loss": 46.0, - "step": 26589 - }, - { - "epoch": 4.2821369620355085, - "grad_norm": 0.005180449690669775, - "learning_rate": 0.00019999095879482298, - "loss": 46.0, - "step": 26590 - }, - { - "epoch": 4.282297999114296, - "grad_norm": 0.0025742785073816776, - "learning_rate": 0.00019999095811451908, - "loss": 46.0, - "step": 26591 - }, - { - "epoch": 4.282459036193083, - "grad_norm": 0.008738684467971325, - "learning_rate": 0.00019999095743418956, - "loss": 46.0, - "step": 26592 - }, - { - "epoch": 4.282620073271871, - "grad_norm": 0.001745415385812521, - "learning_rate": 0.00019999095675383446, - "loss": 46.0, - "step": 26593 - }, - { - "epoch": 4.282781110350658, - "grad_norm": 0.004507759120315313, - "learning_rate": 0.00019999095607345377, - "loss": 46.0, - "step": 26594 - }, - { - "epoch": 4.282942147429446, - "grad_norm": 0.002628570655360818, - "learning_rate": 0.00019999095539304749, - "loss": 46.0, - "step": 26595 - }, - { - "epoch": 4.283103184508233, - "grad_norm": 0.002590820426121354, - "learning_rate": 0.00019999095471261562, - "loss": 46.0, - "step": 26596 - }, - { - "epoch": 4.283264221587021, - "grad_norm": 0.00393479922786355, - "learning_rate": 0.00019999095403215814, - "loss": 46.0, - "step": 26597 - }, - { - "epoch": 4.283425258665808, - "grad_norm": 0.005723224487155676, - "learning_rate": 0.00019999095335167507, - "loss": 46.0, - "step": 26598 - }, - { - "epoch": 4.283586295744596, - "grad_norm": 0.010294840671122074, - "learning_rate": 0.00019999095267116644, - "loss": 46.0, - "step": 26599 - }, - { - "epoch": 4.283747332823383, - "grad_norm": 0.0033415881916880608, - "learning_rate": 0.00019999095199063217, - "loss": 46.0, - "step": 26600 - }, - { - "epoch": 4.28390836990217, - "grad_norm": 0.0013351988745853305, - "learning_rate": 0.00019999095131007234, - "loss": 46.0, - "step": 26601 - }, - { - "epoch": 4.284069406980957, - "grad_norm": 0.003475687000900507, - "learning_rate": 0.00019999095062948693, - "loss": 46.0, - "step": 26602 - }, - { - "epoch": 4.2842304440597445, - "grad_norm": 0.017013397067785263, - "learning_rate": 0.0001999909499488759, - "loss": 46.0, - "step": 26603 - }, - { - "epoch": 4.284391481138532, - "grad_norm": 0.012152256444096565, - "learning_rate": 0.00019999094926823928, - "loss": 46.0, - "step": 26604 - }, - { - "epoch": 4.284552518217319, - "grad_norm": 0.003864055033773184, - "learning_rate": 0.00019999094858757705, - "loss": 46.0, - "step": 26605 - }, - { - "epoch": 4.284713555296107, - "grad_norm": 0.002758234040811658, - "learning_rate": 0.00019999094790688928, - "loss": 46.0, - "step": 26606 - }, - { - "epoch": 4.284874592374894, - "grad_norm": 0.00917613785713911, - "learning_rate": 0.00019999094722617587, - "loss": 46.0, - "step": 26607 - }, - { - "epoch": 4.285035629453682, - "grad_norm": 0.0014187961351126432, - "learning_rate": 0.00019999094654543688, - "loss": 46.0, - "step": 26608 - }, - { - "epoch": 4.285196666532469, - "grad_norm": 0.006978214252740145, - "learning_rate": 0.0001999909458646723, - "loss": 46.0, - "step": 26609 - }, - { - "epoch": 4.285357703611257, - "grad_norm": 0.006249192636460066, - "learning_rate": 0.00019999094518388213, - "loss": 46.0, - "step": 26610 - }, - { - "epoch": 4.285518740690044, - "grad_norm": 0.004516422748565674, - "learning_rate": 0.00019999094450306638, - "loss": 46.0, - "step": 26611 - }, - { - "epoch": 4.285679777768832, - "grad_norm": 0.00408960273489356, - "learning_rate": 0.00019999094382222503, - "loss": 46.0, - "step": 26612 - }, - { - "epoch": 4.285840814847619, - "grad_norm": 0.008581865578889847, - "learning_rate": 0.00019999094314135808, - "loss": 46.0, - "step": 26613 - }, - { - "epoch": 4.286001851926406, - "grad_norm": 0.0033353972248733044, - "learning_rate": 0.00019999094246046553, - "loss": 46.0, - "step": 26614 - }, - { - "epoch": 4.286162889005193, - "grad_norm": 0.005265991669148207, - "learning_rate": 0.0001999909417795474, - "loss": 46.0, - "step": 26615 - }, - { - "epoch": 4.2863239260839805, - "grad_norm": 0.0015589615795761347, - "learning_rate": 0.00019999094109860366, - "loss": 46.0, - "step": 26616 - }, - { - "epoch": 4.286484963162768, - "grad_norm": 0.006723297759890556, - "learning_rate": 0.00019999094041763435, - "loss": 46.0, - "step": 26617 - }, - { - "epoch": 4.286646000241555, - "grad_norm": 0.005083768162876368, - "learning_rate": 0.00019999093973663943, - "loss": 46.0, - "step": 26618 - }, - { - "epoch": 4.286807037320343, - "grad_norm": 0.005982173141092062, - "learning_rate": 0.00019999093905561895, - "loss": 46.0, - "step": 26619 - }, - { - "epoch": 4.28696807439913, - "grad_norm": 0.0018009596969932318, - "learning_rate": 0.00019999093837457283, - "loss": 46.0, - "step": 26620 - }, - { - "epoch": 4.287129111477918, - "grad_norm": 0.0015570831019431353, - "learning_rate": 0.00019999093769350115, - "loss": 46.0, - "step": 26621 - }, - { - "epoch": 4.287290148556705, - "grad_norm": 0.0029178354889154434, - "learning_rate": 0.00019999093701240388, - "loss": 46.0, - "step": 26622 - }, - { - "epoch": 4.287451185635493, - "grad_norm": 0.0030241538770496845, - "learning_rate": 0.000199990936331281, - "loss": 46.0, - "step": 26623 - }, - { - "epoch": 4.28761222271428, - "grad_norm": 0.02319498546421528, - "learning_rate": 0.00019999093565013253, - "loss": 46.0, - "step": 26624 - }, - { - "epoch": 4.287773259793068, - "grad_norm": 0.0027693198062479496, - "learning_rate": 0.00019999093496895847, - "loss": 46.0, - "step": 26625 - }, - { - "epoch": 4.287934296871855, - "grad_norm": 0.0031212191097438335, - "learning_rate": 0.00019999093428775883, - "loss": 46.0, - "step": 26626 - }, - { - "epoch": 4.2880953339506425, - "grad_norm": 0.0008244689088314772, - "learning_rate": 0.00019999093360653357, - "loss": 46.0, - "step": 26627 - }, - { - "epoch": 4.28825637102943, - "grad_norm": 0.004984673112630844, - "learning_rate": 0.00019999093292528275, - "loss": 46.0, - "step": 26628 - }, - { - "epoch": 4.2884174081082165, - "grad_norm": 0.0034086136147379875, - "learning_rate": 0.0001999909322440063, - "loss": 46.0, - "step": 26629 - }, - { - "epoch": 4.288578445187004, - "grad_norm": 0.01127166859805584, - "learning_rate": 0.00019999093156270427, - "loss": 46.0, - "step": 26630 - }, - { - "epoch": 4.288739482265791, - "grad_norm": 0.011049147695302963, - "learning_rate": 0.0001999909308813767, - "loss": 46.0, - "step": 26631 - }, - { - "epoch": 4.288900519344579, - "grad_norm": 0.0017384352395310998, - "learning_rate": 0.00019999093020002347, - "loss": 46.0, - "step": 26632 - }, - { - "epoch": 4.289061556423366, - "grad_norm": 0.002624780172482133, - "learning_rate": 0.00019999092951864466, - "loss": 46.0, - "step": 26633 - }, - { - "epoch": 4.289222593502154, - "grad_norm": 0.006158050149679184, - "learning_rate": 0.00019999092883724026, - "loss": 46.0, - "step": 26634 - }, - { - "epoch": 4.289383630580941, - "grad_norm": 0.007796927355229855, - "learning_rate": 0.00019999092815581028, - "loss": 46.0, - "step": 26635 - }, - { - "epoch": 4.289544667659729, - "grad_norm": 0.0029653809033334255, - "learning_rate": 0.0001999909274743547, - "loss": 46.0, - "step": 26636 - }, - { - "epoch": 4.289705704738516, - "grad_norm": 0.0018333265325054526, - "learning_rate": 0.00019999092679287355, - "loss": 46.0, - "step": 26637 - }, - { - "epoch": 4.2898667418173035, - "grad_norm": 0.00667930580675602, - "learning_rate": 0.00019999092611136678, - "loss": 46.0, - "step": 26638 - }, - { - "epoch": 4.290027778896091, - "grad_norm": 0.0036499658599495888, - "learning_rate": 0.00019999092542983442, - "loss": 46.0, - "step": 26639 - }, - { - "epoch": 4.2901888159748784, - "grad_norm": 0.004853421822190285, - "learning_rate": 0.00019999092474827648, - "loss": 46.0, - "step": 26640 - }, - { - "epoch": 4.290349853053666, - "grad_norm": 0.003092606784775853, - "learning_rate": 0.00019999092406669294, - "loss": 46.0, - "step": 26641 - }, - { - "epoch": 4.290510890132453, - "grad_norm": 0.01001638825982809, - "learning_rate": 0.0001999909233850838, - "loss": 46.0, - "step": 26642 - }, - { - "epoch": 4.290671927211241, - "grad_norm": 0.00549060944467783, - "learning_rate": 0.00019999092270344906, - "loss": 46.0, - "step": 26643 - }, - { - "epoch": 4.290832964290027, - "grad_norm": 0.00893006008118391, - "learning_rate": 0.00019999092202178874, - "loss": 46.0, - "step": 26644 - }, - { - "epoch": 4.290994001368815, - "grad_norm": 0.0012227289844304323, - "learning_rate": 0.00019999092134010283, - "loss": 46.0, - "step": 26645 - }, - { - "epoch": 4.291155038447602, - "grad_norm": 0.00520157627761364, - "learning_rate": 0.00019999092065839134, - "loss": 46.0, - "step": 26646 - }, - { - "epoch": 4.29131607552639, - "grad_norm": 0.0033616607543081045, - "learning_rate": 0.00019999091997665425, - "loss": 46.0, - "step": 26647 - }, - { - "epoch": 4.291477112605177, - "grad_norm": 0.0013342745369300246, - "learning_rate": 0.00019999091929489155, - "loss": 46.0, - "step": 26648 - }, - { - "epoch": 4.291638149683965, - "grad_norm": 0.00819773506373167, - "learning_rate": 0.00019999091861310327, - "loss": 46.0, - "step": 26649 - }, - { - "epoch": 4.291799186762752, - "grad_norm": 0.008123735897243023, - "learning_rate": 0.00019999091793128942, - "loss": 46.0, - "step": 26650 - }, - { - "epoch": 4.2919602238415395, - "grad_norm": 0.005695602390915155, - "learning_rate": 0.00019999091724944994, - "loss": 46.0, - "step": 26651 - }, - { - "epoch": 4.292121260920327, - "grad_norm": 0.002651038346812129, - "learning_rate": 0.0001999909165675849, - "loss": 46.0, - "step": 26652 - }, - { - "epoch": 4.292282297999114, - "grad_norm": 0.003819429548457265, - "learning_rate": 0.00019999091588569423, - "loss": 46.0, - "step": 26653 - }, - { - "epoch": 4.292443335077902, - "grad_norm": 0.006745167542248964, - "learning_rate": 0.00019999091520377798, - "loss": 46.0, - "step": 26654 - }, - { - "epoch": 4.292604372156689, - "grad_norm": 0.0028554874006658792, - "learning_rate": 0.00019999091452183615, - "loss": 46.0, - "step": 26655 - }, - { - "epoch": 4.292765409235477, - "grad_norm": 0.005565503612160683, - "learning_rate": 0.00019999091383986873, - "loss": 46.0, - "step": 26656 - }, - { - "epoch": 4.292926446314264, - "grad_norm": 0.003558178199455142, - "learning_rate": 0.0001999909131578757, - "loss": 46.0, - "step": 26657 - }, - { - "epoch": 4.293087483393052, - "grad_norm": 0.006929673254489899, - "learning_rate": 0.0001999909124758571, - "loss": 46.0, - "step": 26658 - }, - { - "epoch": 4.293248520471838, - "grad_norm": 0.012406129390001297, - "learning_rate": 0.00019999091179381288, - "loss": 46.0, - "step": 26659 - }, - { - "epoch": 4.293409557550626, - "grad_norm": 0.0014742364874109626, - "learning_rate": 0.0001999909111117431, - "loss": 46.0, - "step": 26660 - }, - { - "epoch": 4.293570594629413, - "grad_norm": 0.0027932864613831043, - "learning_rate": 0.0001999909104296477, - "loss": 46.0, - "step": 26661 - }, - { - "epoch": 4.293731631708201, - "grad_norm": 0.006052783690392971, - "learning_rate": 0.00019999090974752673, - "loss": 46.0, - "step": 26662 - }, - { - "epoch": 4.293892668786988, - "grad_norm": 0.002156896749511361, - "learning_rate": 0.00019999090906538014, - "loss": 46.0, - "step": 26663 - }, - { - "epoch": 4.2940537058657755, - "grad_norm": 0.004768858663737774, - "learning_rate": 0.00019999090838320797, - "loss": 46.0, - "step": 26664 - }, - { - "epoch": 4.294214742944563, - "grad_norm": 0.0023126413580030203, - "learning_rate": 0.0001999909077010102, - "loss": 46.0, - "step": 26665 - }, - { - "epoch": 4.29437578002335, - "grad_norm": 0.005086436867713928, - "learning_rate": 0.00019999090701878686, - "loss": 46.0, - "step": 26666 - }, - { - "epoch": 4.294536817102138, - "grad_norm": 0.003239473793655634, - "learning_rate": 0.0001999909063365379, - "loss": 46.0, - "step": 26667 - }, - { - "epoch": 4.294697854180925, - "grad_norm": 0.009535335004329681, - "learning_rate": 0.00019999090565426335, - "loss": 46.0, - "step": 26668 - }, - { - "epoch": 4.294858891259713, - "grad_norm": 0.0024792891927063465, - "learning_rate": 0.0001999909049719632, - "loss": 46.0, - "step": 26669 - }, - { - "epoch": 4.2950199283385, - "grad_norm": 0.0019711507484316826, - "learning_rate": 0.00019999090428963752, - "loss": 46.0, - "step": 26670 - }, - { - "epoch": 4.295180965417288, - "grad_norm": 0.0029612237121909857, - "learning_rate": 0.00019999090360728618, - "loss": 46.0, - "step": 26671 - }, - { - "epoch": 4.295342002496075, - "grad_norm": 0.013222194276750088, - "learning_rate": 0.00019999090292490928, - "loss": 46.0, - "step": 26672 - }, - { - "epoch": 4.295503039574863, - "grad_norm": 0.00529817771166563, - "learning_rate": 0.0001999909022425068, - "loss": 46.0, - "step": 26673 - }, - { - "epoch": 4.295664076653649, - "grad_norm": 0.017362484708428383, - "learning_rate": 0.0001999909015600787, - "loss": 46.0, - "step": 26674 - }, - { - "epoch": 4.295825113732437, - "grad_norm": 0.005261871498078108, - "learning_rate": 0.000199990900877625, - "loss": 46.0, - "step": 26675 - }, - { - "epoch": 4.295986150811224, - "grad_norm": 0.005306694190949202, - "learning_rate": 0.00019999090019514574, - "loss": 46.0, - "step": 26676 - }, - { - "epoch": 4.2961471878900115, - "grad_norm": 0.007739758584648371, - "learning_rate": 0.00019999089951264085, - "loss": 46.0, - "step": 26677 - }, - { - "epoch": 4.296308224968799, - "grad_norm": 0.0015890548238530755, - "learning_rate": 0.00019999089883011037, - "loss": 46.0, - "step": 26678 - }, - { - "epoch": 4.296469262047586, - "grad_norm": 0.017876725643873215, - "learning_rate": 0.0001999908981475543, - "loss": 46.0, - "step": 26679 - }, - { - "epoch": 4.296630299126374, - "grad_norm": 0.0012373109348118305, - "learning_rate": 0.0001999908974649727, - "loss": 46.0, - "step": 26680 - }, - { - "epoch": 4.296791336205161, - "grad_norm": 0.0043030050583183765, - "learning_rate": 0.00019999089678236545, - "loss": 46.0, - "step": 26681 - }, - { - "epoch": 4.296952373283949, - "grad_norm": 0.008796032518148422, - "learning_rate": 0.0001999908960997326, - "loss": 46.0, - "step": 26682 - }, - { - "epoch": 4.297113410362736, - "grad_norm": 0.016864174976944923, - "learning_rate": 0.00019999089541707416, - "loss": 46.0, - "step": 26683 - }, - { - "epoch": 4.297274447441524, - "grad_norm": 0.001528097433038056, - "learning_rate": 0.00019999089473439016, - "loss": 46.0, - "step": 26684 - }, - { - "epoch": 4.297435484520311, - "grad_norm": 0.0013680357951670885, - "learning_rate": 0.00019999089405168055, - "loss": 46.0, - "step": 26685 - }, - { - "epoch": 4.2975965215990986, - "grad_norm": 0.00332814222201705, - "learning_rate": 0.00019999089336894535, - "loss": 46.0, - "step": 26686 - }, - { - "epoch": 4.297757558677885, - "grad_norm": 0.0034406413324177265, - "learning_rate": 0.00019999089268618456, - "loss": 46.0, - "step": 26687 - }, - { - "epoch": 4.297918595756673, - "grad_norm": 0.0029496520292013884, - "learning_rate": 0.00019999089200339816, - "loss": 46.0, - "step": 26688 - }, - { - "epoch": 4.29807963283546, - "grad_norm": 0.006994324736297131, - "learning_rate": 0.00019999089132058617, - "loss": 46.0, - "step": 26689 - }, - { - "epoch": 4.2982406699142475, - "grad_norm": 0.008817637339234352, - "learning_rate": 0.0001999908906377486, - "loss": 46.0, - "step": 26690 - }, - { - "epoch": 4.298401706993035, - "grad_norm": 0.005084660369902849, - "learning_rate": 0.0001999908899548854, - "loss": 46.0, - "step": 26691 - }, - { - "epoch": 4.298562744071822, - "grad_norm": 0.0020231306552886963, - "learning_rate": 0.00019999088927199669, - "loss": 46.0, - "step": 26692 - }, - { - "epoch": 4.29872378115061, - "grad_norm": 0.013756733387708664, - "learning_rate": 0.0001999908885890823, - "loss": 46.0, - "step": 26693 - }, - { - "epoch": 4.298884818229397, - "grad_norm": 0.005016148090362549, - "learning_rate": 0.00019999088790614237, - "loss": 46.0, - "step": 26694 - }, - { - "epoch": 4.299045855308185, - "grad_norm": 0.002878783503547311, - "learning_rate": 0.00019999088722317683, - "loss": 46.0, - "step": 26695 - }, - { - "epoch": 4.299206892386972, - "grad_norm": 0.016500653699040413, - "learning_rate": 0.0001999908865401857, - "loss": 46.0, - "step": 26696 - }, - { - "epoch": 4.29936792946576, - "grad_norm": 0.003728938288986683, - "learning_rate": 0.000199990885857169, - "loss": 46.0, - "step": 26697 - }, - { - "epoch": 4.299528966544547, - "grad_norm": 0.003530869958922267, - "learning_rate": 0.00019999088517412667, - "loss": 46.0, - "step": 26698 - }, - { - "epoch": 4.2996900036233345, - "grad_norm": 0.0028254776261746883, - "learning_rate": 0.00019999088449105875, - "loss": 46.0, - "step": 26699 - }, - { - "epoch": 4.299851040702122, - "grad_norm": 0.0009359674877487123, - "learning_rate": 0.00019999088380796528, - "loss": 46.0, - "step": 26700 - }, - { - "epoch": 4.300012077780909, - "grad_norm": 0.0008102217689156532, - "learning_rate": 0.00019999088312484616, - "loss": 46.0, - "step": 26701 - }, - { - "epoch": 4.300173114859696, - "grad_norm": 0.0013406844809651375, - "learning_rate": 0.00019999088244170149, - "loss": 46.0, - "step": 26702 - }, - { - "epoch": 4.3003341519384835, - "grad_norm": 0.003481874242424965, - "learning_rate": 0.0001999908817585312, - "loss": 46.0, - "step": 26703 - }, - { - "epoch": 4.300495189017271, - "grad_norm": 0.0012658028863370419, - "learning_rate": 0.00019999088107533535, - "loss": 46.0, - "step": 26704 - }, - { - "epoch": 4.300656226096058, - "grad_norm": 0.0047179111279547215, - "learning_rate": 0.00019999088039211385, - "loss": 46.0, - "step": 26705 - }, - { - "epoch": 4.300817263174846, - "grad_norm": 0.004426430445164442, - "learning_rate": 0.0001999908797088668, - "loss": 46.0, - "step": 26706 - }, - { - "epoch": 4.300978300253633, - "grad_norm": 0.0029422191437333822, - "learning_rate": 0.00019999087902559416, - "loss": 46.0, - "step": 26707 - }, - { - "epoch": 4.301139337332421, - "grad_norm": 0.016356289386749268, - "learning_rate": 0.00019999087834229594, - "loss": 46.0, - "step": 26708 - }, - { - "epoch": 4.301300374411208, - "grad_norm": 0.0074377222917973995, - "learning_rate": 0.0001999908776589721, - "loss": 46.0, - "step": 26709 - }, - { - "epoch": 4.301461411489996, - "grad_norm": 0.0068101114593446255, - "learning_rate": 0.0001999908769756227, - "loss": 46.0, - "step": 26710 - }, - { - "epoch": 4.301622448568783, - "grad_norm": 0.002824036404490471, - "learning_rate": 0.00019999087629224766, - "loss": 46.0, - "step": 26711 - }, - { - "epoch": 4.3017834856475705, - "grad_norm": 0.005177484359592199, - "learning_rate": 0.00019999087560884705, - "loss": 46.0, - "step": 26712 - }, - { - "epoch": 4.301944522726358, - "grad_norm": 0.0039574452675879, - "learning_rate": 0.00019999087492542086, - "loss": 46.0, - "step": 26713 - }, - { - "epoch": 4.302105559805145, - "grad_norm": 0.003296489827334881, - "learning_rate": 0.00019999087424196906, - "loss": 46.0, - "step": 26714 - }, - { - "epoch": 4.302266596883933, - "grad_norm": 0.001527283457107842, - "learning_rate": 0.00019999087355849167, - "loss": 46.0, - "step": 26715 - }, - { - "epoch": 4.30242763396272, - "grad_norm": 0.0034816754050552845, - "learning_rate": 0.00019999087287498867, - "loss": 46.0, - "step": 26716 - }, - { - "epoch": 4.302588671041507, - "grad_norm": 0.006492525804787874, - "learning_rate": 0.0001999908721914601, - "loss": 46.0, - "step": 26717 - }, - { - "epoch": 4.302749708120294, - "grad_norm": 0.005838372744619846, - "learning_rate": 0.00019999087150790595, - "loss": 46.0, - "step": 26718 - }, - { - "epoch": 4.302910745199082, - "grad_norm": 0.013641547411680222, - "learning_rate": 0.00019999087082432618, - "loss": 46.0, - "step": 26719 - }, - { - "epoch": 4.303071782277869, - "grad_norm": 0.0020314499270170927, - "learning_rate": 0.00019999087014072083, - "loss": 46.0, - "step": 26720 - }, - { - "epoch": 4.303232819356657, - "grad_norm": 0.0036813009064644575, - "learning_rate": 0.0001999908694570899, - "loss": 46.0, - "step": 26721 - }, - { - "epoch": 4.303393856435444, - "grad_norm": 0.0015707028796896338, - "learning_rate": 0.00019999086877343336, - "loss": 46.0, - "step": 26722 - }, - { - "epoch": 4.303554893514232, - "grad_norm": 0.0005612073582597077, - "learning_rate": 0.00019999086808975122, - "loss": 46.0, - "step": 26723 - }, - { - "epoch": 4.303715930593019, - "grad_norm": 0.0027560086455196142, - "learning_rate": 0.00019999086740604352, - "loss": 46.0, - "step": 26724 - }, - { - "epoch": 4.3038769676718065, - "grad_norm": 0.0025809984654188156, - "learning_rate": 0.00019999086672231017, - "loss": 46.0, - "step": 26725 - }, - { - "epoch": 4.304038004750594, - "grad_norm": 0.0029472627211362123, - "learning_rate": 0.00019999086603855127, - "loss": 46.0, - "step": 26726 - }, - { - "epoch": 4.304199041829381, - "grad_norm": 0.007135242223739624, - "learning_rate": 0.00019999086535476678, - "loss": 46.0, - "step": 26727 - }, - { - "epoch": 4.304360078908169, - "grad_norm": 0.000985802966170013, - "learning_rate": 0.0001999908646709567, - "loss": 46.0, - "step": 26728 - }, - { - "epoch": 4.304521115986956, - "grad_norm": 0.0017725750803947449, - "learning_rate": 0.000199990863987121, - "loss": 46.0, - "step": 26729 - }, - { - "epoch": 4.304682153065744, - "grad_norm": 0.002109637251123786, - "learning_rate": 0.00019999086330325973, - "loss": 46.0, - "step": 26730 - }, - { - "epoch": 4.304843190144531, - "grad_norm": 0.0025911603588610888, - "learning_rate": 0.00019999086261937287, - "loss": 46.0, - "step": 26731 - }, - { - "epoch": 4.305004227223318, - "grad_norm": 0.0013003478525206447, - "learning_rate": 0.0001999908619354604, - "loss": 46.0, - "step": 26732 - }, - { - "epoch": 4.305165264302105, - "grad_norm": 0.005003707483410835, - "learning_rate": 0.00019999086125152234, - "loss": 46.0, - "step": 26733 - }, - { - "epoch": 4.305326301380893, - "grad_norm": 0.0056510204449296, - "learning_rate": 0.00019999086056755872, - "loss": 46.0, - "step": 26734 - }, - { - "epoch": 4.30548733845968, - "grad_norm": 0.0020021649543195963, - "learning_rate": 0.00019999085988356947, - "loss": 46.0, - "step": 26735 - }, - { - "epoch": 4.305648375538468, - "grad_norm": 0.001760657294653356, - "learning_rate": 0.00019999085919955464, - "loss": 46.0, - "step": 26736 - }, - { - "epoch": 4.305809412617255, - "grad_norm": 0.01574433594942093, - "learning_rate": 0.00019999085851551423, - "loss": 46.0, - "step": 26737 - }, - { - "epoch": 4.3059704496960425, - "grad_norm": 0.004249979741871357, - "learning_rate": 0.0001999908578314482, - "loss": 46.0, - "step": 26738 - }, - { - "epoch": 4.30613148677483, - "grad_norm": 0.02154911682009697, - "learning_rate": 0.00019999085714735658, - "loss": 46.0, - "step": 26739 - }, - { - "epoch": 4.306292523853617, - "grad_norm": 0.0044340468011796474, - "learning_rate": 0.00019999085646323937, - "loss": 46.0, - "step": 26740 - }, - { - "epoch": 4.306453560932405, - "grad_norm": 0.009184077382087708, - "learning_rate": 0.0001999908557790966, - "loss": 46.0, - "step": 26741 - }, - { - "epoch": 4.306614598011192, - "grad_norm": 0.0012312076287344098, - "learning_rate": 0.0001999908550949282, - "loss": 46.0, - "step": 26742 - }, - { - "epoch": 4.30677563508998, - "grad_norm": 0.0018530371598899364, - "learning_rate": 0.00019999085441073424, - "loss": 46.0, - "step": 26743 - }, - { - "epoch": 4.306936672168767, - "grad_norm": 0.002606758149340749, - "learning_rate": 0.00019999085372651468, - "loss": 46.0, - "step": 26744 - }, - { - "epoch": 4.307097709247555, - "grad_norm": 0.0008032356854528189, - "learning_rate": 0.00019999085304226949, - "loss": 46.0, - "step": 26745 - }, - { - "epoch": 4.307258746326342, - "grad_norm": 0.003036564914509654, - "learning_rate": 0.00019999085235799876, - "loss": 46.0, - "step": 26746 - }, - { - "epoch": 4.307419783405129, - "grad_norm": 0.00476501788944006, - "learning_rate": 0.0001999908516737024, - "loss": 46.0, - "step": 26747 - }, - { - "epoch": 4.307580820483916, - "grad_norm": 0.006617344915866852, - "learning_rate": 0.00019999085098938043, - "loss": 46.0, - "step": 26748 - }, - { - "epoch": 4.307741857562704, - "grad_norm": 0.007015538401901722, - "learning_rate": 0.00019999085030503292, - "loss": 46.0, - "step": 26749 - }, - { - "epoch": 4.307902894641491, - "grad_norm": 0.004610313102602959, - "learning_rate": 0.00019999084962065976, - "loss": 46.0, - "step": 26750 - }, - { - "epoch": 4.3080639317202785, - "grad_norm": 0.0031711666379123926, - "learning_rate": 0.00019999084893626107, - "loss": 46.0, - "step": 26751 - }, - { - "epoch": 4.308224968799066, - "grad_norm": 0.006278675980865955, - "learning_rate": 0.00019999084825183676, - "loss": 46.0, - "step": 26752 - }, - { - "epoch": 4.308386005877853, - "grad_norm": 0.0010566464625298977, - "learning_rate": 0.00019999084756738684, - "loss": 46.0, - "step": 26753 - }, - { - "epoch": 4.308547042956641, - "grad_norm": 0.006965600419789553, - "learning_rate": 0.00019999084688291133, - "loss": 46.0, - "step": 26754 - }, - { - "epoch": 4.308708080035428, - "grad_norm": 0.005000283941626549, - "learning_rate": 0.00019999084619841027, - "loss": 46.0, - "step": 26755 - }, - { - "epoch": 4.308869117114216, - "grad_norm": 0.005617290735244751, - "learning_rate": 0.00019999084551388359, - "loss": 46.0, - "step": 26756 - }, - { - "epoch": 4.309030154193003, - "grad_norm": 0.008802504278719425, - "learning_rate": 0.0001999908448293313, - "loss": 46.0, - "step": 26757 - }, - { - "epoch": 4.309191191271791, - "grad_norm": 0.0014073667116463184, - "learning_rate": 0.00019999084414475343, - "loss": 46.0, - "step": 26758 - }, - { - "epoch": 4.309352228350578, - "grad_norm": 0.003529355162754655, - "learning_rate": 0.00019999084346015, - "loss": 46.0, - "step": 26759 - }, - { - "epoch": 4.3095132654293655, - "grad_norm": 0.0074369837529957294, - "learning_rate": 0.0001999908427755209, - "loss": 46.0, - "step": 26760 - }, - { - "epoch": 4.309674302508152, - "grad_norm": 0.011568040587008, - "learning_rate": 0.00019999084209086626, - "loss": 46.0, - "step": 26761 - }, - { - "epoch": 4.3098353395869395, - "grad_norm": 0.0017909719608724117, - "learning_rate": 0.00019999084140618606, - "loss": 46.0, - "step": 26762 - }, - { - "epoch": 4.309996376665727, - "grad_norm": 0.001900325994938612, - "learning_rate": 0.00019999084072148021, - "loss": 46.0, - "step": 26763 - }, - { - "epoch": 4.310157413744514, - "grad_norm": 0.005974930711090565, - "learning_rate": 0.00019999084003674878, - "loss": 46.0, - "step": 26764 - }, - { - "epoch": 4.310318450823302, - "grad_norm": 0.00584437744691968, - "learning_rate": 0.00019999083935199176, - "loss": 46.0, - "step": 26765 - }, - { - "epoch": 4.310479487902089, - "grad_norm": 0.003897067392244935, - "learning_rate": 0.00019999083866720918, - "loss": 46.0, - "step": 26766 - }, - { - "epoch": 4.310640524980877, - "grad_norm": 0.0007997590582817793, - "learning_rate": 0.00019999083798240096, - "loss": 46.0, - "step": 26767 - }, - { - "epoch": 4.310801562059664, - "grad_norm": 0.0026893229223787785, - "learning_rate": 0.00019999083729756718, - "loss": 46.0, - "step": 26768 - }, - { - "epoch": 4.310962599138452, - "grad_norm": 0.008987111039459705, - "learning_rate": 0.0001999908366127078, - "loss": 46.0, - "step": 26769 - }, - { - "epoch": 4.311123636217239, - "grad_norm": 0.0015317542711272836, - "learning_rate": 0.00019999083592782283, - "loss": 46.0, - "step": 26770 - }, - { - "epoch": 4.311284673296027, - "grad_norm": 0.003954547457396984, - "learning_rate": 0.00019999083524291226, - "loss": 46.0, - "step": 26771 - }, - { - "epoch": 4.311445710374814, - "grad_norm": 0.012025341391563416, - "learning_rate": 0.0001999908345579761, - "loss": 46.0, - "step": 26772 - }, - { - "epoch": 4.3116067474536015, - "grad_norm": 0.004102442879229784, - "learning_rate": 0.00019999083387301433, - "loss": 46.0, - "step": 26773 - }, - { - "epoch": 4.311767784532389, - "grad_norm": 0.0034901387989521027, - "learning_rate": 0.000199990833188027, - "loss": 46.0, - "step": 26774 - }, - { - "epoch": 4.3119288216111755, - "grad_norm": 0.011448844335973263, - "learning_rate": 0.00019999083250301405, - "loss": 46.0, - "step": 26775 - }, - { - "epoch": 4.312089858689963, - "grad_norm": 0.007840665057301521, - "learning_rate": 0.00019999083181797552, - "loss": 46.0, - "step": 26776 - }, - { - "epoch": 4.31225089576875, - "grad_norm": 0.005458701401948929, - "learning_rate": 0.0001999908311329114, - "loss": 46.0, - "step": 26777 - }, - { - "epoch": 4.312411932847538, - "grad_norm": 0.0008306221570819616, - "learning_rate": 0.00019999083044782166, - "loss": 46.0, - "step": 26778 - }, - { - "epoch": 4.312572969926325, - "grad_norm": 0.011773806065320969, - "learning_rate": 0.00019999082976270637, - "loss": 46.0, - "step": 26779 - }, - { - "epoch": 4.312734007005113, - "grad_norm": 0.0029615748208016157, - "learning_rate": 0.00019999082907756546, - "loss": 46.0, - "step": 26780 - }, - { - "epoch": 4.3128950440839, - "grad_norm": 0.0031708742026239634, - "learning_rate": 0.000199990828392399, - "loss": 46.0, - "step": 26781 - }, - { - "epoch": 4.313056081162688, - "grad_norm": 0.0038983039557933807, - "learning_rate": 0.00019999082770720688, - "loss": 46.0, - "step": 26782 - }, - { - "epoch": 4.313217118241475, - "grad_norm": 0.0013287054607644677, - "learning_rate": 0.00019999082702198918, - "loss": 46.0, - "step": 26783 - }, - { - "epoch": 4.313378155320263, - "grad_norm": 0.002075494732707739, - "learning_rate": 0.00019999082633674592, - "loss": 46.0, - "step": 26784 - }, - { - "epoch": 4.31353919239905, - "grad_norm": 0.007053275592625141, - "learning_rate": 0.00019999082565147705, - "loss": 46.0, - "step": 26785 - }, - { - "epoch": 4.3137002294778375, - "grad_norm": 0.004405648913234472, - "learning_rate": 0.0001999908249661826, - "loss": 46.0, - "step": 26786 - }, - { - "epoch": 4.313861266556625, - "grad_norm": 0.002393609844148159, - "learning_rate": 0.00019999082428086255, - "loss": 46.0, - "step": 26787 - }, - { - "epoch": 4.314022303635412, - "grad_norm": 0.0010131018934771419, - "learning_rate": 0.00019999082359551691, - "loss": 46.0, - "step": 26788 - }, - { - "epoch": 4.3141833407142, - "grad_norm": 0.005945413839071989, - "learning_rate": 0.0001999908229101457, - "loss": 46.0, - "step": 26789 - }, - { - "epoch": 4.314344377792986, - "grad_norm": 0.002798818750306964, - "learning_rate": 0.00019999082222474886, - "loss": 46.0, - "step": 26790 - }, - { - "epoch": 4.314505414871774, - "grad_norm": 0.002516764448955655, - "learning_rate": 0.0001999908215393264, - "loss": 46.0, - "step": 26791 - }, - { - "epoch": 4.314666451950561, - "grad_norm": 0.002391841495409608, - "learning_rate": 0.0001999908208538784, - "loss": 46.0, - "step": 26792 - }, - { - "epoch": 4.314827489029349, - "grad_norm": 0.010734972544014454, - "learning_rate": 0.00019999082016840483, - "loss": 46.0, - "step": 26793 - }, - { - "epoch": 4.314988526108136, - "grad_norm": 0.002970040775835514, - "learning_rate": 0.00019999081948290562, - "loss": 46.0, - "step": 26794 - }, - { - "epoch": 4.315149563186924, - "grad_norm": 0.0029096766375005245, - "learning_rate": 0.00019999081879738082, - "loss": 46.0, - "step": 26795 - }, - { - "epoch": 4.315310600265711, - "grad_norm": 0.003329734317958355, - "learning_rate": 0.00019999081811183044, - "loss": 46.0, - "step": 26796 - }, - { - "epoch": 4.315471637344499, - "grad_norm": 0.0018089211080223322, - "learning_rate": 0.00019999081742625444, - "loss": 46.0, - "step": 26797 - }, - { - "epoch": 4.315632674423286, - "grad_norm": 0.0014738165773451328, - "learning_rate": 0.0001999908167406529, - "loss": 46.0, - "step": 26798 - }, - { - "epoch": 4.3157937115020735, - "grad_norm": 0.002472248859703541, - "learning_rate": 0.00019999081605502573, - "loss": 46.0, - "step": 26799 - }, - { - "epoch": 4.315954748580861, - "grad_norm": 0.0014111031778156757, - "learning_rate": 0.00019999081536937297, - "loss": 46.0, - "step": 26800 - }, - { - "epoch": 4.316115785659648, - "grad_norm": 0.007189365569502115, - "learning_rate": 0.00019999081468369463, - "loss": 46.0, - "step": 26801 - }, - { - "epoch": 4.316276822738436, - "grad_norm": 0.002926510525867343, - "learning_rate": 0.0001999908139979907, - "loss": 46.0, - "step": 26802 - }, - { - "epoch": 4.316437859817223, - "grad_norm": 0.009579833596944809, - "learning_rate": 0.00019999081331226117, - "loss": 46.0, - "step": 26803 - }, - { - "epoch": 4.316598896896011, - "grad_norm": 0.00287226727232337, - "learning_rate": 0.00019999081262650606, - "loss": 46.0, - "step": 26804 - }, - { - "epoch": 4.316759933974797, - "grad_norm": 0.011395324021577835, - "learning_rate": 0.00019999081194072533, - "loss": 46.0, - "step": 26805 - }, - { - "epoch": 4.316920971053585, - "grad_norm": 0.0065136500634253025, - "learning_rate": 0.00019999081125491902, - "loss": 46.0, - "step": 26806 - }, - { - "epoch": 4.317082008132372, - "grad_norm": 0.0027885367162525654, - "learning_rate": 0.00019999081056908713, - "loss": 46.0, - "step": 26807 - }, - { - "epoch": 4.31724304521116, - "grad_norm": 0.003528163768351078, - "learning_rate": 0.00019999080988322961, - "loss": 46.0, - "step": 26808 - }, - { - "epoch": 4.317404082289947, - "grad_norm": 0.0013375038979575038, - "learning_rate": 0.00019999080919734654, - "loss": 46.0, - "step": 26809 - }, - { - "epoch": 4.3175651193687345, - "grad_norm": 0.004187578801065683, - "learning_rate": 0.00019999080851143788, - "loss": 46.0, - "step": 26810 - }, - { - "epoch": 4.317726156447522, - "grad_norm": 0.004302739631384611, - "learning_rate": 0.00019999080782550358, - "loss": 46.0, - "step": 26811 - }, - { - "epoch": 4.3178871935263095, - "grad_norm": 0.0018789521418511868, - "learning_rate": 0.00019999080713954372, - "loss": 46.0, - "step": 26812 - }, - { - "epoch": 4.318048230605097, - "grad_norm": 0.00590970553457737, - "learning_rate": 0.00019999080645355827, - "loss": 46.0, - "step": 26813 - }, - { - "epoch": 4.318209267683884, - "grad_norm": 0.011279669590294361, - "learning_rate": 0.00019999080576754724, - "loss": 46.0, - "step": 26814 - }, - { - "epoch": 4.318370304762672, - "grad_norm": 0.020957602187991142, - "learning_rate": 0.00019999080508151061, - "loss": 46.0, - "step": 26815 - }, - { - "epoch": 4.318531341841459, - "grad_norm": 0.004916503094136715, - "learning_rate": 0.00019999080439544838, - "loss": 46.0, - "step": 26816 - }, - { - "epoch": 4.318692378920247, - "grad_norm": 0.0006075624260120094, - "learning_rate": 0.00019999080370936053, - "loss": 46.0, - "step": 26817 - }, - { - "epoch": 4.318853415999034, - "grad_norm": 0.0012110215611755848, - "learning_rate": 0.00019999080302324714, - "loss": 46.0, - "step": 26818 - }, - { - "epoch": 4.319014453077822, - "grad_norm": 0.005387540906667709, - "learning_rate": 0.00019999080233710812, - "loss": 46.0, - "step": 26819 - }, - { - "epoch": 4.319175490156608, - "grad_norm": 0.0017627552151679993, - "learning_rate": 0.0001999908016509435, - "loss": 46.0, - "step": 26820 - }, - { - "epoch": 4.319336527235396, - "grad_norm": 0.0028475935105234385, - "learning_rate": 0.0001999908009647533, - "loss": 46.0, - "step": 26821 - }, - { - "epoch": 4.319497564314183, - "grad_norm": 0.0012488632928580046, - "learning_rate": 0.00019999080027853752, - "loss": 46.0, - "step": 26822 - }, - { - "epoch": 4.3196586013929705, - "grad_norm": 0.0023156683892011642, - "learning_rate": 0.00019999079959229614, - "loss": 46.0, - "step": 26823 - }, - { - "epoch": 4.319819638471758, - "grad_norm": 0.001889853272587061, - "learning_rate": 0.00019999079890602916, - "loss": 46.0, - "step": 26824 - }, - { - "epoch": 4.319980675550545, - "grad_norm": 0.002063898602500558, - "learning_rate": 0.0001999907982197366, - "loss": 46.0, - "step": 26825 - }, - { - "epoch": 4.320141712629333, - "grad_norm": 0.0012143782805651426, - "learning_rate": 0.00019999079753341844, - "loss": 46.0, - "step": 26826 - }, - { - "epoch": 4.32030274970812, - "grad_norm": 0.0013486681273207068, - "learning_rate": 0.0001999907968470747, - "loss": 46.0, - "step": 26827 - }, - { - "epoch": 4.320463786786908, - "grad_norm": 0.008054655976593494, - "learning_rate": 0.00019999079616070536, - "loss": 46.0, - "step": 26828 - }, - { - "epoch": 4.320624823865695, - "grad_norm": 0.0014415632467716932, - "learning_rate": 0.0001999907954743104, - "loss": 46.0, - "step": 26829 - }, - { - "epoch": 4.320785860944483, - "grad_norm": 0.0015651858411729336, - "learning_rate": 0.0001999907947878899, - "loss": 46.0, - "step": 26830 - }, - { - "epoch": 4.32094689802327, - "grad_norm": 0.003934207838028669, - "learning_rate": 0.00019999079410144374, - "loss": 46.0, - "step": 26831 - }, - { - "epoch": 4.321107935102058, - "grad_norm": 0.0015107926446944475, - "learning_rate": 0.00019999079341497206, - "loss": 46.0, - "step": 26832 - }, - { - "epoch": 4.321268972180845, - "grad_norm": 0.002687514293938875, - "learning_rate": 0.00019999079272847473, - "loss": 46.0, - "step": 26833 - }, - { - "epoch": 4.3214300092596325, - "grad_norm": 0.0033835256472229958, - "learning_rate": 0.00019999079204195184, - "loss": 46.0, - "step": 26834 - }, - { - "epoch": 4.321591046338419, - "grad_norm": 0.005868611391633749, - "learning_rate": 0.00019999079135540334, - "loss": 46.0, - "step": 26835 - }, - { - "epoch": 4.3217520834172065, - "grad_norm": 0.006264748051762581, - "learning_rate": 0.00019999079066882928, - "loss": 46.0, - "step": 26836 - }, - { - "epoch": 4.321913120495994, - "grad_norm": 0.002757747657597065, - "learning_rate": 0.0001999907899822296, - "loss": 46.0, - "step": 26837 - }, - { - "epoch": 4.322074157574781, - "grad_norm": 0.005958610214293003, - "learning_rate": 0.00019999078929560434, - "loss": 46.0, - "step": 26838 - }, - { - "epoch": 4.322235194653569, - "grad_norm": 0.004291116259992123, - "learning_rate": 0.00019999078860895346, - "loss": 46.0, - "step": 26839 - }, - { - "epoch": 4.322396231732356, - "grad_norm": 0.0012658450286835432, - "learning_rate": 0.000199990787922277, - "loss": 46.0, - "step": 26840 - }, - { - "epoch": 4.322557268811144, - "grad_norm": 0.0006600506021641195, - "learning_rate": 0.00019999078723557497, - "loss": 46.0, - "step": 26841 - }, - { - "epoch": 4.322718305889931, - "grad_norm": 0.006315393839031458, - "learning_rate": 0.00019999078654884734, - "loss": 46.0, - "step": 26842 - }, - { - "epoch": 4.322879342968719, - "grad_norm": 0.012058478780090809, - "learning_rate": 0.0001999907858620941, - "loss": 46.0, - "step": 26843 - }, - { - "epoch": 4.323040380047506, - "grad_norm": 0.0019039458129554987, - "learning_rate": 0.0001999907851753153, - "loss": 46.0, - "step": 26844 - }, - { - "epoch": 4.323201417126294, - "grad_norm": 0.0019165630219504237, - "learning_rate": 0.00019999078448851084, - "loss": 46.0, - "step": 26845 - }, - { - "epoch": 4.323362454205081, - "grad_norm": 0.0020978557877242565, - "learning_rate": 0.00019999078380168083, - "loss": 46.0, - "step": 26846 - }, - { - "epoch": 4.3235234912838685, - "grad_norm": 0.006783506833016872, - "learning_rate": 0.00019999078311482525, - "loss": 46.0, - "step": 26847 - }, - { - "epoch": 4.323684528362655, - "grad_norm": 0.009774120524525642, - "learning_rate": 0.00019999078242794406, - "loss": 46.0, - "step": 26848 - }, - { - "epoch": 4.3238455654414425, - "grad_norm": 0.0018163928762078285, - "learning_rate": 0.00019999078174103726, - "loss": 46.0, - "step": 26849 - }, - { - "epoch": 4.32400660252023, - "grad_norm": 0.0021257009357213974, - "learning_rate": 0.0001999907810541049, - "loss": 46.0, - "step": 26850 - }, - { - "epoch": 4.324167639599017, - "grad_norm": 0.004128101281821728, - "learning_rate": 0.00019999078036714695, - "loss": 46.0, - "step": 26851 - }, - { - "epoch": 4.324328676677805, - "grad_norm": 0.004893376957625151, - "learning_rate": 0.00019999077968016336, - "loss": 46.0, - "step": 26852 - }, - { - "epoch": 4.324489713756592, - "grad_norm": 0.0020878843497484922, - "learning_rate": 0.0001999907789931542, - "loss": 46.0, - "step": 26853 - }, - { - "epoch": 4.32465075083538, - "grad_norm": 0.004224977921694517, - "learning_rate": 0.00019999077830611946, - "loss": 46.0, - "step": 26854 - }, - { - "epoch": 4.324811787914167, - "grad_norm": 0.0030052324291318655, - "learning_rate": 0.0001999907776190591, - "loss": 46.0, - "step": 26855 - }, - { - "epoch": 4.324972824992955, - "grad_norm": 0.015664605423808098, - "learning_rate": 0.0001999907769319732, - "loss": 46.0, - "step": 26856 - }, - { - "epoch": 4.325133862071742, - "grad_norm": 0.020176611840724945, - "learning_rate": 0.00019999077624486167, - "loss": 46.0, - "step": 26857 - }, - { - "epoch": 4.32529489915053, - "grad_norm": 0.009067884646356106, - "learning_rate": 0.00019999077555772453, - "loss": 46.0, - "step": 26858 - }, - { - "epoch": 4.325455936229317, - "grad_norm": 0.011108560487627983, - "learning_rate": 0.00019999077487056183, - "loss": 46.0, - "step": 26859 - }, - { - "epoch": 4.3256169733081045, - "grad_norm": 0.002188031328842044, - "learning_rate": 0.00019999077418337354, - "loss": 46.0, - "step": 26860 - }, - { - "epoch": 4.325778010386892, - "grad_norm": 0.007436261046677828, - "learning_rate": 0.00019999077349615963, - "loss": 46.0, - "step": 26861 - }, - { - "epoch": 4.325939047465679, - "grad_norm": 0.0025805607438087463, - "learning_rate": 0.00019999077280892014, - "loss": 46.0, - "step": 26862 - }, - { - "epoch": 4.326100084544466, - "grad_norm": 0.003826677333563566, - "learning_rate": 0.00019999077212165504, - "loss": 46.0, - "step": 26863 - }, - { - "epoch": 4.326261121623253, - "grad_norm": 0.004517124034464359, - "learning_rate": 0.00019999077143436437, - "loss": 46.0, - "step": 26864 - }, - { - "epoch": 4.326422158702041, - "grad_norm": 0.002376899356022477, - "learning_rate": 0.00019999077074704812, - "loss": 46.0, - "step": 26865 - }, - { - "epoch": 4.326583195780828, - "grad_norm": 0.004488472826778889, - "learning_rate": 0.00019999077005970626, - "loss": 46.0, - "step": 26866 - }, - { - "epoch": 4.326744232859616, - "grad_norm": 0.003915915731340647, - "learning_rate": 0.0001999907693723388, - "loss": 46.0, - "step": 26867 - }, - { - "epoch": 4.326905269938403, - "grad_norm": 0.0010685756569728255, - "learning_rate": 0.00019999076868494576, - "loss": 46.0, - "step": 26868 - }, - { - "epoch": 4.327066307017191, - "grad_norm": 0.0020829103887081146, - "learning_rate": 0.0001999907679975271, - "loss": 46.0, - "step": 26869 - }, - { - "epoch": 4.327227344095978, - "grad_norm": 0.0040535940788686275, - "learning_rate": 0.0001999907673100829, - "loss": 46.0, - "step": 26870 - }, - { - "epoch": 4.3273883811747655, - "grad_norm": 0.010978455655276775, - "learning_rate": 0.00019999076662261306, - "loss": 46.0, - "step": 26871 - }, - { - "epoch": 4.327549418253553, - "grad_norm": 0.0008613588288426399, - "learning_rate": 0.00019999076593511767, - "loss": 46.0, - "step": 26872 - }, - { - "epoch": 4.3277104553323404, - "grad_norm": 0.003091295948252082, - "learning_rate": 0.00019999076524759667, - "loss": 46.0, - "step": 26873 - }, - { - "epoch": 4.327871492411128, - "grad_norm": 0.004010102245956659, - "learning_rate": 0.00019999076456005005, - "loss": 46.0, - "step": 26874 - }, - { - "epoch": 4.328032529489915, - "grad_norm": 0.0014021600363776088, - "learning_rate": 0.00019999076387247787, - "loss": 46.0, - "step": 26875 - }, - { - "epoch": 4.328193566568703, - "grad_norm": 0.0030932254157960415, - "learning_rate": 0.00019999076318488008, - "loss": 46.0, - "step": 26876 - }, - { - "epoch": 4.32835460364749, - "grad_norm": 0.0017301643965765834, - "learning_rate": 0.0001999907624972567, - "loss": 46.0, - "step": 26877 - }, - { - "epoch": 4.328515640726277, - "grad_norm": 0.006087945774197578, - "learning_rate": 0.00019999076180960773, - "loss": 46.0, - "step": 26878 - }, - { - "epoch": 4.328676677805064, - "grad_norm": 0.003597947070375085, - "learning_rate": 0.00019999076112193318, - "loss": 46.0, - "step": 26879 - }, - { - "epoch": 4.328837714883852, - "grad_norm": 0.03220827132463455, - "learning_rate": 0.00019999076043423304, - "loss": 46.0, - "step": 26880 - }, - { - "epoch": 4.328998751962639, - "grad_norm": 0.005902218632400036, - "learning_rate": 0.00019999075974650728, - "loss": 46.0, - "step": 26881 - }, - { - "epoch": 4.329159789041427, - "grad_norm": 0.008851482532918453, - "learning_rate": 0.00019999075905875591, - "loss": 46.0, - "step": 26882 - }, - { - "epoch": 4.329320826120214, - "grad_norm": 0.006018993444740772, - "learning_rate": 0.000199990758370979, - "loss": 46.0, - "step": 26883 - }, - { - "epoch": 4.3294818631990015, - "grad_norm": 0.0024594860151410103, - "learning_rate": 0.00019999075768317647, - "loss": 46.0, - "step": 26884 - }, - { - "epoch": 4.329642900277789, - "grad_norm": 0.003404195187613368, - "learning_rate": 0.00019999075699534836, - "loss": 46.0, - "step": 26885 - }, - { - "epoch": 4.329803937356576, - "grad_norm": 0.003302328521385789, - "learning_rate": 0.00019999075630749467, - "loss": 46.0, - "step": 26886 - }, - { - "epoch": 4.329964974435364, - "grad_norm": 0.0013579169753938913, - "learning_rate": 0.00019999075561961537, - "loss": 46.0, - "step": 26887 - }, - { - "epoch": 4.330126011514151, - "grad_norm": 0.0020914459601044655, - "learning_rate": 0.00019999075493171048, - "loss": 46.0, - "step": 26888 - }, - { - "epoch": 4.330287048592939, - "grad_norm": 0.005633731372654438, - "learning_rate": 0.00019999075424377997, - "loss": 46.0, - "step": 26889 - }, - { - "epoch": 4.330448085671726, - "grad_norm": 0.00464225048199296, - "learning_rate": 0.0001999907535558239, - "loss": 46.0, - "step": 26890 - }, - { - "epoch": 4.330609122750514, - "grad_norm": 0.0026460890658199787, - "learning_rate": 0.00019999075286784225, - "loss": 46.0, - "step": 26891 - }, - { - "epoch": 4.330770159829301, - "grad_norm": 0.004468322731554508, - "learning_rate": 0.00019999075217983495, - "loss": 46.0, - "step": 26892 - }, - { - "epoch": 4.330931196908088, - "grad_norm": 0.002827275777235627, - "learning_rate": 0.0001999907514918021, - "loss": 46.0, - "step": 26893 - }, - { - "epoch": 4.331092233986875, - "grad_norm": 0.007451838813722134, - "learning_rate": 0.00019999075080374365, - "loss": 46.0, - "step": 26894 - }, - { - "epoch": 4.331253271065663, - "grad_norm": 0.012124565429985523, - "learning_rate": 0.00019999075011565962, - "loss": 46.0, - "step": 26895 - }, - { - "epoch": 4.33141430814445, - "grad_norm": 0.002243731403723359, - "learning_rate": 0.00019999074942754998, - "loss": 46.0, - "step": 26896 - }, - { - "epoch": 4.3315753452232375, - "grad_norm": 0.0029568923637270927, - "learning_rate": 0.00019999074873941478, - "loss": 46.0, - "step": 26897 - }, - { - "epoch": 4.331736382302025, - "grad_norm": 0.008408868685364723, - "learning_rate": 0.00019999074805125396, - "loss": 46.0, - "step": 26898 - }, - { - "epoch": 4.331897419380812, - "grad_norm": 0.021713390946388245, - "learning_rate": 0.00019999074736306752, - "loss": 46.0, - "step": 26899 - }, - { - "epoch": 4.3320584564596, - "grad_norm": 0.0030565140768885612, - "learning_rate": 0.00019999074667485553, - "loss": 46.0, - "step": 26900 - }, - { - "epoch": 4.332219493538387, - "grad_norm": 0.010622644796967506, - "learning_rate": 0.00019999074598661792, - "loss": 46.0, - "step": 26901 - }, - { - "epoch": 4.332380530617175, - "grad_norm": 0.0027988175861537457, - "learning_rate": 0.00019999074529835476, - "loss": 46.0, - "step": 26902 - }, - { - "epoch": 4.332541567695962, - "grad_norm": 0.0014003480318933725, - "learning_rate": 0.00019999074461006595, - "loss": 46.0, - "step": 26903 - }, - { - "epoch": 4.33270260477475, - "grad_norm": 0.002058149315416813, - "learning_rate": 0.00019999074392175158, - "loss": 46.0, - "step": 26904 - }, - { - "epoch": 4.332863641853537, - "grad_norm": 0.002338825026527047, - "learning_rate": 0.0001999907432334116, - "loss": 46.0, - "step": 26905 - }, - { - "epoch": 4.333024678932325, - "grad_norm": 0.0016602956457063556, - "learning_rate": 0.00019999074254504605, - "loss": 46.0, - "step": 26906 - }, - { - "epoch": 4.333185716011112, - "grad_norm": 0.011720615439116955, - "learning_rate": 0.0001999907418566549, - "loss": 46.0, - "step": 26907 - }, - { - "epoch": 4.333346753089899, - "grad_norm": 0.0005688276141881943, - "learning_rate": 0.00019999074116823815, - "loss": 46.0, - "step": 26908 - }, - { - "epoch": 4.333507790168686, - "grad_norm": 0.002005399437621236, - "learning_rate": 0.00019999074047979582, - "loss": 46.0, - "step": 26909 - }, - { - "epoch": 4.3336688272474735, - "grad_norm": 0.0006501065799966455, - "learning_rate": 0.0001999907397913279, - "loss": 46.0, - "step": 26910 - }, - { - "epoch": 4.333829864326261, - "grad_norm": 0.006243292707949877, - "learning_rate": 0.0001999907391028344, - "loss": 46.0, - "step": 26911 - }, - { - "epoch": 4.333990901405048, - "grad_norm": 0.0007953814929351211, - "learning_rate": 0.00019999073841431525, - "loss": 46.0, - "step": 26912 - }, - { - "epoch": 4.334151938483836, - "grad_norm": 0.0014161694562062621, - "learning_rate": 0.00019999073772577054, - "loss": 46.0, - "step": 26913 - }, - { - "epoch": 4.334312975562623, - "grad_norm": 0.0015716011403128505, - "learning_rate": 0.00019999073703720025, - "loss": 46.0, - "step": 26914 - }, - { - "epoch": 4.334474012641411, - "grad_norm": 0.007059035357087851, - "learning_rate": 0.00019999073634860437, - "loss": 46.0, - "step": 26915 - }, - { - "epoch": 4.334635049720198, - "grad_norm": 0.00320522696711123, - "learning_rate": 0.00019999073565998287, - "loss": 46.0, - "step": 26916 - }, - { - "epoch": 4.334796086798986, - "grad_norm": 0.0024273826275020838, - "learning_rate": 0.00019999073497133581, - "loss": 46.0, - "step": 26917 - }, - { - "epoch": 4.334957123877773, - "grad_norm": 0.006734349764883518, - "learning_rate": 0.00019999073428266314, - "loss": 46.0, - "step": 26918 - }, - { - "epoch": 4.3351181609565606, - "grad_norm": 0.004628733266144991, - "learning_rate": 0.00019999073359396489, - "loss": 46.0, - "step": 26919 - }, - { - "epoch": 4.335279198035348, - "grad_norm": 0.00761287659406662, - "learning_rate": 0.00019999073290524101, - "loss": 46.0, - "step": 26920 - }, - { - "epoch": 4.335440235114135, - "grad_norm": 0.011748096905648708, - "learning_rate": 0.00019999073221649158, - "loss": 46.0, - "step": 26921 - }, - { - "epoch": 4.335601272192922, - "grad_norm": 0.011996623128652573, - "learning_rate": 0.00019999073152771654, - "loss": 46.0, - "step": 26922 - }, - { - "epoch": 4.3357623092717095, - "grad_norm": 0.005394014064222574, - "learning_rate": 0.0001999907308389159, - "loss": 46.0, - "step": 26923 - }, - { - "epoch": 4.335923346350497, - "grad_norm": 0.003779176389798522, - "learning_rate": 0.00019999073015008968, - "loss": 46.0, - "step": 26924 - }, - { - "epoch": 4.336084383429284, - "grad_norm": 0.0032469138968735933, - "learning_rate": 0.00019999072946123788, - "loss": 46.0, - "step": 26925 - }, - { - "epoch": 4.336245420508072, - "grad_norm": 0.002185650635510683, - "learning_rate": 0.00019999072877236045, - "loss": 46.0, - "step": 26926 - }, - { - "epoch": 4.336406457586859, - "grad_norm": 0.001808663597330451, - "learning_rate": 0.00019999072808345747, - "loss": 46.0, - "step": 26927 - }, - { - "epoch": 4.336567494665647, - "grad_norm": 0.0010025043739005923, - "learning_rate": 0.00019999072739452887, - "loss": 46.0, - "step": 26928 - }, - { - "epoch": 4.336728531744434, - "grad_norm": 0.0033047625329345465, - "learning_rate": 0.0001999907267055747, - "loss": 46.0, - "step": 26929 - }, - { - "epoch": 4.336889568823222, - "grad_norm": 0.013785278424620628, - "learning_rate": 0.0001999907260165949, - "loss": 46.0, - "step": 26930 - }, - { - "epoch": 4.337050605902009, - "grad_norm": 0.005489861126989126, - "learning_rate": 0.00019999072532758954, - "loss": 46.0, - "step": 26931 - }, - { - "epoch": 4.3372116429807965, - "grad_norm": 0.004444201942533255, - "learning_rate": 0.00019999072463855856, - "loss": 46.0, - "step": 26932 - }, - { - "epoch": 4.337372680059584, - "grad_norm": 0.0026683283504098654, - "learning_rate": 0.00019999072394950203, - "loss": 46.0, - "step": 26933 - }, - { - "epoch": 4.337533717138371, - "grad_norm": 0.0037998268380761147, - "learning_rate": 0.00019999072326041986, - "loss": 46.0, - "step": 26934 - }, - { - "epoch": 4.337694754217159, - "grad_norm": 0.009480730630457401, - "learning_rate": 0.0001999907225713121, - "loss": 46.0, - "step": 26935 - }, - { - "epoch": 4.3378557912959455, - "grad_norm": 0.015174102038145065, - "learning_rate": 0.00019999072188217878, - "loss": 46.0, - "step": 26936 - }, - { - "epoch": 4.338016828374733, - "grad_norm": 0.0023719877935945988, - "learning_rate": 0.00019999072119301987, - "loss": 46.0, - "step": 26937 - }, - { - "epoch": 4.33817786545352, - "grad_norm": 0.008152016438543797, - "learning_rate": 0.00019999072050383535, - "loss": 46.0, - "step": 26938 - }, - { - "epoch": 4.338338902532308, - "grad_norm": 0.0032736333087086678, - "learning_rate": 0.00019999071981462524, - "loss": 46.0, - "step": 26939 - }, - { - "epoch": 4.338499939611095, - "grad_norm": 0.008397176861763, - "learning_rate": 0.00019999071912538954, - "loss": 46.0, - "step": 26940 - }, - { - "epoch": 4.338660976689883, - "grad_norm": 0.000996448565274477, - "learning_rate": 0.00019999071843612823, - "loss": 46.0, - "step": 26941 - }, - { - "epoch": 4.33882201376867, - "grad_norm": 0.007727055344730616, - "learning_rate": 0.00019999071774684133, - "loss": 46.0, - "step": 26942 - }, - { - "epoch": 4.338983050847458, - "grad_norm": 0.004088437184691429, - "learning_rate": 0.00019999071705752885, - "loss": 46.0, - "step": 26943 - }, - { - "epoch": 4.339144087926245, - "grad_norm": 0.0048957462422549725, - "learning_rate": 0.0001999907163681908, - "loss": 46.0, - "step": 26944 - }, - { - "epoch": 4.3393051250050325, - "grad_norm": 0.00665948074311018, - "learning_rate": 0.00019999071567882712, - "loss": 46.0, - "step": 26945 - }, - { - "epoch": 4.33946616208382, - "grad_norm": 0.010340073145925999, - "learning_rate": 0.00019999071498943787, - "loss": 46.0, - "step": 26946 - }, - { - "epoch": 4.339627199162607, - "grad_norm": 0.00218792911618948, - "learning_rate": 0.000199990714300023, - "loss": 46.0, - "step": 26947 - }, - { - "epoch": 4.339788236241395, - "grad_norm": 0.009167652577161789, - "learning_rate": 0.00019999071361058256, - "loss": 46.0, - "step": 26948 - }, - { - "epoch": 4.339949273320182, - "grad_norm": 0.003372750710695982, - "learning_rate": 0.00019999071292111652, - "loss": 46.0, - "step": 26949 - }, - { - "epoch": 4.34011031039897, - "grad_norm": 0.005344822537153959, - "learning_rate": 0.0001999907122316249, - "loss": 46.0, - "step": 26950 - }, - { - "epoch": 4.340271347477756, - "grad_norm": 0.00749730970710516, - "learning_rate": 0.0001999907115421077, - "loss": 46.0, - "step": 26951 - }, - { - "epoch": 4.340432384556544, - "grad_norm": 0.0029990843031555414, - "learning_rate": 0.00019999071085256487, - "loss": 46.0, - "step": 26952 - }, - { - "epoch": 4.340593421635331, - "grad_norm": 0.0047389790415763855, - "learning_rate": 0.00019999071016299646, - "loss": 46.0, - "step": 26953 - }, - { - "epoch": 4.340754458714119, - "grad_norm": 0.005430468823760748, - "learning_rate": 0.00019999070947340246, - "loss": 46.0, - "step": 26954 - }, - { - "epoch": 4.340915495792906, - "grad_norm": 0.0030369332525879145, - "learning_rate": 0.00019999070878378287, - "loss": 46.0, - "step": 26955 - }, - { - "epoch": 4.341076532871694, - "grad_norm": 0.002362895989790559, - "learning_rate": 0.0001999907080941377, - "loss": 46.0, - "step": 26956 - }, - { - "epoch": 4.341237569950481, - "grad_norm": 0.0035293116234242916, - "learning_rate": 0.00019999070740446691, - "loss": 46.0, - "step": 26957 - }, - { - "epoch": 4.3413986070292685, - "grad_norm": 0.0018974484410136938, - "learning_rate": 0.00019999070671477057, - "loss": 46.0, - "step": 26958 - }, - { - "epoch": 4.341559644108056, - "grad_norm": 0.005015982314944267, - "learning_rate": 0.00019999070602504858, - "loss": 46.0, - "step": 26959 - }, - { - "epoch": 4.341720681186843, - "grad_norm": 0.001991160213947296, - "learning_rate": 0.00019999070533530103, - "loss": 46.0, - "step": 26960 - }, - { - "epoch": 4.341881718265631, - "grad_norm": 0.004959786776453257, - "learning_rate": 0.00019999070464552787, - "loss": 46.0, - "step": 26961 - }, - { - "epoch": 4.342042755344418, - "grad_norm": 0.002750984625890851, - "learning_rate": 0.00019999070395572917, - "loss": 46.0, - "step": 26962 - }, - { - "epoch": 4.342203792423206, - "grad_norm": 0.010296100750565529, - "learning_rate": 0.00019999070326590484, - "loss": 46.0, - "step": 26963 - }, - { - "epoch": 4.342364829501993, - "grad_norm": 0.00532003166154027, - "learning_rate": 0.00019999070257605488, - "loss": 46.0, - "step": 26964 - }, - { - "epoch": 4.342525866580781, - "grad_norm": 0.009289945475757122, - "learning_rate": 0.0001999907018861794, - "loss": 46.0, - "step": 26965 - }, - { - "epoch": 4.342686903659567, - "grad_norm": 0.010900177992880344, - "learning_rate": 0.00019999070119627827, - "loss": 46.0, - "step": 26966 - }, - { - "epoch": 4.342847940738355, - "grad_norm": 0.0026615154929459095, - "learning_rate": 0.0001999907005063516, - "loss": 46.0, - "step": 26967 - }, - { - "epoch": 4.343008977817142, - "grad_norm": 0.006060689687728882, - "learning_rate": 0.00019999069981639926, - "loss": 46.0, - "step": 26968 - }, - { - "epoch": 4.34317001489593, - "grad_norm": 0.004727237857878208, - "learning_rate": 0.00019999069912642137, - "loss": 46.0, - "step": 26969 - }, - { - "epoch": 4.343331051974717, - "grad_norm": 0.0015523300971835852, - "learning_rate": 0.0001999906984364179, - "loss": 46.0, - "step": 26970 - }, - { - "epoch": 4.3434920890535045, - "grad_norm": 0.010069606825709343, - "learning_rate": 0.00019999069774638884, - "loss": 46.0, - "step": 26971 - }, - { - "epoch": 4.343653126132292, - "grad_norm": 0.0022206956055015326, - "learning_rate": 0.00019999069705633416, - "loss": 46.0, - "step": 26972 - }, - { - "epoch": 4.343814163211079, - "grad_norm": 0.004480462521314621, - "learning_rate": 0.00019999069636625393, - "loss": 46.0, - "step": 26973 - }, - { - "epoch": 4.343975200289867, - "grad_norm": 0.005475439131259918, - "learning_rate": 0.00019999069567614808, - "loss": 46.0, - "step": 26974 - }, - { - "epoch": 4.344136237368654, - "grad_norm": 0.0070614987052977085, - "learning_rate": 0.00019999069498601664, - "loss": 46.0, - "step": 26975 - }, - { - "epoch": 4.344297274447442, - "grad_norm": 0.002243722788989544, - "learning_rate": 0.0001999906942958596, - "loss": 46.0, - "step": 26976 - }, - { - "epoch": 4.344458311526229, - "grad_norm": 0.006641100160777569, - "learning_rate": 0.00019999069360567698, - "loss": 46.0, - "step": 26977 - }, - { - "epoch": 4.344619348605017, - "grad_norm": 0.0038843858055770397, - "learning_rate": 0.00019999069291546878, - "loss": 46.0, - "step": 26978 - }, - { - "epoch": 4.344780385683804, - "grad_norm": 0.003903406672179699, - "learning_rate": 0.00019999069222523494, - "loss": 46.0, - "step": 26979 - }, - { - "epoch": 4.3449414227625915, - "grad_norm": 0.006455189548432827, - "learning_rate": 0.00019999069153497554, - "loss": 46.0, - "step": 26980 - }, - { - "epoch": 4.345102459841378, - "grad_norm": 0.006002070847898722, - "learning_rate": 0.00019999069084469055, - "loss": 46.0, - "step": 26981 - }, - { - "epoch": 4.345263496920166, - "grad_norm": 0.0072707985527813435, - "learning_rate": 0.00019999069015437995, - "loss": 46.0, - "step": 26982 - }, - { - "epoch": 4.345424533998953, - "grad_norm": 0.004200181923806667, - "learning_rate": 0.0001999906894640438, - "loss": 46.0, - "step": 26983 - }, - { - "epoch": 4.3455855710777405, - "grad_norm": 0.015590599738061428, - "learning_rate": 0.00019999068877368198, - "loss": 46.0, - "step": 26984 - }, - { - "epoch": 4.345746608156528, - "grad_norm": 0.007098034955561161, - "learning_rate": 0.00019999068808329465, - "loss": 46.0, - "step": 26985 - }, - { - "epoch": 4.345907645235315, - "grad_norm": 0.014213014394044876, - "learning_rate": 0.00019999068739288167, - "loss": 46.0, - "step": 26986 - }, - { - "epoch": 4.346068682314103, - "grad_norm": 0.007169815246015787, - "learning_rate": 0.0001999906867024431, - "loss": 46.0, - "step": 26987 - }, - { - "epoch": 4.34622971939289, - "grad_norm": 0.0040504042990505695, - "learning_rate": 0.00019999068601197898, - "loss": 46.0, - "step": 26988 - }, - { - "epoch": 4.346390756471678, - "grad_norm": 0.004480308853089809, - "learning_rate": 0.00019999068532148924, - "loss": 46.0, - "step": 26989 - }, - { - "epoch": 4.346551793550465, - "grad_norm": 0.009324566461145878, - "learning_rate": 0.00019999068463097392, - "loss": 46.0, - "step": 26990 - }, - { - "epoch": 4.346712830629253, - "grad_norm": 0.000650244124699384, - "learning_rate": 0.00019999068394043298, - "loss": 46.0, - "step": 26991 - }, - { - "epoch": 4.34687386770804, - "grad_norm": 0.0036535440012812614, - "learning_rate": 0.0001999906832498665, - "loss": 46.0, - "step": 26992 - }, - { - "epoch": 4.3470349047868275, - "grad_norm": 0.007548793684691191, - "learning_rate": 0.0001999906825592744, - "loss": 46.0, - "step": 26993 - }, - { - "epoch": 4.347195941865614, - "grad_norm": 0.0009921301389113069, - "learning_rate": 0.00019999068186865669, - "loss": 46.0, - "step": 26994 - }, - { - "epoch": 4.3473569789444015, - "grad_norm": 0.007178795523941517, - "learning_rate": 0.0001999906811780134, - "loss": 46.0, - "step": 26995 - }, - { - "epoch": 4.347518016023189, - "grad_norm": 0.030864425003528595, - "learning_rate": 0.0001999906804873445, - "loss": 46.0, - "step": 26996 - }, - { - "epoch": 4.347679053101976, - "grad_norm": 0.00576575193554163, - "learning_rate": 0.00019999067979665003, - "loss": 46.0, - "step": 26997 - }, - { - "epoch": 4.347840090180764, - "grad_norm": 0.0037891629617661238, - "learning_rate": 0.00019999067910592998, - "loss": 46.0, - "step": 26998 - }, - { - "epoch": 4.348001127259551, - "grad_norm": 0.0030787428840994835, - "learning_rate": 0.00019999067841518431, - "loss": 46.0, - "step": 26999 - }, - { - "epoch": 4.348162164338339, - "grad_norm": 0.002335413359105587, - "learning_rate": 0.00019999067772441306, - "loss": 46.0, - "step": 27000 - }, - { - "epoch": 4.348323201417126, - "grad_norm": 0.0076245530508458614, - "learning_rate": 0.00019999067703361622, - "loss": 46.0, - "step": 27001 - }, - { - "epoch": 4.348484238495914, - "grad_norm": 0.007962411269545555, - "learning_rate": 0.00019999067634279377, - "loss": 46.0, - "step": 27002 - }, - { - "epoch": 4.348645275574701, - "grad_norm": 0.02506794035434723, - "learning_rate": 0.00019999067565194576, - "loss": 46.0, - "step": 27003 - }, - { - "epoch": 4.348806312653489, - "grad_norm": 0.0025293612852692604, - "learning_rate": 0.00019999067496107213, - "loss": 46.0, - "step": 27004 - }, - { - "epoch": 4.348967349732276, - "grad_norm": 0.0010694734519347548, - "learning_rate": 0.00019999067427017291, - "loss": 46.0, - "step": 27005 - }, - { - "epoch": 4.3491283868110635, - "grad_norm": 0.002649099100381136, - "learning_rate": 0.0001999906735792481, - "loss": 46.0, - "step": 27006 - }, - { - "epoch": 4.349289423889851, - "grad_norm": 0.010377351194620132, - "learning_rate": 0.0001999906728882977, - "loss": 46.0, - "step": 27007 - }, - { - "epoch": 4.349450460968638, - "grad_norm": 0.0047653838992118835, - "learning_rate": 0.0001999906721973217, - "loss": 46.0, - "step": 27008 - }, - { - "epoch": 4.349611498047425, - "grad_norm": 0.009352420456707478, - "learning_rate": 0.00019999067150632013, - "loss": 46.0, - "step": 27009 - }, - { - "epoch": 4.349772535126212, - "grad_norm": 0.006724266801029444, - "learning_rate": 0.00019999067081529295, - "loss": 46.0, - "step": 27010 - }, - { - "epoch": 4.349933572205, - "grad_norm": 0.00318281352519989, - "learning_rate": 0.00019999067012424018, - "loss": 46.0, - "step": 27011 - }, - { - "epoch": 4.350094609283787, - "grad_norm": 0.002402494428679347, - "learning_rate": 0.0001999906694331618, - "loss": 46.0, - "step": 27012 - }, - { - "epoch": 4.350255646362575, - "grad_norm": 0.0007097293855622411, - "learning_rate": 0.00019999066874205784, - "loss": 46.0, - "step": 27013 - }, - { - "epoch": 4.350416683441362, - "grad_norm": 0.008624077774584293, - "learning_rate": 0.00019999066805092828, - "loss": 46.0, - "step": 27014 - }, - { - "epoch": 4.35057772052015, - "grad_norm": 0.005293683148920536, - "learning_rate": 0.00019999066735977317, - "loss": 46.0, - "step": 27015 - }, - { - "epoch": 4.350738757598937, - "grad_norm": 0.0036781809758394957, - "learning_rate": 0.00019999066666859244, - "loss": 46.0, - "step": 27016 - }, - { - "epoch": 4.350899794677725, - "grad_norm": 0.0006104833446443081, - "learning_rate": 0.00019999066597738613, - "loss": 46.0, - "step": 27017 - }, - { - "epoch": 4.351060831756512, - "grad_norm": 0.008044463582336903, - "learning_rate": 0.0001999906652861542, - "loss": 46.0, - "step": 27018 - }, - { - "epoch": 4.3512218688352995, - "grad_norm": 0.0030127991922199726, - "learning_rate": 0.00019999066459489668, - "loss": 46.0, - "step": 27019 - }, - { - "epoch": 4.351382905914087, - "grad_norm": 0.002214134205132723, - "learning_rate": 0.00019999066390361358, - "loss": 46.0, - "step": 27020 - }, - { - "epoch": 4.351543942992874, - "grad_norm": 0.0007719038985669613, - "learning_rate": 0.00019999066321230489, - "loss": 46.0, - "step": 27021 - }, - { - "epoch": 4.351704980071662, - "grad_norm": 0.004441957920789719, - "learning_rate": 0.0001999906625209706, - "loss": 46.0, - "step": 27022 - }, - { - "epoch": 4.351866017150449, - "grad_norm": 0.00208304007537663, - "learning_rate": 0.00019999066182961072, - "loss": 46.0, - "step": 27023 - }, - { - "epoch": 4.352027054229236, - "grad_norm": 0.0017706903163343668, - "learning_rate": 0.00019999066113822524, - "loss": 46.0, - "step": 27024 - }, - { - "epoch": 4.352188091308023, - "grad_norm": 0.005134752951562405, - "learning_rate": 0.00019999066044681417, - "loss": 46.0, - "step": 27025 - }, - { - "epoch": 4.352349128386811, - "grad_norm": 0.0017050745664164424, - "learning_rate": 0.00019999065975537751, - "loss": 46.0, - "step": 27026 - }, - { - "epoch": 4.352510165465598, - "grad_norm": 0.0014627579366788268, - "learning_rate": 0.00019999065906391527, - "loss": 46.0, - "step": 27027 - }, - { - "epoch": 4.352671202544386, - "grad_norm": 0.0031340501736849546, - "learning_rate": 0.00019999065837242742, - "loss": 46.0, - "step": 27028 - }, - { - "epoch": 4.352832239623173, - "grad_norm": 0.01404839288443327, - "learning_rate": 0.00019999065768091398, - "loss": 46.0, - "step": 27029 - }, - { - "epoch": 4.352993276701961, - "grad_norm": 0.008472476154565811, - "learning_rate": 0.00019999065698937497, - "loss": 46.0, - "step": 27030 - }, - { - "epoch": 4.353154313780748, - "grad_norm": 0.002201506169512868, - "learning_rate": 0.00019999065629781036, - "loss": 46.0, - "step": 27031 - }, - { - "epoch": 4.3533153508595355, - "grad_norm": 0.003494028467684984, - "learning_rate": 0.00019999065560622015, - "loss": 46.0, - "step": 27032 - }, - { - "epoch": 4.353476387938323, - "grad_norm": 0.003972860053181648, - "learning_rate": 0.0001999906549146043, - "loss": 46.0, - "step": 27033 - }, - { - "epoch": 4.35363742501711, - "grad_norm": 0.0031147426925599575, - "learning_rate": 0.00019999065422296295, - "loss": 46.0, - "step": 27034 - }, - { - "epoch": 4.353798462095898, - "grad_norm": 0.0014362650690600276, - "learning_rate": 0.00019999065353129596, - "loss": 46.0, - "step": 27035 - }, - { - "epoch": 4.353959499174685, - "grad_norm": 0.00355161027982831, - "learning_rate": 0.00019999065283960336, - "loss": 46.0, - "step": 27036 - }, - { - "epoch": 4.354120536253473, - "grad_norm": 0.006990562193095684, - "learning_rate": 0.0001999906521478852, - "loss": 46.0, - "step": 27037 - }, - { - "epoch": 4.35428157333226, - "grad_norm": 0.006435507442802191, - "learning_rate": 0.00019999065145614143, - "loss": 46.0, - "step": 27038 - }, - { - "epoch": 4.354442610411047, - "grad_norm": 0.002615630626678467, - "learning_rate": 0.00019999065076437207, - "loss": 46.0, - "step": 27039 - }, - { - "epoch": 4.354603647489834, - "grad_norm": 0.0010898062027990818, - "learning_rate": 0.0001999906500725771, - "loss": 46.0, - "step": 27040 - }, - { - "epoch": 4.354764684568622, - "grad_norm": 0.0026471030432730913, - "learning_rate": 0.00019999064938075657, - "loss": 46.0, - "step": 27041 - }, - { - "epoch": 4.354925721647409, - "grad_norm": 0.006431697867810726, - "learning_rate": 0.0001999906486889104, - "loss": 46.0, - "step": 27042 - }, - { - "epoch": 4.3550867587261965, - "grad_norm": 0.0005420433008112013, - "learning_rate": 0.0001999906479970387, - "loss": 46.0, - "step": 27043 - }, - { - "epoch": 4.355247795804984, - "grad_norm": 0.013869138434529305, - "learning_rate": 0.0001999906473051414, - "loss": 46.0, - "step": 27044 - }, - { - "epoch": 4.3554088328837715, - "grad_norm": 0.014153920114040375, - "learning_rate": 0.00019999064661321847, - "loss": 46.0, - "step": 27045 - }, - { - "epoch": 4.355569869962559, - "grad_norm": 0.0006360609550029039, - "learning_rate": 0.00019999064592126994, - "loss": 46.0, - "step": 27046 - }, - { - "epoch": 4.355730907041346, - "grad_norm": 0.0047181532718241215, - "learning_rate": 0.00019999064522929584, - "loss": 46.0, - "step": 27047 - }, - { - "epoch": 4.355891944120134, - "grad_norm": 0.00938387680798769, - "learning_rate": 0.00019999064453729614, - "loss": 46.0, - "step": 27048 - }, - { - "epoch": 4.356052981198921, - "grad_norm": 0.0032553032506257296, - "learning_rate": 0.00019999064384527087, - "loss": 46.0, - "step": 27049 - }, - { - "epoch": 4.356214018277709, - "grad_norm": 0.003998341970145702, - "learning_rate": 0.00019999064315322001, - "loss": 46.0, - "step": 27050 - }, - { - "epoch": 4.356375055356496, - "grad_norm": 0.0007415534928441048, - "learning_rate": 0.00019999064246114352, - "loss": 46.0, - "step": 27051 - }, - { - "epoch": 4.356536092435284, - "grad_norm": 0.009251092560589314, - "learning_rate": 0.00019999064176904146, - "loss": 46.0, - "step": 27052 - }, - { - "epoch": 4.356697129514071, - "grad_norm": 0.0022210204042494297, - "learning_rate": 0.00019999064107691382, - "loss": 46.0, - "step": 27053 - }, - { - "epoch": 4.356858166592858, - "grad_norm": 0.0012636297615244985, - "learning_rate": 0.00019999064038476056, - "loss": 46.0, - "step": 27054 - }, - { - "epoch": 4.357019203671645, - "grad_norm": 0.0026149656623601913, - "learning_rate": 0.00019999063969258174, - "loss": 46.0, - "step": 27055 - }, - { - "epoch": 4.3571802407504325, - "grad_norm": 0.002562863053753972, - "learning_rate": 0.0001999906390003773, - "loss": 46.0, - "step": 27056 - }, - { - "epoch": 4.35734127782922, - "grad_norm": 0.005267334636300802, - "learning_rate": 0.00019999063830814727, - "loss": 46.0, - "step": 27057 - }, - { - "epoch": 4.357502314908007, - "grad_norm": 0.0026908330619335175, - "learning_rate": 0.00019999063761589166, - "loss": 46.0, - "step": 27058 - }, - { - "epoch": 4.357663351986795, - "grad_norm": 0.002155652502551675, - "learning_rate": 0.00019999063692361047, - "loss": 46.0, - "step": 27059 - }, - { - "epoch": 4.357824389065582, - "grad_norm": 0.003818254452198744, - "learning_rate": 0.00019999063623130363, - "loss": 46.0, - "step": 27060 - }, - { - "epoch": 4.35798542614437, - "grad_norm": 0.0036588225048035383, - "learning_rate": 0.00019999063553897126, - "loss": 46.0, - "step": 27061 - }, - { - "epoch": 4.358146463223157, - "grad_norm": 0.0072400267235934734, - "learning_rate": 0.00019999063484661325, - "loss": 46.0, - "step": 27062 - }, - { - "epoch": 4.358307500301945, - "grad_norm": 0.0027110599912703037, - "learning_rate": 0.00019999063415422969, - "loss": 46.0, - "step": 27063 - }, - { - "epoch": 4.358468537380732, - "grad_norm": 0.014049648307263851, - "learning_rate": 0.00019999063346182053, - "loss": 46.0, - "step": 27064 - }, - { - "epoch": 4.35862957445952, - "grad_norm": 0.0014710122486576438, - "learning_rate": 0.00019999063276938576, - "loss": 46.0, - "step": 27065 - }, - { - "epoch": 4.358790611538307, - "grad_norm": 0.001651704660616815, - "learning_rate": 0.0001999906320769254, - "loss": 46.0, - "step": 27066 - }, - { - "epoch": 4.3589516486170945, - "grad_norm": 0.008998613804578781, - "learning_rate": 0.00019999063138443943, - "loss": 46.0, - "step": 27067 - }, - { - "epoch": 4.359112685695881, - "grad_norm": 0.003404980758205056, - "learning_rate": 0.00019999063069192792, - "loss": 46.0, - "step": 27068 - }, - { - "epoch": 4.3592737227746685, - "grad_norm": 0.009843757376074791, - "learning_rate": 0.00019999062999939078, - "loss": 46.0, - "step": 27069 - }, - { - "epoch": 4.359434759853456, - "grad_norm": 0.00931534357368946, - "learning_rate": 0.00019999062930682807, - "loss": 46.0, - "step": 27070 - }, - { - "epoch": 4.359595796932243, - "grad_norm": 0.0031456982251256704, - "learning_rate": 0.00019999062861423972, - "loss": 46.0, - "step": 27071 - }, - { - "epoch": 4.359756834011031, - "grad_norm": 0.008781283162534237, - "learning_rate": 0.00019999062792162582, - "loss": 46.0, - "step": 27072 - }, - { - "epoch": 4.359917871089818, - "grad_norm": 0.0014256745344027877, - "learning_rate": 0.00019999062722898632, - "loss": 46.0, - "step": 27073 - }, - { - "epoch": 4.360078908168606, - "grad_norm": 0.002624319400638342, - "learning_rate": 0.0001999906265363212, - "loss": 46.0, - "step": 27074 - }, - { - "epoch": 4.360239945247393, - "grad_norm": 0.0021495830733329058, - "learning_rate": 0.00019999062584363054, - "loss": 46.0, - "step": 27075 - }, - { - "epoch": 4.360400982326181, - "grad_norm": 0.002483322052285075, - "learning_rate": 0.00019999062515091426, - "loss": 46.0, - "step": 27076 - }, - { - "epoch": 4.360562019404968, - "grad_norm": 0.00913215521723032, - "learning_rate": 0.00019999062445817236, - "loss": 46.0, - "step": 27077 - }, - { - "epoch": 4.360723056483756, - "grad_norm": 0.00351537112146616, - "learning_rate": 0.00019999062376540493, - "loss": 46.0, - "step": 27078 - }, - { - "epoch": 4.360884093562543, - "grad_norm": 0.0006506695644930005, - "learning_rate": 0.00019999062307261186, - "loss": 46.0, - "step": 27079 - }, - { - "epoch": 4.3610451306413305, - "grad_norm": 0.009654135443270206, - "learning_rate": 0.0001999906223797932, - "loss": 46.0, - "step": 27080 - }, - { - "epoch": 4.361206167720118, - "grad_norm": 0.0036119548603892326, - "learning_rate": 0.00019999062168694895, - "loss": 46.0, - "step": 27081 - }, - { - "epoch": 4.3613672047989045, - "grad_norm": 0.010684236884117126, - "learning_rate": 0.00019999062099407912, - "loss": 46.0, - "step": 27082 - }, - { - "epoch": 4.361528241877692, - "grad_norm": 0.0027164663188159466, - "learning_rate": 0.0001999906203011837, - "loss": 46.0, - "step": 27083 - }, - { - "epoch": 4.361689278956479, - "grad_norm": 0.0009681042865850031, - "learning_rate": 0.00019999061960826266, - "loss": 46.0, - "step": 27084 - }, - { - "epoch": 4.361850316035267, - "grad_norm": 0.004821220878511667, - "learning_rate": 0.00019999061891531604, - "loss": 46.0, - "step": 27085 - }, - { - "epoch": 4.362011353114054, - "grad_norm": 0.002124957274645567, - "learning_rate": 0.00019999061822234383, - "loss": 46.0, - "step": 27086 - }, - { - "epoch": 4.362172390192842, - "grad_norm": 0.005472272168844938, - "learning_rate": 0.00019999061752934606, - "loss": 46.0, - "step": 27087 - }, - { - "epoch": 4.362333427271629, - "grad_norm": 0.0031505899969488382, - "learning_rate": 0.00019999061683632268, - "loss": 46.0, - "step": 27088 - }, - { - "epoch": 4.362494464350417, - "grad_norm": 0.0014355042949318886, - "learning_rate": 0.00019999061614327368, - "loss": 46.0, - "step": 27089 - }, - { - "epoch": 4.362655501429204, - "grad_norm": 0.005559311248362064, - "learning_rate": 0.0001999906154501991, - "loss": 46.0, - "step": 27090 - }, - { - "epoch": 4.362816538507992, - "grad_norm": 0.004858406260609627, - "learning_rate": 0.00019999061475709895, - "loss": 46.0, - "step": 27091 - }, - { - "epoch": 4.362977575586779, - "grad_norm": 0.002242660615593195, - "learning_rate": 0.00019999061406397316, - "loss": 46.0, - "step": 27092 - }, - { - "epoch": 4.3631386126655665, - "grad_norm": 0.00249241734854877, - "learning_rate": 0.0001999906133708218, - "loss": 46.0, - "step": 27093 - }, - { - "epoch": 4.363299649744354, - "grad_norm": 0.008938068524003029, - "learning_rate": 0.00019999061267764488, - "loss": 46.0, - "step": 27094 - }, - { - "epoch": 4.363460686823141, - "grad_norm": 0.0026307005900889635, - "learning_rate": 0.00019999061198444233, - "loss": 46.0, - "step": 27095 - }, - { - "epoch": 4.363621723901929, - "grad_norm": 0.004502871073782444, - "learning_rate": 0.00019999061129121422, - "loss": 46.0, - "step": 27096 - }, - { - "epoch": 4.363782760980715, - "grad_norm": 0.013487651012837887, - "learning_rate": 0.0001999906105979605, - "loss": 46.0, - "step": 27097 - }, - { - "epoch": 4.363943798059503, - "grad_norm": 0.006255973130464554, - "learning_rate": 0.00019999060990468116, - "loss": 46.0, - "step": 27098 - }, - { - "epoch": 4.36410483513829, - "grad_norm": 0.004796335007995367, - "learning_rate": 0.00019999060921137627, - "loss": 46.0, - "step": 27099 - }, - { - "epoch": 4.364265872217078, - "grad_norm": 0.004446865059435368, - "learning_rate": 0.00019999060851804575, - "loss": 46.0, - "step": 27100 - }, - { - "epoch": 4.364426909295865, - "grad_norm": 0.007209441624581814, - "learning_rate": 0.00019999060782468968, - "loss": 46.0, - "step": 27101 - }, - { - "epoch": 4.364587946374653, - "grad_norm": 0.000757641508243978, - "learning_rate": 0.00019999060713130797, - "loss": 46.0, - "step": 27102 - }, - { - "epoch": 4.36474898345344, - "grad_norm": 0.0013446417870000005, - "learning_rate": 0.0001999906064379007, - "loss": 46.0, - "step": 27103 - }, - { - "epoch": 4.3649100205322275, - "grad_norm": 0.019902419298887253, - "learning_rate": 0.0001999906057444678, - "loss": 46.0, - "step": 27104 - }, - { - "epoch": 4.365071057611015, - "grad_norm": 0.0032546038273721933, - "learning_rate": 0.00019999060505100936, - "loss": 46.0, - "step": 27105 - }, - { - "epoch": 4.3652320946898024, - "grad_norm": 0.0037054584827274084, - "learning_rate": 0.0001999906043575253, - "loss": 46.0, - "step": 27106 - }, - { - "epoch": 4.36539313176859, - "grad_norm": 0.0005276603042148054, - "learning_rate": 0.00019999060366401565, - "loss": 46.0, - "step": 27107 - }, - { - "epoch": 4.365554168847377, - "grad_norm": 0.0007191240438260138, - "learning_rate": 0.00019999060297048042, - "loss": 46.0, - "step": 27108 - }, - { - "epoch": 4.365715205926165, - "grad_norm": 0.005270296707749367, - "learning_rate": 0.0001999906022769196, - "loss": 46.0, - "step": 27109 - }, - { - "epoch": 4.365876243004952, - "grad_norm": 0.008191713131964207, - "learning_rate": 0.00019999060158333318, - "loss": 46.0, - "step": 27110 - }, - { - "epoch": 4.36603728008374, - "grad_norm": 0.0016720653511583805, - "learning_rate": 0.00019999060088972116, - "loss": 46.0, - "step": 27111 - }, - { - "epoch": 4.366198317162526, - "grad_norm": 0.0019134688191115856, - "learning_rate": 0.00019999060019608352, - "loss": 46.0, - "step": 27112 - }, - { - "epoch": 4.366359354241314, - "grad_norm": 0.007254447788000107, - "learning_rate": 0.00019999059950242035, - "loss": 46.0, - "step": 27113 - }, - { - "epoch": 4.366520391320101, - "grad_norm": 0.0008195106638595462, - "learning_rate": 0.00019999059880873154, - "loss": 46.0, - "step": 27114 - }, - { - "epoch": 4.366681428398889, - "grad_norm": 0.006284576375037432, - "learning_rate": 0.00019999059811501714, - "loss": 46.0, - "step": 27115 - }, - { - "epoch": 4.366842465477676, - "grad_norm": 0.008425384759902954, - "learning_rate": 0.0001999905974212772, - "loss": 46.0, - "step": 27116 - }, - { - "epoch": 4.3670035025564635, - "grad_norm": 0.0022102666553109884, - "learning_rate": 0.0001999905967275116, - "loss": 46.0, - "step": 27117 - }, - { - "epoch": 4.367164539635251, - "grad_norm": 0.004138268996030092, - "learning_rate": 0.00019999059603372047, - "loss": 46.0, - "step": 27118 - }, - { - "epoch": 4.367325576714038, - "grad_norm": 0.00965751800686121, - "learning_rate": 0.00019999059533990366, - "loss": 46.0, - "step": 27119 - }, - { - "epoch": 4.367486613792826, - "grad_norm": 0.004430947359651327, - "learning_rate": 0.00019999059464606133, - "loss": 46.0, - "step": 27120 - }, - { - "epoch": 4.367647650871613, - "grad_norm": 0.0010899221524596214, - "learning_rate": 0.0001999905939521934, - "loss": 46.0, - "step": 27121 - }, - { - "epoch": 4.367808687950401, - "grad_norm": 0.006405069027096033, - "learning_rate": 0.00019999059325829984, - "loss": 46.0, - "step": 27122 - }, - { - "epoch": 4.367969725029188, - "grad_norm": 0.013673920184373856, - "learning_rate": 0.00019999059256438072, - "loss": 46.0, - "step": 27123 - }, - { - "epoch": 4.368130762107976, - "grad_norm": 0.006496686954051256, - "learning_rate": 0.00019999059187043603, - "loss": 46.0, - "step": 27124 - }, - { - "epoch": 4.368291799186763, - "grad_norm": 0.007036920636892319, - "learning_rate": 0.0001999905911764657, - "loss": 46.0, - "step": 27125 - }, - { - "epoch": 4.368452836265551, - "grad_norm": 0.0066603501327335835, - "learning_rate": 0.0001999905904824698, - "loss": 46.0, - "step": 27126 - }, - { - "epoch": 4.368613873344337, - "grad_norm": 0.0025863044429570436, - "learning_rate": 0.0001999905897884483, - "loss": 46.0, - "step": 27127 - }, - { - "epoch": 4.368774910423125, - "grad_norm": 0.0008355535101145506, - "learning_rate": 0.0001999905890944012, - "loss": 46.0, - "step": 27128 - }, - { - "epoch": 4.368935947501912, - "grad_norm": 0.0017207521013915539, - "learning_rate": 0.00019999058840032853, - "loss": 46.0, - "step": 27129 - }, - { - "epoch": 4.3690969845806995, - "grad_norm": 0.0031994956079870462, - "learning_rate": 0.00019999058770623024, - "loss": 46.0, - "step": 27130 - }, - { - "epoch": 4.369258021659487, - "grad_norm": 0.004539301618933678, - "learning_rate": 0.0001999905870121064, - "loss": 46.0, - "step": 27131 - }, - { - "epoch": 4.369419058738274, - "grad_norm": 0.018510285764932632, - "learning_rate": 0.00019999058631795693, - "loss": 46.0, - "step": 27132 - }, - { - "epoch": 4.369580095817062, - "grad_norm": 0.0016801398014649749, - "learning_rate": 0.00019999058562378188, - "loss": 46.0, - "step": 27133 - }, - { - "epoch": 4.369741132895849, - "grad_norm": 0.0031483913771808147, - "learning_rate": 0.00019999058492958124, - "loss": 46.0, - "step": 27134 - }, - { - "epoch": 4.369902169974637, - "grad_norm": 0.01651756837964058, - "learning_rate": 0.000199990584235355, - "loss": 46.0, - "step": 27135 - }, - { - "epoch": 4.370063207053424, - "grad_norm": 0.0009231962030753493, - "learning_rate": 0.00019999058354110315, - "loss": 46.0, - "step": 27136 - }, - { - "epoch": 4.370224244132212, - "grad_norm": 0.0010743794264271855, - "learning_rate": 0.00019999058284682575, - "loss": 46.0, - "step": 27137 - }, - { - "epoch": 4.370385281210999, - "grad_norm": 0.0020308492239564657, - "learning_rate": 0.00019999058215252274, - "loss": 46.0, - "step": 27138 - }, - { - "epoch": 4.370546318289787, - "grad_norm": 0.0009223786764778197, - "learning_rate": 0.0001999905814581941, - "loss": 46.0, - "step": 27139 - }, - { - "epoch": 4.370707355368574, - "grad_norm": 0.003088445169851184, - "learning_rate": 0.00019999058076383995, - "loss": 46.0, - "step": 27140 - }, - { - "epoch": 4.3708683924473615, - "grad_norm": 0.0049479054287076, - "learning_rate": 0.00019999058006946015, - "loss": 46.0, - "step": 27141 - }, - { - "epoch": 4.371029429526148, - "grad_norm": 0.017845382913947105, - "learning_rate": 0.00019999057937505476, - "loss": 46.0, - "step": 27142 - }, - { - "epoch": 4.3711904666049355, - "grad_norm": 0.009426828473806381, - "learning_rate": 0.00019999057868062375, - "loss": 46.0, - "step": 27143 - }, - { - "epoch": 4.371351503683723, - "grad_norm": 0.004196139983832836, - "learning_rate": 0.00019999057798616722, - "loss": 46.0, - "step": 27144 - }, - { - "epoch": 4.37151254076251, - "grad_norm": 0.0027773615438491106, - "learning_rate": 0.00019999057729168504, - "loss": 46.0, - "step": 27145 - }, - { - "epoch": 4.371673577841298, - "grad_norm": 0.005441885907202959, - "learning_rate": 0.00019999057659717727, - "loss": 46.0, - "step": 27146 - }, - { - "epoch": 4.371834614920085, - "grad_norm": 0.0028727292083203793, - "learning_rate": 0.00019999057590264397, - "loss": 46.0, - "step": 27147 - }, - { - "epoch": 4.371995651998873, - "grad_norm": 0.001167637063190341, - "learning_rate": 0.000199990575208085, - "loss": 46.0, - "step": 27148 - }, - { - "epoch": 4.37215668907766, - "grad_norm": 0.005727018229663372, - "learning_rate": 0.00019999057451350048, - "loss": 46.0, - "step": 27149 - }, - { - "epoch": 4.372317726156448, - "grad_norm": 0.006780306342989206, - "learning_rate": 0.00019999057381889034, - "loss": 46.0, - "step": 27150 - }, - { - "epoch": 4.372478763235235, - "grad_norm": 0.0014827157137915492, - "learning_rate": 0.00019999057312425464, - "loss": 46.0, - "step": 27151 - }, - { - "epoch": 4.3726398003140226, - "grad_norm": 0.004794629756361246, - "learning_rate": 0.00019999057242959332, - "loss": 46.0, - "step": 27152 - }, - { - "epoch": 4.37280083739281, - "grad_norm": 0.003373529762029648, - "learning_rate": 0.00019999057173490642, - "loss": 46.0, - "step": 27153 - }, - { - "epoch": 4.3729618744715975, - "grad_norm": 0.0029637019615620375, - "learning_rate": 0.00019999057104019393, - "loss": 46.0, - "step": 27154 - }, - { - "epoch": 4.373122911550384, - "grad_norm": 0.0037493992131203413, - "learning_rate": 0.00019999057034545586, - "loss": 46.0, - "step": 27155 - }, - { - "epoch": 4.3732839486291715, - "grad_norm": 0.006962976418435574, - "learning_rate": 0.00019999056965069216, - "loss": 46.0, - "step": 27156 - }, - { - "epoch": 4.373444985707959, - "grad_norm": 0.0015373227652162313, - "learning_rate": 0.0001999905689559029, - "loss": 46.0, - "step": 27157 - }, - { - "epoch": 4.373606022786746, - "grad_norm": 0.0017439405201002955, - "learning_rate": 0.00019999056826108805, - "loss": 46.0, - "step": 27158 - }, - { - "epoch": 4.373767059865534, - "grad_norm": 0.005726260133087635, - "learning_rate": 0.0001999905675662476, - "loss": 46.0, - "step": 27159 - }, - { - "epoch": 4.373928096944321, - "grad_norm": 0.0012014490785077214, - "learning_rate": 0.00019999056687138153, - "loss": 46.0, - "step": 27160 - }, - { - "epoch": 4.374089134023109, - "grad_norm": 0.005440492648631334, - "learning_rate": 0.0001999905661764899, - "loss": 46.0, - "step": 27161 - }, - { - "epoch": 4.374250171101896, - "grad_norm": 0.01880985125899315, - "learning_rate": 0.00019999056548157266, - "loss": 46.0, - "step": 27162 - }, - { - "epoch": 4.374411208180684, - "grad_norm": 0.010553323663771152, - "learning_rate": 0.00019999056478662986, - "loss": 46.0, - "step": 27163 - }, - { - "epoch": 4.374572245259471, - "grad_norm": 0.0013744422467425466, - "learning_rate": 0.00019999056409166142, - "loss": 46.0, - "step": 27164 - }, - { - "epoch": 4.3747332823382585, - "grad_norm": 0.01420077495276928, - "learning_rate": 0.0001999905633966674, - "loss": 46.0, - "step": 27165 - }, - { - "epoch": 4.374894319417046, - "grad_norm": 0.002734562149271369, - "learning_rate": 0.0001999905627016478, - "loss": 46.0, - "step": 27166 - }, - { - "epoch": 4.375055356495833, - "grad_norm": 0.013370763510465622, - "learning_rate": 0.00019999056200660262, - "loss": 46.0, - "step": 27167 - }, - { - "epoch": 4.375216393574621, - "grad_norm": 0.0037429702933877707, - "learning_rate": 0.00019999056131153183, - "loss": 46.0, - "step": 27168 - }, - { - "epoch": 4.375377430653408, - "grad_norm": 0.017090575769543648, - "learning_rate": 0.00019999056061643545, - "loss": 46.0, - "step": 27169 - }, - { - "epoch": 4.375538467732195, - "grad_norm": 0.007569979876279831, - "learning_rate": 0.00019999055992131348, - "loss": 46.0, - "step": 27170 - }, - { - "epoch": 4.375699504810982, - "grad_norm": 0.02025243639945984, - "learning_rate": 0.0001999905592261659, - "loss": 46.0, - "step": 27171 - }, - { - "epoch": 4.37586054188977, - "grad_norm": 0.0019129186403006315, - "learning_rate": 0.00019999055853099274, - "loss": 46.0, - "step": 27172 - }, - { - "epoch": 4.376021578968557, - "grad_norm": 0.011236516758799553, - "learning_rate": 0.00019999055783579398, - "loss": 46.0, - "step": 27173 - }, - { - "epoch": 4.376182616047345, - "grad_norm": 0.004096119664609432, - "learning_rate": 0.00019999055714056967, - "loss": 46.0, - "step": 27174 - }, - { - "epoch": 4.376343653126132, - "grad_norm": 0.011911129578948021, - "learning_rate": 0.00019999055644531971, - "loss": 46.0, - "step": 27175 - }, - { - "epoch": 4.37650469020492, - "grad_norm": 0.0014494559727609158, - "learning_rate": 0.0001999905557500442, - "loss": 46.0, - "step": 27176 - }, - { - "epoch": 4.376665727283707, - "grad_norm": 0.01516756508499384, - "learning_rate": 0.0001999905550547431, - "loss": 46.0, - "step": 27177 - }, - { - "epoch": 4.3768267643624945, - "grad_norm": 0.007629004307091236, - "learning_rate": 0.00019999055435941635, - "loss": 46.0, - "step": 27178 - }, - { - "epoch": 4.376987801441282, - "grad_norm": 0.004359136335551739, - "learning_rate": 0.00019999055366406405, - "loss": 46.0, - "step": 27179 - }, - { - "epoch": 4.377148838520069, - "grad_norm": 0.0033742839004844427, - "learning_rate": 0.00019999055296868615, - "loss": 46.0, - "step": 27180 - }, - { - "epoch": 4.377309875598857, - "grad_norm": 0.007984524592757225, - "learning_rate": 0.00019999055227328268, - "loss": 46.0, - "step": 27181 - }, - { - "epoch": 4.377470912677644, - "grad_norm": 0.0045737153850495815, - "learning_rate": 0.0001999905515778536, - "loss": 46.0, - "step": 27182 - }, - { - "epoch": 4.377631949756432, - "grad_norm": 0.015872295945882797, - "learning_rate": 0.00019999055088239893, - "loss": 46.0, - "step": 27183 - }, - { - "epoch": 4.377792986835219, - "grad_norm": 0.0030408913735300303, - "learning_rate": 0.00019999055018691864, - "loss": 46.0, - "step": 27184 - }, - { - "epoch": 4.377954023914006, - "grad_norm": 0.005618977826088667, - "learning_rate": 0.0001999905494914128, - "loss": 46.0, - "step": 27185 - }, - { - "epoch": 4.378115060992793, - "grad_norm": 0.0023326631635427475, - "learning_rate": 0.00019999054879588134, - "loss": 46.0, - "step": 27186 - }, - { - "epoch": 4.378276098071581, - "grad_norm": 0.001446245238184929, - "learning_rate": 0.00019999054810032428, - "loss": 46.0, - "step": 27187 - }, - { - "epoch": 4.378437135150368, - "grad_norm": 0.023635298013687134, - "learning_rate": 0.00019999054740474167, - "loss": 46.0, - "step": 27188 - }, - { - "epoch": 4.378598172229156, - "grad_norm": 0.001972927013412118, - "learning_rate": 0.0001999905467091334, - "loss": 46.0, - "step": 27189 - }, - { - "epoch": 4.378759209307943, - "grad_norm": 0.0035482419189065695, - "learning_rate": 0.0001999905460134996, - "loss": 46.0, - "step": 27190 - }, - { - "epoch": 4.3789202463867305, - "grad_norm": 0.003834902308881283, - "learning_rate": 0.00019999054531784021, - "loss": 46.0, - "step": 27191 - }, - { - "epoch": 4.379081283465518, - "grad_norm": 0.003968066535890102, - "learning_rate": 0.0001999905446221552, - "loss": 46.0, - "step": 27192 - }, - { - "epoch": 4.379242320544305, - "grad_norm": 0.0021446545142680407, - "learning_rate": 0.0001999905439264446, - "loss": 46.0, - "step": 27193 - }, - { - "epoch": 4.379403357623093, - "grad_norm": 0.002743483753874898, - "learning_rate": 0.0001999905432307084, - "loss": 46.0, - "step": 27194 - }, - { - "epoch": 4.37956439470188, - "grad_norm": 0.0024175182916224003, - "learning_rate": 0.0001999905425349466, - "loss": 46.0, - "step": 27195 - }, - { - "epoch": 4.379725431780668, - "grad_norm": 0.003960182424634695, - "learning_rate": 0.00019999054183915925, - "loss": 46.0, - "step": 27196 - }, - { - "epoch": 4.379886468859455, - "grad_norm": 0.005331596825271845, - "learning_rate": 0.00019999054114334627, - "loss": 46.0, - "step": 27197 - }, - { - "epoch": 4.380047505938243, - "grad_norm": 0.004465213045477867, - "learning_rate": 0.00019999054044750772, - "loss": 46.0, - "step": 27198 - }, - { - "epoch": 4.38020854301703, - "grad_norm": 0.005457528866827488, - "learning_rate": 0.00019999053975164354, - "loss": 46.0, - "step": 27199 - }, - { - "epoch": 4.380369580095817, - "grad_norm": 0.0036655315198004246, - "learning_rate": 0.0001999905390557538, - "loss": 46.0, - "step": 27200 - }, - { - "epoch": 4.380530617174604, - "grad_norm": 0.0014014887856319547, - "learning_rate": 0.00019999053835983847, - "loss": 46.0, - "step": 27201 - }, - { - "epoch": 4.380691654253392, - "grad_norm": 0.007068261504173279, - "learning_rate": 0.00019999053766389752, - "loss": 46.0, - "step": 27202 - }, - { - "epoch": 4.380852691332179, - "grad_norm": 0.001446820329874754, - "learning_rate": 0.00019999053696793102, - "loss": 46.0, - "step": 27203 - }, - { - "epoch": 4.3810137284109665, - "grad_norm": 0.005662822164595127, - "learning_rate": 0.0001999905362719389, - "loss": 46.0, - "step": 27204 - }, - { - "epoch": 4.381174765489754, - "grad_norm": 0.008554995059967041, - "learning_rate": 0.0001999905355759212, - "loss": 46.0, - "step": 27205 - }, - { - "epoch": 4.381335802568541, - "grad_norm": 0.008206910453736782, - "learning_rate": 0.0001999905348798779, - "loss": 46.0, - "step": 27206 - }, - { - "epoch": 4.381496839647329, - "grad_norm": 0.001227443921379745, - "learning_rate": 0.000199990534183809, - "loss": 46.0, - "step": 27207 - }, - { - "epoch": 4.381657876726116, - "grad_norm": 0.007651353720575571, - "learning_rate": 0.00019999053348771452, - "loss": 46.0, - "step": 27208 - }, - { - "epoch": 4.381818913804904, - "grad_norm": 0.0031927514355629683, - "learning_rate": 0.00019999053279159444, - "loss": 46.0, - "step": 27209 - }, - { - "epoch": 4.381979950883691, - "grad_norm": 0.005342401098459959, - "learning_rate": 0.00019999053209544874, - "loss": 46.0, - "step": 27210 - }, - { - "epoch": 4.382140987962479, - "grad_norm": 0.0032986472360789776, - "learning_rate": 0.00019999053139927751, - "loss": 46.0, - "step": 27211 - }, - { - "epoch": 4.382302025041266, - "grad_norm": 0.004678192548453808, - "learning_rate": 0.00019999053070308064, - "loss": 46.0, - "step": 27212 - }, - { - "epoch": 4.3824630621200535, - "grad_norm": 0.0019402520265430212, - "learning_rate": 0.0001999905300068582, - "loss": 46.0, - "step": 27213 - }, - { - "epoch": 4.382624099198841, - "grad_norm": 0.0028347631450742483, - "learning_rate": 0.00019999052931061014, - "loss": 46.0, - "step": 27214 - }, - { - "epoch": 4.382785136277628, - "grad_norm": 0.0008329771808348596, - "learning_rate": 0.0001999905286143365, - "loss": 46.0, - "step": 27215 - }, - { - "epoch": 4.382946173356415, - "grad_norm": 0.0035379512701183558, - "learning_rate": 0.0001999905279180373, - "loss": 46.0, - "step": 27216 - }, - { - "epoch": 4.3831072104352025, - "grad_norm": 0.0015824425499886274, - "learning_rate": 0.0001999905272217125, - "loss": 46.0, - "step": 27217 - }, - { - "epoch": 4.38326824751399, - "grad_norm": 0.00535028288140893, - "learning_rate": 0.0001999905265253621, - "loss": 46.0, - "step": 27218 - }, - { - "epoch": 4.383429284592777, - "grad_norm": 0.007845137268304825, - "learning_rate": 0.00019999052582898608, - "loss": 46.0, - "step": 27219 - }, - { - "epoch": 4.383590321671565, - "grad_norm": 0.006032073870301247, - "learning_rate": 0.00019999052513258446, - "loss": 46.0, - "step": 27220 - }, - { - "epoch": 4.383751358750352, - "grad_norm": 0.002569495001807809, - "learning_rate": 0.00019999052443615728, - "loss": 46.0, - "step": 27221 - }, - { - "epoch": 4.38391239582914, - "grad_norm": 0.0013232954079285264, - "learning_rate": 0.0001999905237397045, - "loss": 46.0, - "step": 27222 - }, - { - "epoch": 4.384073432907927, - "grad_norm": 0.001953263534232974, - "learning_rate": 0.00019999052304322615, - "loss": 46.0, - "step": 27223 - }, - { - "epoch": 4.384234469986715, - "grad_norm": 0.009116681292653084, - "learning_rate": 0.00019999052234672218, - "loss": 46.0, - "step": 27224 - }, - { - "epoch": 4.384395507065502, - "grad_norm": 0.00343763898126781, - "learning_rate": 0.00019999052165019262, - "loss": 46.0, - "step": 27225 - }, - { - "epoch": 4.3845565441442895, - "grad_norm": 0.016197996214032173, - "learning_rate": 0.00019999052095363745, - "loss": 46.0, - "step": 27226 - }, - { - "epoch": 4.384717581223077, - "grad_norm": 0.0060784099623560905, - "learning_rate": 0.00019999052025705672, - "loss": 46.0, - "step": 27227 - }, - { - "epoch": 4.3848786183018635, - "grad_norm": 0.011557098478078842, - "learning_rate": 0.0001999905195604504, - "loss": 46.0, - "step": 27228 - }, - { - "epoch": 4.385039655380651, - "grad_norm": 0.009646390564739704, - "learning_rate": 0.0001999905188638185, - "loss": 46.0, - "step": 27229 - }, - { - "epoch": 4.385200692459438, - "grad_norm": 0.012289199978113174, - "learning_rate": 0.00019999051816716094, - "loss": 46.0, - "step": 27230 - }, - { - "epoch": 4.385361729538226, - "grad_norm": 0.002778079593554139, - "learning_rate": 0.00019999051747047783, - "loss": 46.0, - "step": 27231 - }, - { - "epoch": 4.385522766617013, - "grad_norm": 0.004068195354193449, - "learning_rate": 0.0001999905167737691, - "loss": 46.0, - "step": 27232 - }, - { - "epoch": 4.385683803695801, - "grad_norm": 0.004521645605564117, - "learning_rate": 0.00019999051607703483, - "loss": 46.0, - "step": 27233 - }, - { - "epoch": 4.385844840774588, - "grad_norm": 0.0013227129820734262, - "learning_rate": 0.00019999051538027493, - "loss": 46.0, - "step": 27234 - }, - { - "epoch": 4.386005877853376, - "grad_norm": 0.011316705495119095, - "learning_rate": 0.00019999051468348947, - "loss": 46.0, - "step": 27235 - }, - { - "epoch": 4.386166914932163, - "grad_norm": 0.0023729936219751835, - "learning_rate": 0.00019999051398667837, - "loss": 46.0, - "step": 27236 - }, - { - "epoch": 4.386327952010951, - "grad_norm": 0.0026012288872152567, - "learning_rate": 0.00019999051328984174, - "loss": 46.0, - "step": 27237 - }, - { - "epoch": 4.386488989089738, - "grad_norm": 0.0019430381944403052, - "learning_rate": 0.00019999051259297947, - "loss": 46.0, - "step": 27238 - }, - { - "epoch": 4.3866500261685255, - "grad_norm": 0.003454585326835513, - "learning_rate": 0.00019999051189609158, - "loss": 46.0, - "step": 27239 - }, - { - "epoch": 4.386811063247313, - "grad_norm": 0.00687748659402132, - "learning_rate": 0.00019999051119917816, - "loss": 46.0, - "step": 27240 - }, - { - "epoch": 4.3869721003261, - "grad_norm": 0.0034899902530014515, - "learning_rate": 0.00019999051050223913, - "loss": 46.0, - "step": 27241 - }, - { - "epoch": 4.387133137404888, - "grad_norm": 0.00203665136359632, - "learning_rate": 0.0001999905098052745, - "loss": 46.0, - "step": 27242 - }, - { - "epoch": 4.387294174483674, - "grad_norm": 0.002735869726166129, - "learning_rate": 0.00019999050910828427, - "loss": 46.0, - "step": 27243 - }, - { - "epoch": 4.387455211562462, - "grad_norm": 0.002177219605073333, - "learning_rate": 0.00019999050841126845, - "loss": 46.0, - "step": 27244 - }, - { - "epoch": 4.387616248641249, - "grad_norm": 0.0011689657112583518, - "learning_rate": 0.00019999050771422704, - "loss": 46.0, - "step": 27245 - }, - { - "epoch": 4.387777285720037, - "grad_norm": 0.006479209288954735, - "learning_rate": 0.00019999050701716007, - "loss": 46.0, - "step": 27246 - }, - { - "epoch": 4.387938322798824, - "grad_norm": 0.008870921097695827, - "learning_rate": 0.00019999050632006746, - "loss": 46.0, - "step": 27247 - }, - { - "epoch": 4.388099359877612, - "grad_norm": 0.007648714818060398, - "learning_rate": 0.00019999050562294929, - "loss": 46.0, - "step": 27248 - }, - { - "epoch": 4.388260396956399, - "grad_norm": 0.0023880142252892256, - "learning_rate": 0.0001999905049258055, - "loss": 46.0, - "step": 27249 - }, - { - "epoch": 4.388421434035187, - "grad_norm": 0.008994984440505505, - "learning_rate": 0.00019999050422863615, - "loss": 46.0, - "step": 27250 - }, - { - "epoch": 4.388582471113974, - "grad_norm": 0.003872970351949334, - "learning_rate": 0.0001999905035314412, - "loss": 46.0, - "step": 27251 - }, - { - "epoch": 4.3887435081927615, - "grad_norm": 0.00975795928388834, - "learning_rate": 0.00019999050283422062, - "loss": 46.0, - "step": 27252 - }, - { - "epoch": 4.388904545271549, - "grad_norm": 0.00978830549865961, - "learning_rate": 0.00019999050213697449, - "loss": 46.0, - "step": 27253 - }, - { - "epoch": 4.389065582350336, - "grad_norm": 0.011856836266815662, - "learning_rate": 0.00019999050143970274, - "loss": 46.0, - "step": 27254 - }, - { - "epoch": 4.389226619429124, - "grad_norm": 0.0014544740552082658, - "learning_rate": 0.00019999050074240543, - "loss": 46.0, - "step": 27255 - }, - { - "epoch": 4.389387656507911, - "grad_norm": 0.00330316717736423, - "learning_rate": 0.00019999050004508248, - "loss": 46.0, - "step": 27256 - }, - { - "epoch": 4.389548693586699, - "grad_norm": 0.0014017503708600998, - "learning_rate": 0.000199990499347734, - "loss": 46.0, - "step": 27257 - }, - { - "epoch": 4.389709730665485, - "grad_norm": 0.0007029243861325085, - "learning_rate": 0.00019999049865035987, - "loss": 46.0, - "step": 27258 - }, - { - "epoch": 4.389870767744273, - "grad_norm": 0.004955565556883812, - "learning_rate": 0.00019999049795296016, - "loss": 46.0, - "step": 27259 - }, - { - "epoch": 4.39003180482306, - "grad_norm": 0.0024757403880357742, - "learning_rate": 0.00019999049725553486, - "loss": 46.0, - "step": 27260 - }, - { - "epoch": 4.390192841901848, - "grad_norm": 0.0020914538763463497, - "learning_rate": 0.000199990496558084, - "loss": 46.0, - "step": 27261 - }, - { - "epoch": 4.390353878980635, - "grad_norm": 0.002094556577503681, - "learning_rate": 0.00019999049586060753, - "loss": 46.0, - "step": 27262 - }, - { - "epoch": 4.390514916059423, - "grad_norm": 0.00696517201140523, - "learning_rate": 0.00019999049516310544, - "loss": 46.0, - "step": 27263 - }, - { - "epoch": 4.39067595313821, - "grad_norm": 0.0029635962564498186, - "learning_rate": 0.0001999904944655778, - "loss": 46.0, - "step": 27264 - }, - { - "epoch": 4.3908369902169975, - "grad_norm": 0.0019418272422626615, - "learning_rate": 0.00019999049376802456, - "loss": 46.0, - "step": 27265 - }, - { - "epoch": 4.390998027295785, - "grad_norm": 0.0016454599099233747, - "learning_rate": 0.00019999049307044568, - "loss": 46.0, - "step": 27266 - }, - { - "epoch": 4.391159064374572, - "grad_norm": 0.00554841011762619, - "learning_rate": 0.00019999049237284124, - "loss": 46.0, - "step": 27267 - }, - { - "epoch": 4.39132010145336, - "grad_norm": 0.003708989592269063, - "learning_rate": 0.0001999904916752112, - "loss": 46.0, - "step": 27268 - }, - { - "epoch": 4.391481138532147, - "grad_norm": 0.003114469349384308, - "learning_rate": 0.00019999049097755558, - "loss": 46.0, - "step": 27269 - }, - { - "epoch": 4.391642175610935, - "grad_norm": 0.008489621803164482, - "learning_rate": 0.00019999049027987439, - "loss": 46.0, - "step": 27270 - }, - { - "epoch": 4.391803212689722, - "grad_norm": 0.017003241926431656, - "learning_rate": 0.00019999048958216755, - "loss": 46.0, - "step": 27271 - }, - { - "epoch": 4.39196424976851, - "grad_norm": 0.006560994312167168, - "learning_rate": 0.00019999048888443515, - "loss": 46.0, - "step": 27272 - }, - { - "epoch": 4.392125286847296, - "grad_norm": 0.007608387619256973, - "learning_rate": 0.00019999048818667716, - "loss": 46.0, - "step": 27273 - }, - { - "epoch": 4.392286323926084, - "grad_norm": 0.0036803975235670805, - "learning_rate": 0.0001999904874888936, - "loss": 46.0, - "step": 27274 - }, - { - "epoch": 4.392447361004871, - "grad_norm": 0.0007890161359682679, - "learning_rate": 0.0001999904867910844, - "loss": 46.0, - "step": 27275 - }, - { - "epoch": 4.3926083980836585, - "grad_norm": 0.009090281091630459, - "learning_rate": 0.00019999048609324965, - "loss": 46.0, - "step": 27276 - }, - { - "epoch": 4.392769435162446, - "grad_norm": 0.0024434817023575306, - "learning_rate": 0.00019999048539538926, - "loss": 46.0, - "step": 27277 - }, - { - "epoch": 4.3929304722412335, - "grad_norm": 0.005545934662222862, - "learning_rate": 0.0001999904846975033, - "loss": 46.0, - "step": 27278 - }, - { - "epoch": 4.393091509320021, - "grad_norm": 0.01016025710850954, - "learning_rate": 0.00019999048399959178, - "loss": 46.0, - "step": 27279 - }, - { - "epoch": 4.393252546398808, - "grad_norm": 0.01716170646250248, - "learning_rate": 0.00019999048330165463, - "loss": 46.0, - "step": 27280 - }, - { - "epoch": 4.393413583477596, - "grad_norm": 0.0040521337650716305, - "learning_rate": 0.0001999904826036919, - "loss": 46.0, - "step": 27281 - }, - { - "epoch": 4.393574620556383, - "grad_norm": 0.0064969779923558235, - "learning_rate": 0.00019999048190570356, - "loss": 46.0, - "step": 27282 - }, - { - "epoch": 4.393735657635171, - "grad_norm": 0.0027617421001195908, - "learning_rate": 0.00019999048120768965, - "loss": 46.0, - "step": 27283 - }, - { - "epoch": 4.393896694713958, - "grad_norm": 0.0012256621848791838, - "learning_rate": 0.00019999048050965012, - "loss": 46.0, - "step": 27284 - }, - { - "epoch": 4.394057731792746, - "grad_norm": 0.003906638827174902, - "learning_rate": 0.00019999047981158504, - "loss": 46.0, - "step": 27285 - }, - { - "epoch": 4.394218768871533, - "grad_norm": 0.004821930546313524, - "learning_rate": 0.00019999047911349434, - "loss": 46.0, - "step": 27286 - }, - { - "epoch": 4.3943798059503205, - "grad_norm": 0.00246910797432065, - "learning_rate": 0.00019999047841537805, - "loss": 46.0, - "step": 27287 - }, - { - "epoch": 4.394540843029107, - "grad_norm": 0.002774229273200035, - "learning_rate": 0.00019999047771723615, - "loss": 46.0, - "step": 27288 - }, - { - "epoch": 4.3947018801078945, - "grad_norm": 0.0012742452090606093, - "learning_rate": 0.00019999047701906868, - "loss": 46.0, - "step": 27289 - }, - { - "epoch": 4.394862917186682, - "grad_norm": 0.010323584079742432, - "learning_rate": 0.0001999904763208756, - "loss": 46.0, - "step": 27290 - }, - { - "epoch": 4.395023954265469, - "grad_norm": 0.0025238331872969866, - "learning_rate": 0.00019999047562265697, - "loss": 46.0, - "step": 27291 - }, - { - "epoch": 4.395184991344257, - "grad_norm": 0.005917206406593323, - "learning_rate": 0.00019999047492441272, - "loss": 46.0, - "step": 27292 - }, - { - "epoch": 4.395346028423044, - "grad_norm": 0.0033823102712631226, - "learning_rate": 0.00019999047422614288, - "loss": 46.0, - "step": 27293 - }, - { - "epoch": 4.395507065501832, - "grad_norm": 0.01712118834257126, - "learning_rate": 0.00019999047352784743, - "loss": 46.0, - "step": 27294 - }, - { - "epoch": 4.395668102580619, - "grad_norm": 0.006925694178789854, - "learning_rate": 0.00019999047282952642, - "loss": 46.0, - "step": 27295 - }, - { - "epoch": 4.395829139659407, - "grad_norm": 0.003581579774618149, - "learning_rate": 0.0001999904721311798, - "loss": 46.0, - "step": 27296 - }, - { - "epoch": 4.395990176738194, - "grad_norm": 0.015719318762421608, - "learning_rate": 0.00019999047143280757, - "loss": 46.0, - "step": 27297 - }, - { - "epoch": 4.396151213816982, - "grad_norm": 0.002585549606010318, - "learning_rate": 0.00019999047073440977, - "loss": 46.0, - "step": 27298 - }, - { - "epoch": 4.396312250895769, - "grad_norm": 0.004612908698618412, - "learning_rate": 0.00019999047003598638, - "loss": 46.0, - "step": 27299 - }, - { - "epoch": 4.3964732879745565, - "grad_norm": 0.004402322694659233, - "learning_rate": 0.00019999046933753738, - "loss": 46.0, - "step": 27300 - }, - { - "epoch": 4.396634325053343, - "grad_norm": 0.012610073201358318, - "learning_rate": 0.0001999904686390628, - "loss": 46.0, - "step": 27301 - }, - { - "epoch": 4.3967953621321305, - "grad_norm": 0.0031418197322636843, - "learning_rate": 0.00019999046794056262, - "loss": 46.0, - "step": 27302 - }, - { - "epoch": 4.396956399210918, - "grad_norm": 0.006623541936278343, - "learning_rate": 0.00019999046724203688, - "loss": 46.0, - "step": 27303 - }, - { - "epoch": 4.397117436289705, - "grad_norm": 0.005509393289685249, - "learning_rate": 0.0001999904665434855, - "loss": 46.0, - "step": 27304 - }, - { - "epoch": 4.397278473368493, - "grad_norm": 0.0046736011281609535, - "learning_rate": 0.00019999046584490853, - "loss": 46.0, - "step": 27305 - }, - { - "epoch": 4.39743951044728, - "grad_norm": 0.01256459392607212, - "learning_rate": 0.00019999046514630598, - "loss": 46.0, - "step": 27306 - }, - { - "epoch": 4.397600547526068, - "grad_norm": 0.0038980746176093817, - "learning_rate": 0.00019999046444767787, - "loss": 46.0, - "step": 27307 - }, - { - "epoch": 4.397761584604855, - "grad_norm": 0.0073218620382249355, - "learning_rate": 0.00019999046374902414, - "loss": 46.0, - "step": 27308 - }, - { - "epoch": 4.397922621683643, - "grad_norm": 0.002032952383160591, - "learning_rate": 0.0001999904630503448, - "loss": 46.0, - "step": 27309 - }, - { - "epoch": 4.39808365876243, - "grad_norm": 0.012131624855101109, - "learning_rate": 0.0001999904623516399, - "loss": 46.0, - "step": 27310 - }, - { - "epoch": 4.398244695841218, - "grad_norm": 0.008933956734836102, - "learning_rate": 0.0001999904616529094, - "loss": 46.0, - "step": 27311 - }, - { - "epoch": 4.398405732920005, - "grad_norm": 0.0033500180579721928, - "learning_rate": 0.00019999046095415328, - "loss": 46.0, - "step": 27312 - }, - { - "epoch": 4.3985667699987925, - "grad_norm": 0.0023320051841437817, - "learning_rate": 0.0001999904602553716, - "loss": 46.0, - "step": 27313 - }, - { - "epoch": 4.39872780707758, - "grad_norm": 0.002560779917985201, - "learning_rate": 0.0001999904595565643, - "loss": 46.0, - "step": 27314 - }, - { - "epoch": 4.398888844156367, - "grad_norm": 0.012891615740954876, - "learning_rate": 0.00019999045885773145, - "loss": 46.0, - "step": 27315 - }, - { - "epoch": 4.399049881235154, - "grad_norm": 0.008207086473703384, - "learning_rate": 0.00019999045815887297, - "loss": 46.0, - "step": 27316 - }, - { - "epoch": 4.399210918313941, - "grad_norm": 0.0012559652095660567, - "learning_rate": 0.0001999904574599889, - "loss": 46.0, - "step": 27317 - }, - { - "epoch": 4.399371955392729, - "grad_norm": 0.001945055671967566, - "learning_rate": 0.00019999045676107925, - "loss": 46.0, - "step": 27318 - }, - { - "epoch": 4.399532992471516, - "grad_norm": 0.0026621054857969284, - "learning_rate": 0.000199990456062144, - "loss": 46.0, - "step": 27319 - }, - { - "epoch": 4.399694029550304, - "grad_norm": 0.002348612993955612, - "learning_rate": 0.00019999045536318318, - "loss": 46.0, - "step": 27320 - }, - { - "epoch": 4.399855066629091, - "grad_norm": 0.01567247323691845, - "learning_rate": 0.00019999045466419674, - "loss": 46.0, - "step": 27321 - }, - { - "epoch": 4.400016103707879, - "grad_norm": 0.004743401426821947, - "learning_rate": 0.0001999904539651847, - "loss": 46.0, - "step": 27322 - }, - { - "epoch": 4.400177140786666, - "grad_norm": 0.011594566516578197, - "learning_rate": 0.0001999904532661471, - "loss": 46.0, - "step": 27323 - }, - { - "epoch": 4.400338177865454, - "grad_norm": 0.004417700227349997, - "learning_rate": 0.0001999904525670839, - "loss": 46.0, - "step": 27324 - }, - { - "epoch": 4.400499214944241, - "grad_norm": 0.0013746990589424968, - "learning_rate": 0.0001999904518679951, - "loss": 46.0, - "step": 27325 - }, - { - "epoch": 4.4006602520230285, - "grad_norm": 0.0018068519420921803, - "learning_rate": 0.0001999904511688807, - "loss": 46.0, - "step": 27326 - }, - { - "epoch": 4.400821289101816, - "grad_norm": 0.0023612671066075563, - "learning_rate": 0.0001999904504697407, - "loss": 46.0, - "step": 27327 - }, - { - "epoch": 4.400982326180603, - "grad_norm": 0.0010737653356045485, - "learning_rate": 0.00019999044977057513, - "loss": 46.0, - "step": 27328 - }, - { - "epoch": 4.401143363259391, - "grad_norm": 0.007300083991140127, - "learning_rate": 0.000199990449071384, - "loss": 46.0, - "step": 27329 - }, - { - "epoch": 4.401304400338178, - "grad_norm": 0.004863124340772629, - "learning_rate": 0.0001999904483721672, - "loss": 46.0, - "step": 27330 - }, - { - "epoch": 4.401465437416965, - "grad_norm": 0.0029079876840114594, - "learning_rate": 0.00019999044767292484, - "loss": 46.0, - "step": 27331 - }, - { - "epoch": 4.401626474495752, - "grad_norm": 0.002203600015491247, - "learning_rate": 0.0001999904469736569, - "loss": 46.0, - "step": 27332 - }, - { - "epoch": 4.40178751157454, - "grad_norm": 0.0017512129852548242, - "learning_rate": 0.00019999044627436337, - "loss": 46.0, - "step": 27333 - }, - { - "epoch": 4.401948548653327, - "grad_norm": 0.004532170481979847, - "learning_rate": 0.00019999044557504422, - "loss": 46.0, - "step": 27334 - }, - { - "epoch": 4.402109585732115, - "grad_norm": 0.012387637980282307, - "learning_rate": 0.0001999904448756995, - "loss": 46.0, - "step": 27335 - }, - { - "epoch": 4.402270622810902, - "grad_norm": 0.0029165635351091623, - "learning_rate": 0.0001999904441763292, - "loss": 46.0, - "step": 27336 - }, - { - "epoch": 4.4024316598896895, - "grad_norm": 0.003268257947638631, - "learning_rate": 0.0001999904434769333, - "loss": 46.0, - "step": 27337 - }, - { - "epoch": 4.402592696968477, - "grad_norm": 0.028318816795945168, - "learning_rate": 0.00019999044277751177, - "loss": 46.0, - "step": 27338 - }, - { - "epoch": 4.4027537340472644, - "grad_norm": 0.0035808617249131203, - "learning_rate": 0.00019999044207806468, - "loss": 46.0, - "step": 27339 - }, - { - "epoch": 4.402914771126052, - "grad_norm": 0.00875371228903532, - "learning_rate": 0.000199990441378592, - "loss": 46.0, - "step": 27340 - }, - { - "epoch": 4.403075808204839, - "grad_norm": 0.0037388845812529325, - "learning_rate": 0.0001999904406790937, - "loss": 46.0, - "step": 27341 - }, - { - "epoch": 4.403236845283627, - "grad_norm": 0.00576428696513176, - "learning_rate": 0.00019999043997956983, - "loss": 46.0, - "step": 27342 - }, - { - "epoch": 4.403397882362414, - "grad_norm": 0.0019498420879244804, - "learning_rate": 0.0001999904392800204, - "loss": 46.0, - "step": 27343 - }, - { - "epoch": 4.403558919441202, - "grad_norm": 0.0016144951805472374, - "learning_rate": 0.00019999043858044533, - "loss": 46.0, - "step": 27344 - }, - { - "epoch": 4.403719956519989, - "grad_norm": 0.005327403545379639, - "learning_rate": 0.0001999904378808447, - "loss": 46.0, - "step": 27345 - }, - { - "epoch": 4.403880993598776, - "grad_norm": 0.0029097970109432936, - "learning_rate": 0.00019999043718121843, - "loss": 46.0, - "step": 27346 - }, - { - "epoch": 4.404042030677563, - "grad_norm": 0.005063321907073259, - "learning_rate": 0.00019999043648156662, - "loss": 46.0, - "step": 27347 - }, - { - "epoch": 4.404203067756351, - "grad_norm": 0.0027337747160345316, - "learning_rate": 0.00019999043578188919, - "loss": 46.0, - "step": 27348 - }, - { - "epoch": 4.404364104835138, - "grad_norm": 0.00688413018360734, - "learning_rate": 0.00019999043508218617, - "loss": 46.0, - "step": 27349 - }, - { - "epoch": 4.4045251419139255, - "grad_norm": 0.0009647997212596238, - "learning_rate": 0.00019999043438245754, - "loss": 46.0, - "step": 27350 - }, - { - "epoch": 4.404686178992713, - "grad_norm": 0.0017396489856764674, - "learning_rate": 0.00019999043368270334, - "loss": 46.0, - "step": 27351 - }, - { - "epoch": 4.4048472160715, - "grad_norm": 0.0025180771481245756, - "learning_rate": 0.00019999043298292354, - "loss": 46.0, - "step": 27352 - }, - { - "epoch": 4.405008253150288, - "grad_norm": 0.0048584109172225, - "learning_rate": 0.00019999043228311814, - "loss": 46.0, - "step": 27353 - }, - { - "epoch": 4.405169290229075, - "grad_norm": 0.001719545922242105, - "learning_rate": 0.0001999904315832872, - "loss": 46.0, - "step": 27354 - }, - { - "epoch": 4.405330327307863, - "grad_norm": 0.0025414868723601103, - "learning_rate": 0.00019999043088343062, - "loss": 46.0, - "step": 27355 - }, - { - "epoch": 4.40549136438665, - "grad_norm": 0.011448708362877369, - "learning_rate": 0.00019999043018354844, - "loss": 46.0, - "step": 27356 - }, - { - "epoch": 4.405652401465438, - "grad_norm": 0.005573517642915249, - "learning_rate": 0.0001999904294836407, - "loss": 46.0, - "step": 27357 - }, - { - "epoch": 4.405813438544225, - "grad_norm": 0.0020006783306598663, - "learning_rate": 0.00019999042878370734, - "loss": 46.0, - "step": 27358 - }, - { - "epoch": 4.405974475623013, - "grad_norm": 0.01607229746878147, - "learning_rate": 0.0001999904280837484, - "loss": 46.0, - "step": 27359 - }, - { - "epoch": 4.4061355127018, - "grad_norm": 0.0024870475754141808, - "learning_rate": 0.00019999042738376386, - "loss": 46.0, - "step": 27360 - }, - { - "epoch": 4.406296549780587, - "grad_norm": 0.011495199985802174, - "learning_rate": 0.00019999042668375375, - "loss": 46.0, - "step": 27361 - }, - { - "epoch": 4.406457586859374, - "grad_norm": 0.0029110966715961695, - "learning_rate": 0.000199990425983718, - "loss": 46.0, - "step": 27362 - }, - { - "epoch": 4.4066186239381615, - "grad_norm": 0.006751770153641701, - "learning_rate": 0.00019999042528365667, - "loss": 46.0, - "step": 27363 - }, - { - "epoch": 4.406779661016949, - "grad_norm": 0.010595368221402168, - "learning_rate": 0.00019999042458356981, - "loss": 46.0, - "step": 27364 - }, - { - "epoch": 4.406940698095736, - "grad_norm": 0.0059717013500630856, - "learning_rate": 0.0001999904238834573, - "loss": 46.0, - "step": 27365 - }, - { - "epoch": 4.407101735174524, - "grad_norm": 0.002517386106774211, - "learning_rate": 0.00019999042318331919, - "loss": 46.0, - "step": 27366 - }, - { - "epoch": 4.407262772253311, - "grad_norm": 0.0035003568045794964, - "learning_rate": 0.00019999042248315552, - "loss": 46.0, - "step": 27367 - }, - { - "epoch": 4.407423809332099, - "grad_norm": 0.006473209243267775, - "learning_rate": 0.00019999042178296623, - "loss": 46.0, - "step": 27368 - }, - { - "epoch": 4.407584846410886, - "grad_norm": 0.011653761379420757, - "learning_rate": 0.00019999042108275136, - "loss": 46.0, - "step": 27369 - }, - { - "epoch": 4.407745883489674, - "grad_norm": 0.007314703427255154, - "learning_rate": 0.00019999042038251093, - "loss": 46.0, - "step": 27370 - }, - { - "epoch": 4.407906920568461, - "grad_norm": 0.002933237934485078, - "learning_rate": 0.00019999041968224486, - "loss": 46.0, - "step": 27371 - }, - { - "epoch": 4.408067957647249, - "grad_norm": 0.003286631777882576, - "learning_rate": 0.0001999904189819532, - "loss": 46.0, - "step": 27372 - }, - { - "epoch": 4.408228994726036, - "grad_norm": 0.0019488940015435219, - "learning_rate": 0.00019999041828163599, - "loss": 46.0, - "step": 27373 - }, - { - "epoch": 4.4083900318048235, - "grad_norm": 0.002086965600028634, - "learning_rate": 0.00019999041758129315, - "loss": 46.0, - "step": 27374 - }, - { - "epoch": 4.40855106888361, - "grad_norm": 0.0017261735629290342, - "learning_rate": 0.00019999041688092473, - "loss": 46.0, - "step": 27375 - }, - { - "epoch": 4.4087121059623975, - "grad_norm": 0.006090558134019375, - "learning_rate": 0.00019999041618053073, - "loss": 46.0, - "step": 27376 - }, - { - "epoch": 4.408873143041185, - "grad_norm": 0.0021092109382152557, - "learning_rate": 0.0001999904154801111, - "loss": 46.0, - "step": 27377 - }, - { - "epoch": 4.409034180119972, - "grad_norm": 0.006767811253666878, - "learning_rate": 0.00019999041477966592, - "loss": 46.0, - "step": 27378 - }, - { - "epoch": 4.40919521719876, - "grad_norm": 0.002752867992967367, - "learning_rate": 0.00019999041407919513, - "loss": 46.0, - "step": 27379 - }, - { - "epoch": 4.409356254277547, - "grad_norm": 0.004678305238485336, - "learning_rate": 0.00019999041337869872, - "loss": 46.0, - "step": 27380 - }, - { - "epoch": 4.409517291356335, - "grad_norm": 0.0012461270671337843, - "learning_rate": 0.00019999041267817677, - "loss": 46.0, - "step": 27381 - }, - { - "epoch": 4.409678328435122, - "grad_norm": 0.0024450754281133413, - "learning_rate": 0.0001999904119776292, - "loss": 46.0, - "step": 27382 - }, - { - "epoch": 4.40983936551391, - "grad_norm": 0.006418305449187756, - "learning_rate": 0.00019999041127705605, - "loss": 46.0, - "step": 27383 - }, - { - "epoch": 4.410000402592697, - "grad_norm": 0.012624701485037804, - "learning_rate": 0.00019999041057645726, - "loss": 46.0, - "step": 27384 - }, - { - "epoch": 4.4101614396714846, - "grad_norm": 0.0018664766103029251, - "learning_rate": 0.00019999040987583294, - "loss": 46.0, - "step": 27385 - }, - { - "epoch": 4.410322476750272, - "grad_norm": 0.003344783326610923, - "learning_rate": 0.000199990409175183, - "loss": 46.0, - "step": 27386 - }, - { - "epoch": 4.4104835138290595, - "grad_norm": 0.003047558479011059, - "learning_rate": 0.00019999040847450746, - "loss": 46.0, - "step": 27387 - }, - { - "epoch": 4.410644550907847, - "grad_norm": 0.006820410490036011, - "learning_rate": 0.00019999040777380635, - "loss": 46.0, - "step": 27388 - }, - { - "epoch": 4.4108055879866335, - "grad_norm": 0.008419794030487537, - "learning_rate": 0.00019999040707307963, - "loss": 46.0, - "step": 27389 - }, - { - "epoch": 4.410966625065421, - "grad_norm": 0.006269561592489481, - "learning_rate": 0.00019999040637232732, - "loss": 46.0, - "step": 27390 - }, - { - "epoch": 4.411127662144208, - "grad_norm": 0.0024414879735559225, - "learning_rate": 0.0001999904056715494, - "loss": 46.0, - "step": 27391 - }, - { - "epoch": 4.411288699222996, - "grad_norm": 0.0022647599689662457, - "learning_rate": 0.00019999040497074592, - "loss": 46.0, - "step": 27392 - }, - { - "epoch": 4.411449736301783, - "grad_norm": 0.0038857299368828535, - "learning_rate": 0.00019999040426991685, - "loss": 46.0, - "step": 27393 - }, - { - "epoch": 4.411610773380571, - "grad_norm": 0.0017655508127063513, - "learning_rate": 0.00019999040356906216, - "loss": 46.0, - "step": 27394 - }, - { - "epoch": 4.411771810459358, - "grad_norm": 0.0027361814863979816, - "learning_rate": 0.0001999904028681819, - "loss": 46.0, - "step": 27395 - }, - { - "epoch": 4.411932847538146, - "grad_norm": 0.001051982631906867, - "learning_rate": 0.00019999040216727606, - "loss": 46.0, - "step": 27396 - }, - { - "epoch": 4.412093884616933, - "grad_norm": 0.0018975939601659775, - "learning_rate": 0.00019999040146634459, - "loss": 46.0, - "step": 27397 - }, - { - "epoch": 4.4122549216957205, - "grad_norm": 0.0038895714096724987, - "learning_rate": 0.00019999040076538752, - "loss": 46.0, - "step": 27398 - }, - { - "epoch": 4.412415958774508, - "grad_norm": 0.0020249593071639538, - "learning_rate": 0.00019999040006440488, - "loss": 46.0, - "step": 27399 - }, - { - "epoch": 4.412576995853295, - "grad_norm": 0.0033499368000775576, - "learning_rate": 0.00019999039936339664, - "loss": 46.0, - "step": 27400 - }, - { - "epoch": 4.412738032932083, - "grad_norm": 0.0009048962383531034, - "learning_rate": 0.00019999039866236285, - "loss": 46.0, - "step": 27401 - }, - { - "epoch": 4.41289907001087, - "grad_norm": 0.002805883763357997, - "learning_rate": 0.0001999903979613034, - "loss": 46.0, - "step": 27402 - }, - { - "epoch": 4.413060107089658, - "grad_norm": 0.004113157745450735, - "learning_rate": 0.0001999903972602184, - "loss": 46.0, - "step": 27403 - }, - { - "epoch": 4.413221144168444, - "grad_norm": 0.005982518196105957, - "learning_rate": 0.0001999903965591078, - "loss": 46.0, - "step": 27404 - }, - { - "epoch": 4.413382181247232, - "grad_norm": 0.001797487959265709, - "learning_rate": 0.0001999903958579716, - "loss": 46.0, - "step": 27405 - }, - { - "epoch": 4.413543218326019, - "grad_norm": 0.003818618832156062, - "learning_rate": 0.00019999039515680985, - "loss": 46.0, - "step": 27406 - }, - { - "epoch": 4.413704255404807, - "grad_norm": 0.004484761506319046, - "learning_rate": 0.00019999039445562248, - "loss": 46.0, - "step": 27407 - }, - { - "epoch": 4.413865292483594, - "grad_norm": 0.027530241757631302, - "learning_rate": 0.00019999039375440946, - "loss": 46.0, - "step": 27408 - }, - { - "epoch": 4.414026329562382, - "grad_norm": 0.010995124466717243, - "learning_rate": 0.00019999039305317092, - "loss": 46.0, - "step": 27409 - }, - { - "epoch": 4.414187366641169, - "grad_norm": 0.004521520342677832, - "learning_rate": 0.00019999039235190676, - "loss": 46.0, - "step": 27410 - }, - { - "epoch": 4.4143484037199565, - "grad_norm": 0.0023264845367521048, - "learning_rate": 0.00019999039165061698, - "loss": 46.0, - "step": 27411 - }, - { - "epoch": 4.414509440798744, - "grad_norm": 0.002906639128923416, - "learning_rate": 0.00019999039094930167, - "loss": 46.0, - "step": 27412 - }, - { - "epoch": 4.414670477877531, - "grad_norm": 0.002049673581495881, - "learning_rate": 0.00019999039024796072, - "loss": 46.0, - "step": 27413 - }, - { - "epoch": 4.414831514956319, - "grad_norm": 0.007087324280291796, - "learning_rate": 0.00019999038954659422, - "loss": 46.0, - "step": 27414 - }, - { - "epoch": 4.414992552035106, - "grad_norm": 0.0018439792329445481, - "learning_rate": 0.0001999903888452021, - "loss": 46.0, - "step": 27415 - }, - { - "epoch": 4.415153589113894, - "grad_norm": 0.0035840305499732494, - "learning_rate": 0.00019999038814378438, - "loss": 46.0, - "step": 27416 - }, - { - "epoch": 4.415314626192681, - "grad_norm": 0.012445604428648949, - "learning_rate": 0.00019999038744234108, - "loss": 46.0, - "step": 27417 - }, - { - "epoch": 4.415475663271469, - "grad_norm": 0.013269487768411636, - "learning_rate": 0.00019999038674087217, - "loss": 46.0, - "step": 27418 - }, - { - "epoch": 4.415636700350255, - "grad_norm": 0.00528492359444499, - "learning_rate": 0.0001999903860393777, - "loss": 46.0, - "step": 27419 - }, - { - "epoch": 4.415797737429043, - "grad_norm": 0.0030354666523635387, - "learning_rate": 0.0001999903853378576, - "loss": 46.0, - "step": 27420 - }, - { - "epoch": 4.41595877450783, - "grad_norm": 0.00844779796898365, - "learning_rate": 0.00019999038463631194, - "loss": 46.0, - "step": 27421 - }, - { - "epoch": 4.416119811586618, - "grad_norm": 0.0012517308350652456, - "learning_rate": 0.00019999038393474068, - "loss": 46.0, - "step": 27422 - }, - { - "epoch": 4.416280848665405, - "grad_norm": 0.007626294158399105, - "learning_rate": 0.00019999038323314383, - "loss": 46.0, - "step": 27423 - }, - { - "epoch": 4.4164418857441925, - "grad_norm": 0.005166039802134037, - "learning_rate": 0.0001999903825315214, - "loss": 46.0, - "step": 27424 - }, - { - "epoch": 4.41660292282298, - "grad_norm": 0.003854632843285799, - "learning_rate": 0.00019999038182987332, - "loss": 46.0, - "step": 27425 - }, - { - "epoch": 4.416763959901767, - "grad_norm": 0.00282242801040411, - "learning_rate": 0.0001999903811281997, - "loss": 46.0, - "step": 27426 - }, - { - "epoch": 4.416924996980555, - "grad_norm": 0.009118017740547657, - "learning_rate": 0.00019999038042650046, - "loss": 46.0, - "step": 27427 - }, - { - "epoch": 4.417086034059342, - "grad_norm": 0.005772007163614035, - "learning_rate": 0.00019999037972477565, - "loss": 46.0, - "step": 27428 - }, - { - "epoch": 4.41724707113813, - "grad_norm": 0.0037051483523100615, - "learning_rate": 0.00019999037902302522, - "loss": 46.0, - "step": 27429 - }, - { - "epoch": 4.417408108216917, - "grad_norm": 0.0013796199345961213, - "learning_rate": 0.00019999037832124923, - "loss": 46.0, - "step": 27430 - }, - { - "epoch": 4.417569145295705, - "grad_norm": 0.008439082652330399, - "learning_rate": 0.00019999037761944763, - "loss": 46.0, - "step": 27431 - }, - { - "epoch": 4.417730182374492, - "grad_norm": 0.009900303557515144, - "learning_rate": 0.00019999037691762045, - "loss": 46.0, - "step": 27432 - }, - { - "epoch": 4.41789121945328, - "grad_norm": 0.001965191913768649, - "learning_rate": 0.00019999037621576767, - "loss": 46.0, - "step": 27433 - }, - { - "epoch": 4.418052256532066, - "grad_norm": 0.00998140498995781, - "learning_rate": 0.0001999903755138893, - "loss": 46.0, - "step": 27434 - }, - { - "epoch": 4.418213293610854, - "grad_norm": 0.0008698838646523654, - "learning_rate": 0.00019999037481198534, - "loss": 46.0, - "step": 27435 - }, - { - "epoch": 4.418374330689641, - "grad_norm": 0.007133046630769968, - "learning_rate": 0.00019999037411005577, - "loss": 46.0, - "step": 27436 - }, - { - "epoch": 4.4185353677684285, - "grad_norm": 0.0007299024146050215, - "learning_rate": 0.0001999903734081006, - "loss": 46.0, - "step": 27437 - }, - { - "epoch": 4.418696404847216, - "grad_norm": 0.003916271962225437, - "learning_rate": 0.00019999037270611986, - "loss": 46.0, - "step": 27438 - }, - { - "epoch": 4.418857441926003, - "grad_norm": 0.001871993183158338, - "learning_rate": 0.00019999037200411354, - "loss": 46.0, - "step": 27439 - }, - { - "epoch": 4.419018479004791, - "grad_norm": 0.004256522282958031, - "learning_rate": 0.0001999903713020816, - "loss": 46.0, - "step": 27440 - }, - { - "epoch": 4.419179516083578, - "grad_norm": 0.0019722296856343746, - "learning_rate": 0.00019999037060002407, - "loss": 46.0, - "step": 27441 - }, - { - "epoch": 4.419340553162366, - "grad_norm": 0.004146620165556669, - "learning_rate": 0.000199990369897941, - "loss": 46.0, - "step": 27442 - }, - { - "epoch": 4.419501590241153, - "grad_norm": 0.010430843569338322, - "learning_rate": 0.00019999036919583226, - "loss": 46.0, - "step": 27443 - }, - { - "epoch": 4.419662627319941, - "grad_norm": 0.001501597696915269, - "learning_rate": 0.00019999036849369797, - "loss": 46.0, - "step": 27444 - }, - { - "epoch": 4.419823664398728, - "grad_norm": 0.013357128947973251, - "learning_rate": 0.00019999036779153807, - "loss": 46.0, - "step": 27445 - }, - { - "epoch": 4.4199847014775155, - "grad_norm": 0.005096310283988714, - "learning_rate": 0.00019999036708935258, - "loss": 46.0, - "step": 27446 - }, - { - "epoch": 4.420145738556303, - "grad_norm": 0.008011817000806332, - "learning_rate": 0.00019999036638714153, - "loss": 46.0, - "step": 27447 - }, - { - "epoch": 4.4203067756350904, - "grad_norm": 0.004170471336692572, - "learning_rate": 0.00019999036568490484, - "loss": 46.0, - "step": 27448 - }, - { - "epoch": 4.420467812713877, - "grad_norm": 0.0007771399104967713, - "learning_rate": 0.0001999903649826426, - "loss": 46.0, - "step": 27449 - }, - { - "epoch": 4.4206288497926645, - "grad_norm": 0.005197890568524599, - "learning_rate": 0.00019999036428035476, - "loss": 46.0, - "step": 27450 - }, - { - "epoch": 4.420789886871452, - "grad_norm": 0.0008401761297136545, - "learning_rate": 0.0001999903635780413, - "loss": 46.0, - "step": 27451 - }, - { - "epoch": 4.420950923950239, - "grad_norm": 0.003124451031908393, - "learning_rate": 0.00019999036287570227, - "loss": 46.0, - "step": 27452 - }, - { - "epoch": 4.421111961029027, - "grad_norm": 0.006259698886424303, - "learning_rate": 0.0001999903621733376, - "loss": 46.0, - "step": 27453 - }, - { - "epoch": 4.421272998107814, - "grad_norm": 0.004135970026254654, - "learning_rate": 0.0001999903614709474, - "loss": 46.0, - "step": 27454 - }, - { - "epoch": 4.421434035186602, - "grad_norm": 0.008526977151632309, - "learning_rate": 0.0001999903607685316, - "loss": 46.0, - "step": 27455 - }, - { - "epoch": 4.421595072265389, - "grad_norm": 0.003540364094078541, - "learning_rate": 0.00019999036006609019, - "loss": 46.0, - "step": 27456 - }, - { - "epoch": 4.421756109344177, - "grad_norm": 0.00501538161188364, - "learning_rate": 0.0001999903593636232, - "loss": 46.0, - "step": 27457 - }, - { - "epoch": 4.421917146422964, - "grad_norm": 0.004424090497195721, - "learning_rate": 0.0001999903586611306, - "loss": 46.0, - "step": 27458 - }, - { - "epoch": 4.4220781835017515, - "grad_norm": 0.005353490822017193, - "learning_rate": 0.00019999035795861242, - "loss": 46.0, - "step": 27459 - }, - { - "epoch": 4.422239220580539, - "grad_norm": 0.007835939526557922, - "learning_rate": 0.00019999035725606863, - "loss": 46.0, - "step": 27460 - }, - { - "epoch": 4.422400257659326, - "grad_norm": 0.004030084237456322, - "learning_rate": 0.00019999035655349928, - "loss": 46.0, - "step": 27461 - }, - { - "epoch": 4.422561294738113, - "grad_norm": 0.012688993476331234, - "learning_rate": 0.00019999035585090431, - "loss": 46.0, - "step": 27462 - }, - { - "epoch": 4.4227223318169, - "grad_norm": 0.0035829246044158936, - "learning_rate": 0.00019999035514828373, - "loss": 46.0, - "step": 27463 - }, - { - "epoch": 4.422883368895688, - "grad_norm": 0.0062058595940470695, - "learning_rate": 0.0001999903544456376, - "loss": 46.0, - "step": 27464 - }, - { - "epoch": 4.423044405974475, - "grad_norm": 0.011897902935743332, - "learning_rate": 0.0001999903537429659, - "loss": 46.0, - "step": 27465 - }, - { - "epoch": 4.423205443053263, - "grad_norm": 0.0048384033143520355, - "learning_rate": 0.00019999035304026853, - "loss": 46.0, - "step": 27466 - }, - { - "epoch": 4.42336648013205, - "grad_norm": 0.0071312733925879, - "learning_rate": 0.00019999035233754563, - "loss": 46.0, - "step": 27467 - }, - { - "epoch": 4.423527517210838, - "grad_norm": 0.005477431230247021, - "learning_rate": 0.0001999903516347971, - "loss": 46.0, - "step": 27468 - }, - { - "epoch": 4.423688554289625, - "grad_norm": 0.013101095333695412, - "learning_rate": 0.00019999035093202298, - "loss": 46.0, - "step": 27469 - }, - { - "epoch": 4.423849591368413, - "grad_norm": 0.006468312349170446, - "learning_rate": 0.0001999903502292233, - "loss": 46.0, - "step": 27470 - }, - { - "epoch": 4.4240106284472, - "grad_norm": 0.010020007379353046, - "learning_rate": 0.000199990349526398, - "loss": 46.0, - "step": 27471 - }, - { - "epoch": 4.4241716655259875, - "grad_norm": 0.005001259967684746, - "learning_rate": 0.0001999903488235471, - "loss": 46.0, - "step": 27472 - }, - { - "epoch": 4.424332702604775, - "grad_norm": 0.0012007616460323334, - "learning_rate": 0.00019999034812067062, - "loss": 46.0, - "step": 27473 - }, - { - "epoch": 4.424493739683562, - "grad_norm": 0.00495382584631443, - "learning_rate": 0.00019999034741776856, - "loss": 46.0, - "step": 27474 - }, - { - "epoch": 4.42465477676235, - "grad_norm": 0.011137055233120918, - "learning_rate": 0.0001999903467148409, - "loss": 46.0, - "step": 27475 - }, - { - "epoch": 4.424815813841137, - "grad_norm": 0.0017682421021163464, - "learning_rate": 0.00019999034601188767, - "loss": 46.0, - "step": 27476 - }, - { - "epoch": 4.424976850919924, - "grad_norm": 0.005838059354573488, - "learning_rate": 0.0001999903453089088, - "loss": 46.0, - "step": 27477 - }, - { - "epoch": 4.425137887998711, - "grad_norm": 0.0027915502432733774, - "learning_rate": 0.00019999034460590437, - "loss": 46.0, - "step": 27478 - }, - { - "epoch": 4.425298925077499, - "grad_norm": 0.003008077386766672, - "learning_rate": 0.00019999034390287435, - "loss": 46.0, - "step": 27479 - }, - { - "epoch": 4.425459962156286, - "grad_norm": 0.00393042154610157, - "learning_rate": 0.0001999903431998187, - "loss": 46.0, - "step": 27480 - }, - { - "epoch": 4.425620999235074, - "grad_norm": 0.004699901212006807, - "learning_rate": 0.0001999903424967375, - "loss": 46.0, - "step": 27481 - }, - { - "epoch": 4.425782036313861, - "grad_norm": 0.004551334772258997, - "learning_rate": 0.00019999034179363069, - "loss": 46.0, - "step": 27482 - }, - { - "epoch": 4.425943073392649, - "grad_norm": 0.008647710084915161, - "learning_rate": 0.00019999034109049828, - "loss": 46.0, - "step": 27483 - }, - { - "epoch": 4.426104110471436, - "grad_norm": 0.00466693053022027, - "learning_rate": 0.0001999903403873403, - "loss": 46.0, - "step": 27484 - }, - { - "epoch": 4.4262651475502235, - "grad_norm": 0.009514503180980682, - "learning_rate": 0.0001999903396841567, - "loss": 46.0, - "step": 27485 - }, - { - "epoch": 4.426426184629011, - "grad_norm": 0.010345939546823502, - "learning_rate": 0.00019999033898094752, - "loss": 46.0, - "step": 27486 - }, - { - "epoch": 4.426587221707798, - "grad_norm": 0.0046316394582390785, - "learning_rate": 0.00019999033827771277, - "loss": 46.0, - "step": 27487 - }, - { - "epoch": 4.426748258786586, - "grad_norm": 0.015254509635269642, - "learning_rate": 0.00019999033757445237, - "loss": 46.0, - "step": 27488 - }, - { - "epoch": 4.426909295865373, - "grad_norm": 0.0034906507935374975, - "learning_rate": 0.00019999033687116642, - "loss": 46.0, - "step": 27489 - }, - { - "epoch": 4.427070332944161, - "grad_norm": 0.0015474860556423664, - "learning_rate": 0.00019999033616785488, - "loss": 46.0, - "step": 27490 - }, - { - "epoch": 4.427231370022948, - "grad_norm": 0.0026442024391144514, - "learning_rate": 0.00019999033546451775, - "loss": 46.0, - "step": 27491 - }, - { - "epoch": 4.427392407101735, - "grad_norm": 0.013181975111365318, - "learning_rate": 0.000199990334761155, - "loss": 46.0, - "step": 27492 - }, - { - "epoch": 4.427553444180522, - "grad_norm": 0.003948105964809656, - "learning_rate": 0.00019999033405776668, - "loss": 46.0, - "step": 27493 - }, - { - "epoch": 4.42771448125931, - "grad_norm": 0.013080221600830555, - "learning_rate": 0.0001999903333543528, - "loss": 46.0, - "step": 27494 - }, - { - "epoch": 4.427875518338097, - "grad_norm": 0.002306859940290451, - "learning_rate": 0.00019999033265091326, - "loss": 46.0, - "step": 27495 - }, - { - "epoch": 4.428036555416885, - "grad_norm": 0.004247264936566353, - "learning_rate": 0.00019999033194744814, - "loss": 46.0, - "step": 27496 - }, - { - "epoch": 4.428197592495672, - "grad_norm": 0.014462347142398357, - "learning_rate": 0.00019999033124395746, - "loss": 46.0, - "step": 27497 - }, - { - "epoch": 4.4283586295744595, - "grad_norm": 0.0006957217119634151, - "learning_rate": 0.0001999903305404412, - "loss": 46.0, - "step": 27498 - }, - { - "epoch": 4.428519666653247, - "grad_norm": 0.0009768378222361207, - "learning_rate": 0.0001999903298368993, - "loss": 46.0, - "step": 27499 - }, - { - "epoch": 4.428680703732034, - "grad_norm": 0.00218068715184927, - "learning_rate": 0.00019999032913333182, - "loss": 46.0, - "step": 27500 - }, - { - "epoch": 4.428841740810822, - "grad_norm": 0.005291748326271772, - "learning_rate": 0.00019999032842973874, - "loss": 46.0, - "step": 27501 - }, - { - "epoch": 4.429002777889609, - "grad_norm": 0.0007746998453512788, - "learning_rate": 0.0001999903277261201, - "loss": 46.0, - "step": 27502 - }, - { - "epoch": 4.429163814968397, - "grad_norm": 0.006851615384221077, - "learning_rate": 0.00019999032702247585, - "loss": 46.0, - "step": 27503 - }, - { - "epoch": 4.429324852047184, - "grad_norm": 0.0024214282166212797, - "learning_rate": 0.000199990326318806, - "loss": 46.0, - "step": 27504 - }, - { - "epoch": 4.429485889125972, - "grad_norm": 0.0007645718287676573, - "learning_rate": 0.00019999032561511057, - "loss": 46.0, - "step": 27505 - }, - { - "epoch": 4.429646926204759, - "grad_norm": 0.009464041329920292, - "learning_rate": 0.00019999032491138956, - "loss": 46.0, - "step": 27506 - }, - { - "epoch": 4.429807963283546, - "grad_norm": 0.008248715661466122, - "learning_rate": 0.00019999032420764295, - "loss": 46.0, - "step": 27507 - }, - { - "epoch": 4.429969000362333, - "grad_norm": 0.0009513927507214248, - "learning_rate": 0.0001999903235038707, - "loss": 46.0, - "step": 27508 - }, - { - "epoch": 4.4301300374411205, - "grad_norm": 0.0029127642046660185, - "learning_rate": 0.00019999032280007293, - "loss": 46.0, - "step": 27509 - }, - { - "epoch": 4.430291074519908, - "grad_norm": 0.015009592287242413, - "learning_rate": 0.00019999032209624953, - "loss": 46.0, - "step": 27510 - }, - { - "epoch": 4.4304521115986955, - "grad_norm": 0.003507836489006877, - "learning_rate": 0.00019999032139240053, - "loss": 46.0, - "step": 27511 - }, - { - "epoch": 4.430613148677483, - "grad_norm": 0.009676394052803516, - "learning_rate": 0.00019999032068852593, - "loss": 46.0, - "step": 27512 - }, - { - "epoch": 4.43077418575627, - "grad_norm": 0.0035322660114616156, - "learning_rate": 0.00019999031998462575, - "loss": 46.0, - "step": 27513 - }, - { - "epoch": 4.430935222835058, - "grad_norm": 0.005746645852923393, - "learning_rate": 0.0001999903192807, - "loss": 46.0, - "step": 27514 - }, - { - "epoch": 4.431096259913845, - "grad_norm": 0.0014641284942626953, - "learning_rate": 0.00019999031857674865, - "loss": 46.0, - "step": 27515 - }, - { - "epoch": 4.431257296992633, - "grad_norm": 0.003112539416179061, - "learning_rate": 0.00019999031787277168, - "loss": 46.0, - "step": 27516 - }, - { - "epoch": 4.43141833407142, - "grad_norm": 0.005320766009390354, - "learning_rate": 0.00019999031716876915, - "loss": 46.0, - "step": 27517 - }, - { - "epoch": 4.431579371150208, - "grad_norm": 0.0018871987704187632, - "learning_rate": 0.000199990316464741, - "loss": 46.0, - "step": 27518 - }, - { - "epoch": 4.431740408228995, - "grad_norm": 0.013681559823453426, - "learning_rate": 0.0001999903157606873, - "loss": 46.0, - "step": 27519 - }, - { - "epoch": 4.4319014453077825, - "grad_norm": 0.0063905734568834305, - "learning_rate": 0.00019999031505660796, - "loss": 46.0, - "step": 27520 - }, - { - "epoch": 4.43206248238657, - "grad_norm": 0.003211074275895953, - "learning_rate": 0.00019999031435250305, - "loss": 46.0, - "step": 27521 - }, - { - "epoch": 4.4322235194653565, - "grad_norm": 0.0175580233335495, - "learning_rate": 0.00019999031364837256, - "loss": 46.0, - "step": 27522 - }, - { - "epoch": 4.432384556544144, - "grad_norm": 0.001359413145110011, - "learning_rate": 0.00019999031294421645, - "loss": 46.0, - "step": 27523 - }, - { - "epoch": 4.432545593622931, - "grad_norm": 0.0013794455444440246, - "learning_rate": 0.00019999031224003476, - "loss": 46.0, - "step": 27524 - }, - { - "epoch": 4.432706630701719, - "grad_norm": 0.00516211474314332, - "learning_rate": 0.00019999031153582748, - "loss": 46.0, - "step": 27525 - }, - { - "epoch": 4.432867667780506, - "grad_norm": 0.004926779307425022, - "learning_rate": 0.0001999903108315946, - "loss": 46.0, - "step": 27526 - }, - { - "epoch": 4.433028704859294, - "grad_norm": 0.003878003219142556, - "learning_rate": 0.00019999031012733615, - "loss": 46.0, - "step": 27527 - }, - { - "epoch": 4.433189741938081, - "grad_norm": 0.004260852932929993, - "learning_rate": 0.0001999903094230521, - "loss": 46.0, - "step": 27528 - }, - { - "epoch": 4.433350779016869, - "grad_norm": 0.001436437712982297, - "learning_rate": 0.00019999030871874243, - "loss": 46.0, - "step": 27529 - }, - { - "epoch": 4.433511816095656, - "grad_norm": 0.005477797240018845, - "learning_rate": 0.00019999030801440718, - "loss": 46.0, - "step": 27530 - }, - { - "epoch": 4.433672853174444, - "grad_norm": 0.006780264433473349, - "learning_rate": 0.00019999030731004635, - "loss": 46.0, - "step": 27531 - }, - { - "epoch": 4.433833890253231, - "grad_norm": 0.0016470797127112746, - "learning_rate": 0.00019999030660565993, - "loss": 46.0, - "step": 27532 - }, - { - "epoch": 4.4339949273320185, - "grad_norm": 0.0032933943439275026, - "learning_rate": 0.00019999030590124792, - "loss": 46.0, - "step": 27533 - }, - { - "epoch": 4.434155964410806, - "grad_norm": 0.008356256410479546, - "learning_rate": 0.0001999903051968103, - "loss": 46.0, - "step": 27534 - }, - { - "epoch": 4.4343170014895925, - "grad_norm": 0.0032204226590692997, - "learning_rate": 0.00019999030449234707, - "loss": 46.0, - "step": 27535 - }, - { - "epoch": 4.43447803856838, - "grad_norm": 0.007282535079866648, - "learning_rate": 0.00019999030378785827, - "loss": 46.0, - "step": 27536 - }, - { - "epoch": 4.434639075647167, - "grad_norm": 0.002672413596883416, - "learning_rate": 0.0001999903030833439, - "loss": 46.0, - "step": 27537 - }, - { - "epoch": 4.434800112725955, - "grad_norm": 0.008341788314282894, - "learning_rate": 0.00019999030237880392, - "loss": 46.0, - "step": 27538 - }, - { - "epoch": 4.434961149804742, - "grad_norm": 0.005341968033462763, - "learning_rate": 0.00019999030167423834, - "loss": 46.0, - "step": 27539 - }, - { - "epoch": 4.43512218688353, - "grad_norm": 0.002406828571110964, - "learning_rate": 0.00019999030096964717, - "loss": 46.0, - "step": 27540 - }, - { - "epoch": 4.435283223962317, - "grad_norm": 0.004608490038663149, - "learning_rate": 0.0001999903002650304, - "loss": 46.0, - "step": 27541 - }, - { - "epoch": 4.435444261041105, - "grad_norm": 0.003845945931971073, - "learning_rate": 0.00019999029956038807, - "loss": 46.0, - "step": 27542 - }, - { - "epoch": 4.435605298119892, - "grad_norm": 0.011631255969405174, - "learning_rate": 0.0001999902988557201, - "loss": 46.0, - "step": 27543 - }, - { - "epoch": 4.43576633519868, - "grad_norm": 0.018674982711672783, - "learning_rate": 0.00019999029815102656, - "loss": 46.0, - "step": 27544 - }, - { - "epoch": 4.435927372277467, - "grad_norm": 0.0025312798097729683, - "learning_rate": 0.00019999029744630743, - "loss": 46.0, - "step": 27545 - }, - { - "epoch": 4.4360884093562545, - "grad_norm": 0.003297422779724002, - "learning_rate": 0.00019999029674156273, - "loss": 46.0, - "step": 27546 - }, - { - "epoch": 4.436249446435042, - "grad_norm": 0.015062783844769001, - "learning_rate": 0.0001999902960367924, - "loss": 46.0, - "step": 27547 - }, - { - "epoch": 4.436410483513829, - "grad_norm": 0.004186402540653944, - "learning_rate": 0.0001999902953319965, - "loss": 46.0, - "step": 27548 - }, - { - "epoch": 4.436571520592617, - "grad_norm": 0.005380000453442335, - "learning_rate": 0.000199990294627175, - "loss": 46.0, - "step": 27549 - }, - { - "epoch": 4.436732557671403, - "grad_norm": 0.0017875123303383589, - "learning_rate": 0.0001999902939223279, - "loss": 46.0, - "step": 27550 - }, - { - "epoch": 4.436893594750191, - "grad_norm": 0.002819065237417817, - "learning_rate": 0.00019999029321745524, - "loss": 46.0, - "step": 27551 - }, - { - "epoch": 4.437054631828978, - "grad_norm": 0.004958902485668659, - "learning_rate": 0.00019999029251255697, - "loss": 46.0, - "step": 27552 - }, - { - "epoch": 4.437215668907766, - "grad_norm": 0.008189218118786812, - "learning_rate": 0.00019999029180763306, - "loss": 46.0, - "step": 27553 - }, - { - "epoch": 4.437376705986553, - "grad_norm": 0.003609184641391039, - "learning_rate": 0.00019999029110268364, - "loss": 46.0, - "step": 27554 - }, - { - "epoch": 4.437537743065341, - "grad_norm": 0.0032892327290028334, - "learning_rate": 0.00019999029039770858, - "loss": 46.0, - "step": 27555 - }, - { - "epoch": 4.437698780144128, - "grad_norm": 0.0021846629679203033, - "learning_rate": 0.00019999028969270793, - "loss": 46.0, - "step": 27556 - }, - { - "epoch": 4.437859817222916, - "grad_norm": 0.0025701646227389574, - "learning_rate": 0.00019999028898768167, - "loss": 46.0, - "step": 27557 - }, - { - "epoch": 4.438020854301703, - "grad_norm": 0.0029579834081232548, - "learning_rate": 0.00019999028828262985, - "loss": 46.0, - "step": 27558 - }, - { - "epoch": 4.4381818913804905, - "grad_norm": 0.0034569271374493837, - "learning_rate": 0.00019999028757755242, - "loss": 46.0, - "step": 27559 - }, - { - "epoch": 4.438342928459278, - "grad_norm": 0.00441031064838171, - "learning_rate": 0.00019999028687244942, - "loss": 46.0, - "step": 27560 - }, - { - "epoch": 4.438503965538065, - "grad_norm": 0.003183339722454548, - "learning_rate": 0.0001999902861673208, - "loss": 46.0, - "step": 27561 - }, - { - "epoch": 4.438665002616853, - "grad_norm": 0.015945790335536003, - "learning_rate": 0.00019999028546216661, - "loss": 46.0, - "step": 27562 - }, - { - "epoch": 4.43882603969564, - "grad_norm": 0.0013115203473716974, - "learning_rate": 0.00019999028475698683, - "loss": 46.0, - "step": 27563 - }, - { - "epoch": 4.438987076774428, - "grad_norm": 0.0027518514543771744, - "learning_rate": 0.0001999902840517814, - "loss": 46.0, - "step": 27564 - }, - { - "epoch": 4.439148113853214, - "grad_norm": 0.010220200754702091, - "learning_rate": 0.00019999028334655042, - "loss": 46.0, - "step": 27565 - }, - { - "epoch": 4.439309150932002, - "grad_norm": 0.0070998347364366055, - "learning_rate": 0.00019999028264129387, - "loss": 46.0, - "step": 27566 - }, - { - "epoch": 4.439470188010789, - "grad_norm": 0.0011235951678827405, - "learning_rate": 0.00019999028193601172, - "loss": 46.0, - "step": 27567 - }, - { - "epoch": 4.439631225089577, - "grad_norm": 0.009345768950879574, - "learning_rate": 0.00019999028123070397, - "loss": 46.0, - "step": 27568 - }, - { - "epoch": 4.439792262168364, - "grad_norm": 0.004411500412970781, - "learning_rate": 0.0001999902805253706, - "loss": 46.0, - "step": 27569 - }, - { - "epoch": 4.4399532992471515, - "grad_norm": 0.009427009150385857, - "learning_rate": 0.00019999027982001169, - "loss": 46.0, - "step": 27570 - }, - { - "epoch": 4.440114336325939, - "grad_norm": 0.006120585836470127, - "learning_rate": 0.00019999027911462712, - "loss": 46.0, - "step": 27571 - }, - { - "epoch": 4.4402753734047264, - "grad_norm": 0.002826493699103594, - "learning_rate": 0.00019999027840921703, - "loss": 46.0, - "step": 27572 - }, - { - "epoch": 4.440436410483514, - "grad_norm": 0.005567345768213272, - "learning_rate": 0.0001999902777037813, - "loss": 46.0, - "step": 27573 - }, - { - "epoch": 4.440597447562301, - "grad_norm": 0.004630262032151222, - "learning_rate": 0.00019999027699832, - "loss": 46.0, - "step": 27574 - }, - { - "epoch": 4.440758484641089, - "grad_norm": 0.00944850966334343, - "learning_rate": 0.00019999027629283308, - "loss": 46.0, - "step": 27575 - }, - { - "epoch": 4.440919521719876, - "grad_norm": 0.0021597445011138916, - "learning_rate": 0.00019999027558732058, - "loss": 46.0, - "step": 27576 - }, - { - "epoch": 4.441080558798664, - "grad_norm": 0.0020268631633371115, - "learning_rate": 0.00019999027488178253, - "loss": 46.0, - "step": 27577 - }, - { - "epoch": 4.441241595877451, - "grad_norm": 0.0027508866041898727, - "learning_rate": 0.00019999027417621883, - "loss": 46.0, - "step": 27578 - }, - { - "epoch": 4.441402632956239, - "grad_norm": 0.0073135183192789555, - "learning_rate": 0.00019999027347062957, - "loss": 46.0, - "step": 27579 - }, - { - "epoch": 4.441563670035025, - "grad_norm": 0.008463002741336823, - "learning_rate": 0.0001999902727650147, - "loss": 46.0, - "step": 27580 - }, - { - "epoch": 4.441724707113813, - "grad_norm": 0.004070807248353958, - "learning_rate": 0.00019999027205937426, - "loss": 46.0, - "step": 27581 - }, - { - "epoch": 4.4418857441926, - "grad_norm": 0.0016556144692003727, - "learning_rate": 0.0001999902713537082, - "loss": 46.0, - "step": 27582 - }, - { - "epoch": 4.4420467812713875, - "grad_norm": 0.003913614433258772, - "learning_rate": 0.00019999027064801657, - "loss": 46.0, - "step": 27583 - }, - { - "epoch": 4.442207818350175, - "grad_norm": 0.0021010919008404016, - "learning_rate": 0.00019999026994229932, - "loss": 46.0, - "step": 27584 - }, - { - "epoch": 4.442368855428962, - "grad_norm": 0.0022471530828624964, - "learning_rate": 0.0001999902692365565, - "loss": 46.0, - "step": 27585 - }, - { - "epoch": 4.44252989250775, - "grad_norm": 0.0034856132697314024, - "learning_rate": 0.0001999902685307881, - "loss": 46.0, - "step": 27586 - }, - { - "epoch": 4.442690929586537, - "grad_norm": 0.015602142550051212, - "learning_rate": 0.00019999026782499408, - "loss": 46.0, - "step": 27587 - }, - { - "epoch": 4.442851966665325, - "grad_norm": 0.006289406679570675, - "learning_rate": 0.00019999026711917445, - "loss": 46.0, - "step": 27588 - }, - { - "epoch": 4.443013003744112, - "grad_norm": 0.013078049756586552, - "learning_rate": 0.00019999026641332926, - "loss": 46.0, - "step": 27589 - }, - { - "epoch": 4.4431740408229, - "grad_norm": 0.0013088486157357693, - "learning_rate": 0.0001999902657074585, - "loss": 46.0, - "step": 27590 - }, - { - "epoch": 4.443335077901687, - "grad_norm": 0.003772629890590906, - "learning_rate": 0.0001999902650015621, - "loss": 46.0, - "step": 27591 - }, - { - "epoch": 4.443496114980475, - "grad_norm": 0.02117474004626274, - "learning_rate": 0.00019999026429564013, - "loss": 46.0, - "step": 27592 - }, - { - "epoch": 4.443657152059262, - "grad_norm": 0.004630803130567074, - "learning_rate": 0.00019999026358969257, - "loss": 46.0, - "step": 27593 - }, - { - "epoch": 4.4438181891380495, - "grad_norm": 0.006949576549232006, - "learning_rate": 0.0001999902628837194, - "loss": 46.0, - "step": 27594 - }, - { - "epoch": 4.443979226216836, - "grad_norm": 0.008330770768225193, - "learning_rate": 0.00019999026217772068, - "loss": 46.0, - "step": 27595 - }, - { - "epoch": 4.4441402632956235, - "grad_norm": 0.015360256657004356, - "learning_rate": 0.00019999026147169633, - "loss": 46.0, - "step": 27596 - }, - { - "epoch": 4.444301300374411, - "grad_norm": 0.00623265840113163, - "learning_rate": 0.0001999902607656464, - "loss": 46.0, - "step": 27597 - }, - { - "epoch": 4.444462337453198, - "grad_norm": 0.005557015538215637, - "learning_rate": 0.00019999026005957087, - "loss": 46.0, - "step": 27598 - }, - { - "epoch": 4.444623374531986, - "grad_norm": 0.0055144005455076694, - "learning_rate": 0.00019999025935346976, - "loss": 46.0, - "step": 27599 - }, - { - "epoch": 4.444784411610773, - "grad_norm": 0.00569901941344142, - "learning_rate": 0.00019999025864734303, - "loss": 46.0, - "step": 27600 - }, - { - "epoch": 4.444945448689561, - "grad_norm": 0.00342980376444757, - "learning_rate": 0.00019999025794119075, - "loss": 46.0, - "step": 27601 - }, - { - "epoch": 4.445106485768348, - "grad_norm": 0.000592138385400176, - "learning_rate": 0.00019999025723501285, - "loss": 46.0, - "step": 27602 - }, - { - "epoch": 4.445267522847136, - "grad_norm": 0.004448787309229374, - "learning_rate": 0.00019999025652880933, - "loss": 46.0, - "step": 27603 - }, - { - "epoch": 4.445428559925923, - "grad_norm": 0.004147749859839678, - "learning_rate": 0.00019999025582258026, - "loss": 46.0, - "step": 27604 - }, - { - "epoch": 4.445589597004711, - "grad_norm": 0.004227131139487028, - "learning_rate": 0.00019999025511632562, - "loss": 46.0, - "step": 27605 - }, - { - "epoch": 4.445750634083498, - "grad_norm": 0.006812307517975569, - "learning_rate": 0.00019999025441004532, - "loss": 46.0, - "step": 27606 - }, - { - "epoch": 4.4459116711622855, - "grad_norm": 0.005776179488748312, - "learning_rate": 0.00019999025370373948, - "loss": 46.0, - "step": 27607 - }, - { - "epoch": 4.446072708241072, - "grad_norm": 0.012053025886416435, - "learning_rate": 0.00019999025299740803, - "loss": 46.0, - "step": 27608 - }, - { - "epoch": 4.4462337453198595, - "grad_norm": 0.0008147385669872165, - "learning_rate": 0.00019999025229105097, - "loss": 46.0, - "step": 27609 - }, - { - "epoch": 4.446394782398647, - "grad_norm": 0.008392365649342537, - "learning_rate": 0.00019999025158466834, - "loss": 46.0, - "step": 27610 - }, - { - "epoch": 4.446555819477434, - "grad_norm": 0.001987253315746784, - "learning_rate": 0.0001999902508782601, - "loss": 46.0, - "step": 27611 - }, - { - "epoch": 4.446716856556222, - "grad_norm": 0.006051927804946899, - "learning_rate": 0.0001999902501718263, - "loss": 46.0, - "step": 27612 - }, - { - "epoch": 4.446877893635009, - "grad_norm": 0.004000207409262657, - "learning_rate": 0.0001999902494653669, - "loss": 46.0, - "step": 27613 - }, - { - "epoch": 4.447038930713797, - "grad_norm": 0.008097521029412746, - "learning_rate": 0.0001999902487588819, - "loss": 46.0, - "step": 27614 - }, - { - "epoch": 4.447199967792584, - "grad_norm": 0.00424214918166399, - "learning_rate": 0.0001999902480523713, - "loss": 46.0, - "step": 27615 - }, - { - "epoch": 4.447361004871372, - "grad_norm": 0.006864896509796381, - "learning_rate": 0.0001999902473458351, - "loss": 46.0, - "step": 27616 - }, - { - "epoch": 4.447522041950159, - "grad_norm": 0.0033597571309655905, - "learning_rate": 0.00019999024663927334, - "loss": 46.0, - "step": 27617 - }, - { - "epoch": 4.4476830790289466, - "grad_norm": 0.0025569158606231213, - "learning_rate": 0.00019999024593268593, - "loss": 46.0, - "step": 27618 - }, - { - "epoch": 4.447844116107734, - "grad_norm": 0.002775526838377118, - "learning_rate": 0.000199990245226073, - "loss": 46.0, - "step": 27619 - }, - { - "epoch": 4.4480051531865215, - "grad_norm": 0.0027869076002389193, - "learning_rate": 0.00019999024451943442, - "loss": 46.0, - "step": 27620 - }, - { - "epoch": 4.448166190265309, - "grad_norm": 0.0014000433729961514, - "learning_rate": 0.00019999024381277028, - "loss": 46.0, - "step": 27621 - }, - { - "epoch": 4.448327227344096, - "grad_norm": 0.004964140709489584, - "learning_rate": 0.00019999024310608056, - "loss": 46.0, - "step": 27622 - }, - { - "epoch": 4.448488264422883, - "grad_norm": 0.001712570432573557, - "learning_rate": 0.0001999902423993652, - "loss": 46.0, - "step": 27623 - }, - { - "epoch": 4.44864930150167, - "grad_norm": 0.0012439304264262319, - "learning_rate": 0.00019999024169262426, - "loss": 46.0, - "step": 27624 - }, - { - "epoch": 4.448810338580458, - "grad_norm": 0.002051472896710038, - "learning_rate": 0.00019999024098585775, - "loss": 46.0, - "step": 27625 - }, - { - "epoch": 4.448971375659245, - "grad_norm": 0.004451302345842123, - "learning_rate": 0.00019999024027906565, - "loss": 46.0, - "step": 27626 - }, - { - "epoch": 4.449132412738033, - "grad_norm": 0.005217827390879393, - "learning_rate": 0.00019999023957224793, - "loss": 46.0, - "step": 27627 - }, - { - "epoch": 4.44929344981682, - "grad_norm": 0.002307931426912546, - "learning_rate": 0.00019999023886540466, - "loss": 46.0, - "step": 27628 - }, - { - "epoch": 4.449454486895608, - "grad_norm": 0.008777051232755184, - "learning_rate": 0.00019999023815853574, - "loss": 46.0, - "step": 27629 - }, - { - "epoch": 4.449615523974395, - "grad_norm": 0.009207247756421566, - "learning_rate": 0.00019999023745164126, - "loss": 46.0, - "step": 27630 - }, - { - "epoch": 4.4497765610531825, - "grad_norm": 0.01339022722095251, - "learning_rate": 0.0001999902367447212, - "loss": 46.0, - "step": 27631 - }, - { - "epoch": 4.44993759813197, - "grad_norm": 0.007821612060070038, - "learning_rate": 0.00019999023603777555, - "loss": 46.0, - "step": 27632 - }, - { - "epoch": 4.450098635210757, - "grad_norm": 0.005127355922013521, - "learning_rate": 0.00019999023533080426, - "loss": 46.0, - "step": 27633 - }, - { - "epoch": 4.450259672289545, - "grad_norm": 0.003538827644661069, - "learning_rate": 0.0001999902346238074, - "loss": 46.0, - "step": 27634 - }, - { - "epoch": 4.450420709368332, - "grad_norm": 0.0018289581639692187, - "learning_rate": 0.00019999023391678496, - "loss": 46.0, - "step": 27635 - }, - { - "epoch": 4.45058174644712, - "grad_norm": 0.0012734842021018267, - "learning_rate": 0.00019999023320973694, - "loss": 46.0, - "step": 27636 - }, - { - "epoch": 4.450742783525907, - "grad_norm": 0.004097527824342251, - "learning_rate": 0.00019999023250266332, - "loss": 46.0, - "step": 27637 - }, - { - "epoch": 4.450903820604694, - "grad_norm": 0.0016025390941649675, - "learning_rate": 0.0001999902317955641, - "loss": 46.0, - "step": 27638 - }, - { - "epoch": 4.451064857683481, - "grad_norm": 0.00741965277120471, - "learning_rate": 0.00019999023108843925, - "loss": 46.0, - "step": 27639 - }, - { - "epoch": 4.451225894762269, - "grad_norm": 0.0016680487897247076, - "learning_rate": 0.00019999023038128888, - "loss": 46.0, - "step": 27640 - }, - { - "epoch": 4.451386931841056, - "grad_norm": 0.0036669380497187376, - "learning_rate": 0.00019999022967411286, - "loss": 46.0, - "step": 27641 - }, - { - "epoch": 4.451547968919844, - "grad_norm": 0.0036178594455122948, - "learning_rate": 0.00019999022896691128, - "loss": 46.0, - "step": 27642 - }, - { - "epoch": 4.451709005998631, - "grad_norm": 0.0038552344776690006, - "learning_rate": 0.0001999902282596841, - "loss": 46.0, - "step": 27643 - }, - { - "epoch": 4.4518700430774185, - "grad_norm": 0.004150839056819677, - "learning_rate": 0.00019999022755243134, - "loss": 46.0, - "step": 27644 - }, - { - "epoch": 4.452031080156206, - "grad_norm": 0.009294205345213413, - "learning_rate": 0.00019999022684515297, - "loss": 46.0, - "step": 27645 - }, - { - "epoch": 4.452192117234993, - "grad_norm": 0.010999877005815506, - "learning_rate": 0.000199990226137849, - "loss": 46.0, - "step": 27646 - }, - { - "epoch": 4.452353154313781, - "grad_norm": 0.0011699118185788393, - "learning_rate": 0.00019999022543051943, - "loss": 46.0, - "step": 27647 - }, - { - "epoch": 4.452514191392568, - "grad_norm": 0.004467427730560303, - "learning_rate": 0.0001999902247231643, - "loss": 46.0, - "step": 27648 - }, - { - "epoch": 4.452675228471356, - "grad_norm": 0.004868346732109785, - "learning_rate": 0.00019999022401578356, - "loss": 46.0, - "step": 27649 - }, - { - "epoch": 4.452836265550143, - "grad_norm": 0.0024809425231069326, - "learning_rate": 0.00019999022330837726, - "loss": 46.0, - "step": 27650 - }, - { - "epoch": 4.452997302628931, - "grad_norm": 0.005295577459037304, - "learning_rate": 0.00019999022260094531, - "loss": 46.0, - "step": 27651 - }, - { - "epoch": 4.453158339707718, - "grad_norm": 0.004362618084996939, - "learning_rate": 0.00019999022189348778, - "loss": 46.0, - "step": 27652 - }, - { - "epoch": 4.453319376786505, - "grad_norm": 0.0030936384573578835, - "learning_rate": 0.0001999902211860047, - "loss": 46.0, - "step": 27653 - }, - { - "epoch": 4.453480413865292, - "grad_norm": 0.0027114993426948786, - "learning_rate": 0.000199990220478496, - "loss": 46.0, - "step": 27654 - }, - { - "epoch": 4.45364145094408, - "grad_norm": 0.020321976393461227, - "learning_rate": 0.0001999902197709617, - "loss": 46.0, - "step": 27655 - }, - { - "epoch": 4.453802488022867, - "grad_norm": 0.004300059285014868, - "learning_rate": 0.00019999021906340182, - "loss": 46.0, - "step": 27656 - }, - { - "epoch": 4.4539635251016545, - "grad_norm": 0.007416091859340668, - "learning_rate": 0.00019999021835581635, - "loss": 46.0, - "step": 27657 - }, - { - "epoch": 4.454124562180442, - "grad_norm": 0.012945530004799366, - "learning_rate": 0.00019999021764820527, - "loss": 46.0, - "step": 27658 - }, - { - "epoch": 4.454285599259229, - "grad_norm": 0.004318093881011009, - "learning_rate": 0.0001999902169405686, - "loss": 46.0, - "step": 27659 - }, - { - "epoch": 4.454446636338017, - "grad_norm": 0.003392336890101433, - "learning_rate": 0.00019999021623290638, - "loss": 46.0, - "step": 27660 - }, - { - "epoch": 4.454607673416804, - "grad_norm": 0.0029355748556554317, - "learning_rate": 0.0001999902155252185, - "loss": 46.0, - "step": 27661 - }, - { - "epoch": 4.454768710495592, - "grad_norm": 0.004893424920737743, - "learning_rate": 0.00019999021481750505, - "loss": 46.0, - "step": 27662 - }, - { - "epoch": 4.454929747574379, - "grad_norm": 0.014779835939407349, - "learning_rate": 0.00019999021410976604, - "loss": 46.0, - "step": 27663 - }, - { - "epoch": 4.455090784653167, - "grad_norm": 0.004990631714463234, - "learning_rate": 0.00019999021340200143, - "loss": 46.0, - "step": 27664 - }, - { - "epoch": 4.455251821731954, - "grad_norm": 0.0011120865819975734, - "learning_rate": 0.00019999021269421122, - "loss": 46.0, - "step": 27665 - }, - { - "epoch": 4.455412858810742, - "grad_norm": 0.0024424034636467695, - "learning_rate": 0.0001999902119863954, - "loss": 46.0, - "step": 27666 - }, - { - "epoch": 4.455573895889529, - "grad_norm": 0.005659935064613819, - "learning_rate": 0.00019999021127855402, - "loss": 46.0, - "step": 27667 - }, - { - "epoch": 4.455734932968316, - "grad_norm": 0.004742661956697702, - "learning_rate": 0.00019999021057068701, - "loss": 46.0, - "step": 27668 - }, - { - "epoch": 4.455895970047103, - "grad_norm": 0.001482888008467853, - "learning_rate": 0.00019999020986279442, - "loss": 46.0, - "step": 27669 - }, - { - "epoch": 4.4560570071258905, - "grad_norm": 0.0017236744752153754, - "learning_rate": 0.00019999020915487624, - "loss": 46.0, - "step": 27670 - }, - { - "epoch": 4.456218044204678, - "grad_norm": 0.004353113938122988, - "learning_rate": 0.00019999020844693247, - "loss": 46.0, - "step": 27671 - }, - { - "epoch": 4.456379081283465, - "grad_norm": 0.005677455570548773, - "learning_rate": 0.00019999020773896312, - "loss": 46.0, - "step": 27672 - }, - { - "epoch": 4.456540118362253, - "grad_norm": 0.0056228190660476685, - "learning_rate": 0.00019999020703096818, - "loss": 46.0, - "step": 27673 - }, - { - "epoch": 4.45670115544104, - "grad_norm": 0.00876994151622057, - "learning_rate": 0.0001999902063229476, - "loss": 46.0, - "step": 27674 - }, - { - "epoch": 4.456862192519828, - "grad_norm": 0.0034961539786309004, - "learning_rate": 0.0001999902056149015, - "loss": 46.0, - "step": 27675 - }, - { - "epoch": 4.457023229598615, - "grad_norm": 0.005490646697580814, - "learning_rate": 0.00019999020490682975, - "loss": 46.0, - "step": 27676 - }, - { - "epoch": 4.457184266677403, - "grad_norm": 0.015552288852632046, - "learning_rate": 0.00019999020419873243, - "loss": 46.0, - "step": 27677 - }, - { - "epoch": 4.45734530375619, - "grad_norm": 0.004894928075373173, - "learning_rate": 0.0001999902034906095, - "loss": 46.0, - "step": 27678 - }, - { - "epoch": 4.4575063408349775, - "grad_norm": 0.002772238804027438, - "learning_rate": 0.00019999020278246098, - "loss": 46.0, - "step": 27679 - }, - { - "epoch": 4.457667377913765, - "grad_norm": 0.003138558939099312, - "learning_rate": 0.0001999902020742869, - "loss": 46.0, - "step": 27680 - }, - { - "epoch": 4.4578284149925524, - "grad_norm": 0.008120818994939327, - "learning_rate": 0.0001999902013660872, - "loss": 46.0, - "step": 27681 - }, - { - "epoch": 4.45798945207134, - "grad_norm": 0.002648937748745084, - "learning_rate": 0.00019999020065786192, - "loss": 46.0, - "step": 27682 - }, - { - "epoch": 4.4581504891501265, - "grad_norm": 0.004655967932194471, - "learning_rate": 0.00019999019994961103, - "loss": 46.0, - "step": 27683 - }, - { - "epoch": 4.458311526228914, - "grad_norm": 0.008802182972431183, - "learning_rate": 0.00019999019924133457, - "loss": 46.0, - "step": 27684 - }, - { - "epoch": 4.458472563307701, - "grad_norm": 0.009249554947018623, - "learning_rate": 0.0001999901985330325, - "loss": 46.0, - "step": 27685 - }, - { - "epoch": 4.458633600386489, - "grad_norm": 0.003689124481752515, - "learning_rate": 0.00019999019782470487, - "loss": 46.0, - "step": 27686 - }, - { - "epoch": 4.458794637465276, - "grad_norm": 0.002156637143343687, - "learning_rate": 0.00019999019711635163, - "loss": 46.0, - "step": 27687 - }, - { - "epoch": 4.458955674544064, - "grad_norm": 0.0007555747870355844, - "learning_rate": 0.00019999019640797277, - "loss": 46.0, - "step": 27688 - }, - { - "epoch": 4.459116711622851, - "grad_norm": 0.0037337748799473047, - "learning_rate": 0.00019999019569956835, - "loss": 46.0, - "step": 27689 - }, - { - "epoch": 4.459277748701639, - "grad_norm": 0.008690381422638893, - "learning_rate": 0.00019999019499113832, - "loss": 46.0, - "step": 27690 - }, - { - "epoch": 4.459438785780426, - "grad_norm": 0.002384126652032137, - "learning_rate": 0.0001999901942826827, - "loss": 46.0, - "step": 27691 - }, - { - "epoch": 4.4595998228592135, - "grad_norm": 0.0035090213641524315, - "learning_rate": 0.0001999901935742015, - "loss": 46.0, - "step": 27692 - }, - { - "epoch": 4.459760859938001, - "grad_norm": 0.011557115241885185, - "learning_rate": 0.0001999901928656947, - "loss": 46.0, - "step": 27693 - }, - { - "epoch": 4.459921897016788, - "grad_norm": 0.0022580409422516823, - "learning_rate": 0.0001999901921571623, - "loss": 46.0, - "step": 27694 - }, - { - "epoch": 4.460082934095576, - "grad_norm": 0.003985777031630278, - "learning_rate": 0.00019999019144860433, - "loss": 46.0, - "step": 27695 - }, - { - "epoch": 4.460243971174362, - "grad_norm": 0.01426723413169384, - "learning_rate": 0.00019999019074002074, - "loss": 46.0, - "step": 27696 - }, - { - "epoch": 4.46040500825315, - "grad_norm": 0.010837400332093239, - "learning_rate": 0.00019999019003141157, - "loss": 46.0, - "step": 27697 - }, - { - "epoch": 4.460566045331937, - "grad_norm": 0.025637198239564896, - "learning_rate": 0.00019999018932277682, - "loss": 46.0, - "step": 27698 - }, - { - "epoch": 4.460727082410725, - "grad_norm": 0.0017911669565364718, - "learning_rate": 0.00019999018861411645, - "loss": 46.0, - "step": 27699 - }, - { - "epoch": 4.460888119489512, - "grad_norm": 0.003228267887607217, - "learning_rate": 0.0001999901879054305, - "loss": 46.0, - "step": 27700 - }, - { - "epoch": 4.4610491565683, - "grad_norm": 0.008008738048374653, - "learning_rate": 0.00019999018719671897, - "loss": 46.0, - "step": 27701 - }, - { - "epoch": 4.461210193647087, - "grad_norm": 0.006623091176152229, - "learning_rate": 0.00019999018648798184, - "loss": 46.0, - "step": 27702 - }, - { - "epoch": 4.461371230725875, - "grad_norm": 0.00327286752872169, - "learning_rate": 0.00019999018577921912, - "loss": 46.0, - "step": 27703 - }, - { - "epoch": 4.461532267804662, - "grad_norm": 0.010388505645096302, - "learning_rate": 0.00019999018507043078, - "loss": 46.0, - "step": 27704 - }, - { - "epoch": 4.4616933048834495, - "grad_norm": 0.006412656512111425, - "learning_rate": 0.0001999901843616169, - "loss": 46.0, - "step": 27705 - }, - { - "epoch": 4.461854341962237, - "grad_norm": 0.004264703020453453, - "learning_rate": 0.00019999018365277738, - "loss": 46.0, - "step": 27706 - }, - { - "epoch": 4.462015379041024, - "grad_norm": 0.005853310227394104, - "learning_rate": 0.00019999018294391229, - "loss": 46.0, - "step": 27707 - }, - { - "epoch": 4.462176416119812, - "grad_norm": 0.0006503654294647276, - "learning_rate": 0.0001999901822350216, - "loss": 46.0, - "step": 27708 - }, - { - "epoch": 4.462337453198599, - "grad_norm": 0.0021483106538653374, - "learning_rate": 0.00019999018152610533, - "loss": 46.0, - "step": 27709 - }, - { - "epoch": 4.462498490277387, - "grad_norm": 0.008021563291549683, - "learning_rate": 0.00019999018081716345, - "loss": 46.0, - "step": 27710 - }, - { - "epoch": 4.462659527356173, - "grad_norm": 0.013105835765600204, - "learning_rate": 0.00019999018010819598, - "loss": 46.0, - "step": 27711 - }, - { - "epoch": 4.462820564434961, - "grad_norm": 0.0016825452912598848, - "learning_rate": 0.00019999017939920295, - "loss": 46.0, - "step": 27712 - }, - { - "epoch": 4.462981601513748, - "grad_norm": 0.004741394426673651, - "learning_rate": 0.00019999017869018427, - "loss": 46.0, - "step": 27713 - }, - { - "epoch": 4.463142638592536, - "grad_norm": 0.001727042836137116, - "learning_rate": 0.00019999017798114004, - "loss": 46.0, - "step": 27714 - }, - { - "epoch": 4.463303675671323, - "grad_norm": 0.0026482846587896347, - "learning_rate": 0.00019999017727207022, - "loss": 46.0, - "step": 27715 - }, - { - "epoch": 4.463464712750111, - "grad_norm": 0.010358884930610657, - "learning_rate": 0.00019999017656297479, - "loss": 46.0, - "step": 27716 - }, - { - "epoch": 4.463625749828898, - "grad_norm": 0.002391768153756857, - "learning_rate": 0.0001999901758538538, - "loss": 46.0, - "step": 27717 - }, - { - "epoch": 4.4637867869076855, - "grad_norm": 0.0013181371614336967, - "learning_rate": 0.00019999017514470718, - "loss": 46.0, - "step": 27718 - }, - { - "epoch": 4.463947823986473, - "grad_norm": 0.0069139860570430756, - "learning_rate": 0.00019999017443553496, - "loss": 46.0, - "step": 27719 - }, - { - "epoch": 4.46410886106526, - "grad_norm": 0.004040156025439501, - "learning_rate": 0.00019999017372633718, - "loss": 46.0, - "step": 27720 - }, - { - "epoch": 4.464269898144048, - "grad_norm": 0.005994816310703754, - "learning_rate": 0.00019999017301711378, - "loss": 46.0, - "step": 27721 - }, - { - "epoch": 4.464430935222835, - "grad_norm": 0.010004755109548569, - "learning_rate": 0.00019999017230786482, - "loss": 46.0, - "step": 27722 - }, - { - "epoch": 4.464591972301623, - "grad_norm": 0.002008899115025997, - "learning_rate": 0.00019999017159859022, - "loss": 46.0, - "step": 27723 - }, - { - "epoch": 4.46475300938041, - "grad_norm": 0.0023983328137546778, - "learning_rate": 0.00019999017088929007, - "loss": 46.0, - "step": 27724 - }, - { - "epoch": 4.464914046459198, - "grad_norm": 0.0013118039350956678, - "learning_rate": 0.00019999017017996432, - "loss": 46.0, - "step": 27725 - }, - { - "epoch": 4.465075083537984, - "grad_norm": 0.0072244820185005665, - "learning_rate": 0.00019999016947061296, - "loss": 46.0, - "step": 27726 - }, - { - "epoch": 4.465236120616772, - "grad_norm": 0.001990034943446517, - "learning_rate": 0.000199990168761236, - "loss": 46.0, - "step": 27727 - }, - { - "epoch": 4.465397157695559, - "grad_norm": 0.0032743357587605715, - "learning_rate": 0.00019999016805183348, - "loss": 46.0, - "step": 27728 - }, - { - "epoch": 4.465558194774347, - "grad_norm": 0.0027179557364434004, - "learning_rate": 0.00019999016734240536, - "loss": 46.0, - "step": 27729 - }, - { - "epoch": 4.465719231853134, - "grad_norm": 0.0036493882071226835, - "learning_rate": 0.00019999016663295165, - "loss": 46.0, - "step": 27730 - }, - { - "epoch": 4.4658802689319215, - "grad_norm": 0.021345365792512894, - "learning_rate": 0.00019999016592347232, - "loss": 46.0, - "step": 27731 - }, - { - "epoch": 4.466041306010709, - "grad_norm": 0.0017449143342673779, - "learning_rate": 0.00019999016521396744, - "loss": 46.0, - "step": 27732 - }, - { - "epoch": 4.466202343089496, - "grad_norm": 0.0019290444906800985, - "learning_rate": 0.00019999016450443692, - "loss": 46.0, - "step": 27733 - }, - { - "epoch": 4.466363380168284, - "grad_norm": 0.008033310994505882, - "learning_rate": 0.00019999016379488086, - "loss": 46.0, - "step": 27734 - }, - { - "epoch": 4.466524417247071, - "grad_norm": 0.008996175602078438, - "learning_rate": 0.00019999016308529916, - "loss": 46.0, - "step": 27735 - }, - { - "epoch": 4.466685454325859, - "grad_norm": 0.0116839949041605, - "learning_rate": 0.0001999901623756919, - "loss": 46.0, - "step": 27736 - }, - { - "epoch": 4.466846491404646, - "grad_norm": 0.0013488969998434186, - "learning_rate": 0.00019999016166605903, - "loss": 46.0, - "step": 27737 - }, - { - "epoch": 4.467007528483434, - "grad_norm": 0.005387578159570694, - "learning_rate": 0.00019999016095640057, - "loss": 46.0, - "step": 27738 - }, - { - "epoch": 4.467168565562221, - "grad_norm": 0.01853710040450096, - "learning_rate": 0.00019999016024671652, - "loss": 46.0, - "step": 27739 - }, - { - "epoch": 4.4673296026410085, - "grad_norm": 0.0019915702287107706, - "learning_rate": 0.00019999015953700688, - "loss": 46.0, - "step": 27740 - }, - { - "epoch": 4.467490639719795, - "grad_norm": 0.006848345045000315, - "learning_rate": 0.00019999015882727163, - "loss": 46.0, - "step": 27741 - }, - { - "epoch": 4.4676516767985825, - "grad_norm": 0.005931831430643797, - "learning_rate": 0.0001999901581175108, - "loss": 46.0, - "step": 27742 - }, - { - "epoch": 4.46781271387737, - "grad_norm": 0.013120879419147968, - "learning_rate": 0.0001999901574077244, - "loss": 46.0, - "step": 27743 - }, - { - "epoch": 4.4679737509561575, - "grad_norm": 0.011298086494207382, - "learning_rate": 0.0001999901566979124, - "loss": 46.0, - "step": 27744 - }, - { - "epoch": 4.468134788034945, - "grad_norm": 0.004889341536909342, - "learning_rate": 0.0001999901559880748, - "loss": 46.0, - "step": 27745 - }, - { - "epoch": 4.468295825113732, - "grad_norm": 0.0023594731464982033, - "learning_rate": 0.00019999015527821158, - "loss": 46.0, - "step": 27746 - }, - { - "epoch": 4.46845686219252, - "grad_norm": 0.0008554062806069851, - "learning_rate": 0.0001999901545683228, - "loss": 46.0, - "step": 27747 - }, - { - "epoch": 4.468617899271307, - "grad_norm": 0.009103850461542606, - "learning_rate": 0.00019999015385840842, - "loss": 46.0, - "step": 27748 - }, - { - "epoch": 4.468778936350095, - "grad_norm": 0.010141601786017418, - "learning_rate": 0.00019999015314846845, - "loss": 46.0, - "step": 27749 - }, - { - "epoch": 4.468939973428882, - "grad_norm": 0.009308332577347755, - "learning_rate": 0.0001999901524385029, - "loss": 46.0, - "step": 27750 - }, - { - "epoch": 4.46910101050767, - "grad_norm": 0.0033651439007371664, - "learning_rate": 0.00019999015172851174, - "loss": 46.0, - "step": 27751 - }, - { - "epoch": 4.469262047586457, - "grad_norm": 0.002752259373664856, - "learning_rate": 0.00019999015101849498, - "loss": 46.0, - "step": 27752 - }, - { - "epoch": 4.4694230846652445, - "grad_norm": 0.0032868387643247843, - "learning_rate": 0.00019999015030845266, - "loss": 46.0, - "step": 27753 - }, - { - "epoch": 4.469584121744032, - "grad_norm": 0.0008634725236333907, - "learning_rate": 0.0001999901495983847, - "loss": 46.0, - "step": 27754 - }, - { - "epoch": 4.469745158822819, - "grad_norm": 0.00149381288792938, - "learning_rate": 0.00019999014888829117, - "loss": 46.0, - "step": 27755 - }, - { - "epoch": 4.469906195901606, - "grad_norm": 0.00942729227244854, - "learning_rate": 0.00019999014817817206, - "loss": 46.0, - "step": 27756 - }, - { - "epoch": 4.470067232980393, - "grad_norm": 0.003926991950720549, - "learning_rate": 0.00019999014746802733, - "loss": 46.0, - "step": 27757 - }, - { - "epoch": 4.470228270059181, - "grad_norm": 0.0026579948607832193, - "learning_rate": 0.00019999014675785705, - "loss": 46.0, - "step": 27758 - }, - { - "epoch": 4.470389307137968, - "grad_norm": 0.010880478657782078, - "learning_rate": 0.00019999014604766115, - "loss": 46.0, - "step": 27759 - }, - { - "epoch": 4.470550344216756, - "grad_norm": 0.010115723125636578, - "learning_rate": 0.00019999014533743966, - "loss": 46.0, - "step": 27760 - }, - { - "epoch": 4.470711381295543, - "grad_norm": 0.006723347585648298, - "learning_rate": 0.00019999014462719256, - "loss": 46.0, - "step": 27761 - }, - { - "epoch": 4.470872418374331, - "grad_norm": 0.0032588550820946693, - "learning_rate": 0.0001999901439169199, - "loss": 46.0, - "step": 27762 - }, - { - "epoch": 4.471033455453118, - "grad_norm": 0.006792832165956497, - "learning_rate": 0.00019999014320662165, - "loss": 46.0, - "step": 27763 - }, - { - "epoch": 4.471194492531906, - "grad_norm": 0.005623354576528072, - "learning_rate": 0.00019999014249629776, - "loss": 46.0, - "step": 27764 - }, - { - "epoch": 4.471355529610693, - "grad_norm": 0.0070923203602433205, - "learning_rate": 0.0001999901417859483, - "loss": 46.0, - "step": 27765 - }, - { - "epoch": 4.4715165666894805, - "grad_norm": 0.00419378699734807, - "learning_rate": 0.0001999901410755733, - "loss": 46.0, - "step": 27766 - }, - { - "epoch": 4.471677603768268, - "grad_norm": 0.0019027515081688762, - "learning_rate": 0.00019999014036517265, - "loss": 46.0, - "step": 27767 - }, - { - "epoch": 4.471838640847055, - "grad_norm": 0.0009966065408661962, - "learning_rate": 0.00019999013965474641, - "loss": 46.0, - "step": 27768 - }, - { - "epoch": 4.471999677925842, - "grad_norm": 0.006412910763174295, - "learning_rate": 0.0001999901389442946, - "loss": 46.0, - "step": 27769 - }, - { - "epoch": 4.472160715004629, - "grad_norm": 0.013252461329102516, - "learning_rate": 0.00019999013823381718, - "loss": 46.0, - "step": 27770 - }, - { - "epoch": 4.472321752083417, - "grad_norm": 0.002783434931188822, - "learning_rate": 0.0001999901375233142, - "loss": 46.0, - "step": 27771 - }, - { - "epoch": 4.472482789162204, - "grad_norm": 0.001420151675119996, - "learning_rate": 0.0001999901368127856, - "loss": 46.0, - "step": 27772 - }, - { - "epoch": 4.472643826240992, - "grad_norm": 0.002665820997208357, - "learning_rate": 0.0001999901361022314, - "loss": 46.0, - "step": 27773 - }, - { - "epoch": 4.472804863319779, - "grad_norm": 0.008886474184691906, - "learning_rate": 0.0001999901353916516, - "loss": 46.0, - "step": 27774 - }, - { - "epoch": 4.472965900398567, - "grad_norm": 0.00343678193166852, - "learning_rate": 0.00019999013468104625, - "loss": 46.0, - "step": 27775 - }, - { - "epoch": 4.473126937477354, - "grad_norm": 0.004481021780520678, - "learning_rate": 0.0001999901339704153, - "loss": 46.0, - "step": 27776 - }, - { - "epoch": 4.473287974556142, - "grad_norm": 0.009889495559036732, - "learning_rate": 0.00019999013325975874, - "loss": 46.0, - "step": 27777 - }, - { - "epoch": 4.473449011634929, - "grad_norm": 0.007639762479811907, - "learning_rate": 0.00019999013254907657, - "loss": 46.0, - "step": 27778 - }, - { - "epoch": 4.4736100487137165, - "grad_norm": 0.0052644130773842335, - "learning_rate": 0.00019999013183836882, - "loss": 46.0, - "step": 27779 - }, - { - "epoch": 4.473771085792504, - "grad_norm": 0.015168738551437855, - "learning_rate": 0.00019999013112763548, - "loss": 46.0, - "step": 27780 - }, - { - "epoch": 4.473932122871291, - "grad_norm": 0.001831917092204094, - "learning_rate": 0.00019999013041687659, - "loss": 46.0, - "step": 27781 - }, - { - "epoch": 4.474093159950079, - "grad_norm": 0.0044548953883349895, - "learning_rate": 0.00019999012970609205, - "loss": 46.0, - "step": 27782 - }, - { - "epoch": 4.474254197028866, - "grad_norm": 0.0151712941005826, - "learning_rate": 0.00019999012899528195, - "loss": 46.0, - "step": 27783 - }, - { - "epoch": 4.474415234107653, - "grad_norm": 0.005104460753500462, - "learning_rate": 0.00019999012828444623, - "loss": 46.0, - "step": 27784 - }, - { - "epoch": 4.47457627118644, - "grad_norm": 0.0052878339774906635, - "learning_rate": 0.00019999012757358493, - "loss": 46.0, - "step": 27785 - }, - { - "epoch": 4.474737308265228, - "grad_norm": 0.004600665532052517, - "learning_rate": 0.00019999012686269804, - "loss": 46.0, - "step": 27786 - }, - { - "epoch": 4.474898345344015, - "grad_norm": 0.0005238695885054767, - "learning_rate": 0.00019999012615178557, - "loss": 46.0, - "step": 27787 - }, - { - "epoch": 4.475059382422803, - "grad_norm": 0.0009664374520070851, - "learning_rate": 0.00019999012544084748, - "loss": 46.0, - "step": 27788 - }, - { - "epoch": 4.47522041950159, - "grad_norm": 0.012209751643240452, - "learning_rate": 0.00019999012472988383, - "loss": 46.0, - "step": 27789 - }, - { - "epoch": 4.475381456580378, - "grad_norm": 0.015519886277616024, - "learning_rate": 0.00019999012401889456, - "loss": 46.0, - "step": 27790 - }, - { - "epoch": 4.475542493659165, - "grad_norm": 0.008853236213326454, - "learning_rate": 0.0001999901233078797, - "loss": 46.0, - "step": 27791 - }, - { - "epoch": 4.4757035307379525, - "grad_norm": 0.00215332699008286, - "learning_rate": 0.00019999012259683927, - "loss": 46.0, - "step": 27792 - }, - { - "epoch": 4.47586456781674, - "grad_norm": 0.0018610305851325393, - "learning_rate": 0.00019999012188577324, - "loss": 46.0, - "step": 27793 - }, - { - "epoch": 4.476025604895527, - "grad_norm": 0.006760372314602137, - "learning_rate": 0.0001999901211746816, - "loss": 46.0, - "step": 27794 - }, - { - "epoch": 4.476186641974315, - "grad_norm": 0.011449955403804779, - "learning_rate": 0.00019999012046356438, - "loss": 46.0, - "step": 27795 - }, - { - "epoch": 4.476347679053102, - "grad_norm": 0.0010572896571829915, - "learning_rate": 0.00019999011975242156, - "loss": 46.0, - "step": 27796 - }, - { - "epoch": 4.47650871613189, - "grad_norm": 0.0009545032517053187, - "learning_rate": 0.00019999011904125316, - "loss": 46.0, - "step": 27797 - }, - { - "epoch": 4.476669753210677, - "grad_norm": 0.005871484987437725, - "learning_rate": 0.00019999011833005917, - "loss": 46.0, - "step": 27798 - }, - { - "epoch": 4.476830790289464, - "grad_norm": 0.0022825472988188267, - "learning_rate": 0.0001999901176188396, - "loss": 46.0, - "step": 27799 - }, - { - "epoch": 4.476991827368251, - "grad_norm": 0.002660314552485943, - "learning_rate": 0.0001999901169075944, - "loss": 46.0, - "step": 27800 - }, - { - "epoch": 4.477152864447039, - "grad_norm": 0.0111022237688303, - "learning_rate": 0.00019999011619632363, - "loss": 46.0, - "step": 27801 - }, - { - "epoch": 4.477313901525826, - "grad_norm": 0.0045442297123372555, - "learning_rate": 0.00019999011548502726, - "loss": 46.0, - "step": 27802 - }, - { - "epoch": 4.4774749386046135, - "grad_norm": 0.001113284844905138, - "learning_rate": 0.00019999011477370528, - "loss": 46.0, - "step": 27803 - }, - { - "epoch": 4.477635975683401, - "grad_norm": 0.005483392626047134, - "learning_rate": 0.00019999011406235772, - "loss": 46.0, - "step": 27804 - }, - { - "epoch": 4.4777970127621884, - "grad_norm": 0.0027010401245206594, - "learning_rate": 0.0001999901133509846, - "loss": 46.0, - "step": 27805 - }, - { - "epoch": 4.477958049840976, - "grad_norm": 0.0022945499513298273, - "learning_rate": 0.00019999011263958588, - "loss": 46.0, - "step": 27806 - }, - { - "epoch": 4.478119086919763, - "grad_norm": 0.0059724366292357445, - "learning_rate": 0.00019999011192816152, - "loss": 46.0, - "step": 27807 - }, - { - "epoch": 4.478280123998551, - "grad_norm": 0.0036939126439392567, - "learning_rate": 0.0001999901112167116, - "loss": 46.0, - "step": 27808 - }, - { - "epoch": 4.478441161077338, - "grad_norm": 0.002406270941719413, - "learning_rate": 0.0001999901105052361, - "loss": 46.0, - "step": 27809 - }, - { - "epoch": 4.478602198156126, - "grad_norm": 0.017762577161192894, - "learning_rate": 0.000199990109793735, - "loss": 46.0, - "step": 27810 - }, - { - "epoch": 4.478763235234913, - "grad_norm": 0.0014806096442043781, - "learning_rate": 0.0001999901090822083, - "loss": 46.0, - "step": 27811 - }, - { - "epoch": 4.478924272313701, - "grad_norm": 0.003524838015437126, - "learning_rate": 0.000199990108370656, - "loss": 46.0, - "step": 27812 - }, - { - "epoch": 4.479085309392488, - "grad_norm": 0.0012625409290194511, - "learning_rate": 0.00019999010765907812, - "loss": 46.0, - "step": 27813 - }, - { - "epoch": 4.479246346471275, - "grad_norm": 0.003366118762642145, - "learning_rate": 0.00019999010694747465, - "loss": 46.0, - "step": 27814 - }, - { - "epoch": 4.479407383550062, - "grad_norm": 0.005326335318386555, - "learning_rate": 0.00019999010623584557, - "loss": 46.0, - "step": 27815 - }, - { - "epoch": 4.4795684206288495, - "grad_norm": 0.017563287168741226, - "learning_rate": 0.00019999010552419094, - "loss": 46.0, - "step": 27816 - }, - { - "epoch": 4.479729457707637, - "grad_norm": 0.004145849496126175, - "learning_rate": 0.00019999010481251065, - "loss": 46.0, - "step": 27817 - }, - { - "epoch": 4.479890494786424, - "grad_norm": 0.003242343896999955, - "learning_rate": 0.00019999010410080481, - "loss": 46.0, - "step": 27818 - }, - { - "epoch": 4.480051531865212, - "grad_norm": 0.008767087012529373, - "learning_rate": 0.00019999010338907339, - "loss": 46.0, - "step": 27819 - }, - { - "epoch": 4.480212568943999, - "grad_norm": 0.0042501818388700485, - "learning_rate": 0.00019999010267731637, - "loss": 46.0, - "step": 27820 - }, - { - "epoch": 4.480373606022787, - "grad_norm": 0.0023322668857872486, - "learning_rate": 0.00019999010196553374, - "loss": 46.0, - "step": 27821 - }, - { - "epoch": 4.480534643101574, - "grad_norm": 0.0037910579703748226, - "learning_rate": 0.00019999010125372553, - "loss": 46.0, - "step": 27822 - }, - { - "epoch": 4.480695680180362, - "grad_norm": 0.003508639056235552, - "learning_rate": 0.00019999010054189172, - "loss": 46.0, - "step": 27823 - }, - { - "epoch": 4.480856717259149, - "grad_norm": 0.0035073955077677965, - "learning_rate": 0.00019999009983003233, - "loss": 46.0, - "step": 27824 - }, - { - "epoch": 4.481017754337937, - "grad_norm": 0.0024367840960621834, - "learning_rate": 0.00019999009911814733, - "loss": 46.0, - "step": 27825 - }, - { - "epoch": 4.481178791416724, - "grad_norm": 0.003021844429895282, - "learning_rate": 0.00019999009840623676, - "loss": 46.0, - "step": 27826 - }, - { - "epoch": 4.4813398284955115, - "grad_norm": 0.0015001606661826372, - "learning_rate": 0.00019999009769430058, - "loss": 46.0, - "step": 27827 - }, - { - "epoch": 4.481500865574299, - "grad_norm": 0.0023999549448490143, - "learning_rate": 0.00019999009698233881, - "loss": 46.0, - "step": 27828 - }, - { - "epoch": 4.4816619026530855, - "grad_norm": 0.0007765580667182803, - "learning_rate": 0.00019999009627035146, - "loss": 46.0, - "step": 27829 - }, - { - "epoch": 4.481822939731873, - "grad_norm": 0.005309755448251963, - "learning_rate": 0.00019999009555833846, - "loss": 46.0, - "step": 27830 - }, - { - "epoch": 4.48198397681066, - "grad_norm": 0.0027437207754701376, - "learning_rate": 0.00019999009484629996, - "loss": 46.0, - "step": 27831 - }, - { - "epoch": 4.482145013889448, - "grad_norm": 0.002044303109869361, - "learning_rate": 0.00019999009413423582, - "loss": 46.0, - "step": 27832 - }, - { - "epoch": 4.482306050968235, - "grad_norm": 0.001303886529058218, - "learning_rate": 0.00019999009342214606, - "loss": 46.0, - "step": 27833 - }, - { - "epoch": 4.482467088047023, - "grad_norm": 0.002399574965238571, - "learning_rate": 0.00019999009271003075, - "loss": 46.0, - "step": 27834 - }, - { - "epoch": 4.48262812512581, - "grad_norm": 0.0034279238898307085, - "learning_rate": 0.00019999009199788982, - "loss": 46.0, - "step": 27835 - }, - { - "epoch": 4.482789162204598, - "grad_norm": 0.005639585666358471, - "learning_rate": 0.00019999009128572333, - "loss": 46.0, - "step": 27836 - }, - { - "epoch": 4.482950199283385, - "grad_norm": 0.018510637804865837, - "learning_rate": 0.00019999009057353125, - "loss": 46.0, - "step": 27837 - }, - { - "epoch": 4.483111236362173, - "grad_norm": 0.004043771885335445, - "learning_rate": 0.00019999008986131353, - "loss": 46.0, - "step": 27838 - }, - { - "epoch": 4.48327227344096, - "grad_norm": 0.0028538291808217764, - "learning_rate": 0.00019999008914907025, - "loss": 46.0, - "step": 27839 - }, - { - "epoch": 4.4834333105197475, - "grad_norm": 0.0019156353082507849, - "learning_rate": 0.00019999008843680138, - "loss": 46.0, - "step": 27840 - }, - { - "epoch": 4.483594347598535, - "grad_norm": 0.0016860762843862176, - "learning_rate": 0.00019999008772450693, - "loss": 46.0, - "step": 27841 - }, - { - "epoch": 4.4837553846773215, - "grad_norm": 0.0018055159598588943, - "learning_rate": 0.00019999008701218686, - "loss": 46.0, - "step": 27842 - }, - { - "epoch": 4.483916421756109, - "grad_norm": 0.004454894922673702, - "learning_rate": 0.00019999008629984123, - "loss": 46.0, - "step": 27843 - }, - { - "epoch": 4.484077458834896, - "grad_norm": 0.001974615501239896, - "learning_rate": 0.00019999008558746996, - "loss": 46.0, - "step": 27844 - }, - { - "epoch": 4.484238495913684, - "grad_norm": 0.0023097912780940533, - "learning_rate": 0.00019999008487507313, - "loss": 46.0, - "step": 27845 - }, - { - "epoch": 4.484399532992471, - "grad_norm": 0.015262618660926819, - "learning_rate": 0.0001999900841626507, - "loss": 46.0, - "step": 27846 - }, - { - "epoch": 4.484560570071259, - "grad_norm": 0.012720214203000069, - "learning_rate": 0.00019999008345020268, - "loss": 46.0, - "step": 27847 - }, - { - "epoch": 4.484721607150046, - "grad_norm": 0.004694274626672268, - "learning_rate": 0.00019999008273772906, - "loss": 46.0, - "step": 27848 - }, - { - "epoch": 4.484882644228834, - "grad_norm": 0.009466389194130898, - "learning_rate": 0.00019999008202522983, - "loss": 46.0, - "step": 27849 - }, - { - "epoch": 4.485043681307621, - "grad_norm": 0.0029717348515987396, - "learning_rate": 0.00019999008131270504, - "loss": 46.0, - "step": 27850 - }, - { - "epoch": 4.4852047183864086, - "grad_norm": 0.0018321602838113904, - "learning_rate": 0.00019999008060015468, - "loss": 46.0, - "step": 27851 - }, - { - "epoch": 4.485365755465196, - "grad_norm": 0.0048034158535301685, - "learning_rate": 0.00019999007988757866, - "loss": 46.0, - "step": 27852 - }, - { - "epoch": 4.4855267925439835, - "grad_norm": 0.003441430628299713, - "learning_rate": 0.0001999900791749771, - "loss": 46.0, - "step": 27853 - }, - { - "epoch": 4.485687829622771, - "grad_norm": 0.0032759748864918947, - "learning_rate": 0.00019999007846234994, - "loss": 46.0, - "step": 27854 - }, - { - "epoch": 4.485848866701558, - "grad_norm": 0.002269813558086753, - "learning_rate": 0.00019999007774969716, - "loss": 46.0, - "step": 27855 - }, - { - "epoch": 4.486009903780346, - "grad_norm": 0.0019601918756961823, - "learning_rate": 0.00019999007703701881, - "loss": 46.0, - "step": 27856 - }, - { - "epoch": 4.486170940859132, - "grad_norm": 0.0030982508324086666, - "learning_rate": 0.00019999007632431488, - "loss": 46.0, - "step": 27857 - }, - { - "epoch": 4.48633197793792, - "grad_norm": 0.010010729543864727, - "learning_rate": 0.00019999007561158534, - "loss": 46.0, - "step": 27858 - }, - { - "epoch": 4.486493015016707, - "grad_norm": 0.007289108820259571, - "learning_rate": 0.00019999007489883018, - "loss": 46.0, - "step": 27859 - }, - { - "epoch": 4.486654052095495, - "grad_norm": 0.016130438074469566, - "learning_rate": 0.00019999007418604946, - "loss": 46.0, - "step": 27860 - }, - { - "epoch": 4.486815089174282, - "grad_norm": 0.002619587816298008, - "learning_rate": 0.00019999007347324316, - "loss": 46.0, - "step": 27861 - }, - { - "epoch": 4.48697612625307, - "grad_norm": 0.010935296304523945, - "learning_rate": 0.00019999007276041126, - "loss": 46.0, - "step": 27862 - }, - { - "epoch": 4.487137163331857, - "grad_norm": 0.002434429246932268, - "learning_rate": 0.00019999007204755378, - "loss": 46.0, - "step": 27863 - }, - { - "epoch": 4.4872982004106445, - "grad_norm": 0.005705788731575012, - "learning_rate": 0.00019999007133467066, - "loss": 46.0, - "step": 27864 - }, - { - "epoch": 4.487459237489432, - "grad_norm": 0.007801066618412733, - "learning_rate": 0.00019999007062176198, - "loss": 46.0, - "step": 27865 - }, - { - "epoch": 4.487620274568219, - "grad_norm": 0.012204373255372047, - "learning_rate": 0.0001999900699088277, - "loss": 46.0, - "step": 27866 - }, - { - "epoch": 4.487781311647007, - "grad_norm": 0.0011020316742360592, - "learning_rate": 0.00019999006919586785, - "loss": 46.0, - "step": 27867 - }, - { - "epoch": 4.487942348725794, - "grad_norm": 0.005237429868429899, - "learning_rate": 0.00019999006848288238, - "loss": 46.0, - "step": 27868 - }, - { - "epoch": 4.488103385804582, - "grad_norm": 0.002027320908382535, - "learning_rate": 0.00019999006776987133, - "loss": 46.0, - "step": 27869 - }, - { - "epoch": 4.488264422883369, - "grad_norm": 0.01505147386342287, - "learning_rate": 0.0001999900670568347, - "loss": 46.0, - "step": 27870 - }, - { - "epoch": 4.488425459962157, - "grad_norm": 0.0028392779640853405, - "learning_rate": 0.00019999006634377245, - "loss": 46.0, - "step": 27871 - }, - { - "epoch": 4.488586497040943, - "grad_norm": 0.0022702605929225683, - "learning_rate": 0.0001999900656306846, - "loss": 46.0, - "step": 27872 - }, - { - "epoch": 4.488747534119731, - "grad_norm": 0.0028342497535049915, - "learning_rate": 0.0001999900649175712, - "loss": 46.0, - "step": 27873 - }, - { - "epoch": 4.488908571198518, - "grad_norm": 0.0015059520956128836, - "learning_rate": 0.00019999006420443218, - "loss": 46.0, - "step": 27874 - }, - { - "epoch": 4.489069608277306, - "grad_norm": 0.0023169571068137884, - "learning_rate": 0.00019999006349126757, - "loss": 46.0, - "step": 27875 - }, - { - "epoch": 4.489230645356093, - "grad_norm": 0.0031651421450078487, - "learning_rate": 0.00019999006277807737, - "loss": 46.0, - "step": 27876 - }, - { - "epoch": 4.4893916824348805, - "grad_norm": 0.0031909889075905085, - "learning_rate": 0.0001999900620648616, - "loss": 46.0, - "step": 27877 - }, - { - "epoch": 4.489552719513668, - "grad_norm": 0.007121788803488016, - "learning_rate": 0.0001999900613516202, - "loss": 46.0, - "step": 27878 - }, - { - "epoch": 4.489713756592455, - "grad_norm": 0.0050599356181919575, - "learning_rate": 0.0001999900606383532, - "loss": 46.0, - "step": 27879 - }, - { - "epoch": 4.489874793671243, - "grad_norm": 0.011040827259421349, - "learning_rate": 0.00019999005992506064, - "loss": 46.0, - "step": 27880 - }, - { - "epoch": 4.49003583075003, - "grad_norm": 0.0010725490283221006, - "learning_rate": 0.00019999005921174248, - "loss": 46.0, - "step": 27881 - }, - { - "epoch": 4.490196867828818, - "grad_norm": 0.0033081036526709795, - "learning_rate": 0.00019999005849839874, - "loss": 46.0, - "step": 27882 - }, - { - "epoch": 4.490357904907605, - "grad_norm": 0.006857835687696934, - "learning_rate": 0.0001999900577850294, - "loss": 46.0, - "step": 27883 - }, - { - "epoch": 4.490518941986393, - "grad_norm": 0.0021876965183764696, - "learning_rate": 0.00019999005707163446, - "loss": 46.0, - "step": 27884 - }, - { - "epoch": 4.49067997906518, - "grad_norm": 0.007280195597559214, - "learning_rate": 0.0001999900563582139, - "loss": 46.0, - "step": 27885 - }, - { - "epoch": 4.490841016143968, - "grad_norm": 0.002429440850391984, - "learning_rate": 0.0001999900556447678, - "loss": 46.0, - "step": 27886 - }, - { - "epoch": 4.491002053222754, - "grad_norm": 0.0017278622835874557, - "learning_rate": 0.00019999005493129607, - "loss": 46.0, - "step": 27887 - }, - { - "epoch": 4.491163090301542, - "grad_norm": 0.005135042127221823, - "learning_rate": 0.00019999005421779878, - "loss": 46.0, - "step": 27888 - }, - { - "epoch": 4.491324127380329, - "grad_norm": 0.02064802125096321, - "learning_rate": 0.0001999900535042759, - "loss": 46.0, - "step": 27889 - }, - { - "epoch": 4.4914851644591165, - "grad_norm": 0.002363914856687188, - "learning_rate": 0.00019999005279072737, - "loss": 46.0, - "step": 27890 - }, - { - "epoch": 4.491646201537904, - "grad_norm": 0.0016027031233534217, - "learning_rate": 0.0001999900520771533, - "loss": 46.0, - "step": 27891 - }, - { - "epoch": 4.491807238616691, - "grad_norm": 0.002568686380982399, - "learning_rate": 0.00019999005136355362, - "loss": 46.0, - "step": 27892 - }, - { - "epoch": 4.491968275695479, - "grad_norm": 0.00444758078083396, - "learning_rate": 0.00019999005064992836, - "loss": 46.0, - "step": 27893 - }, - { - "epoch": 4.492129312774266, - "grad_norm": 0.009934254921972752, - "learning_rate": 0.0001999900499362775, - "loss": 46.0, - "step": 27894 - }, - { - "epoch": 4.492290349853054, - "grad_norm": 0.004778178408741951, - "learning_rate": 0.00019999004922260105, - "loss": 46.0, - "step": 27895 - }, - { - "epoch": 4.492451386931841, - "grad_norm": 0.0018919466529041529, - "learning_rate": 0.000199990048508899, - "loss": 46.0, - "step": 27896 - }, - { - "epoch": 4.492612424010629, - "grad_norm": 0.010147398337721825, - "learning_rate": 0.00019999004779517137, - "loss": 46.0, - "step": 27897 - }, - { - "epoch": 4.492773461089416, - "grad_norm": 0.002310806652531028, - "learning_rate": 0.00019999004708141812, - "loss": 46.0, - "step": 27898 - }, - { - "epoch": 4.492934498168204, - "grad_norm": 0.011375637724995613, - "learning_rate": 0.00019999004636763932, - "loss": 46.0, - "step": 27899 - }, - { - "epoch": 4.493095535246991, - "grad_norm": 0.0022303175646811724, - "learning_rate": 0.00019999004565383487, - "loss": 46.0, - "step": 27900 - }, - { - "epoch": 4.4932565723257785, - "grad_norm": 0.005507065914571285, - "learning_rate": 0.00019999004494000486, - "loss": 46.0, - "step": 27901 - }, - { - "epoch": 4.493417609404565, - "grad_norm": 0.0024187129456549883, - "learning_rate": 0.00019999004422614926, - "loss": 46.0, - "step": 27902 - }, - { - "epoch": 4.4935786464833525, - "grad_norm": 0.00918637402355671, - "learning_rate": 0.00019999004351226808, - "loss": 46.0, - "step": 27903 - }, - { - "epoch": 4.49373968356214, - "grad_norm": 0.0023364771623164415, - "learning_rate": 0.00019999004279836128, - "loss": 46.0, - "step": 27904 - }, - { - "epoch": 4.493900720640927, - "grad_norm": 0.0009917255956679583, - "learning_rate": 0.00019999004208442892, - "loss": 46.0, - "step": 27905 - }, - { - "epoch": 4.494061757719715, - "grad_norm": 0.0013017618330195546, - "learning_rate": 0.00019999004137047095, - "loss": 46.0, - "step": 27906 - }, - { - "epoch": 4.494222794798502, - "grad_norm": 0.0037942298222333193, - "learning_rate": 0.00019999004065648741, - "loss": 46.0, - "step": 27907 - }, - { - "epoch": 4.49438383187729, - "grad_norm": 0.00743611017242074, - "learning_rate": 0.00019999003994247824, - "loss": 46.0, - "step": 27908 - }, - { - "epoch": 4.494544868956077, - "grad_norm": 0.004279667977243662, - "learning_rate": 0.0001999900392284435, - "loss": 46.0, - "step": 27909 - }, - { - "epoch": 4.494705906034865, - "grad_norm": 0.0021074344404041767, - "learning_rate": 0.00019999003851438316, - "loss": 46.0, - "step": 27910 - }, - { - "epoch": 4.494866943113652, - "grad_norm": 0.0032866366673260927, - "learning_rate": 0.00019999003780029725, - "loss": 46.0, - "step": 27911 - }, - { - "epoch": 4.4950279801924395, - "grad_norm": 0.001152546377852559, - "learning_rate": 0.0001999900370861857, - "loss": 46.0, - "step": 27912 - }, - { - "epoch": 4.495189017271227, - "grad_norm": 0.002169529441744089, - "learning_rate": 0.0001999900363720486, - "loss": 46.0, - "step": 27913 - }, - { - "epoch": 4.4953500543500144, - "grad_norm": 0.004141353536397219, - "learning_rate": 0.00019999003565788587, - "loss": 46.0, - "step": 27914 - }, - { - "epoch": 4.495511091428802, - "grad_norm": 0.008625070564448833, - "learning_rate": 0.00019999003494369758, - "loss": 46.0, - "step": 27915 - }, - { - "epoch": 4.4956721285075885, - "grad_norm": 0.024159444496035576, - "learning_rate": 0.00019999003422948368, - "loss": 46.0, - "step": 27916 - }, - { - "epoch": 4.495833165586376, - "grad_norm": 0.0017476622015237808, - "learning_rate": 0.00019999003351524422, - "loss": 46.0, - "step": 27917 - }, - { - "epoch": 4.495994202665163, - "grad_norm": 0.0041218833066523075, - "learning_rate": 0.00019999003280097912, - "loss": 46.0, - "step": 27918 - }, - { - "epoch": 4.496155239743951, - "grad_norm": 0.030301189050078392, - "learning_rate": 0.00019999003208668844, - "loss": 46.0, - "step": 27919 - }, - { - "epoch": 4.496316276822738, - "grad_norm": 0.003254748648032546, - "learning_rate": 0.0001999900313723722, - "loss": 46.0, - "step": 27920 - }, - { - "epoch": 4.496477313901526, - "grad_norm": 0.0028789632488042116, - "learning_rate": 0.00019999003065803033, - "loss": 46.0, - "step": 27921 - }, - { - "epoch": 4.496638350980313, - "grad_norm": 0.0015815565129742026, - "learning_rate": 0.00019999002994366288, - "loss": 46.0, - "step": 27922 - }, - { - "epoch": 4.496799388059101, - "grad_norm": 0.00667013693600893, - "learning_rate": 0.00019999002922926984, - "loss": 46.0, - "step": 27923 - }, - { - "epoch": 4.496960425137888, - "grad_norm": 0.00903643574565649, - "learning_rate": 0.00019999002851485122, - "loss": 46.0, - "step": 27924 - }, - { - "epoch": 4.4971214622166755, - "grad_norm": 0.011258893646299839, - "learning_rate": 0.00019999002780040698, - "loss": 46.0, - "step": 27925 - }, - { - "epoch": 4.497282499295463, - "grad_norm": 0.007935581728816032, - "learning_rate": 0.00019999002708593716, - "loss": 46.0, - "step": 27926 - }, - { - "epoch": 4.49744353637425, - "grad_norm": 0.0037196953780949116, - "learning_rate": 0.00019999002637144177, - "loss": 46.0, - "step": 27927 - }, - { - "epoch": 4.497604573453038, - "grad_norm": 0.0012101439060643315, - "learning_rate": 0.00019999002565692078, - "loss": 46.0, - "step": 27928 - }, - { - "epoch": 4.497765610531825, - "grad_norm": 0.018383676186203957, - "learning_rate": 0.0001999900249423742, - "loss": 46.0, - "step": 27929 - }, - { - "epoch": 4.497926647610612, - "grad_norm": 0.0027241890784353018, - "learning_rate": 0.000199990024227802, - "loss": 46.0, - "step": 27930 - }, - { - "epoch": 4.498087684689399, - "grad_norm": 0.0075929309241473675, - "learning_rate": 0.00019999002351320423, - "loss": 46.0, - "step": 27931 - }, - { - "epoch": 4.498248721768187, - "grad_norm": 0.003081547562032938, - "learning_rate": 0.00019999002279858085, - "loss": 46.0, - "step": 27932 - }, - { - "epoch": 4.498409758846974, - "grad_norm": 0.007031906396150589, - "learning_rate": 0.0001999900220839319, - "loss": 46.0, - "step": 27933 - }, - { - "epoch": 4.498570795925762, - "grad_norm": 0.0015045617474243045, - "learning_rate": 0.00019999002136925732, - "loss": 46.0, - "step": 27934 - }, - { - "epoch": 4.498731833004549, - "grad_norm": 0.002424549777060747, - "learning_rate": 0.00019999002065455718, - "loss": 46.0, - "step": 27935 - }, - { - "epoch": 4.498892870083337, - "grad_norm": 0.00467715784907341, - "learning_rate": 0.00019999001993983146, - "loss": 46.0, - "step": 27936 - }, - { - "epoch": 4.499053907162124, - "grad_norm": 0.002440407406538725, - "learning_rate": 0.00019999001922508012, - "loss": 46.0, - "step": 27937 - }, - { - "epoch": 4.4992149442409115, - "grad_norm": 0.00235581211745739, - "learning_rate": 0.0001999900185103032, - "loss": 46.0, - "step": 27938 - }, - { - "epoch": 4.499375981319699, - "grad_norm": 0.004303944297134876, - "learning_rate": 0.00019999001779550068, - "loss": 46.0, - "step": 27939 - }, - { - "epoch": 4.499537018398486, - "grad_norm": 0.002360617509111762, - "learning_rate": 0.00019999001708067258, - "loss": 46.0, - "step": 27940 - }, - { - "epoch": 4.499698055477274, - "grad_norm": 0.0028185902629047632, - "learning_rate": 0.00019999001636581887, - "loss": 46.0, - "step": 27941 - }, - { - "epoch": 4.499859092556061, - "grad_norm": 0.002025514841079712, - "learning_rate": 0.00019999001565093957, - "loss": 46.0, - "step": 27942 - }, - { - "epoch": 4.500020129634849, - "grad_norm": 0.004514962900429964, - "learning_rate": 0.0001999900149360347, - "loss": 46.0, - "step": 27943 - }, - { - "epoch": 4.500181166713636, - "grad_norm": 0.0014514346839860082, - "learning_rate": 0.0001999900142211042, - "loss": 46.0, - "step": 27944 - }, - { - "epoch": 4.500342203792423, - "grad_norm": 0.015978841111063957, - "learning_rate": 0.00019999001350614815, - "loss": 46.0, - "step": 27945 - }, - { - "epoch": 4.50050324087121, - "grad_norm": 0.016491329297423363, - "learning_rate": 0.00019999001279116647, - "loss": 46.0, - "step": 27946 - }, - { - "epoch": 4.500664277949998, - "grad_norm": 0.002122324425727129, - "learning_rate": 0.0001999900120761592, - "loss": 46.0, - "step": 27947 - }, - { - "epoch": 4.500825315028785, - "grad_norm": 0.0022590658627450466, - "learning_rate": 0.00019999001136112638, - "loss": 46.0, - "step": 27948 - }, - { - "epoch": 4.500986352107573, - "grad_norm": 0.002661405596882105, - "learning_rate": 0.00019999001064606794, - "loss": 46.0, - "step": 27949 - }, - { - "epoch": 4.50114738918636, - "grad_norm": 0.0037559238262474537, - "learning_rate": 0.00019999000993098392, - "loss": 46.0, - "step": 27950 - }, - { - "epoch": 4.5013084262651475, - "grad_norm": 0.0017206493066623807, - "learning_rate": 0.0001999900092158743, - "loss": 46.0, - "step": 27951 - }, - { - "epoch": 4.501469463343935, - "grad_norm": 0.006683338899165392, - "learning_rate": 0.00019999000850073908, - "loss": 46.0, - "step": 27952 - }, - { - "epoch": 4.501630500422722, - "grad_norm": 0.00048455732758156955, - "learning_rate": 0.00019999000778557827, - "loss": 46.0, - "step": 27953 - }, - { - "epoch": 4.50179153750151, - "grad_norm": 0.006423015613108873, - "learning_rate": 0.00019999000707039187, - "loss": 46.0, - "step": 27954 - }, - { - "epoch": 4.501952574580297, - "grad_norm": 0.002031286945566535, - "learning_rate": 0.00019999000635517988, - "loss": 46.0, - "step": 27955 - }, - { - "epoch": 4.502113611659085, - "grad_norm": 0.01293118018656969, - "learning_rate": 0.00019999000563994228, - "loss": 46.0, - "step": 27956 - }, - { - "epoch": 4.502274648737872, - "grad_norm": 0.0038702180609107018, - "learning_rate": 0.0001999900049246791, - "loss": 46.0, - "step": 27957 - }, - { - "epoch": 4.50243568581666, - "grad_norm": 0.00194447988178581, - "learning_rate": 0.00019999000420939034, - "loss": 46.0, - "step": 27958 - }, - { - "epoch": 4.502596722895447, - "grad_norm": 0.002438580384477973, - "learning_rate": 0.000199990003494076, - "loss": 46.0, - "step": 27959 - }, - { - "epoch": 4.502757759974234, - "grad_norm": 0.005687889643013477, - "learning_rate": 0.00019999000277873602, - "loss": 46.0, - "step": 27960 - }, - { - "epoch": 4.502918797053021, - "grad_norm": 0.006321834400296211, - "learning_rate": 0.00019999000206337049, - "loss": 46.0, - "step": 27961 - }, - { - "epoch": 4.503079834131809, - "grad_norm": 0.005697234533727169, - "learning_rate": 0.00019999000134797933, - "loss": 46.0, - "step": 27962 - }, - { - "epoch": 4.503240871210596, - "grad_norm": 0.011142939329147339, - "learning_rate": 0.00019999000063256262, - "loss": 46.0, - "step": 27963 - }, - { - "epoch": 4.5034019082893835, - "grad_norm": 0.01310784462839365, - "learning_rate": 0.0001999899999171203, - "loss": 46.0, - "step": 27964 - }, - { - "epoch": 4.503562945368171, - "grad_norm": 0.011725926771759987, - "learning_rate": 0.00019998999920165238, - "loss": 46.0, - "step": 27965 - }, - { - "epoch": 4.503723982446958, - "grad_norm": 0.00788667518645525, - "learning_rate": 0.00019998999848615885, - "loss": 46.0, - "step": 27966 - }, - { - "epoch": 4.503885019525746, - "grad_norm": 0.0017495183274149895, - "learning_rate": 0.00019998999777063976, - "loss": 46.0, - "step": 27967 - }, - { - "epoch": 4.504046056604533, - "grad_norm": 0.0035278829745948315, - "learning_rate": 0.00019998999705509506, - "loss": 46.0, - "step": 27968 - }, - { - "epoch": 4.504207093683321, - "grad_norm": 0.009284217841923237, - "learning_rate": 0.00019998999633952477, - "loss": 46.0, - "step": 27969 - }, - { - "epoch": 4.504368130762108, - "grad_norm": 0.0018602310447022319, - "learning_rate": 0.0001999899956239289, - "loss": 46.0, - "step": 27970 - }, - { - "epoch": 4.504529167840896, - "grad_norm": 0.0038876133039593697, - "learning_rate": 0.00019998999490830746, - "loss": 46.0, - "step": 27971 - }, - { - "epoch": 4.504690204919683, - "grad_norm": 0.00523751974105835, - "learning_rate": 0.00019998999419266038, - "loss": 46.0, - "step": 27972 - }, - { - "epoch": 4.5048512419984705, - "grad_norm": 0.015857869759202003, - "learning_rate": 0.00019998999347698771, - "loss": 46.0, - "step": 27973 - }, - { - "epoch": 4.505012279077258, - "grad_norm": 0.0032813113648444414, - "learning_rate": 0.00019998999276128946, - "loss": 46.0, - "step": 27974 - }, - { - "epoch": 4.5051733161560445, - "grad_norm": 0.0012366772862151265, - "learning_rate": 0.00019998999204556562, - "loss": 46.0, - "step": 27975 - }, - { - "epoch": 4.505334353234832, - "grad_norm": 0.0009771501645445824, - "learning_rate": 0.00019998999132981622, - "loss": 46.0, - "step": 27976 - }, - { - "epoch": 4.5054953903136195, - "grad_norm": 0.001074718777090311, - "learning_rate": 0.00019998999061404118, - "loss": 46.0, - "step": 27977 - }, - { - "epoch": 4.505656427392407, - "grad_norm": 0.014997282065451145, - "learning_rate": 0.00019998998989824055, - "loss": 46.0, - "step": 27978 - }, - { - "epoch": 4.505817464471194, - "grad_norm": 0.0018334087217226624, - "learning_rate": 0.00019998998918241436, - "loss": 46.0, - "step": 27979 - }, - { - "epoch": 4.505978501549982, - "grad_norm": 0.003739659208804369, - "learning_rate": 0.00019998998846656256, - "loss": 46.0, - "step": 27980 - }, - { - "epoch": 4.506139538628769, - "grad_norm": 0.0026020260993391275, - "learning_rate": 0.00019998998775068517, - "loss": 46.0, - "step": 27981 - }, - { - "epoch": 4.506300575707557, - "grad_norm": 0.003148492192849517, - "learning_rate": 0.00019998998703478217, - "loss": 46.0, - "step": 27982 - }, - { - "epoch": 4.506461612786344, - "grad_norm": 0.003607948310673237, - "learning_rate": 0.0001999899863188536, - "loss": 46.0, - "step": 27983 - }, - { - "epoch": 4.506622649865132, - "grad_norm": 0.004364179912954569, - "learning_rate": 0.00019998998560289942, - "loss": 46.0, - "step": 27984 - }, - { - "epoch": 4.506783686943919, - "grad_norm": 0.0030395081266760826, - "learning_rate": 0.00019998998488691966, - "loss": 46.0, - "step": 27985 - }, - { - "epoch": 4.5069447240227065, - "grad_norm": 0.00841621682047844, - "learning_rate": 0.0001999899841709143, - "loss": 46.0, - "step": 27986 - }, - { - "epoch": 4.507105761101494, - "grad_norm": 0.00864341575652361, - "learning_rate": 0.00019998998345488337, - "loss": 46.0, - "step": 27987 - }, - { - "epoch": 4.5072667981802805, - "grad_norm": 0.0041961087845265865, - "learning_rate": 0.00019998998273882684, - "loss": 46.0, - "step": 27988 - }, - { - "epoch": 4.507427835259069, - "grad_norm": 0.0029167085886001587, - "learning_rate": 0.0001999899820227447, - "loss": 46.0, - "step": 27989 - }, - { - "epoch": 4.507588872337855, - "grad_norm": 0.002331846859306097, - "learning_rate": 0.00019998998130663697, - "loss": 46.0, - "step": 27990 - }, - { - "epoch": 4.507749909416643, - "grad_norm": 0.0008793384185992181, - "learning_rate": 0.00019998998059050368, - "loss": 46.0, - "step": 27991 - }, - { - "epoch": 4.50791094649543, - "grad_norm": 0.004005909897387028, - "learning_rate": 0.00019998997987434475, - "loss": 46.0, - "step": 27992 - }, - { - "epoch": 4.508071983574218, - "grad_norm": 0.0031180153600871563, - "learning_rate": 0.00019998997915816026, - "loss": 46.0, - "step": 27993 - }, - { - "epoch": 4.508233020653005, - "grad_norm": 0.004932167008519173, - "learning_rate": 0.00019998997844195015, - "loss": 46.0, - "step": 27994 - }, - { - "epoch": 4.508394057731793, - "grad_norm": 0.0017057282384485006, - "learning_rate": 0.00019998997772571446, - "loss": 46.0, - "step": 27995 - }, - { - "epoch": 4.50855509481058, - "grad_norm": 0.007572742644697428, - "learning_rate": 0.00019998997700945318, - "loss": 46.0, - "step": 27996 - }, - { - "epoch": 4.508716131889368, - "grad_norm": 0.004785297904163599, - "learning_rate": 0.00019998997629316635, - "loss": 46.0, - "step": 27997 - }, - { - "epoch": 4.508877168968155, - "grad_norm": 0.0014636771520599723, - "learning_rate": 0.00019998997557685386, - "loss": 46.0, - "step": 27998 - }, - { - "epoch": 4.5090382060469425, - "grad_norm": 0.0012036453699693084, - "learning_rate": 0.0001999899748605158, - "loss": 46.0, - "step": 27999 - }, - { - "epoch": 4.50919924312573, - "grad_norm": 0.001644710311666131, - "learning_rate": 0.00019998997414415217, - "loss": 46.0, - "step": 28000 - }, - { - "epoch": 4.509360280204517, - "grad_norm": 0.003134592669084668, - "learning_rate": 0.0001999899734277629, - "loss": 46.0, - "step": 28001 - }, - { - "epoch": 4.509521317283305, - "grad_norm": 0.0018005961319431663, - "learning_rate": 0.0001999899727113481, - "loss": 46.0, - "step": 28002 - }, - { - "epoch": 4.509682354362091, - "grad_norm": 0.008437184616923332, - "learning_rate": 0.00019998997199490768, - "loss": 46.0, - "step": 28003 - }, - { - "epoch": 4.50984339144088, - "grad_norm": 0.003125728340819478, - "learning_rate": 0.00019998997127844165, - "loss": 46.0, - "step": 28004 - }, - { - "epoch": 4.510004428519666, - "grad_norm": 0.0017466152785345912, - "learning_rate": 0.00019998997056195004, - "loss": 46.0, - "step": 28005 - }, - { - "epoch": 4.510165465598454, - "grad_norm": 0.0020630578510463238, - "learning_rate": 0.00019998996984543283, - "loss": 46.0, - "step": 28006 - }, - { - "epoch": 4.510326502677241, - "grad_norm": 0.003983749076724052, - "learning_rate": 0.00019998996912889007, - "loss": 46.0, - "step": 28007 - }, - { - "epoch": 4.510487539756029, - "grad_norm": 0.009786240756511688, - "learning_rate": 0.00019998996841232166, - "loss": 46.0, - "step": 28008 - }, - { - "epoch": 4.510648576834816, - "grad_norm": 0.00785856880247593, - "learning_rate": 0.00019998996769572772, - "loss": 46.0, - "step": 28009 - }, - { - "epoch": 4.510809613913604, - "grad_norm": 0.004561574198305607, - "learning_rate": 0.0001999899669791081, - "loss": 46.0, - "step": 28010 - }, - { - "epoch": 4.510970650992391, - "grad_norm": 0.00855952873826027, - "learning_rate": 0.00019998996626246295, - "loss": 46.0, - "step": 28011 - }, - { - "epoch": 4.5111316880711785, - "grad_norm": 0.002436211798340082, - "learning_rate": 0.00019998996554579222, - "loss": 46.0, - "step": 28012 - }, - { - "epoch": 4.511292725149966, - "grad_norm": 0.0026596460957080126, - "learning_rate": 0.00019998996482909588, - "loss": 46.0, - "step": 28013 - }, - { - "epoch": 4.511453762228753, - "grad_norm": 0.001462169224396348, - "learning_rate": 0.00019998996411237392, - "loss": 46.0, - "step": 28014 - }, - { - "epoch": 4.511614799307541, - "grad_norm": 0.0017911311006173491, - "learning_rate": 0.0001999899633956264, - "loss": 46.0, - "step": 28015 - }, - { - "epoch": 4.511775836386328, - "grad_norm": 0.026311425492167473, - "learning_rate": 0.00019998996267885327, - "loss": 46.0, - "step": 28016 - }, - { - "epoch": 4.511936873465116, - "grad_norm": 0.0023388275876641273, - "learning_rate": 0.00019998996196205455, - "loss": 46.0, - "step": 28017 - }, - { - "epoch": 4.512097910543902, - "grad_norm": 0.0042266868986189365, - "learning_rate": 0.00019998996124523025, - "loss": 46.0, - "step": 28018 - }, - { - "epoch": 4.51225894762269, - "grad_norm": 0.005381749011576176, - "learning_rate": 0.00019998996052838036, - "loss": 46.0, - "step": 28019 - }, - { - "epoch": 4.512419984701477, - "grad_norm": 0.006414171773940325, - "learning_rate": 0.00019998995981150485, - "loss": 46.0, - "step": 28020 - }, - { - "epoch": 4.512581021780265, - "grad_norm": 0.00540187768638134, - "learning_rate": 0.00019998995909460376, - "loss": 46.0, - "step": 28021 - }, - { - "epoch": 4.512742058859052, - "grad_norm": 0.004719736985862255, - "learning_rate": 0.00019998995837767708, - "loss": 46.0, - "step": 28022 - }, - { - "epoch": 4.51290309593784, - "grad_norm": 0.0017974013462662697, - "learning_rate": 0.00019998995766072484, - "loss": 46.0, - "step": 28023 - }, - { - "epoch": 4.513064133016627, - "grad_norm": 0.002993357367813587, - "learning_rate": 0.00019998995694374695, - "loss": 46.0, - "step": 28024 - }, - { - "epoch": 4.5132251700954145, - "grad_norm": 0.005367578938603401, - "learning_rate": 0.0001999899562267435, - "loss": 46.0, - "step": 28025 - }, - { - "epoch": 4.513386207174202, - "grad_norm": 0.007388588506728411, - "learning_rate": 0.00019998995550971445, - "loss": 46.0, - "step": 28026 - }, - { - "epoch": 4.513547244252989, - "grad_norm": 0.011032975278794765, - "learning_rate": 0.0001999899547926598, - "loss": 46.0, - "step": 28027 - }, - { - "epoch": 4.513708281331777, - "grad_norm": 0.00642789714038372, - "learning_rate": 0.00019998995407557958, - "loss": 46.0, - "step": 28028 - }, - { - "epoch": 4.513869318410564, - "grad_norm": 0.007025600876659155, - "learning_rate": 0.00019998995335847376, - "loss": 46.0, - "step": 28029 - }, - { - "epoch": 4.514030355489352, - "grad_norm": 0.0006311052129603922, - "learning_rate": 0.00019998995264134236, - "loss": 46.0, - "step": 28030 - }, - { - "epoch": 4.514191392568139, - "grad_norm": 0.009003935381770134, - "learning_rate": 0.00019998995192418534, - "loss": 46.0, - "step": 28031 - }, - { - "epoch": 4.514352429646927, - "grad_norm": 0.01081156637519598, - "learning_rate": 0.00019998995120700276, - "loss": 46.0, - "step": 28032 - }, - { - "epoch": 4.514513466725713, - "grad_norm": 0.005827121902257204, - "learning_rate": 0.00019998995048979454, - "loss": 46.0, - "step": 28033 - }, - { - "epoch": 4.514674503804501, - "grad_norm": 0.0017113096546381712, - "learning_rate": 0.00019998994977256076, - "loss": 46.0, - "step": 28034 - }, - { - "epoch": 4.514835540883288, - "grad_norm": 0.003421470522880554, - "learning_rate": 0.0001999899490553014, - "loss": 46.0, - "step": 28035 - }, - { - "epoch": 4.5149965779620755, - "grad_norm": 0.0013876743614673615, - "learning_rate": 0.0001999899483380164, - "loss": 46.0, - "step": 28036 - }, - { - "epoch": 4.515157615040863, - "grad_norm": 0.0020678043365478516, - "learning_rate": 0.00019998994762070583, - "loss": 46.0, - "step": 28037 - }, - { - "epoch": 4.5153186521196504, - "grad_norm": 0.0021930222865194082, - "learning_rate": 0.0001999899469033697, - "loss": 46.0, - "step": 28038 - }, - { - "epoch": 4.515479689198438, - "grad_norm": 0.002599215367808938, - "learning_rate": 0.00019998994618600796, - "loss": 46.0, - "step": 28039 - }, - { - "epoch": 4.515640726277225, - "grad_norm": 0.01092553324997425, - "learning_rate": 0.00019998994546862063, - "loss": 46.0, - "step": 28040 - }, - { - "epoch": 4.515801763356013, - "grad_norm": 0.005444251466542482, - "learning_rate": 0.00019998994475120768, - "loss": 46.0, - "step": 28041 - }, - { - "epoch": 4.5159628004348, - "grad_norm": 0.007895600982010365, - "learning_rate": 0.00019998994403376915, - "loss": 46.0, - "step": 28042 - }, - { - "epoch": 4.516123837513588, - "grad_norm": 0.0010517495684325695, - "learning_rate": 0.00019998994331630506, - "loss": 46.0, - "step": 28043 - }, - { - "epoch": 4.516284874592375, - "grad_norm": 0.011224798858165741, - "learning_rate": 0.00019998994259881535, - "loss": 46.0, - "step": 28044 - }, - { - "epoch": 4.516445911671163, - "grad_norm": 0.007543615065515041, - "learning_rate": 0.00019998994188130006, - "loss": 46.0, - "step": 28045 - }, - { - "epoch": 4.51660694874995, - "grad_norm": 0.003957655280828476, - "learning_rate": 0.00019998994116375915, - "loss": 46.0, - "step": 28046 - }, - { - "epoch": 4.5167679858287375, - "grad_norm": 0.0025699941907078028, - "learning_rate": 0.00019998994044619268, - "loss": 46.0, - "step": 28047 - }, - { - "epoch": 4.516929022907524, - "grad_norm": 0.009581594727933407, - "learning_rate": 0.0001999899397286006, - "loss": 46.0, - "step": 28048 - }, - { - "epoch": 4.5170900599863115, - "grad_norm": 0.0027826293371617794, - "learning_rate": 0.0001999899390109829, - "loss": 46.0, - "step": 28049 - }, - { - "epoch": 4.517251097065099, - "grad_norm": 0.0009113684063777328, - "learning_rate": 0.00019998993829333967, - "loss": 46.0, - "step": 28050 - }, - { - "epoch": 4.517412134143886, - "grad_norm": 0.0033036170061677694, - "learning_rate": 0.00019998993757567083, - "loss": 46.0, - "step": 28051 - }, - { - "epoch": 4.517573171222674, - "grad_norm": 0.002945908345282078, - "learning_rate": 0.00019998993685797637, - "loss": 46.0, - "step": 28052 - }, - { - "epoch": 4.517734208301461, - "grad_norm": 0.006020206492394209, - "learning_rate": 0.00019998993614025632, - "loss": 46.0, - "step": 28053 - }, - { - "epoch": 4.517895245380249, - "grad_norm": 0.0065395161509513855, - "learning_rate": 0.0001999899354225107, - "loss": 46.0, - "step": 28054 - }, - { - "epoch": 4.518056282459036, - "grad_norm": 0.0010534306056797504, - "learning_rate": 0.00019998993470473947, - "loss": 46.0, - "step": 28055 - }, - { - "epoch": 4.518217319537824, - "grad_norm": 0.003093145089223981, - "learning_rate": 0.00019998993398694266, - "loss": 46.0, - "step": 28056 - }, - { - "epoch": 4.518378356616611, - "grad_norm": 0.0070256819017231464, - "learning_rate": 0.00019998993326912024, - "loss": 46.0, - "step": 28057 - }, - { - "epoch": 4.518539393695399, - "grad_norm": 0.0031234880443662405, - "learning_rate": 0.00019998993255127223, - "loss": 46.0, - "step": 28058 - }, - { - "epoch": 4.518700430774186, - "grad_norm": 0.004970559850335121, - "learning_rate": 0.00019998993183339864, - "loss": 46.0, - "step": 28059 - }, - { - "epoch": 4.5188614678529735, - "grad_norm": 0.0023421715013682842, - "learning_rate": 0.00019998993111549948, - "loss": 46.0, - "step": 28060 - }, - { - "epoch": 4.51902250493176, - "grad_norm": 0.003251435933634639, - "learning_rate": 0.0001999899303975747, - "loss": 46.0, - "step": 28061 - }, - { - "epoch": 4.519183542010548, - "grad_norm": 0.005180378910154104, - "learning_rate": 0.00019998992967962433, - "loss": 46.0, - "step": 28062 - }, - { - "epoch": 4.519344579089335, - "grad_norm": 0.002574390731751919, - "learning_rate": 0.00019998992896164835, - "loss": 46.0, - "step": 28063 - }, - { - "epoch": 4.519505616168122, - "grad_norm": 0.008656046353280544, - "learning_rate": 0.00019998992824364682, - "loss": 46.0, - "step": 28064 - }, - { - "epoch": 4.51966665324691, - "grad_norm": 0.00193415442481637, - "learning_rate": 0.00019998992752561968, - "loss": 46.0, - "step": 28065 - }, - { - "epoch": 4.519827690325697, - "grad_norm": 0.014596354216337204, - "learning_rate": 0.00019998992680756694, - "loss": 46.0, - "step": 28066 - }, - { - "epoch": 4.519988727404485, - "grad_norm": 0.005065843928605318, - "learning_rate": 0.0001999899260894886, - "loss": 46.0, - "step": 28067 - }, - { - "epoch": 4.520149764483272, - "grad_norm": 0.0014835710171610117, - "learning_rate": 0.00019998992537138466, - "loss": 46.0, - "step": 28068 - }, - { - "epoch": 4.52031080156206, - "grad_norm": 0.0024256010074168444, - "learning_rate": 0.00019998992465325514, - "loss": 46.0, - "step": 28069 - }, - { - "epoch": 4.520471838640847, - "grad_norm": 0.003363888943567872, - "learning_rate": 0.00019998992393510006, - "loss": 46.0, - "step": 28070 - }, - { - "epoch": 4.520632875719635, - "grad_norm": 0.01180997584015131, - "learning_rate": 0.00019998992321691933, - "loss": 46.0, - "step": 28071 - }, - { - "epoch": 4.520793912798422, - "grad_norm": 0.004813733045011759, - "learning_rate": 0.00019998992249871305, - "loss": 46.0, - "step": 28072 - }, - { - "epoch": 4.5209549498772095, - "grad_norm": 0.01116902381181717, - "learning_rate": 0.00019998992178048118, - "loss": 46.0, - "step": 28073 - }, - { - "epoch": 4.521115986955997, - "grad_norm": 0.0022108505945652723, - "learning_rate": 0.0001999899210622237, - "loss": 46.0, - "step": 28074 - }, - { - "epoch": 4.521277024034784, - "grad_norm": 0.0007912889122962952, - "learning_rate": 0.00019998992034394065, - "loss": 46.0, - "step": 28075 - }, - { - "epoch": 4.521438061113571, - "grad_norm": 0.006706543732434511, - "learning_rate": 0.000199989919625632, - "loss": 46.0, - "step": 28076 - }, - { - "epoch": 4.521599098192359, - "grad_norm": 0.003491014242172241, - "learning_rate": 0.0001999899189072977, - "loss": 46.0, - "step": 28077 - }, - { - "epoch": 4.521760135271146, - "grad_norm": 0.0022573666647076607, - "learning_rate": 0.00019998991818893786, - "loss": 46.0, - "step": 28078 - }, - { - "epoch": 4.521921172349933, - "grad_norm": 0.004572810139507055, - "learning_rate": 0.00019998991747055244, - "loss": 46.0, - "step": 28079 - }, - { - "epoch": 4.522082209428721, - "grad_norm": 0.0068563357926905155, - "learning_rate": 0.0001999899167521414, - "loss": 46.0, - "step": 28080 - }, - { - "epoch": 4.522243246507508, - "grad_norm": 0.0007289955392479897, - "learning_rate": 0.00019998991603370478, - "loss": 46.0, - "step": 28081 - }, - { - "epoch": 4.522404283586296, - "grad_norm": 0.0036731078289449215, - "learning_rate": 0.00019998991531524257, - "loss": 46.0, - "step": 28082 - }, - { - "epoch": 4.522565320665083, - "grad_norm": 0.006915264762938023, - "learning_rate": 0.00019998991459675475, - "loss": 46.0, - "step": 28083 - }, - { - "epoch": 4.5227263577438706, - "grad_norm": 0.01765129156410694, - "learning_rate": 0.00019998991387824136, - "loss": 46.0, - "step": 28084 - }, - { - "epoch": 4.522887394822658, - "grad_norm": 0.004520501010119915, - "learning_rate": 0.00019998991315970237, - "loss": 46.0, - "step": 28085 - }, - { - "epoch": 4.5230484319014455, - "grad_norm": 0.002556120976805687, - "learning_rate": 0.00019998991244113778, - "loss": 46.0, - "step": 28086 - }, - { - "epoch": 4.523209468980233, - "grad_norm": 0.0046248831786215305, - "learning_rate": 0.0001999899117225476, - "loss": 46.0, - "step": 28087 - }, - { - "epoch": 4.52337050605902, - "grad_norm": 0.005944658536463976, - "learning_rate": 0.00019998991100393182, - "loss": 46.0, - "step": 28088 - }, - { - "epoch": 4.523531543137808, - "grad_norm": 0.0027721503283828497, - "learning_rate": 0.00019998991028529045, - "loss": 46.0, - "step": 28089 - }, - { - "epoch": 4.523692580216595, - "grad_norm": 0.0020460374653339386, - "learning_rate": 0.00019998990956662352, - "loss": 46.0, - "step": 28090 - }, - { - "epoch": 4.523853617295382, - "grad_norm": 0.0018385767471045256, - "learning_rate": 0.000199989908847931, - "loss": 46.0, - "step": 28091 - }, - { - "epoch": 4.524014654374169, - "grad_norm": 0.0014394954778254032, - "learning_rate": 0.00019998990812921283, - "loss": 46.0, - "step": 28092 - }, - { - "epoch": 4.524175691452957, - "grad_norm": 0.007078861817717552, - "learning_rate": 0.0001999899074104691, - "loss": 46.0, - "step": 28093 - }, - { - "epoch": 4.524336728531744, - "grad_norm": 0.005528643261641264, - "learning_rate": 0.0001999899066916998, - "loss": 46.0, - "step": 28094 - }, - { - "epoch": 4.524497765610532, - "grad_norm": 0.0033406035508960485, - "learning_rate": 0.00019998990597290488, - "loss": 46.0, - "step": 28095 - }, - { - "epoch": 4.524658802689319, - "grad_norm": 0.0012600012123584747, - "learning_rate": 0.00019998990525408437, - "loss": 46.0, - "step": 28096 - }, - { - "epoch": 4.5248198397681065, - "grad_norm": 0.0047652460634708405, - "learning_rate": 0.00019998990453523827, - "loss": 46.0, - "step": 28097 - }, - { - "epoch": 4.524980876846894, - "grad_norm": 0.0027372546028345823, - "learning_rate": 0.00019998990381636658, - "loss": 46.0, - "step": 28098 - }, - { - "epoch": 4.525141913925681, - "grad_norm": 0.001851135166361928, - "learning_rate": 0.00019998990309746928, - "loss": 46.0, - "step": 28099 - }, - { - "epoch": 4.525302951004469, - "grad_norm": 0.0025847184006124735, - "learning_rate": 0.0001999899023785464, - "loss": 46.0, - "step": 28100 - }, - { - "epoch": 4.525463988083256, - "grad_norm": 0.01216368842869997, - "learning_rate": 0.00019998990165959795, - "loss": 46.0, - "step": 28101 - }, - { - "epoch": 4.525625025162044, - "grad_norm": 0.006062064785510302, - "learning_rate": 0.0001999899009406239, - "loss": 46.0, - "step": 28102 - }, - { - "epoch": 4.525786062240831, - "grad_norm": 0.011608520522713661, - "learning_rate": 0.00019998990022162424, - "loss": 46.0, - "step": 28103 - }, - { - "epoch": 4.525947099319619, - "grad_norm": 0.0017695939168334007, - "learning_rate": 0.00019998989950259898, - "loss": 46.0, - "step": 28104 - }, - { - "epoch": 4.526108136398406, - "grad_norm": 0.003787803230807185, - "learning_rate": 0.00019998989878354816, - "loss": 46.0, - "step": 28105 - }, - { - "epoch": 4.526269173477193, - "grad_norm": 0.003946825861930847, - "learning_rate": 0.00019998989806447172, - "loss": 46.0, - "step": 28106 - }, - { - "epoch": 4.52643021055598, - "grad_norm": 0.0008840218069963157, - "learning_rate": 0.0001999898973453697, - "loss": 46.0, - "step": 28107 - }, - { - "epoch": 4.526591247634768, - "grad_norm": 0.004806368611752987, - "learning_rate": 0.00019998989662624208, - "loss": 46.0, - "step": 28108 - }, - { - "epoch": 4.526752284713555, - "grad_norm": 0.010537315160036087, - "learning_rate": 0.00019998989590708888, - "loss": 46.0, - "step": 28109 - }, - { - "epoch": 4.5269133217923425, - "grad_norm": 0.005134051665663719, - "learning_rate": 0.0001999898951879101, - "loss": 46.0, - "step": 28110 - }, - { - "epoch": 4.52707435887113, - "grad_norm": 0.018456293269991875, - "learning_rate": 0.0001999898944687057, - "loss": 46.0, - "step": 28111 - }, - { - "epoch": 4.527235395949917, - "grad_norm": 0.007777049671858549, - "learning_rate": 0.00019998989374947569, - "loss": 46.0, - "step": 28112 - }, - { - "epoch": 4.527396433028705, - "grad_norm": 0.002604363253340125, - "learning_rate": 0.00019998989303022014, - "loss": 46.0, - "step": 28113 - }, - { - "epoch": 4.527557470107492, - "grad_norm": 0.001909418380819261, - "learning_rate": 0.00019998989231093898, - "loss": 46.0, - "step": 28114 - }, - { - "epoch": 4.52771850718628, - "grad_norm": 0.011187337338924408, - "learning_rate": 0.0001999898915916322, - "loss": 46.0, - "step": 28115 - }, - { - "epoch": 4.527879544265067, - "grad_norm": 0.0013596827629953623, - "learning_rate": 0.00019998989087229987, - "loss": 46.0, - "step": 28116 - }, - { - "epoch": 4.528040581343855, - "grad_norm": 0.012069943360984325, - "learning_rate": 0.00019998989015294192, - "loss": 46.0, - "step": 28117 - }, - { - "epoch": 4.528201618422642, - "grad_norm": 0.02140405774116516, - "learning_rate": 0.00019998988943355838, - "loss": 46.0, - "step": 28118 - }, - { - "epoch": 4.52836265550143, - "grad_norm": 0.0013159465743228793, - "learning_rate": 0.00019998988871414923, - "loss": 46.0, - "step": 28119 - }, - { - "epoch": 4.528523692580217, - "grad_norm": 0.002588863717392087, - "learning_rate": 0.00019998988799471452, - "loss": 46.0, - "step": 28120 - }, - { - "epoch": 4.528684729659004, - "grad_norm": 0.005136772524565458, - "learning_rate": 0.00019998988727525422, - "loss": 46.0, - "step": 28121 - }, - { - "epoch": 4.528845766737791, - "grad_norm": 0.017035972326993942, - "learning_rate": 0.0001999898865557683, - "loss": 46.0, - "step": 28122 - }, - { - "epoch": 4.5290068038165785, - "grad_norm": 0.0044188774190843105, - "learning_rate": 0.00019998988583625683, - "loss": 46.0, - "step": 28123 - }, - { - "epoch": 4.529167840895366, - "grad_norm": 0.002522155875340104, - "learning_rate": 0.00019998988511671972, - "loss": 46.0, - "step": 28124 - }, - { - "epoch": 4.529328877974153, - "grad_norm": 0.006839301902800798, - "learning_rate": 0.00019998988439715702, - "loss": 46.0, - "step": 28125 - }, - { - "epoch": 4.529489915052941, - "grad_norm": 0.004980405792593956, - "learning_rate": 0.00019998988367756875, - "loss": 46.0, - "step": 28126 - }, - { - "epoch": 4.529650952131728, - "grad_norm": 0.004197351634502411, - "learning_rate": 0.00019998988295795488, - "loss": 46.0, - "step": 28127 - }, - { - "epoch": 4.529811989210516, - "grad_norm": 0.017260542139410973, - "learning_rate": 0.00019998988223831544, - "loss": 46.0, - "step": 28128 - }, - { - "epoch": 4.529973026289303, - "grad_norm": 0.0015277242055162787, - "learning_rate": 0.00019998988151865036, - "loss": 46.0, - "step": 28129 - }, - { - "epoch": 4.530134063368091, - "grad_norm": 0.00429127924144268, - "learning_rate": 0.00019998988079895973, - "loss": 46.0, - "step": 28130 - }, - { - "epoch": 4.530295100446878, - "grad_norm": 0.005581240635365248, - "learning_rate": 0.0001999898800792435, - "loss": 46.0, - "step": 28131 - }, - { - "epoch": 4.530456137525666, - "grad_norm": 0.0016650476027280092, - "learning_rate": 0.00019998987935950166, - "loss": 46.0, - "step": 28132 - }, - { - "epoch": 4.530617174604453, - "grad_norm": 0.011678727343678474, - "learning_rate": 0.00019998987863973424, - "loss": 46.0, - "step": 28133 - }, - { - "epoch": 4.53077821168324, - "grad_norm": 0.001121566048823297, - "learning_rate": 0.00019998987791994122, - "loss": 46.0, - "step": 28134 - }, - { - "epoch": 4.530939248762028, - "grad_norm": 0.0030708073172718287, - "learning_rate": 0.0001999898772001226, - "loss": 46.0, - "step": 28135 - }, - { - "epoch": 4.5311002858408145, - "grad_norm": 0.0036146959755569696, - "learning_rate": 0.00019998987648027844, - "loss": 46.0, - "step": 28136 - }, - { - "epoch": 4.531261322919602, - "grad_norm": 0.01375151239335537, - "learning_rate": 0.00019998987576040864, - "loss": 46.0, - "step": 28137 - }, - { - "epoch": 4.531422359998389, - "grad_norm": 0.008395235054194927, - "learning_rate": 0.00019998987504051325, - "loss": 46.0, - "step": 28138 - }, - { - "epoch": 4.531583397077177, - "grad_norm": 0.0024923905730247498, - "learning_rate": 0.00019998987432059227, - "loss": 46.0, - "step": 28139 - }, - { - "epoch": 4.531744434155964, - "grad_norm": 0.008777826093137264, - "learning_rate": 0.0001999898736006457, - "loss": 46.0, - "step": 28140 - }, - { - "epoch": 4.531905471234752, - "grad_norm": 0.0028207488358020782, - "learning_rate": 0.00019998987288067356, - "loss": 46.0, - "step": 28141 - }, - { - "epoch": 4.532066508313539, - "grad_norm": 0.005800921469926834, - "learning_rate": 0.0001999898721606758, - "loss": 46.0, - "step": 28142 - }, - { - "epoch": 4.532227545392327, - "grad_norm": 0.0027123247273266315, - "learning_rate": 0.00019998987144065247, - "loss": 46.0, - "step": 28143 - }, - { - "epoch": 4.532388582471114, - "grad_norm": 0.0012545640347525477, - "learning_rate": 0.0001999898707206035, - "loss": 46.0, - "step": 28144 - }, - { - "epoch": 4.5325496195499015, - "grad_norm": 0.002751567168161273, - "learning_rate": 0.00019998987000052897, - "loss": 46.0, - "step": 28145 - }, - { - "epoch": 4.532710656628689, - "grad_norm": 0.00078404980013147, - "learning_rate": 0.00019998986928042886, - "loss": 46.0, - "step": 28146 - }, - { - "epoch": 4.5328716937074764, - "grad_norm": 0.005834897048771381, - "learning_rate": 0.00019998986856030316, - "loss": 46.0, - "step": 28147 - }, - { - "epoch": 4.533032730786264, - "grad_norm": 0.004490547347813845, - "learning_rate": 0.00019998986784015185, - "loss": 46.0, - "step": 28148 - }, - { - "epoch": 4.5331937678650505, - "grad_norm": 0.004180487710982561, - "learning_rate": 0.00019998986711997497, - "loss": 46.0, - "step": 28149 - }, - { - "epoch": 4.533354804943839, - "grad_norm": 0.008809946477413177, - "learning_rate": 0.00019998986639977245, - "loss": 46.0, - "step": 28150 - }, - { - "epoch": 4.533515842022625, - "grad_norm": 0.012196926400065422, - "learning_rate": 0.00019998986567954438, - "loss": 46.0, - "step": 28151 - }, - { - "epoch": 4.533676879101413, - "grad_norm": 0.0025184727273881435, - "learning_rate": 0.00019998986495929071, - "loss": 46.0, - "step": 28152 - }, - { - "epoch": 4.5338379161802, - "grad_norm": 0.0071161356754601, - "learning_rate": 0.00019998986423901144, - "loss": 46.0, - "step": 28153 - }, - { - "epoch": 4.533998953258988, - "grad_norm": 0.0047623696736991405, - "learning_rate": 0.00019998986351870657, - "loss": 46.0, - "step": 28154 - }, - { - "epoch": 4.534159990337775, - "grad_norm": 0.007362185511738062, - "learning_rate": 0.00019998986279837612, - "loss": 46.0, - "step": 28155 - }, - { - "epoch": 4.534321027416563, - "grad_norm": 0.004340854473412037, - "learning_rate": 0.00019998986207802008, - "loss": 46.0, - "step": 28156 - }, - { - "epoch": 4.53448206449535, - "grad_norm": 0.0020779420156031847, - "learning_rate": 0.00019998986135763845, - "loss": 46.0, - "step": 28157 - }, - { - "epoch": 4.5346431015741375, - "grad_norm": 0.00365415564738214, - "learning_rate": 0.0001999898606372312, - "loss": 46.0, - "step": 28158 - }, - { - "epoch": 4.534804138652925, - "grad_norm": 0.004089410416781902, - "learning_rate": 0.0001999898599167984, - "loss": 46.0, - "step": 28159 - }, - { - "epoch": 4.534965175731712, - "grad_norm": 0.006561561953276396, - "learning_rate": 0.00019998985919634, - "loss": 46.0, - "step": 28160 - }, - { - "epoch": 4.5351262128105, - "grad_norm": 0.0030093041714280844, - "learning_rate": 0.000199989858475856, - "loss": 46.0, - "step": 28161 - }, - { - "epoch": 4.535287249889287, - "grad_norm": 0.007026029285043478, - "learning_rate": 0.00019998985775534638, - "loss": 46.0, - "step": 28162 - }, - { - "epoch": 4.535448286968075, - "grad_norm": 0.0024453147780150175, - "learning_rate": 0.0001999898570348112, - "loss": 46.0, - "step": 28163 - }, - { - "epoch": 4.535609324046861, - "grad_norm": 0.0025690484326332808, - "learning_rate": 0.0001999898563142504, - "loss": 46.0, - "step": 28164 - }, - { - "epoch": 4.535770361125649, - "grad_norm": 0.002705293009057641, - "learning_rate": 0.00019998985559366403, - "loss": 46.0, - "step": 28165 - }, - { - "epoch": 4.535931398204436, - "grad_norm": 0.0009811506606638432, - "learning_rate": 0.00019998985487305207, - "loss": 46.0, - "step": 28166 - }, - { - "epoch": 4.536092435283224, - "grad_norm": 0.0018293160246685147, - "learning_rate": 0.00019998985415241451, - "loss": 46.0, - "step": 28167 - }, - { - "epoch": 4.536253472362011, - "grad_norm": 0.003863105084747076, - "learning_rate": 0.00019998985343175137, - "loss": 46.0, - "step": 28168 - }, - { - "epoch": 4.536414509440799, - "grad_norm": 0.0053612226620316505, - "learning_rate": 0.00019998985271106262, - "loss": 46.0, - "step": 28169 - }, - { - "epoch": 4.536575546519586, - "grad_norm": 0.01269327849149704, - "learning_rate": 0.0001999898519903483, - "loss": 46.0, - "step": 28170 - }, - { - "epoch": 4.5367365835983735, - "grad_norm": 0.002893842291086912, - "learning_rate": 0.00019998985126960835, - "loss": 46.0, - "step": 28171 - }, - { - "epoch": 4.536897620677161, - "grad_norm": 0.0032377797178924084, - "learning_rate": 0.00019998985054884286, - "loss": 46.0, - "step": 28172 - }, - { - "epoch": 4.537058657755948, - "grad_norm": 0.0026010910514742136, - "learning_rate": 0.00019998984982805173, - "loss": 46.0, - "step": 28173 - }, - { - "epoch": 4.537219694834736, - "grad_norm": 0.005723660346120596, - "learning_rate": 0.00019998984910723504, - "loss": 46.0, - "step": 28174 - }, - { - "epoch": 4.537380731913523, - "grad_norm": 0.0017634622054174542, - "learning_rate": 0.0001999898483863927, - "loss": 46.0, - "step": 28175 - }, - { - "epoch": 4.537541768992311, - "grad_norm": 0.002203563926741481, - "learning_rate": 0.00019998984766552485, - "loss": 46.0, - "step": 28176 - }, - { - "epoch": 4.537702806071098, - "grad_norm": 0.005214695353060961, - "learning_rate": 0.00019998984694463134, - "loss": 46.0, - "step": 28177 - }, - { - "epoch": 4.537863843149886, - "grad_norm": 0.0027587739750742912, - "learning_rate": 0.00019998984622371228, - "loss": 46.0, - "step": 28178 - }, - { - "epoch": 4.538024880228672, - "grad_norm": 0.006451367400586605, - "learning_rate": 0.0001999898455027676, - "loss": 46.0, - "step": 28179 - }, - { - "epoch": 4.53818591730746, - "grad_norm": 0.00657782144844532, - "learning_rate": 0.00019998984478179736, - "loss": 46.0, - "step": 28180 - }, - { - "epoch": 4.538346954386247, - "grad_norm": 0.023852072656154633, - "learning_rate": 0.0001999898440608015, - "loss": 46.0, - "step": 28181 - }, - { - "epoch": 4.538507991465035, - "grad_norm": 0.002822832204401493, - "learning_rate": 0.0001999898433397801, - "loss": 46.0, - "step": 28182 - }, - { - "epoch": 4.538669028543822, - "grad_norm": 0.007189903873950243, - "learning_rate": 0.00019998984261873303, - "loss": 46.0, - "step": 28183 - }, - { - "epoch": 4.5388300656226095, - "grad_norm": 0.0042359549552202225, - "learning_rate": 0.00019998984189766042, - "loss": 46.0, - "step": 28184 - }, - { - "epoch": 4.538991102701397, - "grad_norm": 0.0004599812673404813, - "learning_rate": 0.0001999898411765622, - "loss": 46.0, - "step": 28185 - }, - { - "epoch": 4.539152139780184, - "grad_norm": 0.003450341522693634, - "learning_rate": 0.00019998984045543837, - "loss": 46.0, - "step": 28186 - }, - { - "epoch": 4.539313176858972, - "grad_norm": 0.0021386267617344856, - "learning_rate": 0.00019998983973428897, - "loss": 46.0, - "step": 28187 - }, - { - "epoch": 4.539474213937759, - "grad_norm": 0.007107846904546022, - "learning_rate": 0.000199989839013114, - "loss": 46.0, - "step": 28188 - }, - { - "epoch": 4.539635251016547, - "grad_norm": 0.0049768113531172276, - "learning_rate": 0.00019998983829191337, - "loss": 46.0, - "step": 28189 - }, - { - "epoch": 4.539796288095334, - "grad_norm": 0.004822993651032448, - "learning_rate": 0.00019998983757068718, - "loss": 46.0, - "step": 28190 - }, - { - "epoch": 4.539957325174122, - "grad_norm": 0.00548260472714901, - "learning_rate": 0.00019998983684943542, - "loss": 46.0, - "step": 28191 - }, - { - "epoch": 4.540118362252909, - "grad_norm": 0.008749093860387802, - "learning_rate": 0.00019998983612815808, - "loss": 46.0, - "step": 28192 - }, - { - "epoch": 4.5402793993316966, - "grad_norm": 0.003197180340066552, - "learning_rate": 0.0001999898354068551, - "loss": 46.0, - "step": 28193 - }, - { - "epoch": 4.540440436410483, - "grad_norm": 0.001760354032739997, - "learning_rate": 0.00019998983468552653, - "loss": 46.0, - "step": 28194 - }, - { - "epoch": 4.540601473489271, - "grad_norm": 0.002885441994294524, - "learning_rate": 0.0001999898339641724, - "loss": 46.0, - "step": 28195 - }, - { - "epoch": 4.540762510568058, - "grad_norm": 0.0035731117241084576, - "learning_rate": 0.0001999898332427927, - "loss": 46.0, - "step": 28196 - }, - { - "epoch": 4.5409235476468455, - "grad_norm": 0.0033794636838138103, - "learning_rate": 0.00019998983252138736, - "loss": 46.0, - "step": 28197 - }, - { - "epoch": 4.541084584725633, - "grad_norm": 0.0027073032688349485, - "learning_rate": 0.00019998983179995644, - "loss": 46.0, - "step": 28198 - }, - { - "epoch": 4.54124562180442, - "grad_norm": 0.002348700538277626, - "learning_rate": 0.0001999898310784999, - "loss": 46.0, - "step": 28199 - }, - { - "epoch": 4.541406658883208, - "grad_norm": 0.004205639939755201, - "learning_rate": 0.00019998983035701784, - "loss": 46.0, - "step": 28200 - }, - { - "epoch": 4.541567695961995, - "grad_norm": 0.004746836610138416, - "learning_rate": 0.00019998982963551014, - "loss": 46.0, - "step": 28201 - }, - { - "epoch": 4.541728733040783, - "grad_norm": 0.002171910833567381, - "learning_rate": 0.00019998982891397685, - "loss": 46.0, - "step": 28202 - }, - { - "epoch": 4.54188977011957, - "grad_norm": 0.005923191085457802, - "learning_rate": 0.00019998982819241796, - "loss": 46.0, - "step": 28203 - }, - { - "epoch": 4.542050807198358, - "grad_norm": 0.0036464245058596134, - "learning_rate": 0.00019998982747083347, - "loss": 46.0, - "step": 28204 - }, - { - "epoch": 4.542211844277145, - "grad_norm": 0.0044078282080590725, - "learning_rate": 0.00019998982674922342, - "loss": 46.0, - "step": 28205 - }, - { - "epoch": 4.5423728813559325, - "grad_norm": 0.003571434412151575, - "learning_rate": 0.00019998982602758777, - "loss": 46.0, - "step": 28206 - }, - { - "epoch": 4.542533918434719, - "grad_norm": 0.005233215633779764, - "learning_rate": 0.00019998982530592652, - "loss": 46.0, - "step": 28207 - }, - { - "epoch": 4.542694955513507, - "grad_norm": 0.003917828667908907, - "learning_rate": 0.00019998982458423967, - "loss": 46.0, - "step": 28208 - }, - { - "epoch": 4.542855992592294, - "grad_norm": 0.013388173654675484, - "learning_rate": 0.00019998982386252724, - "loss": 46.0, - "step": 28209 - }, - { - "epoch": 4.5430170296710815, - "grad_norm": 0.0016912242863327265, - "learning_rate": 0.0001999898231407892, - "loss": 46.0, - "step": 28210 - }, - { - "epoch": 4.543178066749869, - "grad_norm": 0.007283136248588562, - "learning_rate": 0.0001999898224190256, - "loss": 46.0, - "step": 28211 - }, - { - "epoch": 4.543339103828656, - "grad_norm": 0.0037675367202609777, - "learning_rate": 0.0001999898216972364, - "loss": 46.0, - "step": 28212 - }, - { - "epoch": 4.543500140907444, - "grad_norm": 0.005025290884077549, - "learning_rate": 0.0001999898209754216, - "loss": 46.0, - "step": 28213 - }, - { - "epoch": 4.543661177986231, - "grad_norm": 0.001676195184700191, - "learning_rate": 0.00019998982025358118, - "loss": 46.0, - "step": 28214 - }, - { - "epoch": 4.543822215065019, - "grad_norm": 0.005545006599277258, - "learning_rate": 0.0001999898195317152, - "loss": 46.0, - "step": 28215 - }, - { - "epoch": 4.543983252143806, - "grad_norm": 0.004245630465447903, - "learning_rate": 0.00019998981880982363, - "loss": 46.0, - "step": 28216 - }, - { - "epoch": 4.544144289222594, - "grad_norm": 0.004485202021896839, - "learning_rate": 0.00019998981808790644, - "loss": 46.0, - "step": 28217 - }, - { - "epoch": 4.544305326301381, - "grad_norm": 0.0124098751693964, - "learning_rate": 0.0001999898173659637, - "loss": 46.0, - "step": 28218 - }, - { - "epoch": 4.5444663633801685, - "grad_norm": 0.0062004439532756805, - "learning_rate": 0.00019998981664399535, - "loss": 46.0, - "step": 28219 - }, - { - "epoch": 4.544627400458956, - "grad_norm": 0.0012701530940830708, - "learning_rate": 0.00019998981592200138, - "loss": 46.0, - "step": 28220 - }, - { - "epoch": 4.544788437537743, - "grad_norm": 0.0059409174136817455, - "learning_rate": 0.00019998981519998184, - "loss": 46.0, - "step": 28221 - }, - { - "epoch": 4.54494947461653, - "grad_norm": 0.001416877843439579, - "learning_rate": 0.00019998981447793673, - "loss": 46.0, - "step": 28222 - }, - { - "epoch": 4.545110511695318, - "grad_norm": 0.007426689378917217, - "learning_rate": 0.00019998981375586597, - "loss": 46.0, - "step": 28223 - }, - { - "epoch": 4.545271548774105, - "grad_norm": 0.003515574848279357, - "learning_rate": 0.00019998981303376968, - "loss": 46.0, - "step": 28224 - }, - { - "epoch": 4.545432585852892, - "grad_norm": 0.0019585704430937767, - "learning_rate": 0.00019998981231164774, - "loss": 46.0, - "step": 28225 - }, - { - "epoch": 4.54559362293168, - "grad_norm": 0.006768498569726944, - "learning_rate": 0.00019998981158950025, - "loss": 46.0, - "step": 28226 - }, - { - "epoch": 4.545754660010467, - "grad_norm": 0.0034896666184067726, - "learning_rate": 0.00019998981086732717, - "loss": 46.0, - "step": 28227 - }, - { - "epoch": 4.545915697089255, - "grad_norm": 0.006893771700561047, - "learning_rate": 0.00019998981014512847, - "loss": 46.0, - "step": 28228 - }, - { - "epoch": 4.546076734168042, - "grad_norm": 0.014092490077018738, - "learning_rate": 0.0001999898094229042, - "loss": 46.0, - "step": 28229 - }, - { - "epoch": 4.54623777124683, - "grad_norm": 0.0021846434101462364, - "learning_rate": 0.00019998980870065432, - "loss": 46.0, - "step": 28230 - }, - { - "epoch": 4.546398808325617, - "grad_norm": 0.002295448910444975, - "learning_rate": 0.00019998980797837886, - "loss": 46.0, - "step": 28231 - }, - { - "epoch": 4.5465598454044045, - "grad_norm": 0.012366644106805325, - "learning_rate": 0.0001999898072560778, - "loss": 46.0, - "step": 28232 - }, - { - "epoch": 4.546720882483192, - "grad_norm": 0.0015726208221167326, - "learning_rate": 0.00019998980653375113, - "loss": 46.0, - "step": 28233 - }, - { - "epoch": 4.546881919561979, - "grad_norm": 0.009916739538311958, - "learning_rate": 0.00019998980581139892, - "loss": 46.0, - "step": 28234 - }, - { - "epoch": 4.547042956640767, - "grad_norm": 0.0029857633635401726, - "learning_rate": 0.00019998980508902108, - "loss": 46.0, - "step": 28235 - }, - { - "epoch": 4.547203993719554, - "grad_norm": 0.004519087262451649, - "learning_rate": 0.00019998980436661764, - "loss": 46.0, - "step": 28236 - }, - { - "epoch": 4.547365030798341, - "grad_norm": 0.002939286408945918, - "learning_rate": 0.00019998980364418866, - "loss": 46.0, - "step": 28237 - }, - { - "epoch": 4.547526067877129, - "grad_norm": 0.0009906847262755036, - "learning_rate": 0.000199989802921734, - "loss": 46.0, - "step": 28238 - }, - { - "epoch": 4.547687104955916, - "grad_norm": 0.004644661210477352, - "learning_rate": 0.00019998980219925383, - "loss": 46.0, - "step": 28239 - }, - { - "epoch": 4.547848142034703, - "grad_norm": 0.006151839159429073, - "learning_rate": 0.000199989801476748, - "loss": 46.0, - "step": 28240 - }, - { - "epoch": 4.548009179113491, - "grad_norm": 0.010135608725249767, - "learning_rate": 0.00019998980075421665, - "loss": 46.0, - "step": 28241 - }, - { - "epoch": 4.548170216192278, - "grad_norm": 0.002157302340492606, - "learning_rate": 0.00019998980003165965, - "loss": 46.0, - "step": 28242 - }, - { - "epoch": 4.548331253271066, - "grad_norm": 0.004398688208311796, - "learning_rate": 0.0001999897993090771, - "loss": 46.0, - "step": 28243 - }, - { - "epoch": 4.548492290349853, - "grad_norm": 0.006782538257539272, - "learning_rate": 0.00019998979858646892, - "loss": 46.0, - "step": 28244 - }, - { - "epoch": 4.5486533274286405, - "grad_norm": 0.006548644043505192, - "learning_rate": 0.00019998979786383517, - "loss": 46.0, - "step": 28245 - }, - { - "epoch": 4.548814364507428, - "grad_norm": 0.0014312970452010632, - "learning_rate": 0.00019998979714117582, - "loss": 46.0, - "step": 28246 - }, - { - "epoch": 4.548975401586215, - "grad_norm": 0.001444373861886561, - "learning_rate": 0.0001999897964184909, - "loss": 46.0, - "step": 28247 - }, - { - "epoch": 4.549136438665003, - "grad_norm": 0.002922563813626766, - "learning_rate": 0.00019998979569578037, - "loss": 46.0, - "step": 28248 - }, - { - "epoch": 4.54929747574379, - "grad_norm": 0.0067072720266878605, - "learning_rate": 0.0001999897949730442, - "loss": 46.0, - "step": 28249 - }, - { - "epoch": 4.549458512822578, - "grad_norm": 0.02529129944741726, - "learning_rate": 0.0001999897942502825, - "loss": 46.0, - "step": 28250 - }, - { - "epoch": 4.549619549901365, - "grad_norm": 0.0025627086870372295, - "learning_rate": 0.0001999897935274952, - "loss": 46.0, - "step": 28251 - }, - { - "epoch": 4.549780586980152, - "grad_norm": 0.00597706763073802, - "learning_rate": 0.0001999897928046823, - "loss": 46.0, - "step": 28252 - }, - { - "epoch": 4.549941624058939, - "grad_norm": 0.0023587120231240988, - "learning_rate": 0.0001999897920818438, - "loss": 46.0, - "step": 28253 - }, - { - "epoch": 4.550102661137727, - "grad_norm": 0.0034493152052164078, - "learning_rate": 0.00019998979135897973, - "loss": 46.0, - "step": 28254 - }, - { - "epoch": 4.550263698216514, - "grad_norm": 0.006048236973583698, - "learning_rate": 0.00019998979063609004, - "loss": 46.0, - "step": 28255 - }, - { - "epoch": 4.550424735295302, - "grad_norm": 0.005193533841520548, - "learning_rate": 0.00019998978991317477, - "loss": 46.0, - "step": 28256 - }, - { - "epoch": 4.550585772374089, - "grad_norm": 0.011304478161036968, - "learning_rate": 0.00019998978919023391, - "loss": 46.0, - "step": 28257 - }, - { - "epoch": 4.5507468094528765, - "grad_norm": 0.009198413230478764, - "learning_rate": 0.00019998978846726747, - "loss": 46.0, - "step": 28258 - }, - { - "epoch": 4.550907846531664, - "grad_norm": 0.005907850340008736, - "learning_rate": 0.0001999897877442754, - "loss": 46.0, - "step": 28259 - }, - { - "epoch": 4.551068883610451, - "grad_norm": 0.005806928034871817, - "learning_rate": 0.0001999897870212578, - "loss": 46.0, - "step": 28260 - }, - { - "epoch": 4.551229920689239, - "grad_norm": 0.007388319820165634, - "learning_rate": 0.00019998978629821455, - "loss": 46.0, - "step": 28261 - }, - { - "epoch": 4.551390957768026, - "grad_norm": 0.002225305885076523, - "learning_rate": 0.0001999897855751457, - "loss": 46.0, - "step": 28262 - }, - { - "epoch": 4.551551994846814, - "grad_norm": 0.0031608748249709606, - "learning_rate": 0.00019998978485205132, - "loss": 46.0, - "step": 28263 - }, - { - "epoch": 4.551713031925601, - "grad_norm": 0.006002143025398254, - "learning_rate": 0.0001999897841289313, - "loss": 46.0, - "step": 28264 - }, - { - "epoch": 4.551874069004389, - "grad_norm": 0.0024387154262512922, - "learning_rate": 0.0001999897834057857, - "loss": 46.0, - "step": 28265 - }, - { - "epoch": 4.552035106083176, - "grad_norm": 0.008900858461856842, - "learning_rate": 0.0001999897826826145, - "loss": 46.0, - "step": 28266 - }, - { - "epoch": 4.552196143161963, - "grad_norm": 0.007654973771423101, - "learning_rate": 0.00019998978195941773, - "loss": 46.0, - "step": 28267 - }, - { - "epoch": 4.55235718024075, - "grad_norm": 0.0014580423012375832, - "learning_rate": 0.00019998978123619534, - "loss": 46.0, - "step": 28268 - }, - { - "epoch": 4.5525182173195375, - "grad_norm": 0.001803286257199943, - "learning_rate": 0.00019998978051294738, - "loss": 46.0, - "step": 28269 - }, - { - "epoch": 4.552679254398325, - "grad_norm": 0.0034424234181642532, - "learning_rate": 0.0001999897797896738, - "loss": 46.0, - "step": 28270 - }, - { - "epoch": 4.5528402914771124, - "grad_norm": 0.0038513776380568743, - "learning_rate": 0.00019998977906637467, - "loss": 46.0, - "step": 28271 - }, - { - "epoch": 4.5530013285559, - "grad_norm": 0.005881298333406448, - "learning_rate": 0.0001999897783430499, - "loss": 46.0, - "step": 28272 - }, - { - "epoch": 4.553162365634687, - "grad_norm": 0.002593496348708868, - "learning_rate": 0.00019998977761969956, - "loss": 46.0, - "step": 28273 - }, - { - "epoch": 4.553323402713475, - "grad_norm": 0.01781601831316948, - "learning_rate": 0.00019998977689632367, - "loss": 46.0, - "step": 28274 - }, - { - "epoch": 4.553484439792262, - "grad_norm": 0.0012850770726799965, - "learning_rate": 0.0001999897761729221, - "loss": 46.0, - "step": 28275 - }, - { - "epoch": 4.55364547687105, - "grad_norm": 0.005866596475243568, - "learning_rate": 0.000199989775449495, - "loss": 46.0, - "step": 28276 - }, - { - "epoch": 4.553806513949837, - "grad_norm": 0.006958988960832357, - "learning_rate": 0.0001999897747260423, - "loss": 46.0, - "step": 28277 - }, - { - "epoch": 4.553967551028625, - "grad_norm": 0.00456677470356226, - "learning_rate": 0.000199989774002564, - "loss": 46.0, - "step": 28278 - }, - { - "epoch": 4.554128588107412, - "grad_norm": 0.014959155581891537, - "learning_rate": 0.0001999897732790601, - "loss": 46.0, - "step": 28279 - }, - { - "epoch": 4.5542896251861995, - "grad_norm": 0.002239905996248126, - "learning_rate": 0.00019998977255553061, - "loss": 46.0, - "step": 28280 - }, - { - "epoch": 4.554450662264987, - "grad_norm": 0.011093229055404663, - "learning_rate": 0.00019998977183197556, - "loss": 46.0, - "step": 28281 - }, - { - "epoch": 4.5546116993437735, - "grad_norm": 0.006913878954946995, - "learning_rate": 0.00019998977110839488, - "loss": 46.0, - "step": 28282 - }, - { - "epoch": 4.554772736422561, - "grad_norm": 0.004666912369430065, - "learning_rate": 0.00019998977038478862, - "loss": 46.0, - "step": 28283 - }, - { - "epoch": 4.554933773501348, - "grad_norm": 0.0013718479312956333, - "learning_rate": 0.00019998976966115677, - "loss": 46.0, - "step": 28284 - }, - { - "epoch": 4.555094810580136, - "grad_norm": 0.0023330957628786564, - "learning_rate": 0.0001999897689374993, - "loss": 46.0, - "step": 28285 - }, - { - "epoch": 4.555255847658923, - "grad_norm": 0.017625343054533005, - "learning_rate": 0.00019998976821381626, - "loss": 46.0, - "step": 28286 - }, - { - "epoch": 4.555416884737711, - "grad_norm": 0.009019560180604458, - "learning_rate": 0.00019998976749010765, - "loss": 46.0, - "step": 28287 - }, - { - "epoch": 4.555577921816498, - "grad_norm": 0.007749797776341438, - "learning_rate": 0.00019998976676637343, - "loss": 46.0, - "step": 28288 - }, - { - "epoch": 4.555738958895286, - "grad_norm": 0.0019633814226835966, - "learning_rate": 0.00019998976604261362, - "loss": 46.0, - "step": 28289 - }, - { - "epoch": 4.555899995974073, - "grad_norm": 0.017374303191900253, - "learning_rate": 0.0001999897653188282, - "loss": 46.0, - "step": 28290 - }, - { - "epoch": 4.556061033052861, - "grad_norm": 0.002849637996405363, - "learning_rate": 0.00019998976459501718, - "loss": 46.0, - "step": 28291 - }, - { - "epoch": 4.556222070131648, - "grad_norm": 0.000549272692296654, - "learning_rate": 0.0001999897638711806, - "loss": 46.0, - "step": 28292 - }, - { - "epoch": 4.5563831072104355, - "grad_norm": 0.00421734107658267, - "learning_rate": 0.00019998976314731842, - "loss": 46.0, - "step": 28293 - }, - { - "epoch": 4.556544144289223, - "grad_norm": 0.003836626885458827, - "learning_rate": 0.00019998976242343065, - "loss": 46.0, - "step": 28294 - }, - { - "epoch": 4.5567051813680095, - "grad_norm": 0.002068571513518691, - "learning_rate": 0.0001999897616995173, - "loss": 46.0, - "step": 28295 - }, - { - "epoch": 4.556866218446798, - "grad_norm": 0.00625478383153677, - "learning_rate": 0.00019998976097557834, - "loss": 46.0, - "step": 28296 - }, - { - "epoch": 4.557027255525584, - "grad_norm": 0.006283685099333525, - "learning_rate": 0.00019998976025161378, - "loss": 46.0, - "step": 28297 - }, - { - "epoch": 4.557188292604372, - "grad_norm": 0.009408128447830677, - "learning_rate": 0.00019998975952762363, - "loss": 46.0, - "step": 28298 - }, - { - "epoch": 4.557349329683159, - "grad_norm": 0.0022420056629925966, - "learning_rate": 0.0001999897588036079, - "loss": 46.0, - "step": 28299 - }, - { - "epoch": 4.557510366761947, - "grad_norm": 0.0017376190517097712, - "learning_rate": 0.00019998975807956657, - "loss": 46.0, - "step": 28300 - }, - { - "epoch": 4.557671403840734, - "grad_norm": 0.006508963648229837, - "learning_rate": 0.00019998975735549963, - "loss": 46.0, - "step": 28301 - }, - { - "epoch": 4.557832440919522, - "grad_norm": 0.002009720541536808, - "learning_rate": 0.00019998975663140713, - "loss": 46.0, - "step": 28302 - }, - { - "epoch": 4.557993477998309, - "grad_norm": 0.008927403017878532, - "learning_rate": 0.00019998975590728902, - "loss": 46.0, - "step": 28303 - }, - { - "epoch": 4.558154515077097, - "grad_norm": 0.009754546917974949, - "learning_rate": 0.00019998975518314535, - "loss": 46.0, - "step": 28304 - }, - { - "epoch": 4.558315552155884, - "grad_norm": 0.0027473876252770424, - "learning_rate": 0.00019998975445897603, - "loss": 46.0, - "step": 28305 - }, - { - "epoch": 4.5584765892346715, - "grad_norm": 0.0027844624128192663, - "learning_rate": 0.00019998975373478116, - "loss": 46.0, - "step": 28306 - }, - { - "epoch": 4.558637626313459, - "grad_norm": 0.009226133115589619, - "learning_rate": 0.0001999897530105607, - "loss": 46.0, - "step": 28307 - }, - { - "epoch": 4.558798663392246, - "grad_norm": 0.0023362061474472284, - "learning_rate": 0.0001999897522863146, - "loss": 46.0, - "step": 28308 - }, - { - "epoch": 4.558959700471034, - "grad_norm": 0.01723659038543701, - "learning_rate": 0.00019998975156204293, - "loss": 46.0, - "step": 28309 - }, - { - "epoch": 4.55912073754982, - "grad_norm": 0.00814008992165327, - "learning_rate": 0.00019998975083774568, - "loss": 46.0, - "step": 28310 - }, - { - "epoch": 4.559281774628609, - "grad_norm": 0.0026714010164141655, - "learning_rate": 0.00019998975011342287, - "loss": 46.0, - "step": 28311 - }, - { - "epoch": 4.559442811707395, - "grad_norm": 0.003975267522037029, - "learning_rate": 0.00019998974938907442, - "loss": 46.0, - "step": 28312 - }, - { - "epoch": 4.559603848786183, - "grad_norm": 0.013119656592607498, - "learning_rate": 0.00019998974866470038, - "loss": 46.0, - "step": 28313 - }, - { - "epoch": 4.55976488586497, - "grad_norm": 0.0019531873986124992, - "learning_rate": 0.00019998974794030078, - "loss": 46.0, - "step": 28314 - }, - { - "epoch": 4.559925922943758, - "grad_norm": 0.0026776501908898354, - "learning_rate": 0.00019998974721587554, - "loss": 46.0, - "step": 28315 - }, - { - "epoch": 4.560086960022545, - "grad_norm": 0.0036813144106417894, - "learning_rate": 0.00019998974649142474, - "loss": 46.0, - "step": 28316 - }, - { - "epoch": 4.5602479971013326, - "grad_norm": 0.0014915541978552938, - "learning_rate": 0.00019998974576694836, - "loss": 46.0, - "step": 28317 - }, - { - "epoch": 4.56040903418012, - "grad_norm": 0.006268836557865143, - "learning_rate": 0.00019998974504244636, - "loss": 46.0, - "step": 28318 - }, - { - "epoch": 4.5605700712589075, - "grad_norm": 0.007194567006081343, - "learning_rate": 0.0001999897443179188, - "loss": 46.0, - "step": 28319 - }, - { - "epoch": 4.560731108337695, - "grad_norm": 0.0015810198383405805, - "learning_rate": 0.00019998974359336562, - "loss": 46.0, - "step": 28320 - }, - { - "epoch": 4.560892145416482, - "grad_norm": 0.006441446952521801, - "learning_rate": 0.00019998974286878686, - "loss": 46.0, - "step": 28321 - }, - { - "epoch": 4.56105318249527, - "grad_norm": 0.00707559147849679, - "learning_rate": 0.00019998974214418248, - "loss": 46.0, - "step": 28322 - }, - { - "epoch": 4.561214219574057, - "grad_norm": 0.006631402298808098, - "learning_rate": 0.00019998974141955254, - "loss": 46.0, - "step": 28323 - }, - { - "epoch": 4.561375256652845, - "grad_norm": 0.0061634317971765995, - "learning_rate": 0.000199989740694897, - "loss": 46.0, - "step": 28324 - }, - { - "epoch": 4.561536293731631, - "grad_norm": 0.008553068153560162, - "learning_rate": 0.00019998973997021585, - "loss": 46.0, - "step": 28325 - }, - { - "epoch": 4.561697330810419, - "grad_norm": 0.005489504896104336, - "learning_rate": 0.00019998973924550915, - "loss": 46.0, - "step": 28326 - }, - { - "epoch": 4.561858367889206, - "grad_norm": 0.005943399388343096, - "learning_rate": 0.0001999897385207768, - "loss": 46.0, - "step": 28327 - }, - { - "epoch": 4.562019404967994, - "grad_norm": 0.005850377958267927, - "learning_rate": 0.00019998973779601889, - "loss": 46.0, - "step": 28328 - }, - { - "epoch": 4.562180442046781, - "grad_norm": 0.0027680303901433945, - "learning_rate": 0.0001999897370712354, - "loss": 46.0, - "step": 28329 - }, - { - "epoch": 4.5623414791255685, - "grad_norm": 0.0008222645265050232, - "learning_rate": 0.0001999897363464263, - "loss": 46.0, - "step": 28330 - }, - { - "epoch": 4.562502516204356, - "grad_norm": 0.0037766112945973873, - "learning_rate": 0.00019998973562159163, - "loss": 46.0, - "step": 28331 - }, - { - "epoch": 4.562663553283143, - "grad_norm": 0.004440687131136656, - "learning_rate": 0.0001999897348967313, - "loss": 46.0, - "step": 28332 - }, - { - "epoch": 4.562824590361931, - "grad_norm": 0.009641860611736774, - "learning_rate": 0.00019998973417184547, - "loss": 46.0, - "step": 28333 - }, - { - "epoch": 4.562985627440718, - "grad_norm": 0.005281203892081976, - "learning_rate": 0.000199989733446934, - "loss": 46.0, - "step": 28334 - }, - { - "epoch": 4.563146664519506, - "grad_norm": 0.0026960819959640503, - "learning_rate": 0.00019998973272199693, - "loss": 46.0, - "step": 28335 - }, - { - "epoch": 4.563307701598293, - "grad_norm": 0.0054990132339298725, - "learning_rate": 0.00019998973199703427, - "loss": 46.0, - "step": 28336 - }, - { - "epoch": 4.563468738677081, - "grad_norm": 0.0021799386013299227, - "learning_rate": 0.00019998973127204606, - "loss": 46.0, - "step": 28337 - }, - { - "epoch": 4.563629775755868, - "grad_norm": 0.004018097184598446, - "learning_rate": 0.0001999897305470322, - "loss": 46.0, - "step": 28338 - }, - { - "epoch": 4.563790812834656, - "grad_norm": 0.0018440329004079103, - "learning_rate": 0.00019998972982199277, - "loss": 46.0, - "step": 28339 - }, - { - "epoch": 4.563951849913442, - "grad_norm": 0.0031687328591942787, - "learning_rate": 0.00019998972909692774, - "loss": 46.0, - "step": 28340 - }, - { - "epoch": 4.56411288699223, - "grad_norm": 0.0034760283306241035, - "learning_rate": 0.00019998972837183713, - "loss": 46.0, - "step": 28341 - }, - { - "epoch": 4.564273924071017, - "grad_norm": 0.009181971661746502, - "learning_rate": 0.00019998972764672095, - "loss": 46.0, - "step": 28342 - }, - { - "epoch": 4.5644349611498045, - "grad_norm": 0.006531584542244673, - "learning_rate": 0.00019998972692157914, - "loss": 46.0, - "step": 28343 - }, - { - "epoch": 4.564595998228592, - "grad_norm": 0.005438131280243397, - "learning_rate": 0.00019998972619641176, - "loss": 46.0, - "step": 28344 - }, - { - "epoch": 4.564757035307379, - "grad_norm": 0.013236350379884243, - "learning_rate": 0.00019998972547121877, - "loss": 46.0, - "step": 28345 - }, - { - "epoch": 4.564918072386167, - "grad_norm": 0.0014279453316703439, - "learning_rate": 0.00019998972474600022, - "loss": 46.0, - "step": 28346 - }, - { - "epoch": 4.565079109464954, - "grad_norm": 0.003309838240966201, - "learning_rate": 0.00019998972402075605, - "loss": 46.0, - "step": 28347 - }, - { - "epoch": 4.565240146543742, - "grad_norm": 0.0037735984660685062, - "learning_rate": 0.00019998972329548627, - "loss": 46.0, - "step": 28348 - }, - { - "epoch": 4.565401183622529, - "grad_norm": 0.005396547727286816, - "learning_rate": 0.00019998972257019096, - "loss": 46.0, - "step": 28349 - }, - { - "epoch": 4.565562220701317, - "grad_norm": 0.012511735782027245, - "learning_rate": 0.00019998972184486998, - "loss": 46.0, - "step": 28350 - }, - { - "epoch": 4.565723257780104, - "grad_norm": 0.0031030962709337473, - "learning_rate": 0.00019998972111952344, - "loss": 46.0, - "step": 28351 - }, - { - "epoch": 4.565884294858892, - "grad_norm": 0.005439350847154856, - "learning_rate": 0.00019998972039415134, - "loss": 46.0, - "step": 28352 - }, - { - "epoch": 4.566045331937679, - "grad_norm": 0.0023892424069344997, - "learning_rate": 0.00019998971966875363, - "loss": 46.0, - "step": 28353 - }, - { - "epoch": 4.5662063690164665, - "grad_norm": 0.002230866579338908, - "learning_rate": 0.00019998971894333032, - "loss": 46.0, - "step": 28354 - }, - { - "epoch": 4.566367406095253, - "grad_norm": 0.00886800978332758, - "learning_rate": 0.0001999897182178814, - "loss": 46.0, - "step": 28355 - }, - { - "epoch": 4.5665284431740405, - "grad_norm": 0.010969109833240509, - "learning_rate": 0.0001999897174924069, - "loss": 46.0, - "step": 28356 - }, - { - "epoch": 4.566689480252828, - "grad_norm": 0.0030753680039197206, - "learning_rate": 0.00019998971676690684, - "loss": 46.0, - "step": 28357 - }, - { - "epoch": 4.566850517331615, - "grad_norm": 0.005177609156817198, - "learning_rate": 0.00019998971604138113, - "loss": 46.0, - "step": 28358 - }, - { - "epoch": 4.567011554410403, - "grad_norm": 0.003990458324551582, - "learning_rate": 0.0001999897153158299, - "loss": 46.0, - "step": 28359 - }, - { - "epoch": 4.56717259148919, - "grad_norm": 0.005395010579377413, - "learning_rate": 0.000199989714590253, - "loss": 46.0, - "step": 28360 - }, - { - "epoch": 4.567333628567978, - "grad_norm": 0.006897305138409138, - "learning_rate": 0.00019998971386465055, - "loss": 46.0, - "step": 28361 - }, - { - "epoch": 4.567494665646765, - "grad_norm": 0.004040502477437258, - "learning_rate": 0.0001999897131390225, - "loss": 46.0, - "step": 28362 - }, - { - "epoch": 4.567655702725553, - "grad_norm": 0.003765776753425598, - "learning_rate": 0.00019998971241336886, - "loss": 46.0, - "step": 28363 - }, - { - "epoch": 4.56781673980434, - "grad_norm": 0.0033694072626531124, - "learning_rate": 0.00019998971168768966, - "loss": 46.0, - "step": 28364 - }, - { - "epoch": 4.567977776883128, - "grad_norm": 0.005281837657094002, - "learning_rate": 0.0001999897109619848, - "loss": 46.0, - "step": 28365 - }, - { - "epoch": 4.568138813961915, - "grad_norm": 0.003135291626676917, - "learning_rate": 0.00019998971023625438, - "loss": 46.0, - "step": 28366 - }, - { - "epoch": 4.5682998510407025, - "grad_norm": 0.001260228455066681, - "learning_rate": 0.00019998970951049837, - "loss": 46.0, - "step": 28367 - }, - { - "epoch": 4.568460888119489, - "grad_norm": 0.004873974714428186, - "learning_rate": 0.00019998970878471676, - "loss": 46.0, - "step": 28368 - }, - { - "epoch": 4.568621925198277, - "grad_norm": 0.0039630369283258915, - "learning_rate": 0.00019998970805890957, - "loss": 46.0, - "step": 28369 - }, - { - "epoch": 4.568782962277064, - "grad_norm": 0.005855859722942114, - "learning_rate": 0.0001999897073330768, - "loss": 46.0, - "step": 28370 - }, - { - "epoch": 4.568943999355851, - "grad_norm": 0.0012839700793847442, - "learning_rate": 0.00019998970660721843, - "loss": 46.0, - "step": 28371 - }, - { - "epoch": 4.569105036434639, - "grad_norm": 0.0016850670799613, - "learning_rate": 0.00019998970588133445, - "loss": 46.0, - "step": 28372 - }, - { - "epoch": 4.569266073513426, - "grad_norm": 0.0019511821446940303, - "learning_rate": 0.00019998970515542488, - "loss": 46.0, - "step": 28373 - }, - { - "epoch": 4.569427110592214, - "grad_norm": 0.003716961480677128, - "learning_rate": 0.00019998970442948975, - "loss": 46.0, - "step": 28374 - }, - { - "epoch": 4.569588147671001, - "grad_norm": 0.005065771751105785, - "learning_rate": 0.000199989703703529, - "loss": 46.0, - "step": 28375 - }, - { - "epoch": 4.569749184749789, - "grad_norm": 0.002809886122122407, - "learning_rate": 0.00019998970297754266, - "loss": 46.0, - "step": 28376 - }, - { - "epoch": 4.569910221828576, - "grad_norm": 0.011783783324062824, - "learning_rate": 0.00019998970225153074, - "loss": 46.0, - "step": 28377 - }, - { - "epoch": 4.5700712589073635, - "grad_norm": 0.006862163078039885, - "learning_rate": 0.00019998970152549319, - "loss": 46.0, - "step": 28378 - }, - { - "epoch": 4.570232295986151, - "grad_norm": 0.0027296182233840227, - "learning_rate": 0.00019998970079943007, - "loss": 46.0, - "step": 28379 - }, - { - "epoch": 4.5703933330649384, - "grad_norm": 0.004065383691340685, - "learning_rate": 0.00019998970007334136, - "loss": 46.0, - "step": 28380 - }, - { - "epoch": 4.570554370143726, - "grad_norm": 0.008041276596486568, - "learning_rate": 0.00019998969934722707, - "loss": 46.0, - "step": 28381 - }, - { - "epoch": 4.570715407222513, - "grad_norm": 0.005012942012399435, - "learning_rate": 0.0001999896986210872, - "loss": 46.0, - "step": 28382 - }, - { - "epoch": 4.5708764443013, - "grad_norm": 0.004440854303538799, - "learning_rate": 0.0001999896978949217, - "loss": 46.0, - "step": 28383 - }, - { - "epoch": 4.571037481380088, - "grad_norm": 0.0037269804161041975, - "learning_rate": 0.00019998969716873062, - "loss": 46.0, - "step": 28384 - }, - { - "epoch": 4.571198518458875, - "grad_norm": 0.0008085716981440783, - "learning_rate": 0.00019998969644251396, - "loss": 46.0, - "step": 28385 - }, - { - "epoch": 4.571359555537662, - "grad_norm": 0.004750309977680445, - "learning_rate": 0.00019998969571627167, - "loss": 46.0, - "step": 28386 - }, - { - "epoch": 4.57152059261645, - "grad_norm": 0.0015329812886193395, - "learning_rate": 0.00019998969499000383, - "loss": 46.0, - "step": 28387 - }, - { - "epoch": 4.571681629695237, - "grad_norm": 0.003719376865774393, - "learning_rate": 0.00019998969426371038, - "loss": 46.0, - "step": 28388 - }, - { - "epoch": 4.571842666774025, - "grad_norm": 0.009897838346660137, - "learning_rate": 0.00019998969353739133, - "loss": 46.0, - "step": 28389 - }, - { - "epoch": 4.572003703852812, - "grad_norm": 0.0012749492889270186, - "learning_rate": 0.0001999896928110467, - "loss": 46.0, - "step": 28390 - }, - { - "epoch": 4.5721647409315995, - "grad_norm": 0.005704416893422604, - "learning_rate": 0.0001999896920846765, - "loss": 46.0, - "step": 28391 - }, - { - "epoch": 4.572325778010387, - "grad_norm": 0.0023975849617272615, - "learning_rate": 0.00019998969135828068, - "loss": 46.0, - "step": 28392 - }, - { - "epoch": 4.572486815089174, - "grad_norm": 0.006974838208407164, - "learning_rate": 0.0001999896906318593, - "loss": 46.0, - "step": 28393 - }, - { - "epoch": 4.572647852167962, - "grad_norm": 0.004614113364368677, - "learning_rate": 0.00019998968990541226, - "loss": 46.0, - "step": 28394 - }, - { - "epoch": 4.572808889246749, - "grad_norm": 0.00568041205406189, - "learning_rate": 0.00019998968917893966, - "loss": 46.0, - "step": 28395 - }, - { - "epoch": 4.572969926325537, - "grad_norm": 0.008333171717822552, - "learning_rate": 0.00019998968845244148, - "loss": 46.0, - "step": 28396 - }, - { - "epoch": 4.573130963404324, - "grad_norm": 0.0035618366673588753, - "learning_rate": 0.00019998968772591772, - "loss": 46.0, - "step": 28397 - }, - { - "epoch": 4.573292000483111, - "grad_norm": 0.004362665116786957, - "learning_rate": 0.00019998968699936836, - "loss": 46.0, - "step": 28398 - }, - { - "epoch": 4.573453037561898, - "grad_norm": 0.00687659066170454, - "learning_rate": 0.00019998968627279337, - "loss": 46.0, - "step": 28399 - }, - { - "epoch": 4.573614074640686, - "grad_norm": 0.0024620285257697105, - "learning_rate": 0.00019998968554619284, - "loss": 46.0, - "step": 28400 - }, - { - "epoch": 4.573775111719473, - "grad_norm": 0.0033602954354137182, - "learning_rate": 0.00019998968481956667, - "loss": 46.0, - "step": 28401 - }, - { - "epoch": 4.573936148798261, - "grad_norm": 0.020862076431512833, - "learning_rate": 0.00019998968409291496, - "loss": 46.0, - "step": 28402 - }, - { - "epoch": 4.574097185877048, - "grad_norm": 0.005975116044282913, - "learning_rate": 0.00019998968336623762, - "loss": 46.0, - "step": 28403 - }, - { - "epoch": 4.5742582229558355, - "grad_norm": 0.0035918280482292175, - "learning_rate": 0.00019998968263953471, - "loss": 46.0, - "step": 28404 - }, - { - "epoch": 4.574419260034623, - "grad_norm": 0.011295353062450886, - "learning_rate": 0.00019998968191280617, - "loss": 46.0, - "step": 28405 - }, - { - "epoch": 4.57458029711341, - "grad_norm": 0.00641406886279583, - "learning_rate": 0.0001999896811860521, - "loss": 46.0, - "step": 28406 - }, - { - "epoch": 4.574741334192198, - "grad_norm": 0.003867147024720907, - "learning_rate": 0.00019998968045927237, - "loss": 46.0, - "step": 28407 - }, - { - "epoch": 4.574902371270985, - "grad_norm": 0.006757321301847696, - "learning_rate": 0.0001999896797324671, - "loss": 46.0, - "step": 28408 - }, - { - "epoch": 4.575063408349773, - "grad_norm": 0.004591462668031454, - "learning_rate": 0.00019998967900563622, - "loss": 46.0, - "step": 28409 - }, - { - "epoch": 4.57522444542856, - "grad_norm": 0.0016657090745866299, - "learning_rate": 0.00019998967827877974, - "loss": 46.0, - "step": 28410 - }, - { - "epoch": 4.575385482507348, - "grad_norm": 0.004417312331497669, - "learning_rate": 0.00019998967755189767, - "loss": 46.0, - "step": 28411 - }, - { - "epoch": 4.575546519586135, - "grad_norm": 0.01496258843690157, - "learning_rate": 0.00019998967682499, - "loss": 46.0, - "step": 28412 - }, - { - "epoch": 4.575707556664922, - "grad_norm": 0.004842329770326614, - "learning_rate": 0.00019998967609805677, - "loss": 46.0, - "step": 28413 - }, - { - "epoch": 4.575868593743709, - "grad_norm": 0.01410147175192833, - "learning_rate": 0.0001999896753710979, - "loss": 46.0, - "step": 28414 - }, - { - "epoch": 4.576029630822497, - "grad_norm": 0.001803920604288578, - "learning_rate": 0.0001999896746441135, - "loss": 46.0, - "step": 28415 - }, - { - "epoch": 4.576190667901284, - "grad_norm": 0.002926790388301015, - "learning_rate": 0.00019998967391710344, - "loss": 46.0, - "step": 28416 - }, - { - "epoch": 4.5763517049800715, - "grad_norm": 0.002284947084262967, - "learning_rate": 0.00019998967319006782, - "loss": 46.0, - "step": 28417 - }, - { - "epoch": 4.576512742058859, - "grad_norm": 0.002812547143548727, - "learning_rate": 0.00019998967246300658, - "loss": 46.0, - "step": 28418 - }, - { - "epoch": 4.576673779137646, - "grad_norm": 0.002657968085259199, - "learning_rate": 0.00019998967173591982, - "loss": 46.0, - "step": 28419 - }, - { - "epoch": 4.576834816216434, - "grad_norm": 0.0068285283632576466, - "learning_rate": 0.0001999896710088074, - "loss": 46.0, - "step": 28420 - }, - { - "epoch": 4.576995853295221, - "grad_norm": 0.007613273337483406, - "learning_rate": 0.00019998967028166941, - "loss": 46.0, - "step": 28421 - }, - { - "epoch": 4.577156890374009, - "grad_norm": 0.005652642343193293, - "learning_rate": 0.00019998966955450583, - "loss": 46.0, - "step": 28422 - }, - { - "epoch": 4.577317927452796, - "grad_norm": 0.0014848349383100867, - "learning_rate": 0.00019998966882731666, - "loss": 46.0, - "step": 28423 - }, - { - "epoch": 4.577478964531584, - "grad_norm": 0.0011207249481230974, - "learning_rate": 0.00019998966810010188, - "loss": 46.0, - "step": 28424 - }, - { - "epoch": 4.577640001610371, - "grad_norm": 0.019937967881560326, - "learning_rate": 0.00019998966737286154, - "loss": 46.0, - "step": 28425 - }, - { - "epoch": 4.5778010386891586, - "grad_norm": 0.0026541464030742645, - "learning_rate": 0.0001999896666455956, - "loss": 46.0, - "step": 28426 - }, - { - "epoch": 4.577962075767946, - "grad_norm": 0.002017723862081766, - "learning_rate": 0.00019998966591830403, - "loss": 46.0, - "step": 28427 - }, - { - "epoch": 4.578123112846733, - "grad_norm": 0.002365991473197937, - "learning_rate": 0.0001999896651909869, - "loss": 46.0, - "step": 28428 - }, - { - "epoch": 4.57828414992552, - "grad_norm": 0.0027964806649833918, - "learning_rate": 0.00019998966446364418, - "loss": 46.0, - "step": 28429 - }, - { - "epoch": 4.5784451870043075, - "grad_norm": 0.0025261621922254562, - "learning_rate": 0.00019998966373627585, - "loss": 46.0, - "step": 28430 - }, - { - "epoch": 4.578606224083095, - "grad_norm": 0.00337060634046793, - "learning_rate": 0.00019998966300888195, - "loss": 46.0, - "step": 28431 - }, - { - "epoch": 4.578767261161882, - "grad_norm": 0.003920095041394234, - "learning_rate": 0.00019998966228146244, - "loss": 46.0, - "step": 28432 - }, - { - "epoch": 4.57892829824067, - "grad_norm": 0.0017860674997791648, - "learning_rate": 0.00019998966155401735, - "loss": 46.0, - "step": 28433 - }, - { - "epoch": 4.579089335319457, - "grad_norm": 0.002171809785068035, - "learning_rate": 0.00019998966082654667, - "loss": 46.0, - "step": 28434 - }, - { - "epoch": 4.579250372398245, - "grad_norm": 0.009731261059641838, - "learning_rate": 0.00019998966009905037, - "loss": 46.0, - "step": 28435 - }, - { - "epoch": 4.579411409477032, - "grad_norm": 0.0015148160746321082, - "learning_rate": 0.00019998965937152851, - "loss": 46.0, - "step": 28436 - }, - { - "epoch": 4.57957244655582, - "grad_norm": 0.0018464995082467794, - "learning_rate": 0.00019998965864398102, - "loss": 46.0, - "step": 28437 - }, - { - "epoch": 4.579733483634607, - "grad_norm": 0.0063488297164440155, - "learning_rate": 0.00019998965791640798, - "loss": 46.0, - "step": 28438 - }, - { - "epoch": 4.5798945207133945, - "grad_norm": 0.01029153075069189, - "learning_rate": 0.00019998965718880934, - "loss": 46.0, - "step": 28439 - }, - { - "epoch": 4.580055557792182, - "grad_norm": 0.0009967988589778543, - "learning_rate": 0.00019998965646118508, - "loss": 46.0, - "step": 28440 - }, - { - "epoch": 4.5802165948709685, - "grad_norm": 0.003307321574538946, - "learning_rate": 0.00019998965573353526, - "loss": 46.0, - "step": 28441 - }, - { - "epoch": 4.580377631949757, - "grad_norm": 0.002107944805175066, - "learning_rate": 0.00019998965500585985, - "loss": 46.0, - "step": 28442 - }, - { - "epoch": 4.5805386690285435, - "grad_norm": 0.014695961959660053, - "learning_rate": 0.0001999896542781588, - "loss": 46.0, - "step": 28443 - }, - { - "epoch": 4.580699706107331, - "grad_norm": 0.002039693295955658, - "learning_rate": 0.0001999896535504322, - "loss": 46.0, - "step": 28444 - }, - { - "epoch": 4.580860743186118, - "grad_norm": 0.0021300979424268007, - "learning_rate": 0.00019998965282267997, - "loss": 46.0, - "step": 28445 - }, - { - "epoch": 4.581021780264906, - "grad_norm": 0.009845821186900139, - "learning_rate": 0.0001999896520949022, - "loss": 46.0, - "step": 28446 - }, - { - "epoch": 4.581182817343693, - "grad_norm": 0.006082086358219385, - "learning_rate": 0.00019998965136709882, - "loss": 46.0, - "step": 28447 - }, - { - "epoch": 4.581343854422481, - "grad_norm": 0.010245672427117825, - "learning_rate": 0.0001999896506392698, - "loss": 46.0, - "step": 28448 - }, - { - "epoch": 4.581504891501268, - "grad_norm": 0.005928464233875275, - "learning_rate": 0.00019998964991141527, - "loss": 46.0, - "step": 28449 - }, - { - "epoch": 4.581665928580056, - "grad_norm": 0.006840656977146864, - "learning_rate": 0.00019998964918353508, - "loss": 46.0, - "step": 28450 - }, - { - "epoch": 4.581826965658843, - "grad_norm": 0.0006742523983120918, - "learning_rate": 0.00019998964845562936, - "loss": 46.0, - "step": 28451 - }, - { - "epoch": 4.5819880027376305, - "grad_norm": 0.008705566637217999, - "learning_rate": 0.000199989647727698, - "loss": 46.0, - "step": 28452 - }, - { - "epoch": 4.582149039816418, - "grad_norm": 0.0007399450987577438, - "learning_rate": 0.00019998964699974105, - "loss": 46.0, - "step": 28453 - }, - { - "epoch": 4.582310076895205, - "grad_norm": 0.0040413024835288525, - "learning_rate": 0.00019998964627175855, - "loss": 46.0, - "step": 28454 - }, - { - "epoch": 4.582471113973993, - "grad_norm": 0.0055252546444535255, - "learning_rate": 0.0001999896455437504, - "loss": 46.0, - "step": 28455 - }, - { - "epoch": 4.582632151052779, - "grad_norm": 0.0025090461131185293, - "learning_rate": 0.00019998964481571666, - "loss": 46.0, - "step": 28456 - }, - { - "epoch": 4.582793188131568, - "grad_norm": 0.00297033553943038, - "learning_rate": 0.00019998964408765737, - "loss": 46.0, - "step": 28457 - }, - { - "epoch": 4.582954225210354, - "grad_norm": 0.004799159709364176, - "learning_rate": 0.00019998964335957246, - "loss": 46.0, - "step": 28458 - }, - { - "epoch": 4.583115262289142, - "grad_norm": 0.0039450605399906635, - "learning_rate": 0.000199989642631462, - "loss": 46.0, - "step": 28459 - }, - { - "epoch": 4.583276299367929, - "grad_norm": 0.006026839837431908, - "learning_rate": 0.0001999896419033259, - "loss": 46.0, - "step": 28460 - }, - { - "epoch": 4.583437336446717, - "grad_norm": 0.0046498412266373634, - "learning_rate": 0.0001999896411751642, - "loss": 46.0, - "step": 28461 - }, - { - "epoch": 4.583598373525504, - "grad_norm": 0.007260121870785952, - "learning_rate": 0.00019998964044697694, - "loss": 46.0, - "step": 28462 - }, - { - "epoch": 4.583759410604292, - "grad_norm": 0.0072744861245155334, - "learning_rate": 0.00019998963971876407, - "loss": 46.0, - "step": 28463 - }, - { - "epoch": 4.583920447683079, - "grad_norm": 0.004115426447242498, - "learning_rate": 0.00019998963899052564, - "loss": 46.0, - "step": 28464 - }, - { - "epoch": 4.5840814847618665, - "grad_norm": 0.006077341735363007, - "learning_rate": 0.00019998963826226156, - "loss": 46.0, - "step": 28465 - }, - { - "epoch": 4.584242521840654, - "grad_norm": 0.0024771972093731165, - "learning_rate": 0.00019998963753397193, - "loss": 46.0, - "step": 28466 - }, - { - "epoch": 4.584403558919441, - "grad_norm": 0.00158779660705477, - "learning_rate": 0.0001999896368056567, - "loss": 46.0, - "step": 28467 - }, - { - "epoch": 4.584564595998229, - "grad_norm": 0.0031605777330696583, - "learning_rate": 0.00019998963607731587, - "loss": 46.0, - "step": 28468 - }, - { - "epoch": 4.584725633077016, - "grad_norm": 0.005443513859063387, - "learning_rate": 0.00019998963534894945, - "loss": 46.0, - "step": 28469 - }, - { - "epoch": 4.584886670155804, - "grad_norm": 0.0021112828981131315, - "learning_rate": 0.00019998963462055747, - "loss": 46.0, - "step": 28470 - }, - { - "epoch": 4.58504770723459, - "grad_norm": 0.0065785483457148075, - "learning_rate": 0.00019998963389213984, - "loss": 46.0, - "step": 28471 - }, - { - "epoch": 4.585208744313378, - "grad_norm": 0.004789309576153755, - "learning_rate": 0.00019998963316369666, - "loss": 46.0, - "step": 28472 - }, - { - "epoch": 4.585369781392165, - "grad_norm": 0.0035931633319705725, - "learning_rate": 0.00019998963243522786, - "loss": 46.0, - "step": 28473 - }, - { - "epoch": 4.585530818470953, - "grad_norm": 0.005356601905077696, - "learning_rate": 0.00019998963170673347, - "loss": 46.0, - "step": 28474 - }, - { - "epoch": 4.58569185554974, - "grad_norm": 0.014345882460474968, - "learning_rate": 0.00019998963097821352, - "loss": 46.0, - "step": 28475 - }, - { - "epoch": 4.585852892628528, - "grad_norm": 0.003207128494977951, - "learning_rate": 0.00019998963024966794, - "loss": 46.0, - "step": 28476 - }, - { - "epoch": 4.586013929707315, - "grad_norm": 0.00799788348376751, - "learning_rate": 0.00019998962952109682, - "loss": 46.0, - "step": 28477 - }, - { - "epoch": 4.5861749667861025, - "grad_norm": 0.005665863864123821, - "learning_rate": 0.00019998962879250005, - "loss": 46.0, - "step": 28478 - }, - { - "epoch": 4.58633600386489, - "grad_norm": 0.0017914465861395001, - "learning_rate": 0.00019998962806387773, - "loss": 46.0, - "step": 28479 - }, - { - "epoch": 4.586497040943677, - "grad_norm": 0.000754940789192915, - "learning_rate": 0.0001999896273352298, - "loss": 46.0, - "step": 28480 - }, - { - "epoch": 4.586658078022465, - "grad_norm": 0.002521541900932789, - "learning_rate": 0.00019998962660655627, - "loss": 46.0, - "step": 28481 - }, - { - "epoch": 4.586819115101252, - "grad_norm": 0.008795598521828651, - "learning_rate": 0.00019998962587785716, - "loss": 46.0, - "step": 28482 - }, - { - "epoch": 4.58698015218004, - "grad_norm": 0.0020332983694970608, - "learning_rate": 0.00019998962514913244, - "loss": 46.0, - "step": 28483 - }, - { - "epoch": 4.587141189258827, - "grad_norm": 0.006329297088086605, - "learning_rate": 0.00019998962442038213, - "loss": 46.0, - "step": 28484 - }, - { - "epoch": 4.587302226337615, - "grad_norm": 0.0040099602192640305, - "learning_rate": 0.00019998962369160625, - "loss": 46.0, - "step": 28485 - }, - { - "epoch": 4.587463263416401, - "grad_norm": 0.003025706624612212, - "learning_rate": 0.00019998962296280477, - "loss": 46.0, - "step": 28486 - }, - { - "epoch": 4.587624300495189, - "grad_norm": 0.004924978595227003, - "learning_rate": 0.0001999896222339777, - "loss": 46.0, - "step": 28487 - }, - { - "epoch": 4.587785337573976, - "grad_norm": 0.0020735817961394787, - "learning_rate": 0.00019998962150512503, - "loss": 46.0, - "step": 28488 - }, - { - "epoch": 4.587946374652764, - "grad_norm": 0.0021971550304442644, - "learning_rate": 0.00019998962077624676, - "loss": 46.0, - "step": 28489 - }, - { - "epoch": 4.588107411731551, - "grad_norm": 0.0023650459479540586, - "learning_rate": 0.00019998962004734292, - "loss": 46.0, - "step": 28490 - }, - { - "epoch": 4.5882684488103385, - "grad_norm": 0.005954652093350887, - "learning_rate": 0.00019998961931841347, - "loss": 46.0, - "step": 28491 - }, - { - "epoch": 4.588429485889126, - "grad_norm": 0.002070229034870863, - "learning_rate": 0.00019998961858945844, - "loss": 46.0, - "step": 28492 - }, - { - "epoch": 4.588590522967913, - "grad_norm": 0.0014339193003252149, - "learning_rate": 0.00019998961786047779, - "loss": 46.0, - "step": 28493 - }, - { - "epoch": 4.588751560046701, - "grad_norm": 0.005268601235002279, - "learning_rate": 0.00019998961713147158, - "loss": 46.0, - "step": 28494 - }, - { - "epoch": 4.588912597125488, - "grad_norm": 0.006824306212365627, - "learning_rate": 0.00019998961640243975, - "loss": 46.0, - "step": 28495 - }, - { - "epoch": 4.589073634204276, - "grad_norm": 0.0018382209818810225, - "learning_rate": 0.00019998961567338234, - "loss": 46.0, - "step": 28496 - }, - { - "epoch": 4.589234671283063, - "grad_norm": 0.015351309441030025, - "learning_rate": 0.00019998961494429937, - "loss": 46.0, - "step": 28497 - }, - { - "epoch": 4.589395708361851, - "grad_norm": 0.0014979561092332006, - "learning_rate": 0.00019998961421519078, - "loss": 46.0, - "step": 28498 - }, - { - "epoch": 4.589556745440638, - "grad_norm": 0.0005707125528715551, - "learning_rate": 0.00019998961348605658, - "loss": 46.0, - "step": 28499 - }, - { - "epoch": 4.5897177825194255, - "grad_norm": 0.007236639969050884, - "learning_rate": 0.00019998961275689682, - "loss": 46.0, - "step": 28500 - }, - { - "epoch": 4.589878819598212, - "grad_norm": 0.0015959461452439427, - "learning_rate": 0.00019998961202771144, - "loss": 46.0, - "step": 28501 - }, - { - "epoch": 4.5900398566769995, - "grad_norm": 0.003295185277238488, - "learning_rate": 0.00019998961129850048, - "loss": 46.0, - "step": 28502 - }, - { - "epoch": 4.590200893755787, - "grad_norm": 0.004769337363541126, - "learning_rate": 0.00019998961056926393, - "loss": 46.0, - "step": 28503 - }, - { - "epoch": 4.5903619308345744, - "grad_norm": 0.0012110670795664191, - "learning_rate": 0.0001999896098400018, - "loss": 46.0, - "step": 28504 - }, - { - "epoch": 4.590522967913362, - "grad_norm": 0.014359165914356709, - "learning_rate": 0.00019998960911071404, - "loss": 46.0, - "step": 28505 - }, - { - "epoch": 4.590684004992149, - "grad_norm": 0.001631799153983593, - "learning_rate": 0.00019998960838140073, - "loss": 46.0, - "step": 28506 - }, - { - "epoch": 4.590845042070937, - "grad_norm": 0.005717022810131311, - "learning_rate": 0.0001999896076520618, - "loss": 46.0, - "step": 28507 - }, - { - "epoch": 4.591006079149724, - "grad_norm": 0.007634556386619806, - "learning_rate": 0.0001999896069226973, - "loss": 46.0, - "step": 28508 - }, - { - "epoch": 4.591167116228512, - "grad_norm": 0.004141835495829582, - "learning_rate": 0.00019998960619330717, - "loss": 46.0, - "step": 28509 - }, - { - "epoch": 4.591328153307299, - "grad_norm": 0.005560701712965965, - "learning_rate": 0.0001999896054638915, - "loss": 46.0, - "step": 28510 - }, - { - "epoch": 4.591489190386087, - "grad_norm": 0.002460506744682789, - "learning_rate": 0.00019998960473445018, - "loss": 46.0, - "step": 28511 - }, - { - "epoch": 4.591650227464874, - "grad_norm": 0.003538668854162097, - "learning_rate": 0.00019998960400498332, - "loss": 46.0, - "step": 28512 - }, - { - "epoch": 4.5918112645436615, - "grad_norm": 0.0013440271141007543, - "learning_rate": 0.00019998960327549084, - "loss": 46.0, - "step": 28513 - }, - { - "epoch": 4.591972301622448, - "grad_norm": 0.0025261377450078726, - "learning_rate": 0.00019998960254597278, - "loss": 46.0, - "step": 28514 - }, - { - "epoch": 4.592133338701236, - "grad_norm": 0.013304091058671474, - "learning_rate": 0.00019998960181642913, - "loss": 46.0, - "step": 28515 - }, - { - "epoch": 4.592294375780023, - "grad_norm": 0.003078119596466422, - "learning_rate": 0.00019998960108685986, - "loss": 46.0, - "step": 28516 - }, - { - "epoch": 4.59245541285881, - "grad_norm": 0.0027525704354047775, - "learning_rate": 0.00019998960035726504, - "loss": 46.0, - "step": 28517 - }, - { - "epoch": 4.592616449937598, - "grad_norm": 0.002791887614876032, - "learning_rate": 0.00019998959962764457, - "loss": 46.0, - "step": 28518 - }, - { - "epoch": 4.592777487016385, - "grad_norm": 0.0026647874619811773, - "learning_rate": 0.00019998959889799855, - "loss": 46.0, - "step": 28519 - }, - { - "epoch": 4.592938524095173, - "grad_norm": 0.001245353021658957, - "learning_rate": 0.00019998959816832694, - "loss": 46.0, - "step": 28520 - }, - { - "epoch": 4.59309956117396, - "grad_norm": 0.0008130603237077594, - "learning_rate": 0.00019998959743862976, - "loss": 46.0, - "step": 28521 - }, - { - "epoch": 4.593260598252748, - "grad_norm": 0.0005793844466097653, - "learning_rate": 0.00019998959670890692, - "loss": 46.0, - "step": 28522 - }, - { - "epoch": 4.593421635331535, - "grad_norm": 0.0012297973735257983, - "learning_rate": 0.00019998959597915855, - "loss": 46.0, - "step": 28523 - }, - { - "epoch": 4.593582672410323, - "grad_norm": 0.006347378715872765, - "learning_rate": 0.00019998959524938456, - "loss": 46.0, - "step": 28524 - }, - { - "epoch": 4.59374370948911, - "grad_norm": 0.003759901039302349, - "learning_rate": 0.00019998959451958496, - "loss": 46.0, - "step": 28525 - }, - { - "epoch": 4.5939047465678975, - "grad_norm": 0.006052514538168907, - "learning_rate": 0.0001999895937897598, - "loss": 46.0, - "step": 28526 - }, - { - "epoch": 4.594065783646685, - "grad_norm": 0.0020625251345336437, - "learning_rate": 0.00019998959305990904, - "loss": 46.0, - "step": 28527 - }, - { - "epoch": 4.594226820725472, - "grad_norm": 0.0017713834531605244, - "learning_rate": 0.00019998959233003268, - "loss": 46.0, - "step": 28528 - }, - { - "epoch": 4.594387857804259, - "grad_norm": 0.0014661067398265004, - "learning_rate": 0.00019998959160013073, - "loss": 46.0, - "step": 28529 - }, - { - "epoch": 4.594548894883047, - "grad_norm": 0.0017022364772856236, - "learning_rate": 0.0001999895908702032, - "loss": 46.0, - "step": 28530 - }, - { - "epoch": 4.594709931961834, - "grad_norm": 0.0057439361698925495, - "learning_rate": 0.00019998959014025006, - "loss": 46.0, - "step": 28531 - }, - { - "epoch": 4.594870969040621, - "grad_norm": 0.005177950952202082, - "learning_rate": 0.00019998958941027132, - "loss": 46.0, - "step": 28532 - }, - { - "epoch": 4.595032006119409, - "grad_norm": 0.006061226595193148, - "learning_rate": 0.000199989588680267, - "loss": 46.0, - "step": 28533 - }, - { - "epoch": 4.595193043198196, - "grad_norm": 0.006619125138968229, - "learning_rate": 0.0001999895879502371, - "loss": 46.0, - "step": 28534 - }, - { - "epoch": 4.595354080276984, - "grad_norm": 0.0022555554751306772, - "learning_rate": 0.0001999895872201816, - "loss": 46.0, - "step": 28535 - }, - { - "epoch": 4.595515117355771, - "grad_norm": 0.006830146536231041, - "learning_rate": 0.00019998958649010052, - "loss": 46.0, - "step": 28536 - }, - { - "epoch": 4.595676154434559, - "grad_norm": 0.007183676119893789, - "learning_rate": 0.0001999895857599938, - "loss": 46.0, - "step": 28537 - }, - { - "epoch": 4.595837191513346, - "grad_norm": 0.020978108048439026, - "learning_rate": 0.00019998958502986155, - "loss": 46.0, - "step": 28538 - }, - { - "epoch": 4.5959982285921335, - "grad_norm": 0.0036064700689166784, - "learning_rate": 0.00019998958429970367, - "loss": 46.0, - "step": 28539 - }, - { - "epoch": 4.596159265670921, - "grad_norm": 0.0031953167635947466, - "learning_rate": 0.0001999895835695202, - "loss": 46.0, - "step": 28540 - }, - { - "epoch": 4.596320302749708, - "grad_norm": 0.009189536795020103, - "learning_rate": 0.00019998958283931116, - "loss": 46.0, - "step": 28541 - }, - { - "epoch": 4.596481339828496, - "grad_norm": 0.0021295237820595503, - "learning_rate": 0.0001999895821090765, - "loss": 46.0, - "step": 28542 - }, - { - "epoch": 4.596642376907283, - "grad_norm": 0.004190010018646717, - "learning_rate": 0.00019998958137881626, - "loss": 46.0, - "step": 28543 - }, - { - "epoch": 4.59680341398607, - "grad_norm": 0.005775100085884333, - "learning_rate": 0.00019998958064853045, - "loss": 46.0, - "step": 28544 - }, - { - "epoch": 4.596964451064858, - "grad_norm": 0.004560623783618212, - "learning_rate": 0.000199989579918219, - "loss": 46.0, - "step": 28545 - }, - { - "epoch": 4.597125488143645, - "grad_norm": 0.010999364778399467, - "learning_rate": 0.00019998957918788198, - "loss": 46.0, - "step": 28546 - }, - { - "epoch": 4.597286525222432, - "grad_norm": 0.003572491928935051, - "learning_rate": 0.00019998957845751938, - "loss": 46.0, - "step": 28547 - }, - { - "epoch": 4.59744756230122, - "grad_norm": 0.0017069445457309484, - "learning_rate": 0.0001999895777271312, - "loss": 46.0, - "step": 28548 - }, - { - "epoch": 4.597608599380007, - "grad_norm": 0.004072308074682951, - "learning_rate": 0.00019998957699671739, - "loss": 46.0, - "step": 28549 - }, - { - "epoch": 4.5977696364587946, - "grad_norm": 0.005793790332973003, - "learning_rate": 0.00019998957626627802, - "loss": 46.0, - "step": 28550 - }, - { - "epoch": 4.597930673537582, - "grad_norm": 0.010596828535199165, - "learning_rate": 0.00019998957553581304, - "loss": 46.0, - "step": 28551 - }, - { - "epoch": 4.5980917106163695, - "grad_norm": 0.00041604763828217983, - "learning_rate": 0.00019998957480532245, - "loss": 46.0, - "step": 28552 - }, - { - "epoch": 4.598252747695157, - "grad_norm": 0.0032967927400022745, - "learning_rate": 0.0001999895740748063, - "loss": 46.0, - "step": 28553 - }, - { - "epoch": 4.598413784773944, - "grad_norm": 0.0014154138043522835, - "learning_rate": 0.00019998957334426453, - "loss": 46.0, - "step": 28554 - }, - { - "epoch": 4.598574821852732, - "grad_norm": 0.005878071766346693, - "learning_rate": 0.0001999895726136972, - "loss": 46.0, - "step": 28555 - }, - { - "epoch": 4.598735858931519, - "grad_norm": 0.0025634162593632936, - "learning_rate": 0.00019998957188310427, - "loss": 46.0, - "step": 28556 - }, - { - "epoch": 4.598896896010307, - "grad_norm": 0.004028363153338432, - "learning_rate": 0.00019998957115248574, - "loss": 46.0, - "step": 28557 - }, - { - "epoch": 4.599057933089094, - "grad_norm": 0.0024117566645145416, - "learning_rate": 0.00019998957042184162, - "loss": 46.0, - "step": 28558 - }, - { - "epoch": 4.599218970167881, - "grad_norm": 0.0055236779153347015, - "learning_rate": 0.0001999895696911719, - "loss": 46.0, - "step": 28559 - }, - { - "epoch": 4.599380007246668, - "grad_norm": 0.002532748971134424, - "learning_rate": 0.00019998956896047658, - "loss": 46.0, - "step": 28560 - }, - { - "epoch": 4.599541044325456, - "grad_norm": 0.001568284467794001, - "learning_rate": 0.00019998956822975567, - "loss": 46.0, - "step": 28561 - }, - { - "epoch": 4.599702081404243, - "grad_norm": 0.006054223515093327, - "learning_rate": 0.00019998956749900918, - "loss": 46.0, - "step": 28562 - }, - { - "epoch": 4.5998631184830305, - "grad_norm": 0.006243539042770863, - "learning_rate": 0.0001999895667682371, - "loss": 46.0, - "step": 28563 - }, - { - "epoch": 4.600024155561818, - "grad_norm": 0.015282531268894672, - "learning_rate": 0.00019998956603743944, - "loss": 46.0, - "step": 28564 - }, - { - "epoch": 4.600185192640605, - "grad_norm": 0.006074786651879549, - "learning_rate": 0.00019998956530661616, - "loss": 46.0, - "step": 28565 - }, - { - "epoch": 4.600346229719393, - "grad_norm": 0.00400999141857028, - "learning_rate": 0.0001999895645757673, - "loss": 46.0, - "step": 28566 - }, - { - "epoch": 4.60050726679818, - "grad_norm": 0.00989500805735588, - "learning_rate": 0.00019998956384489284, - "loss": 46.0, - "step": 28567 - }, - { - "epoch": 4.600668303876968, - "grad_norm": 0.004770105239003897, - "learning_rate": 0.0001999895631139928, - "loss": 46.0, - "step": 28568 - }, - { - "epoch": 4.600829340955755, - "grad_norm": 0.007653380744159222, - "learning_rate": 0.00019998956238306717, - "loss": 46.0, - "step": 28569 - }, - { - "epoch": 4.600990378034543, - "grad_norm": 0.005711417179554701, - "learning_rate": 0.00019998956165211593, - "loss": 46.0, - "step": 28570 - }, - { - "epoch": 4.60115141511333, - "grad_norm": 0.0066521973349153996, - "learning_rate": 0.00019998956092113913, - "loss": 46.0, - "step": 28571 - }, - { - "epoch": 4.601312452192118, - "grad_norm": 0.0035176360979676247, - "learning_rate": 0.0001999895601901367, - "loss": 46.0, - "step": 28572 - }, - { - "epoch": 4.601473489270905, - "grad_norm": 0.02367776818573475, - "learning_rate": 0.00019998955945910868, - "loss": 46.0, - "step": 28573 - }, - { - "epoch": 4.601634526349692, - "grad_norm": 0.0005685294163413346, - "learning_rate": 0.0001999895587280551, - "loss": 46.0, - "step": 28574 - }, - { - "epoch": 4.601795563428479, - "grad_norm": 0.0103748245164752, - "learning_rate": 0.0001999895579969759, - "loss": 46.0, - "step": 28575 - }, - { - "epoch": 4.6019566005072665, - "grad_norm": 0.006886561866849661, - "learning_rate": 0.00019998955726587112, - "loss": 46.0, - "step": 28576 - }, - { - "epoch": 4.602117637586054, - "grad_norm": 0.00479584326967597, - "learning_rate": 0.00019998955653474076, - "loss": 46.0, - "step": 28577 - }, - { - "epoch": 4.602278674664841, - "grad_norm": 0.006385931745171547, - "learning_rate": 0.0001999895558035848, - "loss": 46.0, - "step": 28578 - }, - { - "epoch": 4.602439711743629, - "grad_norm": 0.005352462641894817, - "learning_rate": 0.00019998955507240321, - "loss": 46.0, - "step": 28579 - }, - { - "epoch": 4.602600748822416, - "grad_norm": 0.0076520005241036415, - "learning_rate": 0.00019998955434119607, - "loss": 46.0, - "step": 28580 - }, - { - "epoch": 4.602761785901204, - "grad_norm": 0.0023984024301171303, - "learning_rate": 0.00019998955360996332, - "loss": 46.0, - "step": 28581 - }, - { - "epoch": 4.602922822979991, - "grad_norm": 0.007030025590211153, - "learning_rate": 0.00019998955287870498, - "loss": 46.0, - "step": 28582 - }, - { - "epoch": 4.603083860058779, - "grad_norm": 0.001687400508671999, - "learning_rate": 0.00019998955214742105, - "loss": 46.0, - "step": 28583 - }, - { - "epoch": 4.603244897137566, - "grad_norm": 0.008841497823596, - "learning_rate": 0.00019998955141611156, - "loss": 46.0, - "step": 28584 - }, - { - "epoch": 4.603405934216354, - "grad_norm": 0.001367873279377818, - "learning_rate": 0.00019998955068477642, - "loss": 46.0, - "step": 28585 - }, - { - "epoch": 4.603566971295141, - "grad_norm": 0.0015143114142119884, - "learning_rate": 0.00019998954995341573, - "loss": 46.0, - "step": 28586 - }, - { - "epoch": 4.6037280083739285, - "grad_norm": 0.0026926349382847548, - "learning_rate": 0.00019998954922202943, - "loss": 46.0, - "step": 28587 - }, - { - "epoch": 4.603889045452716, - "grad_norm": 0.00903333630412817, - "learning_rate": 0.00019998954849061753, - "loss": 46.0, - "step": 28588 - }, - { - "epoch": 4.6040500825315025, - "grad_norm": 0.0016941786743700504, - "learning_rate": 0.00019998954775918005, - "loss": 46.0, - "step": 28589 - }, - { - "epoch": 4.60421111961029, - "grad_norm": 0.01835503615438938, - "learning_rate": 0.00019998954702771696, - "loss": 46.0, - "step": 28590 - }, - { - "epoch": 4.604372156689077, - "grad_norm": 0.0007851842674426734, - "learning_rate": 0.00019998954629622833, - "loss": 46.0, - "step": 28591 - }, - { - "epoch": 4.604533193767865, - "grad_norm": 0.004995634313672781, - "learning_rate": 0.00019998954556471406, - "loss": 46.0, - "step": 28592 - }, - { - "epoch": 4.604694230846652, - "grad_norm": 0.007921691983938217, - "learning_rate": 0.0001999895448331742, - "loss": 46.0, - "step": 28593 - }, - { - "epoch": 4.60485526792544, - "grad_norm": 0.009077431634068489, - "learning_rate": 0.00019998954410160877, - "loss": 46.0, - "step": 28594 - }, - { - "epoch": 4.605016305004227, - "grad_norm": 0.009035086259245872, - "learning_rate": 0.00019998954337001773, - "loss": 46.0, - "step": 28595 - }, - { - "epoch": 4.605177342083015, - "grad_norm": 0.004378116223961115, - "learning_rate": 0.00019998954263840112, - "loss": 46.0, - "step": 28596 - }, - { - "epoch": 4.605338379161802, - "grad_norm": 0.006223103031516075, - "learning_rate": 0.00019998954190675889, - "loss": 46.0, - "step": 28597 - }, - { - "epoch": 4.60549941624059, - "grad_norm": 0.002344702137634158, - "learning_rate": 0.00019998954117509107, - "loss": 46.0, - "step": 28598 - }, - { - "epoch": 4.605660453319377, - "grad_norm": 0.005434425547719002, - "learning_rate": 0.0001999895404433977, - "loss": 46.0, - "step": 28599 - }, - { - "epoch": 4.6058214903981645, - "grad_norm": 0.005976187530905008, - "learning_rate": 0.0001999895397116787, - "loss": 46.0, - "step": 28600 - }, - { - "epoch": 4.605982527476952, - "grad_norm": 0.023467248305678368, - "learning_rate": 0.0001999895389799341, - "loss": 46.0, - "step": 28601 - }, - { - "epoch": 4.6061435645557385, - "grad_norm": 0.0029122892301529646, - "learning_rate": 0.00019998953824816392, - "loss": 46.0, - "step": 28602 - }, - { - "epoch": 4.606304601634527, - "grad_norm": 0.0015702040400356054, - "learning_rate": 0.00019998953751636817, - "loss": 46.0, - "step": 28603 - }, - { - "epoch": 4.606465638713313, - "grad_norm": 0.0009783548302948475, - "learning_rate": 0.0001999895367845468, - "loss": 46.0, - "step": 28604 - }, - { - "epoch": 4.606626675792101, - "grad_norm": 0.0019342172890901566, - "learning_rate": 0.00019998953605269981, - "loss": 46.0, - "step": 28605 - }, - { - "epoch": 4.606787712870888, - "grad_norm": 0.00224115327000618, - "learning_rate": 0.0001999895353208273, - "loss": 46.0, - "step": 28606 - }, - { - "epoch": 4.606948749949676, - "grad_norm": 0.005332854110747576, - "learning_rate": 0.00019998953458892914, - "loss": 46.0, - "step": 28607 - }, - { - "epoch": 4.607109787028463, - "grad_norm": 0.0061399005353450775, - "learning_rate": 0.0001999895338570054, - "loss": 46.0, - "step": 28608 - }, - { - "epoch": 4.607270824107251, - "grad_norm": 0.006406235974282026, - "learning_rate": 0.0001999895331250561, - "loss": 46.0, - "step": 28609 - }, - { - "epoch": 4.607431861186038, - "grad_norm": 0.012562812305986881, - "learning_rate": 0.00019998953239308117, - "loss": 46.0, - "step": 28610 - }, - { - "epoch": 4.6075928982648255, - "grad_norm": 0.004897661041468382, - "learning_rate": 0.00019998953166108067, - "loss": 46.0, - "step": 28611 - }, - { - "epoch": 4.607753935343613, - "grad_norm": 0.0029372184071689844, - "learning_rate": 0.00019998953092905457, - "loss": 46.0, - "step": 28612 - }, - { - "epoch": 4.6079149724224004, - "grad_norm": 0.0026479947846382856, - "learning_rate": 0.00019998953019700286, - "loss": 46.0, - "step": 28613 - }, - { - "epoch": 4.608076009501188, - "grad_norm": 0.005616776645183563, - "learning_rate": 0.00019998952946492557, - "loss": 46.0, - "step": 28614 - }, - { - "epoch": 4.608237046579975, - "grad_norm": 0.005181202199310064, - "learning_rate": 0.00019998952873282271, - "loss": 46.0, - "step": 28615 - }, - { - "epoch": 4.608398083658763, - "grad_norm": 0.0068702129647135735, - "learning_rate": 0.00019998952800069422, - "loss": 46.0, - "step": 28616 - }, - { - "epoch": 4.608559120737549, - "grad_norm": 0.001958029344677925, - "learning_rate": 0.00019998952726854016, - "loss": 46.0, - "step": 28617 - }, - { - "epoch": 4.608720157816338, - "grad_norm": 0.0015985690988600254, - "learning_rate": 0.0001999895265363605, - "loss": 46.0, - "step": 28618 - }, - { - "epoch": 4.608881194895124, - "grad_norm": 0.0019705630838871002, - "learning_rate": 0.00019998952580415526, - "loss": 46.0, - "step": 28619 - }, - { - "epoch": 4.609042231973912, - "grad_norm": 0.0012102543842047453, - "learning_rate": 0.00019998952507192442, - "loss": 46.0, - "step": 28620 - }, - { - "epoch": 4.609203269052699, - "grad_norm": 0.0038378890603780746, - "learning_rate": 0.00019998952433966798, - "loss": 46.0, - "step": 28621 - }, - { - "epoch": 4.609364306131487, - "grad_norm": 0.0026133048813790083, - "learning_rate": 0.00019998952360738596, - "loss": 46.0, - "step": 28622 - }, - { - "epoch": 4.609525343210274, - "grad_norm": 0.010411676950752735, - "learning_rate": 0.00019998952287507836, - "loss": 46.0, - "step": 28623 - }, - { - "epoch": 4.6096863802890615, - "grad_norm": 0.006977350451052189, - "learning_rate": 0.00019998952214274516, - "loss": 46.0, - "step": 28624 - }, - { - "epoch": 4.609847417367849, - "grad_norm": 0.0025255039799958467, - "learning_rate": 0.00019998952141038636, - "loss": 46.0, - "step": 28625 - }, - { - "epoch": 4.610008454446636, - "grad_norm": 0.006918781902641058, - "learning_rate": 0.00019998952067800193, - "loss": 46.0, - "step": 28626 - }, - { - "epoch": 4.610169491525424, - "grad_norm": 0.006830655969679356, - "learning_rate": 0.00019998951994559198, - "loss": 46.0, - "step": 28627 - }, - { - "epoch": 4.610330528604211, - "grad_norm": 0.00397122697904706, - "learning_rate": 0.0001999895192131564, - "loss": 46.0, - "step": 28628 - }, - { - "epoch": 4.610491565682999, - "grad_norm": 0.015576930716633797, - "learning_rate": 0.00019998951848069523, - "loss": 46.0, - "step": 28629 - }, - { - "epoch": 4.610652602761786, - "grad_norm": 0.002232639119029045, - "learning_rate": 0.00019998951774820845, - "loss": 46.0, - "step": 28630 - }, - { - "epoch": 4.610813639840574, - "grad_norm": 0.01757732778787613, - "learning_rate": 0.0001999895170156961, - "loss": 46.0, - "step": 28631 - }, - { - "epoch": 4.61097467691936, - "grad_norm": 0.008033502846956253, - "learning_rate": 0.00019998951628315815, - "loss": 46.0, - "step": 28632 - }, - { - "epoch": 4.611135713998148, - "grad_norm": 0.0028197630308568478, - "learning_rate": 0.00019998951555059465, - "loss": 46.0, - "step": 28633 - }, - { - "epoch": 4.611296751076935, - "grad_norm": 0.0018976012943312526, - "learning_rate": 0.0001999895148180055, - "loss": 46.0, - "step": 28634 - }, - { - "epoch": 4.611457788155723, - "grad_norm": 0.003416329389438033, - "learning_rate": 0.00019998951408539077, - "loss": 46.0, - "step": 28635 - }, - { - "epoch": 4.61161882523451, - "grad_norm": 0.008057817816734314, - "learning_rate": 0.00019998951335275047, - "loss": 46.0, - "step": 28636 - }, - { - "epoch": 4.6117798623132975, - "grad_norm": 0.009286249056458473, - "learning_rate": 0.00019998951262008456, - "loss": 46.0, - "step": 28637 - }, - { - "epoch": 4.611940899392085, - "grad_norm": 0.0068008312955498695, - "learning_rate": 0.00019998951188739307, - "loss": 46.0, - "step": 28638 - }, - { - "epoch": 4.612101936470872, - "grad_norm": 0.005206909030675888, - "learning_rate": 0.00019998951115467596, - "loss": 46.0, - "step": 28639 - }, - { - "epoch": 4.61226297354966, - "grad_norm": 0.005285155493766069, - "learning_rate": 0.0001999895104219333, - "loss": 46.0, - "step": 28640 - }, - { - "epoch": 4.612424010628447, - "grad_norm": 0.0038314827252179384, - "learning_rate": 0.000199989509689165, - "loss": 46.0, - "step": 28641 - }, - { - "epoch": 4.612585047707235, - "grad_norm": 0.0022522611543536186, - "learning_rate": 0.00019998950895637113, - "loss": 46.0, - "step": 28642 - }, - { - "epoch": 4.612746084786022, - "grad_norm": 0.002440350130200386, - "learning_rate": 0.00019998950822355168, - "loss": 46.0, - "step": 28643 - }, - { - "epoch": 4.61290712186481, - "grad_norm": 0.0048972065560519695, - "learning_rate": 0.00019998950749070663, - "loss": 46.0, - "step": 28644 - }, - { - "epoch": 4.613068158943597, - "grad_norm": 0.0019255060469731688, - "learning_rate": 0.000199989506757836, - "loss": 46.0, - "step": 28645 - }, - { - "epoch": 4.613229196022385, - "grad_norm": 0.003462818218395114, - "learning_rate": 0.00019998950602493975, - "loss": 46.0, - "step": 28646 - }, - { - "epoch": 4.613390233101171, - "grad_norm": 0.0044579338282346725, - "learning_rate": 0.00019998950529201794, - "loss": 46.0, - "step": 28647 - }, - { - "epoch": 4.613551270179959, - "grad_norm": 0.027491984888911247, - "learning_rate": 0.0001999895045590705, - "loss": 46.0, - "step": 28648 - }, - { - "epoch": 4.613712307258746, - "grad_norm": 0.017997391521930695, - "learning_rate": 0.00019998950382609749, - "loss": 46.0, - "step": 28649 - }, - { - "epoch": 4.6138733443375335, - "grad_norm": 0.005901267286390066, - "learning_rate": 0.0001999895030930989, - "loss": 46.0, - "step": 28650 - }, - { - "epoch": 4.614034381416321, - "grad_norm": 0.0022986619733273983, - "learning_rate": 0.0001999895023600747, - "loss": 46.0, - "step": 28651 - }, - { - "epoch": 4.614195418495108, - "grad_norm": 0.0024556294083595276, - "learning_rate": 0.0001999895016270249, - "loss": 46.0, - "step": 28652 - }, - { - "epoch": 4.614356455573896, - "grad_norm": 0.009372428059577942, - "learning_rate": 0.00019998950089394953, - "loss": 46.0, - "step": 28653 - }, - { - "epoch": 4.614517492652683, - "grad_norm": 0.004906050395220518, - "learning_rate": 0.00019998950016084855, - "loss": 46.0, - "step": 28654 - }, - { - "epoch": 4.614678529731471, - "grad_norm": 0.006026510149240494, - "learning_rate": 0.00019998949942772197, - "loss": 46.0, - "step": 28655 - }, - { - "epoch": 4.614839566810258, - "grad_norm": 0.00670495955273509, - "learning_rate": 0.00019998949869456985, - "loss": 46.0, - "step": 28656 - }, - { - "epoch": 4.615000603889046, - "grad_norm": 0.001298116403631866, - "learning_rate": 0.0001999894979613921, - "loss": 46.0, - "step": 28657 - }, - { - "epoch": 4.615161640967833, - "grad_norm": 0.01055904570966959, - "learning_rate": 0.00019998949722818874, - "loss": 46.0, - "step": 28658 - }, - { - "epoch": 4.6153226780466206, - "grad_norm": 0.0016208699671551585, - "learning_rate": 0.00019998949649495978, - "loss": 46.0, - "step": 28659 - }, - { - "epoch": 4.615483715125408, - "grad_norm": 0.008294446393847466, - "learning_rate": 0.00019998949576170526, - "loss": 46.0, - "step": 28660 - }, - { - "epoch": 4.6156447522041955, - "grad_norm": 0.004737715236842632, - "learning_rate": 0.00019998949502842518, - "loss": 46.0, - "step": 28661 - }, - { - "epoch": 4.615805789282982, - "grad_norm": 0.004632387775927782, - "learning_rate": 0.00019998949429511943, - "loss": 46.0, - "step": 28662 - }, - { - "epoch": 4.6159668263617695, - "grad_norm": 0.0025180247612297535, - "learning_rate": 0.00019998949356178815, - "loss": 46.0, - "step": 28663 - }, - { - "epoch": 4.616127863440557, - "grad_norm": 0.002578171668574214, - "learning_rate": 0.00019998949282843125, - "loss": 46.0, - "step": 28664 - }, - { - "epoch": 4.616288900519344, - "grad_norm": 0.011505522765219212, - "learning_rate": 0.00019998949209504876, - "loss": 46.0, - "step": 28665 - }, - { - "epoch": 4.616449937598132, - "grad_norm": 0.0042459312826395035, - "learning_rate": 0.0001999894913616407, - "loss": 46.0, - "step": 28666 - }, - { - "epoch": 4.616610974676919, - "grad_norm": 0.004854676313698292, - "learning_rate": 0.00019998949062820703, - "loss": 46.0, - "step": 28667 - }, - { - "epoch": 4.616772011755707, - "grad_norm": 0.009851894341409206, - "learning_rate": 0.00019998948989474776, - "loss": 46.0, - "step": 28668 - }, - { - "epoch": 4.616933048834494, - "grad_norm": 0.002170231193304062, - "learning_rate": 0.0001999894891612629, - "loss": 46.0, - "step": 28669 - }, - { - "epoch": 4.617094085913282, - "grad_norm": 0.0005476687219925225, - "learning_rate": 0.00019998948842775245, - "loss": 46.0, - "step": 28670 - }, - { - "epoch": 4.617255122992069, - "grad_norm": 0.004725928883999586, - "learning_rate": 0.00019998948769421642, - "loss": 46.0, - "step": 28671 - }, - { - "epoch": 4.6174161600708565, - "grad_norm": 0.0034042028710246086, - "learning_rate": 0.00019998948696065477, - "loss": 46.0, - "step": 28672 - }, - { - "epoch": 4.617577197149644, - "grad_norm": 0.004970040637999773, - "learning_rate": 0.00019998948622706756, - "loss": 46.0, - "step": 28673 - }, - { - "epoch": 4.617738234228431, - "grad_norm": 0.0006485159392468631, - "learning_rate": 0.00019998948549345474, - "loss": 46.0, - "step": 28674 - }, - { - "epoch": 4.617899271307218, - "grad_norm": 0.0031922643538564444, - "learning_rate": 0.00019998948475981633, - "loss": 46.0, - "step": 28675 - }, - { - "epoch": 4.618060308386006, - "grad_norm": 0.000634676543995738, - "learning_rate": 0.00019998948402615233, - "loss": 46.0, - "step": 28676 - }, - { - "epoch": 4.618221345464793, - "grad_norm": 0.004520370624959469, - "learning_rate": 0.00019998948329246274, - "loss": 46.0, - "step": 28677 - }, - { - "epoch": 4.61838238254358, - "grad_norm": 0.0052521973848342896, - "learning_rate": 0.00019998948255874754, - "loss": 46.0, - "step": 28678 - }, - { - "epoch": 4.618543419622368, - "grad_norm": 0.0035603551659733057, - "learning_rate": 0.00019998948182500676, - "loss": 46.0, - "step": 28679 - }, - { - "epoch": 4.618704456701155, - "grad_norm": 0.0029775267466902733, - "learning_rate": 0.00019998948109124038, - "loss": 46.0, - "step": 28680 - }, - { - "epoch": 4.618865493779943, - "grad_norm": 0.0051308441907167435, - "learning_rate": 0.00019998948035744842, - "loss": 46.0, - "step": 28681 - }, - { - "epoch": 4.61902653085873, - "grad_norm": 0.01388498768210411, - "learning_rate": 0.00019998947962363088, - "loss": 46.0, - "step": 28682 - }, - { - "epoch": 4.619187567937518, - "grad_norm": 0.0019024768844246864, - "learning_rate": 0.00019998947888978771, - "loss": 46.0, - "step": 28683 - }, - { - "epoch": 4.619348605016305, - "grad_norm": 0.00683908024802804, - "learning_rate": 0.000199989478155919, - "loss": 46.0, - "step": 28684 - }, - { - "epoch": 4.6195096420950925, - "grad_norm": 0.0029897105414420366, - "learning_rate": 0.00019998947742202466, - "loss": 46.0, - "step": 28685 - }, - { - "epoch": 4.61967067917388, - "grad_norm": 0.00455856928601861, - "learning_rate": 0.00019998947668810476, - "loss": 46.0, - "step": 28686 - }, - { - "epoch": 4.619831716252667, - "grad_norm": 0.0018517576390877366, - "learning_rate": 0.00019998947595415922, - "loss": 46.0, - "step": 28687 - }, - { - "epoch": 4.619992753331455, - "grad_norm": 0.004377380479127169, - "learning_rate": 0.00019998947522018812, - "loss": 46.0, - "step": 28688 - }, - { - "epoch": 4.620153790410242, - "grad_norm": 0.0011183862807229161, - "learning_rate": 0.00019998947448619144, - "loss": 46.0, - "step": 28689 - }, - { - "epoch": 4.620314827489029, - "grad_norm": 0.0066835167817771435, - "learning_rate": 0.0001999894737521691, - "loss": 46.0, - "step": 28690 - }, - { - "epoch": 4.620475864567817, - "grad_norm": 0.00665680505335331, - "learning_rate": 0.00019998947301812123, - "loss": 46.0, - "step": 28691 - }, - { - "epoch": 4.620636901646604, - "grad_norm": 0.0053182621486485004, - "learning_rate": 0.00019998947228404775, - "loss": 46.0, - "step": 28692 - }, - { - "epoch": 4.620797938725391, - "grad_norm": 0.0014270198298618197, - "learning_rate": 0.0001999894715499487, - "loss": 46.0, - "step": 28693 - }, - { - "epoch": 4.620958975804179, - "grad_norm": 0.001682116067968309, - "learning_rate": 0.00019998947081582404, - "loss": 46.0, - "step": 28694 - }, - { - "epoch": 4.621120012882966, - "grad_norm": 0.010620569810271263, - "learning_rate": 0.00019998947008167378, - "loss": 46.0, - "step": 28695 - }, - { - "epoch": 4.621281049961754, - "grad_norm": 0.002223008079454303, - "learning_rate": 0.00019998946934749793, - "loss": 46.0, - "step": 28696 - }, - { - "epoch": 4.621442087040541, - "grad_norm": 0.0020947102457284927, - "learning_rate": 0.0001999894686132965, - "loss": 46.0, - "step": 28697 - }, - { - "epoch": 4.6216031241193285, - "grad_norm": 0.008420648984611034, - "learning_rate": 0.00019998946787906947, - "loss": 46.0, - "step": 28698 - }, - { - "epoch": 4.621764161198116, - "grad_norm": 0.001620523165911436, - "learning_rate": 0.00019998946714481686, - "loss": 46.0, - "step": 28699 - }, - { - "epoch": 4.621925198276903, - "grad_norm": 0.004672982729971409, - "learning_rate": 0.00019998946641053864, - "loss": 46.0, - "step": 28700 - }, - { - "epoch": 4.622086235355691, - "grad_norm": 0.0009783884743228555, - "learning_rate": 0.00019998946567623485, - "loss": 46.0, - "step": 28701 - }, - { - "epoch": 4.622247272434478, - "grad_norm": 0.008539550937712193, - "learning_rate": 0.00019998946494190545, - "loss": 46.0, - "step": 28702 - }, - { - "epoch": 4.622408309513266, - "grad_norm": 0.004753672983497381, - "learning_rate": 0.00019998946420755044, - "loss": 46.0, - "step": 28703 - }, - { - "epoch": 4.622569346592053, - "grad_norm": 0.00302482140250504, - "learning_rate": 0.00019998946347316986, - "loss": 46.0, - "step": 28704 - }, - { - "epoch": 4.62273038367084, - "grad_norm": 0.0042535122483968735, - "learning_rate": 0.0001999894627387637, - "loss": 46.0, - "step": 28705 - }, - { - "epoch": 4.622891420749627, - "grad_norm": 0.019024042412638664, - "learning_rate": 0.00019998946200433193, - "loss": 46.0, - "step": 28706 - }, - { - "epoch": 4.623052457828415, - "grad_norm": 0.008529804646968842, - "learning_rate": 0.00019998946126987456, - "loss": 46.0, - "step": 28707 - }, - { - "epoch": 4.623213494907202, - "grad_norm": 0.007570905145257711, - "learning_rate": 0.00019998946053539161, - "loss": 46.0, - "step": 28708 - }, - { - "epoch": 4.62337453198599, - "grad_norm": 0.00918601918965578, - "learning_rate": 0.00019998945980088308, - "loss": 46.0, - "step": 28709 - }, - { - "epoch": 4.623535569064777, - "grad_norm": 0.0027174788992851973, - "learning_rate": 0.00019998945906634893, - "loss": 46.0, - "step": 28710 - }, - { - "epoch": 4.6236966061435645, - "grad_norm": 0.005719912238419056, - "learning_rate": 0.00019998945833178921, - "loss": 46.0, - "step": 28711 - }, - { - "epoch": 4.623857643222352, - "grad_norm": 0.005411564372479916, - "learning_rate": 0.0001999894575972039, - "loss": 46.0, - "step": 28712 - }, - { - "epoch": 4.624018680301139, - "grad_norm": 0.001239596982486546, - "learning_rate": 0.00019998945686259298, - "loss": 46.0, - "step": 28713 - }, - { - "epoch": 4.624179717379927, - "grad_norm": 0.0036991252563893795, - "learning_rate": 0.00019998945612795648, - "loss": 46.0, - "step": 28714 - }, - { - "epoch": 4.624340754458714, - "grad_norm": 0.003780174534767866, - "learning_rate": 0.0001999894553932944, - "loss": 46.0, - "step": 28715 - }, - { - "epoch": 4.624501791537502, - "grad_norm": 0.0045287394896149635, - "learning_rate": 0.0001999894546586067, - "loss": 46.0, - "step": 28716 - }, - { - "epoch": 4.624662828616289, - "grad_norm": 0.007606519851833582, - "learning_rate": 0.0001999894539238934, - "loss": 46.0, - "step": 28717 - }, - { - "epoch": 4.624823865695077, - "grad_norm": 0.002691676141694188, - "learning_rate": 0.00019998945318915455, - "loss": 46.0, - "step": 28718 - }, - { - "epoch": 4.624984902773864, - "grad_norm": 0.00417829304933548, - "learning_rate": 0.00019998945245439006, - "loss": 46.0, - "step": 28719 - }, - { - "epoch": 4.625145939852651, - "grad_norm": 0.005441527813673019, - "learning_rate": 0.0001999894517196, - "loss": 46.0, - "step": 28720 - }, - { - "epoch": 4.625306976931438, - "grad_norm": 0.0035826906096190214, - "learning_rate": 0.00019998945098478437, - "loss": 46.0, - "step": 28721 - }, - { - "epoch": 4.625468014010226, - "grad_norm": 0.00313868117518723, - "learning_rate": 0.00019998945024994312, - "loss": 46.0, - "step": 28722 - }, - { - "epoch": 4.625629051089013, - "grad_norm": 0.0020399545319378376, - "learning_rate": 0.0001999894495150763, - "loss": 46.0, - "step": 28723 - }, - { - "epoch": 4.6257900881678005, - "grad_norm": 0.002654819516465068, - "learning_rate": 0.00019998944878018389, - "loss": 46.0, - "step": 28724 - }, - { - "epoch": 4.625951125246588, - "grad_norm": 0.011123983189463615, - "learning_rate": 0.00019998944804526585, - "loss": 46.0, - "step": 28725 - }, - { - "epoch": 4.626112162325375, - "grad_norm": 0.0010415163123980165, - "learning_rate": 0.00019998944731032222, - "loss": 46.0, - "step": 28726 - }, - { - "epoch": 4.626273199404163, - "grad_norm": 0.0016799357254058123, - "learning_rate": 0.00019998944657535303, - "loss": 46.0, - "step": 28727 - }, - { - "epoch": 4.62643423648295, - "grad_norm": 0.001290600630454719, - "learning_rate": 0.00019998944584035826, - "loss": 46.0, - "step": 28728 - }, - { - "epoch": 4.626595273561738, - "grad_norm": 0.0064903805032372475, - "learning_rate": 0.00019998944510533787, - "loss": 46.0, - "step": 28729 - }, - { - "epoch": 4.626756310640525, - "grad_norm": 0.001428467221558094, - "learning_rate": 0.0001999894443702919, - "loss": 46.0, - "step": 28730 - }, - { - "epoch": 4.626917347719313, - "grad_norm": 0.0028896804433315992, - "learning_rate": 0.00019998944363522033, - "loss": 46.0, - "step": 28731 - }, - { - "epoch": 4.6270783847981, - "grad_norm": 0.0032285735942423344, - "learning_rate": 0.00019998944290012312, - "loss": 46.0, - "step": 28732 - }, - { - "epoch": 4.6272394218768875, - "grad_norm": 0.006240965332835913, - "learning_rate": 0.0001999894421650004, - "loss": 46.0, - "step": 28733 - }, - { - "epoch": 4.627400458955675, - "grad_norm": 0.003867104183882475, - "learning_rate": 0.00019998944142985203, - "loss": 46.0, - "step": 28734 - }, - { - "epoch": 4.6275614960344615, - "grad_norm": 0.0027933414094150066, - "learning_rate": 0.00019998944069467812, - "loss": 46.0, - "step": 28735 - }, - { - "epoch": 4.627722533113249, - "grad_norm": 0.004787487909197807, - "learning_rate": 0.00019998943995947857, - "loss": 46.0, - "step": 28736 - }, - { - "epoch": 4.6278835701920364, - "grad_norm": 0.004292582161724567, - "learning_rate": 0.00019998943922425343, - "loss": 46.0, - "step": 28737 - }, - { - "epoch": 4.628044607270824, - "grad_norm": 0.004687467589974403, - "learning_rate": 0.00019998943848900273, - "loss": 46.0, - "step": 28738 - }, - { - "epoch": 4.628205644349611, - "grad_norm": 0.005203989800065756, - "learning_rate": 0.00019998943775372641, - "loss": 46.0, - "step": 28739 - }, - { - "epoch": 4.628366681428399, - "grad_norm": 0.0018016740214079618, - "learning_rate": 0.00019998943701842454, - "loss": 46.0, - "step": 28740 - }, - { - "epoch": 4.628527718507186, - "grad_norm": 0.005282426718622446, - "learning_rate": 0.00019998943628309702, - "loss": 46.0, - "step": 28741 - }, - { - "epoch": 4.628688755585974, - "grad_norm": 0.001087265438400209, - "learning_rate": 0.00019998943554774395, - "loss": 46.0, - "step": 28742 - }, - { - "epoch": 4.628849792664761, - "grad_norm": 0.0044693793170154095, - "learning_rate": 0.00019998943481236528, - "loss": 46.0, - "step": 28743 - }, - { - "epoch": 4.629010829743549, - "grad_norm": 0.008755608461797237, - "learning_rate": 0.000199989434076961, - "loss": 46.0, - "step": 28744 - }, - { - "epoch": 4.629171866822336, - "grad_norm": 0.0053488025441765785, - "learning_rate": 0.00019998943334153114, - "loss": 46.0, - "step": 28745 - }, - { - "epoch": 4.6293329039011235, - "grad_norm": 0.006271216087043285, - "learning_rate": 0.0001999894326060757, - "loss": 46.0, - "step": 28746 - }, - { - "epoch": 4.629493940979911, - "grad_norm": 0.0045645213685929775, - "learning_rate": 0.00019998943187059465, - "loss": 46.0, - "step": 28747 - }, - { - "epoch": 4.6296549780586975, - "grad_norm": 0.0031551923602819443, - "learning_rate": 0.00019998943113508802, - "loss": 46.0, - "step": 28748 - }, - { - "epoch": 4.629816015137486, - "grad_norm": 0.0012918355641886592, - "learning_rate": 0.00019998943039955578, - "loss": 46.0, - "step": 28749 - }, - { - "epoch": 4.629977052216272, - "grad_norm": 0.0048337276093661785, - "learning_rate": 0.00019998942966399796, - "loss": 46.0, - "step": 28750 - }, - { - "epoch": 4.63013808929506, - "grad_norm": 0.004521054681390524, - "learning_rate": 0.00019998942892841454, - "loss": 46.0, - "step": 28751 - }, - { - "epoch": 4.630299126373847, - "grad_norm": 0.003795759519562125, - "learning_rate": 0.00019998942819280554, - "loss": 46.0, - "step": 28752 - }, - { - "epoch": 4.630460163452635, - "grad_norm": 0.004174003843218088, - "learning_rate": 0.00019998942745717092, - "loss": 46.0, - "step": 28753 - }, - { - "epoch": 4.630621200531422, - "grad_norm": 0.0024887763429433107, - "learning_rate": 0.00019998942672151075, - "loss": 46.0, - "step": 28754 - }, - { - "epoch": 4.63078223761021, - "grad_norm": 0.00228418642655015, - "learning_rate": 0.00019998942598582496, - "loss": 46.0, - "step": 28755 - }, - { - "epoch": 4.630943274688997, - "grad_norm": 0.0032761809416115284, - "learning_rate": 0.0001999894252501136, - "loss": 46.0, - "step": 28756 - }, - { - "epoch": 4.631104311767785, - "grad_norm": 0.00321602844633162, - "learning_rate": 0.0001999894245143766, - "loss": 46.0, - "step": 28757 - }, - { - "epoch": 4.631265348846572, - "grad_norm": 0.004598714876919985, - "learning_rate": 0.00019998942377861406, - "loss": 46.0, - "step": 28758 - }, - { - "epoch": 4.6314263859253595, - "grad_norm": 0.005074535496532917, - "learning_rate": 0.00019998942304282592, - "loss": 46.0, - "step": 28759 - }, - { - "epoch": 4.631587423004147, - "grad_norm": 0.024239646270871162, - "learning_rate": 0.0001999894223070122, - "loss": 46.0, - "step": 28760 - }, - { - "epoch": 4.631748460082934, - "grad_norm": 0.00471185939386487, - "learning_rate": 0.00019998942157117285, - "loss": 46.0, - "step": 28761 - }, - { - "epoch": 4.631909497161722, - "grad_norm": 0.011587814427912235, - "learning_rate": 0.00019998942083530793, - "loss": 46.0, - "step": 28762 - }, - { - "epoch": 4.632070534240508, - "grad_norm": 0.008011476136744022, - "learning_rate": 0.00019998942009941738, - "loss": 46.0, - "step": 28763 - }, - { - "epoch": 4.632231571319297, - "grad_norm": 0.0014787832042202353, - "learning_rate": 0.00019998941936350128, - "loss": 46.0, - "step": 28764 - }, - { - "epoch": 4.632392608398083, - "grad_norm": 0.011929119005799294, - "learning_rate": 0.00019998941862755956, - "loss": 46.0, - "step": 28765 - }, - { - "epoch": 4.632553645476871, - "grad_norm": 0.007643118966370821, - "learning_rate": 0.0001999894178915923, - "loss": 46.0, - "step": 28766 - }, - { - "epoch": 4.632714682555658, - "grad_norm": 0.002223399467766285, - "learning_rate": 0.00019998941715559937, - "loss": 46.0, - "step": 28767 - }, - { - "epoch": 4.632875719634446, - "grad_norm": 0.0020988464821130037, - "learning_rate": 0.00019998941641958092, - "loss": 46.0, - "step": 28768 - }, - { - "epoch": 4.633036756713233, - "grad_norm": 0.004369881935417652, - "learning_rate": 0.00019998941568353683, - "loss": 46.0, - "step": 28769 - }, - { - "epoch": 4.633197793792021, - "grad_norm": 0.0049811918288469315, - "learning_rate": 0.00019998941494746717, - "loss": 46.0, - "step": 28770 - }, - { - "epoch": 4.633358830870808, - "grad_norm": 0.004455159418284893, - "learning_rate": 0.0001999894142113719, - "loss": 46.0, - "step": 28771 - }, - { - "epoch": 4.6335198679495955, - "grad_norm": 0.002118463162332773, - "learning_rate": 0.00019998941347525105, - "loss": 46.0, - "step": 28772 - }, - { - "epoch": 4.633680905028383, - "grad_norm": 0.003406836884096265, - "learning_rate": 0.0001999894127391046, - "loss": 46.0, - "step": 28773 - }, - { - "epoch": 4.63384194210717, - "grad_norm": 0.006176305469125509, - "learning_rate": 0.00019998941200293258, - "loss": 46.0, - "step": 28774 - }, - { - "epoch": 4.634002979185958, - "grad_norm": 0.010172223672270775, - "learning_rate": 0.00019998941126673497, - "loss": 46.0, - "step": 28775 - }, - { - "epoch": 4.634164016264745, - "grad_norm": 0.00523246917873621, - "learning_rate": 0.00019998941053051174, - "loss": 46.0, - "step": 28776 - }, - { - "epoch": 4.634325053343533, - "grad_norm": 0.0025002325419336557, - "learning_rate": 0.00019998940979426295, - "loss": 46.0, - "step": 28777 - }, - { - "epoch": 4.634486090422319, - "grad_norm": 0.0034382734447717667, - "learning_rate": 0.00019998940905798852, - "loss": 46.0, - "step": 28778 - }, - { - "epoch": 4.634647127501107, - "grad_norm": 0.010849333368241787, - "learning_rate": 0.00019998940832168853, - "loss": 46.0, - "step": 28779 - }, - { - "epoch": 4.634808164579894, - "grad_norm": 0.001068843062967062, - "learning_rate": 0.00019998940758536295, - "loss": 46.0, - "step": 28780 - }, - { - "epoch": 4.634969201658682, - "grad_norm": 0.015925215557217598, - "learning_rate": 0.00019998940684901178, - "loss": 46.0, - "step": 28781 - }, - { - "epoch": 4.635130238737469, - "grad_norm": 0.011597731150686741, - "learning_rate": 0.000199989406112635, - "loss": 46.0, - "step": 28782 - }, - { - "epoch": 4.6352912758162566, - "grad_norm": 0.0019245428266003728, - "learning_rate": 0.00019998940537623266, - "loss": 46.0, - "step": 28783 - }, - { - "epoch": 4.635452312895044, - "grad_norm": 0.003491608891636133, - "learning_rate": 0.00019998940463980468, - "loss": 46.0, - "step": 28784 - }, - { - "epoch": 4.6356133499738315, - "grad_norm": 0.005695475731045008, - "learning_rate": 0.00019998940390335114, - "loss": 46.0, - "step": 28785 - }, - { - "epoch": 4.635774387052619, - "grad_norm": 0.003567585488781333, - "learning_rate": 0.000199989403166872, - "loss": 46.0, - "step": 28786 - }, - { - "epoch": 4.635935424131406, - "grad_norm": 0.0032246678601950407, - "learning_rate": 0.00019998940243036727, - "loss": 46.0, - "step": 28787 - }, - { - "epoch": 4.636096461210194, - "grad_norm": 0.0022226048167794943, - "learning_rate": 0.00019998940169383694, - "loss": 46.0, - "step": 28788 - }, - { - "epoch": 4.636257498288981, - "grad_norm": 0.002625696826726198, - "learning_rate": 0.00019998940095728102, - "loss": 46.0, - "step": 28789 - }, - { - "epoch": 4.636418535367769, - "grad_norm": 0.0031848677899688482, - "learning_rate": 0.00019998940022069952, - "loss": 46.0, - "step": 28790 - }, - { - "epoch": 4.636579572446556, - "grad_norm": 0.0010988399153575301, - "learning_rate": 0.0001999893994840924, - "loss": 46.0, - "step": 28791 - }, - { - "epoch": 4.636740609525344, - "grad_norm": 0.0026120019610971212, - "learning_rate": 0.00019998939874745972, - "loss": 46.0, - "step": 28792 - }, - { - "epoch": 4.63690164660413, - "grad_norm": 0.0035063589457422495, - "learning_rate": 0.00019998939801080145, - "loss": 46.0, - "step": 28793 - }, - { - "epoch": 4.637062683682918, - "grad_norm": 0.016091682016849518, - "learning_rate": 0.00019998939727411757, - "loss": 46.0, - "step": 28794 - }, - { - "epoch": 4.637223720761705, - "grad_norm": 0.010831695981323719, - "learning_rate": 0.00019998939653740808, - "loss": 46.0, - "step": 28795 - }, - { - "epoch": 4.6373847578404925, - "grad_norm": 0.002514464780688286, - "learning_rate": 0.00019998939580067302, - "loss": 46.0, - "step": 28796 - }, - { - "epoch": 4.63754579491928, - "grad_norm": 0.0066358535550534725, - "learning_rate": 0.00019998939506391236, - "loss": 46.0, - "step": 28797 - }, - { - "epoch": 4.637706831998067, - "grad_norm": 0.006156806834042072, - "learning_rate": 0.00019998939432712613, - "loss": 46.0, - "step": 28798 - }, - { - "epoch": 4.637867869076855, - "grad_norm": 0.002510531572625041, - "learning_rate": 0.0001999893935903143, - "loss": 46.0, - "step": 28799 - }, - { - "epoch": 4.638028906155642, - "grad_norm": 0.011003522202372551, - "learning_rate": 0.00019998939285347685, - "loss": 46.0, - "step": 28800 - }, - { - "epoch": 4.63818994323443, - "grad_norm": 0.021284379065036774, - "learning_rate": 0.00019998939211661386, - "loss": 46.0, - "step": 28801 - }, - { - "epoch": 4.638350980313217, - "grad_norm": 0.002376176882535219, - "learning_rate": 0.00019998939137972523, - "loss": 46.0, - "step": 28802 - }, - { - "epoch": 4.638512017392005, - "grad_norm": 0.006309289485216141, - "learning_rate": 0.000199989390642811, - "loss": 46.0, - "step": 28803 - }, - { - "epoch": 4.638673054470792, - "grad_norm": 0.011936895549297333, - "learning_rate": 0.0001999893899058712, - "loss": 46.0, - "step": 28804 - }, - { - "epoch": 4.63883409154958, - "grad_norm": 0.0042016212828457355, - "learning_rate": 0.00019998938916890584, - "loss": 46.0, - "step": 28805 - }, - { - "epoch": 4.638995128628367, - "grad_norm": 0.0020441431552171707, - "learning_rate": 0.00019998938843191483, - "loss": 46.0, - "step": 28806 - }, - { - "epoch": 4.6391561657071545, - "grad_norm": 0.002865507733076811, - "learning_rate": 0.00019998938769489827, - "loss": 46.0, - "step": 28807 - }, - { - "epoch": 4.639317202785941, - "grad_norm": 0.0020579732954502106, - "learning_rate": 0.00019998938695785608, - "loss": 46.0, - "step": 28808 - }, - { - "epoch": 4.6394782398647285, - "grad_norm": 0.0038868901319801807, - "learning_rate": 0.00019998938622078837, - "loss": 46.0, - "step": 28809 - }, - { - "epoch": 4.639639276943516, - "grad_norm": 0.004660432692617178, - "learning_rate": 0.00019998938548369499, - "loss": 46.0, - "step": 28810 - }, - { - "epoch": 4.639800314022303, - "grad_norm": 0.004483303986489773, - "learning_rate": 0.00019998938474657604, - "loss": 46.0, - "step": 28811 - }, - { - "epoch": 4.639961351101091, - "grad_norm": 0.007797194644808769, - "learning_rate": 0.0001999893840094315, - "loss": 46.0, - "step": 28812 - }, - { - "epoch": 4.640122388179878, - "grad_norm": 0.0037905557546764612, - "learning_rate": 0.0001999893832722614, - "loss": 46.0, - "step": 28813 - }, - { - "epoch": 4.640283425258666, - "grad_norm": 0.01063101552426815, - "learning_rate": 0.00019998938253506566, - "loss": 46.0, - "step": 28814 - }, - { - "epoch": 4.640444462337453, - "grad_norm": 0.000624926236923784, - "learning_rate": 0.00019998938179784437, - "loss": 46.0, - "step": 28815 - }, - { - "epoch": 4.640605499416241, - "grad_norm": 0.003992448095232248, - "learning_rate": 0.00019998938106059746, - "loss": 46.0, - "step": 28816 - }, - { - "epoch": 4.640766536495028, - "grad_norm": 0.004300958476960659, - "learning_rate": 0.00019998938032332494, - "loss": 46.0, - "step": 28817 - }, - { - "epoch": 4.640927573573816, - "grad_norm": 0.0032595705706626177, - "learning_rate": 0.00019998937958602686, - "loss": 46.0, - "step": 28818 - }, - { - "epoch": 4.641088610652603, - "grad_norm": 0.007325825281441212, - "learning_rate": 0.0001999893788487032, - "loss": 46.0, - "step": 28819 - }, - { - "epoch": 4.6412496477313905, - "grad_norm": 0.010010342113673687, - "learning_rate": 0.0001999893781113539, - "loss": 46.0, - "step": 28820 - }, - { - "epoch": 4.641410684810177, - "grad_norm": 0.0026441614609211683, - "learning_rate": 0.00019998937737397907, - "loss": 46.0, - "step": 28821 - }, - { - "epoch": 4.641571721888965, - "grad_norm": 0.0011363831581547856, - "learning_rate": 0.0001999893766365786, - "loss": 46.0, - "step": 28822 - }, - { - "epoch": 4.641732758967752, - "grad_norm": 0.002139223041012883, - "learning_rate": 0.00019998937589915254, - "loss": 46.0, - "step": 28823 - }, - { - "epoch": 4.641893796046539, - "grad_norm": 0.0007391894469037652, - "learning_rate": 0.0001999893751617009, - "loss": 46.0, - "step": 28824 - }, - { - "epoch": 4.642054833125327, - "grad_norm": 0.01802361011505127, - "learning_rate": 0.00019998937442422367, - "loss": 46.0, - "step": 28825 - }, - { - "epoch": 4.642215870204114, - "grad_norm": 0.001768721267580986, - "learning_rate": 0.00019998937368672083, - "loss": 46.0, - "step": 28826 - }, - { - "epoch": 4.642376907282902, - "grad_norm": 0.0010522359516471624, - "learning_rate": 0.00019998937294919244, - "loss": 46.0, - "step": 28827 - }, - { - "epoch": 4.642537944361689, - "grad_norm": 0.010329823940992355, - "learning_rate": 0.00019998937221163844, - "loss": 46.0, - "step": 28828 - }, - { - "epoch": 4.642698981440477, - "grad_norm": 0.010859774425625801, - "learning_rate": 0.00019998937147405884, - "loss": 46.0, - "step": 28829 - }, - { - "epoch": 4.642860018519264, - "grad_norm": 0.0082384143024683, - "learning_rate": 0.00019998937073645363, - "loss": 46.0, - "step": 28830 - }, - { - "epoch": 4.643021055598052, - "grad_norm": 0.017249522730708122, - "learning_rate": 0.00019998936999882284, - "loss": 46.0, - "step": 28831 - }, - { - "epoch": 4.643182092676839, - "grad_norm": 0.0026608631014823914, - "learning_rate": 0.00019998936926116648, - "loss": 46.0, - "step": 28832 - }, - { - "epoch": 4.6433431297556265, - "grad_norm": 0.009312928654253483, - "learning_rate": 0.0001999893685234845, - "loss": 46.0, - "step": 28833 - }, - { - "epoch": 4.643504166834414, - "grad_norm": 0.008810551837086678, - "learning_rate": 0.00019998936778577693, - "loss": 46.0, - "step": 28834 - }, - { - "epoch": 4.643665203913201, - "grad_norm": 0.002851797267794609, - "learning_rate": 0.00019998936704804376, - "loss": 46.0, - "step": 28835 - }, - { - "epoch": 4.643826240991988, - "grad_norm": 0.002766981953755021, - "learning_rate": 0.00019998936631028503, - "loss": 46.0, - "step": 28836 - }, - { - "epoch": 4.643987278070776, - "grad_norm": 0.00512302853167057, - "learning_rate": 0.0001999893655725007, - "loss": 46.0, - "step": 28837 - }, - { - "epoch": 4.644148315149563, - "grad_norm": 0.01607791893184185, - "learning_rate": 0.00019998936483469075, - "loss": 46.0, - "step": 28838 - }, - { - "epoch": 4.64430935222835, - "grad_norm": 0.005732540041208267, - "learning_rate": 0.00019998936409685523, - "loss": 46.0, - "step": 28839 - }, - { - "epoch": 4.644470389307138, - "grad_norm": 0.008913103491067886, - "learning_rate": 0.00019998936335899412, - "loss": 46.0, - "step": 28840 - }, - { - "epoch": 4.644631426385925, - "grad_norm": 0.01068598311394453, - "learning_rate": 0.0001999893626211074, - "loss": 46.0, - "step": 28841 - }, - { - "epoch": 4.644792463464713, - "grad_norm": 0.005982093047350645, - "learning_rate": 0.0001999893618831951, - "loss": 46.0, - "step": 28842 - }, - { - "epoch": 4.6449535005435, - "grad_norm": 0.0027696872130036354, - "learning_rate": 0.00019998936114525723, - "loss": 46.0, - "step": 28843 - }, - { - "epoch": 4.6451145376222875, - "grad_norm": 0.002058908576145768, - "learning_rate": 0.00019998936040729372, - "loss": 46.0, - "step": 28844 - }, - { - "epoch": 4.645275574701075, - "grad_norm": 0.0007687556208111346, - "learning_rate": 0.00019998935966930465, - "loss": 46.0, - "step": 28845 - }, - { - "epoch": 4.6454366117798624, - "grad_norm": 0.010753043927252293, - "learning_rate": 0.00019998935893129, - "loss": 46.0, - "step": 28846 - }, - { - "epoch": 4.64559764885865, - "grad_norm": 0.02487793378531933, - "learning_rate": 0.00019998935819324972, - "loss": 46.0, - "step": 28847 - }, - { - "epoch": 4.645758685937437, - "grad_norm": 0.004360293038189411, - "learning_rate": 0.0001999893574551839, - "loss": 46.0, - "step": 28848 - }, - { - "epoch": 4.645919723016225, - "grad_norm": 0.003180798841640353, - "learning_rate": 0.00019998935671709241, - "loss": 46.0, - "step": 28849 - }, - { - "epoch": 4.646080760095012, - "grad_norm": 0.006085132248699665, - "learning_rate": 0.00019998935597897538, - "loss": 46.0, - "step": 28850 - }, - { - "epoch": 4.646241797173799, - "grad_norm": 0.001889756415039301, - "learning_rate": 0.00019998935524083276, - "loss": 46.0, - "step": 28851 - }, - { - "epoch": 4.646402834252587, - "grad_norm": 0.009555756114423275, - "learning_rate": 0.00019998935450266453, - "loss": 46.0, - "step": 28852 - }, - { - "epoch": 4.646563871331374, - "grad_norm": 0.009193788282573223, - "learning_rate": 0.00019998935376447073, - "loss": 46.0, - "step": 28853 - }, - { - "epoch": 4.646724908410161, - "grad_norm": 0.0019975604955106974, - "learning_rate": 0.00019998935302625132, - "loss": 46.0, - "step": 28854 - }, - { - "epoch": 4.646885945488949, - "grad_norm": 0.01905670575797558, - "learning_rate": 0.00019998935228800633, - "loss": 46.0, - "step": 28855 - }, - { - "epoch": 4.647046982567736, - "grad_norm": 0.001570022781379521, - "learning_rate": 0.00019998935154973572, - "loss": 46.0, - "step": 28856 - }, - { - "epoch": 4.6472080196465235, - "grad_norm": 0.0019738497212529182, - "learning_rate": 0.00019998935081143955, - "loss": 46.0, - "step": 28857 - }, - { - "epoch": 4.647369056725311, - "grad_norm": 0.0029243184253573418, - "learning_rate": 0.00019998935007311776, - "loss": 46.0, - "step": 28858 - }, - { - "epoch": 4.647530093804098, - "grad_norm": 0.005150943994522095, - "learning_rate": 0.0001999893493347704, - "loss": 46.0, - "step": 28859 - }, - { - "epoch": 4.647691130882886, - "grad_norm": 0.012920072302222252, - "learning_rate": 0.00019998934859639746, - "loss": 46.0, - "step": 28860 - }, - { - "epoch": 4.647852167961673, - "grad_norm": 0.0070352330803871155, - "learning_rate": 0.0001999893478579989, - "loss": 46.0, - "step": 28861 - }, - { - "epoch": 4.648013205040461, - "grad_norm": 0.0014051893958821893, - "learning_rate": 0.00019998934711957476, - "loss": 46.0, - "step": 28862 - }, - { - "epoch": 4.648174242119248, - "grad_norm": 0.0026403588708490133, - "learning_rate": 0.000199989346381125, - "loss": 46.0, - "step": 28863 - }, - { - "epoch": 4.648335279198036, - "grad_norm": 0.001131717232055962, - "learning_rate": 0.00019998934564264967, - "loss": 46.0, - "step": 28864 - }, - { - "epoch": 4.648496316276823, - "grad_norm": 0.0032847800757735968, - "learning_rate": 0.00019998934490414878, - "loss": 46.0, - "step": 28865 - }, - { - "epoch": 4.64865735335561, - "grad_norm": 0.0187359731644392, - "learning_rate": 0.00019998934416562224, - "loss": 46.0, - "step": 28866 - }, - { - "epoch": 4.648818390434397, - "grad_norm": 0.0023506670258939266, - "learning_rate": 0.00019998934342707017, - "loss": 46.0, - "step": 28867 - }, - { - "epoch": 4.648979427513185, - "grad_norm": 0.0016623161500319839, - "learning_rate": 0.00019998934268849246, - "loss": 46.0, - "step": 28868 - }, - { - "epoch": 4.649140464591972, - "grad_norm": 0.01885918341577053, - "learning_rate": 0.0001999893419498892, - "loss": 46.0, - "step": 28869 - }, - { - "epoch": 4.6493015016707595, - "grad_norm": 0.0025795241817831993, - "learning_rate": 0.00019998934121126028, - "loss": 46.0, - "step": 28870 - }, - { - "epoch": 4.649462538749547, - "grad_norm": 0.005783949047327042, - "learning_rate": 0.00019998934047260584, - "loss": 46.0, - "step": 28871 - }, - { - "epoch": 4.649623575828334, - "grad_norm": 0.004076711367815733, - "learning_rate": 0.00019998933973392578, - "loss": 46.0, - "step": 28872 - }, - { - "epoch": 4.649784612907122, - "grad_norm": 0.002280758460983634, - "learning_rate": 0.0001999893389952201, - "loss": 46.0, - "step": 28873 - }, - { - "epoch": 4.649945649985909, - "grad_norm": 0.0033324796240776777, - "learning_rate": 0.00019998933825648884, - "loss": 46.0, - "step": 28874 - }, - { - "epoch": 4.650106687064697, - "grad_norm": 0.005482825450599194, - "learning_rate": 0.00019998933751773202, - "loss": 46.0, - "step": 28875 - }, - { - "epoch": 4.650267724143484, - "grad_norm": 0.0033118233550339937, - "learning_rate": 0.0001999893367789496, - "loss": 46.0, - "step": 28876 - }, - { - "epoch": 4.650428761222272, - "grad_norm": 0.0021592797711491585, - "learning_rate": 0.00019998933604014157, - "loss": 46.0, - "step": 28877 - }, - { - "epoch": 4.650589798301059, - "grad_norm": 0.001610046485438943, - "learning_rate": 0.00019998933530130796, - "loss": 46.0, - "step": 28878 - }, - { - "epoch": 4.650750835379847, - "grad_norm": 0.0034365851897746325, - "learning_rate": 0.00019998933456244873, - "loss": 46.0, - "step": 28879 - }, - { - "epoch": 4.650911872458634, - "grad_norm": 0.0019274357473477721, - "learning_rate": 0.00019998933382356392, - "loss": 46.0, - "step": 28880 - }, - { - "epoch": 4.651072909537421, - "grad_norm": 0.004763009026646614, - "learning_rate": 0.00019998933308465353, - "loss": 46.0, - "step": 28881 - }, - { - "epoch": 4.651233946616208, - "grad_norm": 0.006467279512435198, - "learning_rate": 0.00019998933234571754, - "loss": 46.0, - "step": 28882 - }, - { - "epoch": 4.6513949836949955, - "grad_norm": 0.011365518905222416, - "learning_rate": 0.00019998933160675597, - "loss": 46.0, - "step": 28883 - }, - { - "epoch": 4.651556020773783, - "grad_norm": 0.0031606238335371017, - "learning_rate": 0.0001999893308677688, - "loss": 46.0, - "step": 28884 - }, - { - "epoch": 4.65171705785257, - "grad_norm": 0.0038487704005092382, - "learning_rate": 0.00019998933012875604, - "loss": 46.0, - "step": 28885 - }, - { - "epoch": 4.651878094931358, - "grad_norm": 0.0037622677627950907, - "learning_rate": 0.00019998932938971768, - "loss": 46.0, - "step": 28886 - }, - { - "epoch": 4.652039132010145, - "grad_norm": 0.00617364514619112, - "learning_rate": 0.00019998932865065373, - "loss": 46.0, - "step": 28887 - }, - { - "epoch": 4.652200169088933, - "grad_norm": 0.0050621856935322285, - "learning_rate": 0.0001999893279115642, - "loss": 46.0, - "step": 28888 - }, - { - "epoch": 4.65236120616772, - "grad_norm": 0.0010590513702481985, - "learning_rate": 0.00019998932717244907, - "loss": 46.0, - "step": 28889 - }, - { - "epoch": 4.652522243246508, - "grad_norm": 0.00235868152230978, - "learning_rate": 0.00019998932643330833, - "loss": 46.0, - "step": 28890 - }, - { - "epoch": 4.652683280325295, - "grad_norm": 0.0073089152574539185, - "learning_rate": 0.00019998932569414204, - "loss": 46.0, - "step": 28891 - }, - { - "epoch": 4.6528443174040826, - "grad_norm": 0.006188882980495691, - "learning_rate": 0.00019998932495495013, - "loss": 46.0, - "step": 28892 - }, - { - "epoch": 4.65300535448287, - "grad_norm": 0.013392229564487934, - "learning_rate": 0.00019998932421573263, - "loss": 46.0, - "step": 28893 - }, - { - "epoch": 4.6531663915616575, - "grad_norm": 0.0023209438659250736, - "learning_rate": 0.00019998932347648954, - "loss": 46.0, - "step": 28894 - }, - { - "epoch": 4.653327428640445, - "grad_norm": 0.007212834898382425, - "learning_rate": 0.00019998932273722084, - "loss": 46.0, - "step": 28895 - }, - { - "epoch": 4.6534884657192315, - "grad_norm": 0.007052355445921421, - "learning_rate": 0.00019998932199792656, - "loss": 46.0, - "step": 28896 - }, - { - "epoch": 4.653649502798019, - "grad_norm": 0.0017822900554165244, - "learning_rate": 0.0001999893212586067, - "loss": 46.0, - "step": 28897 - }, - { - "epoch": 4.653810539876806, - "grad_norm": 0.0026739598251879215, - "learning_rate": 0.00019998932051926125, - "loss": 46.0, - "step": 28898 - }, - { - "epoch": 4.653971576955594, - "grad_norm": 0.001390158082358539, - "learning_rate": 0.0001999893197798902, - "loss": 46.0, - "step": 28899 - }, - { - "epoch": 4.654132614034381, - "grad_norm": 0.0026745223440229893, - "learning_rate": 0.00019998931904049357, - "loss": 46.0, - "step": 28900 - }, - { - "epoch": 4.654293651113169, - "grad_norm": 0.0022330102510750294, - "learning_rate": 0.00019998931830107132, - "loss": 46.0, - "step": 28901 - }, - { - "epoch": 4.654454688191956, - "grad_norm": 0.0026370638515800238, - "learning_rate": 0.00019998931756162348, - "loss": 46.0, - "step": 28902 - }, - { - "epoch": 4.654615725270744, - "grad_norm": 0.008907534182071686, - "learning_rate": 0.00019998931682215006, - "loss": 46.0, - "step": 28903 - }, - { - "epoch": 4.654776762349531, - "grad_norm": 0.0017651786329224706, - "learning_rate": 0.00019998931608265105, - "loss": 46.0, - "step": 28904 - }, - { - "epoch": 4.6549377994283185, - "grad_norm": 0.0031292897183448076, - "learning_rate": 0.00019998931534312645, - "loss": 46.0, - "step": 28905 - }, - { - "epoch": 4.655098836507106, - "grad_norm": 0.0037577941548079252, - "learning_rate": 0.00019998931460357624, - "loss": 46.0, - "step": 28906 - }, - { - "epoch": 4.655259873585893, - "grad_norm": 0.005130811594426632, - "learning_rate": 0.00019998931386400046, - "loss": 46.0, - "step": 28907 - }, - { - "epoch": 4.655420910664681, - "grad_norm": 0.007506816182285547, - "learning_rate": 0.00019998931312439908, - "loss": 46.0, - "step": 28908 - }, - { - "epoch": 4.6555819477434675, - "grad_norm": 0.0010403116466477513, - "learning_rate": 0.00019998931238477213, - "loss": 46.0, - "step": 28909 - }, - { - "epoch": 4.655742984822256, - "grad_norm": 0.003379139583557844, - "learning_rate": 0.00019998931164511954, - "loss": 46.0, - "step": 28910 - }, - { - "epoch": 4.655904021901042, - "grad_norm": 0.0038106029387563467, - "learning_rate": 0.00019998931090544136, - "loss": 46.0, - "step": 28911 - }, - { - "epoch": 4.65606505897983, - "grad_norm": 0.0013994182227179408, - "learning_rate": 0.00019998931016573763, - "loss": 46.0, - "step": 28912 - }, - { - "epoch": 4.656226096058617, - "grad_norm": 0.004259624518454075, - "learning_rate": 0.00019998930942600828, - "loss": 46.0, - "step": 28913 - }, - { - "epoch": 4.656387133137405, - "grad_norm": 0.003210079623386264, - "learning_rate": 0.00019998930868625337, - "loss": 46.0, - "step": 28914 - }, - { - "epoch": 4.656548170216192, - "grad_norm": 0.006245154421776533, - "learning_rate": 0.00019998930794647285, - "loss": 46.0, - "step": 28915 - }, - { - "epoch": 4.65670920729498, - "grad_norm": 0.01517784409224987, - "learning_rate": 0.0001999893072066667, - "loss": 46.0, - "step": 28916 - }, - { - "epoch": 4.656870244373767, - "grad_norm": 0.0025047510862350464, - "learning_rate": 0.000199989306466835, - "loss": 46.0, - "step": 28917 - }, - { - "epoch": 4.6570312814525545, - "grad_norm": 0.0025115013122558594, - "learning_rate": 0.00019998930572697772, - "loss": 46.0, - "step": 28918 - }, - { - "epoch": 4.657192318531342, - "grad_norm": 0.011892990209162235, - "learning_rate": 0.0001999893049870948, - "loss": 46.0, - "step": 28919 - }, - { - "epoch": 4.657353355610129, - "grad_norm": 0.0019797985441982746, - "learning_rate": 0.0001999893042471863, - "loss": 46.0, - "step": 28920 - }, - { - "epoch": 4.657514392688917, - "grad_norm": 0.005202241241931915, - "learning_rate": 0.00019998930350725223, - "loss": 46.0, - "step": 28921 - }, - { - "epoch": 4.657675429767704, - "grad_norm": 0.007219049613922834, - "learning_rate": 0.00019998930276729255, - "loss": 46.0, - "step": 28922 - }, - { - "epoch": 4.657836466846492, - "grad_norm": 0.005986146628856659, - "learning_rate": 0.0001999893020273073, - "loss": 46.0, - "step": 28923 - }, - { - "epoch": 4.657997503925278, - "grad_norm": 0.0044911266304552555, - "learning_rate": 0.00019998930128729646, - "loss": 46.0, - "step": 28924 - }, - { - "epoch": 4.658158541004067, - "grad_norm": 0.0028890916146337986, - "learning_rate": 0.00019998930054725998, - "loss": 46.0, - "step": 28925 - }, - { - "epoch": 4.658319578082853, - "grad_norm": 0.0012727015418931842, - "learning_rate": 0.00019998929980719797, - "loss": 46.0, - "step": 28926 - }, - { - "epoch": 4.658480615161641, - "grad_norm": 0.0037314246874302626, - "learning_rate": 0.00019998929906711035, - "loss": 46.0, - "step": 28927 - }, - { - "epoch": 4.658641652240428, - "grad_norm": 0.0020747019443660975, - "learning_rate": 0.0001999892983269971, - "loss": 46.0, - "step": 28928 - }, - { - "epoch": 4.658802689319216, - "grad_norm": 0.002910978626459837, - "learning_rate": 0.00019998929758685828, - "loss": 46.0, - "step": 28929 - }, - { - "epoch": 4.658963726398003, - "grad_norm": 0.007213153410702944, - "learning_rate": 0.00019998929684669387, - "loss": 46.0, - "step": 28930 - }, - { - "epoch": 4.6591247634767905, - "grad_norm": 0.0007391978288069367, - "learning_rate": 0.00019998929610650387, - "loss": 46.0, - "step": 28931 - }, - { - "epoch": 4.659285800555578, - "grad_norm": 0.00462718028575182, - "learning_rate": 0.0001999892953662883, - "loss": 46.0, - "step": 28932 - }, - { - "epoch": 4.659446837634365, - "grad_norm": 0.0071923150680959225, - "learning_rate": 0.0001999892946260471, - "loss": 46.0, - "step": 28933 - }, - { - "epoch": 4.659607874713153, - "grad_norm": 0.009800664149224758, - "learning_rate": 0.00019998929388578032, - "loss": 46.0, - "step": 28934 - }, - { - "epoch": 4.65976891179194, - "grad_norm": 0.0016293359221890569, - "learning_rate": 0.00019998929314548794, - "loss": 46.0, - "step": 28935 - }, - { - "epoch": 4.659929948870728, - "grad_norm": 0.013044043444097042, - "learning_rate": 0.00019998929240517, - "loss": 46.0, - "step": 28936 - }, - { - "epoch": 4.660090985949515, - "grad_norm": 0.0018472142983227968, - "learning_rate": 0.00019998929166482646, - "loss": 46.0, - "step": 28937 - }, - { - "epoch": 4.660252023028303, - "grad_norm": 0.0036964186001569033, - "learning_rate": 0.0001999892909244573, - "loss": 46.0, - "step": 28938 - }, - { - "epoch": 4.660413060107089, - "grad_norm": 0.007954008877277374, - "learning_rate": 0.00019998929018406257, - "loss": 46.0, - "step": 28939 - }, - { - "epoch": 4.660574097185877, - "grad_norm": 0.004352484364062548, - "learning_rate": 0.00019998928944364223, - "loss": 46.0, - "step": 28940 - }, - { - "epoch": 4.660735134264664, - "grad_norm": 0.004022127948701382, - "learning_rate": 0.0001999892887031963, - "loss": 46.0, - "step": 28941 - }, - { - "epoch": 4.660896171343452, - "grad_norm": 0.013982059434056282, - "learning_rate": 0.0001999892879627248, - "loss": 46.0, - "step": 28942 - }, - { - "epoch": 4.661057208422239, - "grad_norm": 0.006098475307226181, - "learning_rate": 0.0001999892872222277, - "loss": 46.0, - "step": 28943 - }, - { - "epoch": 4.6612182455010265, - "grad_norm": 0.0031336217653006315, - "learning_rate": 0.000199989286481705, - "loss": 46.0, - "step": 28944 - }, - { - "epoch": 4.661379282579814, - "grad_norm": 0.005195137578994036, - "learning_rate": 0.00019998928574115673, - "loss": 46.0, - "step": 28945 - }, - { - "epoch": 4.661540319658601, - "grad_norm": 0.003978671506047249, - "learning_rate": 0.00019998928500058284, - "loss": 46.0, - "step": 28946 - }, - { - "epoch": 4.661701356737389, - "grad_norm": 0.0018693781457841396, - "learning_rate": 0.00019998928425998336, - "loss": 46.0, - "step": 28947 - }, - { - "epoch": 4.661862393816176, - "grad_norm": 0.006021400447934866, - "learning_rate": 0.00019998928351935827, - "loss": 46.0, - "step": 28948 - }, - { - "epoch": 4.662023430894964, - "grad_norm": 0.00260544684715569, - "learning_rate": 0.00019998928277870762, - "loss": 46.0, - "step": 28949 - }, - { - "epoch": 4.662184467973751, - "grad_norm": 0.003724590176716447, - "learning_rate": 0.00019998928203803138, - "loss": 46.0, - "step": 28950 - }, - { - "epoch": 4.662345505052539, - "grad_norm": 0.0018055383116006851, - "learning_rate": 0.00019998928129732956, - "loss": 46.0, - "step": 28951 - }, - { - "epoch": 4.662506542131326, - "grad_norm": 0.002299807732924819, - "learning_rate": 0.00019998928055660212, - "loss": 46.0, - "step": 28952 - }, - { - "epoch": 4.6626675792101135, - "grad_norm": 0.0006217075861059129, - "learning_rate": 0.00019998927981584907, - "loss": 46.0, - "step": 28953 - }, - { - "epoch": 4.6628286162889, - "grad_norm": 0.006435351446270943, - "learning_rate": 0.00019998927907507045, - "loss": 46.0, - "step": 28954 - }, - { - "epoch": 4.662989653367688, - "grad_norm": 0.008741314522922039, - "learning_rate": 0.00019998927833426625, - "loss": 46.0, - "step": 28955 - }, - { - "epoch": 4.663150690446475, - "grad_norm": 0.003162316745147109, - "learning_rate": 0.00019998927759343644, - "loss": 46.0, - "step": 28956 - }, - { - "epoch": 4.6633117275252625, - "grad_norm": 0.0022366230841726065, - "learning_rate": 0.00019998927685258103, - "loss": 46.0, - "step": 28957 - }, - { - "epoch": 4.66347276460405, - "grad_norm": 0.0020759697072207928, - "learning_rate": 0.00019998927611170007, - "loss": 46.0, - "step": 28958 - }, - { - "epoch": 4.663633801682837, - "grad_norm": 0.012824960052967072, - "learning_rate": 0.0001999892753707935, - "loss": 46.0, - "step": 28959 - }, - { - "epoch": 4.663794838761625, - "grad_norm": 0.003920450806617737, - "learning_rate": 0.0001999892746298613, - "loss": 46.0, - "step": 28960 - }, - { - "epoch": 4.663955875840412, - "grad_norm": 0.002151580061763525, - "learning_rate": 0.00019998927388890353, - "loss": 46.0, - "step": 28961 - }, - { - "epoch": 4.6641169129192, - "grad_norm": 0.0014872851315885782, - "learning_rate": 0.0001999892731479202, - "loss": 46.0, - "step": 28962 - }, - { - "epoch": 4.664277949997987, - "grad_norm": 0.009645538404583931, - "learning_rate": 0.00019998927240691124, - "loss": 46.0, - "step": 28963 - }, - { - "epoch": 4.664438987076775, - "grad_norm": 0.002582182874903083, - "learning_rate": 0.0001999892716658767, - "loss": 46.0, - "step": 28964 - }, - { - "epoch": 4.664600024155562, - "grad_norm": 0.001219027559272945, - "learning_rate": 0.00019998927092481657, - "loss": 46.0, - "step": 28965 - }, - { - "epoch": 4.6647610612343495, - "grad_norm": 0.0031504600774496794, - "learning_rate": 0.00019998927018373083, - "loss": 46.0, - "step": 28966 - }, - { - "epoch": 4.664922098313137, - "grad_norm": 0.0031122055370360613, - "learning_rate": 0.00019998926944261953, - "loss": 46.0, - "step": 28967 - }, - { - "epoch": 4.665083135391924, - "grad_norm": 0.0015825171722099185, - "learning_rate": 0.0001999892687014826, - "loss": 46.0, - "step": 28968 - }, - { - "epoch": 4.665244172470711, - "grad_norm": 0.0006179422489367425, - "learning_rate": 0.00019998926796032008, - "loss": 46.0, - "step": 28969 - }, - { - "epoch": 4.6654052095494984, - "grad_norm": 0.002954722149297595, - "learning_rate": 0.000199989267219132, - "loss": 46.0, - "step": 28970 - }, - { - "epoch": 4.665566246628286, - "grad_norm": 0.0018114696722477674, - "learning_rate": 0.00019998926647791834, - "loss": 46.0, - "step": 28971 - }, - { - "epoch": 4.665727283707073, - "grad_norm": 0.002110788132995367, - "learning_rate": 0.00019998926573667905, - "loss": 46.0, - "step": 28972 - }, - { - "epoch": 4.665888320785861, - "grad_norm": 0.009134180843830109, - "learning_rate": 0.00019998926499541417, - "loss": 46.0, - "step": 28973 - }, - { - "epoch": 4.666049357864648, - "grad_norm": 0.01235402375459671, - "learning_rate": 0.0001999892642541237, - "loss": 46.0, - "step": 28974 - }, - { - "epoch": 4.666210394943436, - "grad_norm": 0.005200848914682865, - "learning_rate": 0.00019998926351280766, - "loss": 46.0, - "step": 28975 - }, - { - "epoch": 4.666371432022223, - "grad_norm": 0.0018218058394268155, - "learning_rate": 0.00019998926277146604, - "loss": 46.0, - "step": 28976 - }, - { - "epoch": 4.666532469101011, - "grad_norm": 0.009023908525705338, - "learning_rate": 0.0001999892620300988, - "loss": 46.0, - "step": 28977 - }, - { - "epoch": 4.666693506179798, - "grad_norm": 0.0024534212425351143, - "learning_rate": 0.00019998926128870595, - "loss": 46.0, - "step": 28978 - }, - { - "epoch": 4.6668545432585855, - "grad_norm": 0.013477045111358166, - "learning_rate": 0.00019998926054728752, - "loss": 46.0, - "step": 28979 - }, - { - "epoch": 4.667015580337373, - "grad_norm": 0.004790612496435642, - "learning_rate": 0.0001999892598058435, - "loss": 46.0, - "step": 28980 - }, - { - "epoch": 4.66717661741616, - "grad_norm": 0.004339388106018305, - "learning_rate": 0.0001999892590643739, - "loss": 46.0, - "step": 28981 - }, - { - "epoch": 4.667337654494947, - "grad_norm": 0.00179983489215374, - "learning_rate": 0.0001999892583228787, - "loss": 46.0, - "step": 28982 - }, - { - "epoch": 4.667498691573735, - "grad_norm": 0.0025336977560073137, - "learning_rate": 0.0001999892575813579, - "loss": 46.0, - "step": 28983 - }, - { - "epoch": 4.667659728652522, - "grad_norm": 0.009031030349433422, - "learning_rate": 0.00019998925683981154, - "loss": 46.0, - "step": 28984 - }, - { - "epoch": 4.667820765731309, - "grad_norm": 0.002823849208652973, - "learning_rate": 0.00019998925609823956, - "loss": 46.0, - "step": 28985 - }, - { - "epoch": 4.667981802810097, - "grad_norm": 0.0065071796998381615, - "learning_rate": 0.000199989255356642, - "loss": 46.0, - "step": 28986 - }, - { - "epoch": 4.668142839888884, - "grad_norm": 0.0005395651096478105, - "learning_rate": 0.00019998925461501882, - "loss": 46.0, - "step": 28987 - }, - { - "epoch": 4.668303876967672, - "grad_norm": 0.005989130586385727, - "learning_rate": 0.00019998925387337008, - "loss": 46.0, - "step": 28988 - }, - { - "epoch": 4.668464914046459, - "grad_norm": 0.002158117713406682, - "learning_rate": 0.00019998925313169575, - "loss": 46.0, - "step": 28989 - }, - { - "epoch": 4.668625951125247, - "grad_norm": 0.004594181198626757, - "learning_rate": 0.0001999892523899958, - "loss": 46.0, - "step": 28990 - }, - { - "epoch": 4.668786988204034, - "grad_norm": 0.0063536809757351875, - "learning_rate": 0.00019998925164827028, - "loss": 46.0, - "step": 28991 - }, - { - "epoch": 4.6689480252828215, - "grad_norm": 0.002324148081243038, - "learning_rate": 0.00019998925090651914, - "loss": 46.0, - "step": 28992 - }, - { - "epoch": 4.669109062361609, - "grad_norm": 0.0036857454106211662, - "learning_rate": 0.00019998925016474246, - "loss": 46.0, - "step": 28993 - }, - { - "epoch": 4.669270099440396, - "grad_norm": 0.0008407070417888463, - "learning_rate": 0.00019998924942294015, - "loss": 46.0, - "step": 28994 - }, - { - "epoch": 4.669431136519184, - "grad_norm": 0.009376151487231255, - "learning_rate": 0.00019998924868111224, - "loss": 46.0, - "step": 28995 - }, - { - "epoch": 4.669592173597971, - "grad_norm": 0.003775755176320672, - "learning_rate": 0.00019998924793925875, - "loss": 46.0, - "step": 28996 - }, - { - "epoch": 4.669753210676758, - "grad_norm": 0.004688691347837448, - "learning_rate": 0.00019998924719737967, - "loss": 46.0, - "step": 28997 - }, - { - "epoch": 4.669914247755546, - "grad_norm": 0.02092566527426243, - "learning_rate": 0.00019998924645547498, - "loss": 46.0, - "step": 28998 - }, - { - "epoch": 4.670075284834333, - "grad_norm": 0.012101748958230019, - "learning_rate": 0.00019998924571354472, - "loss": 46.0, - "step": 28999 - }, - { - "epoch": 4.67023632191312, - "grad_norm": 0.014231319539248943, - "learning_rate": 0.00019998924497158888, - "loss": 46.0, - "step": 29000 - }, - { - "epoch": 4.670397358991908, - "grad_norm": 0.004863038659095764, - "learning_rate": 0.00019998924422960743, - "loss": 46.0, - "step": 29001 - }, - { - "epoch": 4.670558396070695, - "grad_norm": 0.0008185673505067825, - "learning_rate": 0.0001999892434876004, - "loss": 46.0, - "step": 29002 - }, - { - "epoch": 4.670719433149483, - "grad_norm": 0.009100433439016342, - "learning_rate": 0.00019998924274556776, - "loss": 46.0, - "step": 29003 - }, - { - "epoch": 4.67088047022827, - "grad_norm": 0.004728530067950487, - "learning_rate": 0.00019998924200350952, - "loss": 46.0, - "step": 29004 - }, - { - "epoch": 4.6710415073070575, - "grad_norm": 0.0031026548240333796, - "learning_rate": 0.0001999892412614257, - "loss": 46.0, - "step": 29005 - }, - { - "epoch": 4.671202544385845, - "grad_norm": 0.0032068907748907804, - "learning_rate": 0.00019998924051931632, - "loss": 46.0, - "step": 29006 - }, - { - "epoch": 4.671363581464632, - "grad_norm": 0.005066788289695978, - "learning_rate": 0.0001999892397771813, - "loss": 46.0, - "step": 29007 - }, - { - "epoch": 4.67152461854342, - "grad_norm": 0.004904876463115215, - "learning_rate": 0.00019998923903502073, - "loss": 46.0, - "step": 29008 - }, - { - "epoch": 4.671685655622207, - "grad_norm": 0.0179563257843256, - "learning_rate": 0.00019998923829283452, - "loss": 46.0, - "step": 29009 - }, - { - "epoch": 4.671846692700995, - "grad_norm": 0.004103793762624264, - "learning_rate": 0.00019998923755062275, - "loss": 46.0, - "step": 29010 - }, - { - "epoch": 4.672007729779782, - "grad_norm": 0.0026706280186772346, - "learning_rate": 0.00019998923680838537, - "loss": 46.0, - "step": 29011 - }, - { - "epoch": 4.672168766858569, - "grad_norm": 0.005859110038727522, - "learning_rate": 0.0001999892360661224, - "loss": 46.0, - "step": 29012 - }, - { - "epoch": 4.672329803937356, - "grad_norm": 0.0018263488309457898, - "learning_rate": 0.00019998923532383388, - "loss": 46.0, - "step": 29013 - }, - { - "epoch": 4.672490841016144, - "grad_norm": 0.011946558952331543, - "learning_rate": 0.0001999892345815197, - "loss": 46.0, - "step": 29014 - }, - { - "epoch": 4.672651878094931, - "grad_norm": 0.0055854138918221, - "learning_rate": 0.00019998923383917998, - "loss": 46.0, - "step": 29015 - }, - { - "epoch": 4.6728129151737186, - "grad_norm": 0.0018073191167786717, - "learning_rate": 0.00019998923309681464, - "loss": 46.0, - "step": 29016 - }, - { - "epoch": 4.672973952252506, - "grad_norm": 0.006920395884662867, - "learning_rate": 0.00019998923235442373, - "loss": 46.0, - "step": 29017 - }, - { - "epoch": 4.6731349893312935, - "grad_norm": 0.002207123441621661, - "learning_rate": 0.0001999892316120072, - "loss": 46.0, - "step": 29018 - }, - { - "epoch": 4.673296026410081, - "grad_norm": 0.0018793592462316155, - "learning_rate": 0.00019998923086956508, - "loss": 46.0, - "step": 29019 - }, - { - "epoch": 4.673457063488868, - "grad_norm": 0.0027318818029016256, - "learning_rate": 0.00019998923012709742, - "loss": 46.0, - "step": 29020 - }, - { - "epoch": 4.673618100567656, - "grad_norm": 0.0016887737438082695, - "learning_rate": 0.0001999892293846041, - "loss": 46.0, - "step": 29021 - }, - { - "epoch": 4.673779137646443, - "grad_norm": 0.0023246132768690586, - "learning_rate": 0.00019998922864208522, - "loss": 46.0, - "step": 29022 - }, - { - "epoch": 4.673940174725231, - "grad_norm": 0.004542097914963961, - "learning_rate": 0.00019998922789954076, - "loss": 46.0, - "step": 29023 - }, - { - "epoch": 4.674101211804018, - "grad_norm": 0.02634265273809433, - "learning_rate": 0.0001999892271569707, - "loss": 46.0, - "step": 29024 - }, - { - "epoch": 4.674262248882806, - "grad_norm": 0.003157178871333599, - "learning_rate": 0.00019998922641437504, - "loss": 46.0, - "step": 29025 - }, - { - "epoch": 4.674423285961593, - "grad_norm": 0.004033325240015984, - "learning_rate": 0.00019998922567175377, - "loss": 46.0, - "step": 29026 - }, - { - "epoch": 4.67458432304038, - "grad_norm": 0.01731760799884796, - "learning_rate": 0.00019998922492910691, - "loss": 46.0, - "step": 29027 - }, - { - "epoch": 4.674745360119167, - "grad_norm": 0.002872991608455777, - "learning_rate": 0.0001999892241864345, - "loss": 46.0, - "step": 29028 - }, - { - "epoch": 4.6749063971979545, - "grad_norm": 0.01205460261553526, - "learning_rate": 0.00019998922344373647, - "loss": 46.0, - "step": 29029 - }, - { - "epoch": 4.675067434276742, - "grad_norm": 0.004222155548632145, - "learning_rate": 0.00019998922270101285, - "loss": 46.0, - "step": 29030 - }, - { - "epoch": 4.675228471355529, - "grad_norm": 0.00934133306145668, - "learning_rate": 0.00019998922195826362, - "loss": 46.0, - "step": 29031 - }, - { - "epoch": 4.675389508434317, - "grad_norm": 0.011708592996001244, - "learning_rate": 0.00019998922121548885, - "loss": 46.0, - "step": 29032 - }, - { - "epoch": 4.675550545513104, - "grad_norm": 0.0011638246942311525, - "learning_rate": 0.00019998922047268844, - "loss": 46.0, - "step": 29033 - }, - { - "epoch": 4.675711582591892, - "grad_norm": 0.0007008335669524968, - "learning_rate": 0.00019998921972986245, - "loss": 46.0, - "step": 29034 - }, - { - "epoch": 4.675872619670679, - "grad_norm": 0.006141862832009792, - "learning_rate": 0.00019998921898701087, - "loss": 46.0, - "step": 29035 - }, - { - "epoch": 4.676033656749467, - "grad_norm": 0.004861264023929834, - "learning_rate": 0.00019998921824413367, - "loss": 46.0, - "step": 29036 - }, - { - "epoch": 4.676194693828254, - "grad_norm": 0.009285642765462399, - "learning_rate": 0.00019998921750123092, - "loss": 46.0, - "step": 29037 - }, - { - "epoch": 4.676355730907042, - "grad_norm": 0.0068975286558270454, - "learning_rate": 0.00019998921675830258, - "loss": 46.0, - "step": 29038 - }, - { - "epoch": 4.676516767985829, - "grad_norm": 0.005794209893792868, - "learning_rate": 0.00019998921601534862, - "loss": 46.0, - "step": 29039 - }, - { - "epoch": 4.6766778050646165, - "grad_norm": 0.009501967579126358, - "learning_rate": 0.00019998921527236908, - "loss": 46.0, - "step": 29040 - }, - { - "epoch": 4.676838842143404, - "grad_norm": 0.010775511153042316, - "learning_rate": 0.00019998921452936394, - "loss": 46.0, - "step": 29041 - }, - { - "epoch": 4.6769998792221905, - "grad_norm": 0.0021371403709053993, - "learning_rate": 0.00019998921378633323, - "loss": 46.0, - "step": 29042 - }, - { - "epoch": 4.677160916300978, - "grad_norm": 0.0052638365887105465, - "learning_rate": 0.00019998921304327692, - "loss": 46.0, - "step": 29043 - }, - { - "epoch": 4.677321953379765, - "grad_norm": 0.011134116910398006, - "learning_rate": 0.000199989212300195, - "loss": 46.0, - "step": 29044 - }, - { - "epoch": 4.677482990458553, - "grad_norm": 0.002875329228118062, - "learning_rate": 0.00019998921155708747, - "loss": 46.0, - "step": 29045 - }, - { - "epoch": 4.67764402753734, - "grad_norm": 0.0009744235430844128, - "learning_rate": 0.0001999892108139544, - "loss": 46.0, - "step": 29046 - }, - { - "epoch": 4.677805064616128, - "grad_norm": 0.005998586770147085, - "learning_rate": 0.00019998921007079572, - "loss": 46.0, - "step": 29047 - }, - { - "epoch": 4.677966101694915, - "grad_norm": 0.0025747613981366158, - "learning_rate": 0.00019998920932761142, - "loss": 46.0, - "step": 29048 - }, - { - "epoch": 4.678127138773703, - "grad_norm": 0.005435957573354244, - "learning_rate": 0.00019998920858440157, - "loss": 46.0, - "step": 29049 - }, - { - "epoch": 4.67828817585249, - "grad_norm": 0.002497670240700245, - "learning_rate": 0.0001999892078411661, - "loss": 46.0, - "step": 29050 - }, - { - "epoch": 4.678449212931278, - "grad_norm": 0.004089960362762213, - "learning_rate": 0.00019998920709790504, - "loss": 46.0, - "step": 29051 - }, - { - "epoch": 4.678610250010065, - "grad_norm": 0.0021418847609311342, - "learning_rate": 0.0001999892063546184, - "loss": 46.0, - "step": 29052 - }, - { - "epoch": 4.6787712870888525, - "grad_norm": 0.004259703680872917, - "learning_rate": 0.00019998920561130616, - "loss": 46.0, - "step": 29053 - }, - { - "epoch": 4.67893232416764, - "grad_norm": 0.0005282812635414302, - "learning_rate": 0.00019998920486796832, - "loss": 46.0, - "step": 29054 - }, - { - "epoch": 4.6790933612464265, - "grad_norm": 0.010999402031302452, - "learning_rate": 0.00019998920412460489, - "loss": 46.0, - "step": 29055 - }, - { - "epoch": 4.679254398325215, - "grad_norm": 0.00427471986040473, - "learning_rate": 0.0001999892033812159, - "loss": 46.0, - "step": 29056 - }, - { - "epoch": 4.679415435404001, - "grad_norm": 0.001892294967547059, - "learning_rate": 0.00019998920263780129, - "loss": 46.0, - "step": 29057 - }, - { - "epoch": 4.679576472482789, - "grad_norm": 0.009502123109996319, - "learning_rate": 0.00019998920189436106, - "loss": 46.0, - "step": 29058 - }, - { - "epoch": 4.679737509561576, - "grad_norm": 0.001962902257218957, - "learning_rate": 0.0001999892011508953, - "loss": 46.0, - "step": 29059 - }, - { - "epoch": 4.679898546640364, - "grad_norm": 0.004759308882057667, - "learning_rate": 0.0001999892004074039, - "loss": 46.0, - "step": 29060 - }, - { - "epoch": 4.680059583719151, - "grad_norm": 0.001699597924016416, - "learning_rate": 0.00019998919966388693, - "loss": 46.0, - "step": 29061 - }, - { - "epoch": 4.680220620797939, - "grad_norm": 0.0034312093630433083, - "learning_rate": 0.00019998919892034436, - "loss": 46.0, - "step": 29062 - }, - { - "epoch": 4.680381657876726, - "grad_norm": 0.00467008538544178, - "learning_rate": 0.0001999891981767762, - "loss": 46.0, - "step": 29063 - }, - { - "epoch": 4.680542694955514, - "grad_norm": 0.002616120269522071, - "learning_rate": 0.00019998919743318246, - "loss": 46.0, - "step": 29064 - }, - { - "epoch": 4.680703732034301, - "grad_norm": 0.0087283905595541, - "learning_rate": 0.0001999891966895631, - "loss": 46.0, - "step": 29065 - }, - { - "epoch": 4.6808647691130885, - "grad_norm": 0.006890458986163139, - "learning_rate": 0.00019998919594591818, - "loss": 46.0, - "step": 29066 - }, - { - "epoch": 4.681025806191876, - "grad_norm": 0.0010276816319674253, - "learning_rate": 0.00019998919520224765, - "loss": 46.0, - "step": 29067 - }, - { - "epoch": 4.681186843270663, - "grad_norm": 0.0011364122619852424, - "learning_rate": 0.00019998919445855153, - "loss": 46.0, - "step": 29068 - }, - { - "epoch": 4.681347880349451, - "grad_norm": 0.005245715845376253, - "learning_rate": 0.0001999891937148298, - "loss": 46.0, - "step": 29069 - }, - { - "epoch": 4.681508917428237, - "grad_norm": 0.0015774654457345605, - "learning_rate": 0.00019998919297108253, - "loss": 46.0, - "step": 29070 - }, - { - "epoch": 4.681669954507026, - "grad_norm": 0.004939381033182144, - "learning_rate": 0.00019998919222730962, - "loss": 46.0, - "step": 29071 - }, - { - "epoch": 4.681830991585812, - "grad_norm": 0.010158554650843143, - "learning_rate": 0.00019998919148351112, - "loss": 46.0, - "step": 29072 - }, - { - "epoch": 4.6819920286646, - "grad_norm": 0.013227131217718124, - "learning_rate": 0.00019998919073968704, - "loss": 46.0, - "step": 29073 - }, - { - "epoch": 4.682153065743387, - "grad_norm": 0.010011455975472927, - "learning_rate": 0.00019998918999583737, - "loss": 46.0, - "step": 29074 - }, - { - "epoch": 4.682314102822175, - "grad_norm": 0.002737136324867606, - "learning_rate": 0.0001999891892519621, - "loss": 46.0, - "step": 29075 - }, - { - "epoch": 4.682475139900962, - "grad_norm": 0.0052657099440693855, - "learning_rate": 0.00019998918850806126, - "loss": 46.0, - "step": 29076 - }, - { - "epoch": 4.6826361769797495, - "grad_norm": 0.0023131216876208782, - "learning_rate": 0.0001999891877641348, - "loss": 46.0, - "step": 29077 - }, - { - "epoch": 4.682797214058537, - "grad_norm": 0.0012860159622505307, - "learning_rate": 0.00019998918702018273, - "loss": 46.0, - "step": 29078 - }, - { - "epoch": 4.6829582511373244, - "grad_norm": 0.004784207325428724, - "learning_rate": 0.0001999891862762051, - "loss": 46.0, - "step": 29079 - }, - { - "epoch": 4.683119288216112, - "grad_norm": 0.0028047605883330107, - "learning_rate": 0.0001999891855322019, - "loss": 46.0, - "step": 29080 - }, - { - "epoch": 4.683280325294899, - "grad_norm": 0.0030267657712101936, - "learning_rate": 0.00019998918478817307, - "loss": 46.0, - "step": 29081 - }, - { - "epoch": 4.683441362373687, - "grad_norm": 0.004298881161957979, - "learning_rate": 0.00019998918404411868, - "loss": 46.0, - "step": 29082 - }, - { - "epoch": 4.683602399452474, - "grad_norm": 0.0023336512967944145, - "learning_rate": 0.0001999891833000387, - "loss": 46.0, - "step": 29083 - }, - { - "epoch": 4.683763436531262, - "grad_norm": 0.004733358975499868, - "learning_rate": 0.00019998918255593307, - "loss": 46.0, - "step": 29084 - }, - { - "epoch": 4.683924473610048, - "grad_norm": 0.01219208724796772, - "learning_rate": 0.00019998918181180191, - "loss": 46.0, - "step": 29085 - }, - { - "epoch": 4.684085510688836, - "grad_norm": 0.002086380496621132, - "learning_rate": 0.00019998918106764512, - "loss": 46.0, - "step": 29086 - }, - { - "epoch": 4.684246547767623, - "grad_norm": 0.004757540766149759, - "learning_rate": 0.00019998918032346273, - "loss": 46.0, - "step": 29087 - }, - { - "epoch": 4.684407584846411, - "grad_norm": 0.0022966107353568077, - "learning_rate": 0.00019998917957925479, - "loss": 46.0, - "step": 29088 - }, - { - "epoch": 4.684568621925198, - "grad_norm": 0.006842173635959625, - "learning_rate": 0.00019998917883502123, - "loss": 46.0, - "step": 29089 - }, - { - "epoch": 4.6847296590039855, - "grad_norm": 0.012283913791179657, - "learning_rate": 0.00019998917809076208, - "loss": 46.0, - "step": 29090 - }, - { - "epoch": 4.684890696082773, - "grad_norm": 0.000713318819180131, - "learning_rate": 0.00019998917734647735, - "loss": 46.0, - "step": 29091 - }, - { - "epoch": 4.68505173316156, - "grad_norm": 0.004016088787466288, - "learning_rate": 0.000199989176602167, - "loss": 46.0, - "step": 29092 - }, - { - "epoch": 4.685212770240348, - "grad_norm": 0.006681870669126511, - "learning_rate": 0.0001999891758578311, - "loss": 46.0, - "step": 29093 - }, - { - "epoch": 4.685373807319135, - "grad_norm": 0.002273829421028495, - "learning_rate": 0.00019998917511346957, - "loss": 46.0, - "step": 29094 - }, - { - "epoch": 4.685534844397923, - "grad_norm": 0.0017024537082761526, - "learning_rate": 0.00019998917436908246, - "loss": 46.0, - "step": 29095 - }, - { - "epoch": 4.68569588147671, - "grad_norm": 0.0027831485494971275, - "learning_rate": 0.0001999891736246698, - "loss": 46.0, - "step": 29096 - }, - { - "epoch": 4.685856918555498, - "grad_norm": 0.0107642887160182, - "learning_rate": 0.00019998917288023148, - "loss": 46.0, - "step": 29097 - }, - { - "epoch": 4.686017955634285, - "grad_norm": 0.003889176994562149, - "learning_rate": 0.00019998917213576758, - "loss": 46.0, - "step": 29098 - }, - { - "epoch": 4.686178992713073, - "grad_norm": 0.0033913159277290106, - "learning_rate": 0.00019998917139127812, - "loss": 46.0, - "step": 29099 - }, - { - "epoch": 4.686340029791859, - "grad_norm": 0.0023318175226449966, - "learning_rate": 0.00019998917064676305, - "loss": 46.0, - "step": 29100 - }, - { - "epoch": 4.686501066870647, - "grad_norm": 0.001813381677493453, - "learning_rate": 0.00019998916990222242, - "loss": 46.0, - "step": 29101 - }, - { - "epoch": 4.686662103949434, - "grad_norm": 0.001382240210659802, - "learning_rate": 0.00019998916915765614, - "loss": 46.0, - "step": 29102 - }, - { - "epoch": 4.6868231410282215, - "grad_norm": 0.0021454552188515663, - "learning_rate": 0.0001999891684130643, - "loss": 46.0, - "step": 29103 - }, - { - "epoch": 4.686984178107009, - "grad_norm": 0.006069576367735863, - "learning_rate": 0.00019998916766844686, - "loss": 46.0, - "step": 29104 - }, - { - "epoch": 4.687145215185796, - "grad_norm": 0.01616569608449936, - "learning_rate": 0.00019998916692380383, - "loss": 46.0, - "step": 29105 - }, - { - "epoch": 4.687306252264584, - "grad_norm": 0.006197620648890734, - "learning_rate": 0.0001999891661791352, - "loss": 46.0, - "step": 29106 - }, - { - "epoch": 4.687467289343371, - "grad_norm": 0.00939467828720808, - "learning_rate": 0.000199989165434441, - "loss": 46.0, - "step": 29107 - }, - { - "epoch": 4.687628326422159, - "grad_norm": 0.011693950742483139, - "learning_rate": 0.0001999891646897212, - "loss": 46.0, - "step": 29108 - }, - { - "epoch": 4.687789363500946, - "grad_norm": 0.00068492820719257, - "learning_rate": 0.0001999891639449758, - "loss": 46.0, - "step": 29109 - }, - { - "epoch": 4.687950400579734, - "grad_norm": 0.007851121947169304, - "learning_rate": 0.0001999891632002048, - "loss": 46.0, - "step": 29110 - }, - { - "epoch": 4.688111437658521, - "grad_norm": 0.0035151569172739983, - "learning_rate": 0.00019998916245540823, - "loss": 46.0, - "step": 29111 - }, - { - "epoch": 4.688272474737309, - "grad_norm": 0.0029837789479643106, - "learning_rate": 0.00019998916171058606, - "loss": 46.0, - "step": 29112 - }, - { - "epoch": 4.688433511816096, - "grad_norm": 0.003076000139117241, - "learning_rate": 0.00019998916096573827, - "loss": 46.0, - "step": 29113 - }, - { - "epoch": 4.6885945488948835, - "grad_norm": 0.0014842329546809196, - "learning_rate": 0.00019998916022086492, - "loss": 46.0, - "step": 29114 - }, - { - "epoch": 4.68875558597367, - "grad_norm": 0.0019867236260324717, - "learning_rate": 0.00019998915947596596, - "loss": 46.0, - "step": 29115 - }, - { - "epoch": 4.6889166230524575, - "grad_norm": 0.0016855103895068169, - "learning_rate": 0.00019998915873104144, - "loss": 46.0, - "step": 29116 - }, - { - "epoch": 4.689077660131245, - "grad_norm": 0.009796492755413055, - "learning_rate": 0.0001999891579860913, - "loss": 46.0, - "step": 29117 - }, - { - "epoch": 4.689238697210032, - "grad_norm": 0.0004139193333685398, - "learning_rate": 0.00019998915724111558, - "loss": 46.0, - "step": 29118 - }, - { - "epoch": 4.68939973428882, - "grad_norm": 0.0029851384460926056, - "learning_rate": 0.00019998915649611427, - "loss": 46.0, - "step": 29119 - }, - { - "epoch": 4.689560771367607, - "grad_norm": 0.008399692364037037, - "learning_rate": 0.00019998915575108737, - "loss": 46.0, - "step": 29120 - }, - { - "epoch": 4.689721808446395, - "grad_norm": 0.005843984428793192, - "learning_rate": 0.00019998915500603486, - "loss": 46.0, - "step": 29121 - }, - { - "epoch": 4.689882845525182, - "grad_norm": 0.0018409896874800324, - "learning_rate": 0.00019998915426095676, - "loss": 46.0, - "step": 29122 - }, - { - "epoch": 4.69004388260397, - "grad_norm": 0.0005754969897679985, - "learning_rate": 0.00019998915351585308, - "loss": 46.0, - "step": 29123 - }, - { - "epoch": 4.690204919682757, - "grad_norm": 0.0020168640185147524, - "learning_rate": 0.00019998915277072378, - "loss": 46.0, - "step": 29124 - }, - { - "epoch": 4.6903659567615446, - "grad_norm": 0.0038000138010829687, - "learning_rate": 0.00019998915202556894, - "loss": 46.0, - "step": 29125 - }, - { - "epoch": 4.690526993840332, - "grad_norm": 0.0032192859798669815, - "learning_rate": 0.00019998915128038847, - "loss": 46.0, - "step": 29126 - }, - { - "epoch": 4.6906880309191195, - "grad_norm": 0.0025296914391219616, - "learning_rate": 0.0001999891505351824, - "loss": 46.0, - "step": 29127 - }, - { - "epoch": 4.690849067997906, - "grad_norm": 0.0039049312472343445, - "learning_rate": 0.0001999891497899508, - "loss": 46.0, - "step": 29128 - }, - { - "epoch": 4.691010105076694, - "grad_norm": 0.001298865769058466, - "learning_rate": 0.00019998914904469355, - "loss": 46.0, - "step": 29129 - }, - { - "epoch": 4.691171142155481, - "grad_norm": 0.0031968820840120316, - "learning_rate": 0.0001999891482994107, - "loss": 46.0, - "step": 29130 - }, - { - "epoch": 4.691332179234268, - "grad_norm": 0.0017479286761954427, - "learning_rate": 0.00019998914755410226, - "loss": 46.0, - "step": 29131 - }, - { - "epoch": 4.691493216313056, - "grad_norm": 0.006804362870752811, - "learning_rate": 0.00019998914680876827, - "loss": 46.0, - "step": 29132 - }, - { - "epoch": 4.691654253391843, - "grad_norm": 0.0038377020973712206, - "learning_rate": 0.00019998914606340865, - "loss": 46.0, - "step": 29133 - }, - { - "epoch": 4.691815290470631, - "grad_norm": 0.006302055437117815, - "learning_rate": 0.00019998914531802346, - "loss": 46.0, - "step": 29134 - }, - { - "epoch": 4.691976327549418, - "grad_norm": 0.007912084460258484, - "learning_rate": 0.00019998914457261267, - "loss": 46.0, - "step": 29135 - }, - { - "epoch": 4.692137364628206, - "grad_norm": 0.0017726949881762266, - "learning_rate": 0.0001999891438271763, - "loss": 46.0, - "step": 29136 - }, - { - "epoch": 4.692298401706993, - "grad_norm": 0.0031725927256047726, - "learning_rate": 0.00019998914308171434, - "loss": 46.0, - "step": 29137 - }, - { - "epoch": 4.6924594387857805, - "grad_norm": 0.0046320389956235886, - "learning_rate": 0.00019998914233622673, - "loss": 46.0, - "step": 29138 - }, - { - "epoch": 4.692620475864568, - "grad_norm": 0.0028918073512613773, - "learning_rate": 0.00019998914159071357, - "loss": 46.0, - "step": 29139 - }, - { - "epoch": 4.692781512943355, - "grad_norm": 0.002753194887191057, - "learning_rate": 0.00019998914084517485, - "loss": 46.0, - "step": 29140 - }, - { - "epoch": 4.692942550022143, - "grad_norm": 0.006393604911863804, - "learning_rate": 0.0001999891400996105, - "loss": 46.0, - "step": 29141 - }, - { - "epoch": 4.69310358710093, - "grad_norm": 0.001441923901438713, - "learning_rate": 0.00019998913935402056, - "loss": 46.0, - "step": 29142 - }, - { - "epoch": 4.693264624179717, - "grad_norm": 0.008365838788449764, - "learning_rate": 0.00019998913860840503, - "loss": 46.0, - "step": 29143 - }, - { - "epoch": 4.693425661258505, - "grad_norm": 0.008358891122043133, - "learning_rate": 0.0001999891378627639, - "loss": 46.0, - "step": 29144 - }, - { - "epoch": 4.693586698337292, - "grad_norm": 0.0032135392539203167, - "learning_rate": 0.0001999891371170972, - "loss": 46.0, - "step": 29145 - }, - { - "epoch": 4.693747735416079, - "grad_norm": 0.01464059203863144, - "learning_rate": 0.00019998913637140492, - "loss": 46.0, - "step": 29146 - }, - { - "epoch": 4.693908772494867, - "grad_norm": 0.003912672866135836, - "learning_rate": 0.000199989135625687, - "loss": 46.0, - "step": 29147 - }, - { - "epoch": 4.694069809573654, - "grad_norm": 0.004959849175065756, - "learning_rate": 0.0001999891348799435, - "loss": 46.0, - "step": 29148 - }, - { - "epoch": 4.694230846652442, - "grad_norm": 0.0054310900159180164, - "learning_rate": 0.00019998913413417442, - "loss": 46.0, - "step": 29149 - }, - { - "epoch": 4.694391883731229, - "grad_norm": 0.005952246021479368, - "learning_rate": 0.00019998913338837974, - "loss": 46.0, - "step": 29150 - }, - { - "epoch": 4.6945529208100165, - "grad_norm": 0.006007140036672354, - "learning_rate": 0.0001999891326425595, - "loss": 46.0, - "step": 29151 - }, - { - "epoch": 4.694713957888804, - "grad_norm": 0.0020258279982954264, - "learning_rate": 0.00019998913189671363, - "loss": 46.0, - "step": 29152 - }, - { - "epoch": 4.694874994967591, - "grad_norm": 0.005397609435021877, - "learning_rate": 0.0001999891311508422, - "loss": 46.0, - "step": 29153 - }, - { - "epoch": 4.695036032046379, - "grad_norm": 0.011329378932714462, - "learning_rate": 0.00019998913040494514, - "loss": 46.0, - "step": 29154 - }, - { - "epoch": 4.695197069125166, - "grad_norm": 0.0007836871664039791, - "learning_rate": 0.00019998912965902253, - "loss": 46.0, - "step": 29155 - }, - { - "epoch": 4.695358106203954, - "grad_norm": 0.01549210213124752, - "learning_rate": 0.00019998912891307428, - "loss": 46.0, - "step": 29156 - }, - { - "epoch": 4.695519143282741, - "grad_norm": 0.0009757225052453578, - "learning_rate": 0.00019998912816710047, - "loss": 46.0, - "step": 29157 - }, - { - "epoch": 4.695680180361528, - "grad_norm": 0.007227322552353144, - "learning_rate": 0.00019998912742110107, - "loss": 46.0, - "step": 29158 - }, - { - "epoch": 4.695841217440316, - "grad_norm": 0.005283804144710302, - "learning_rate": 0.00019998912667507605, - "loss": 46.0, - "step": 29159 - }, - { - "epoch": 4.696002254519103, - "grad_norm": 0.002289191819727421, - "learning_rate": 0.00019998912592902545, - "loss": 46.0, - "step": 29160 - }, - { - "epoch": 4.69616329159789, - "grad_norm": 0.004192507825791836, - "learning_rate": 0.00019998912518294926, - "loss": 46.0, - "step": 29161 - }, - { - "epoch": 4.696324328676678, - "grad_norm": 0.00043169312993995845, - "learning_rate": 0.00019998912443684752, - "loss": 46.0, - "step": 29162 - }, - { - "epoch": 4.696485365755465, - "grad_norm": 0.006443547084927559, - "learning_rate": 0.00019998912369072013, - "loss": 46.0, - "step": 29163 - }, - { - "epoch": 4.6966464028342525, - "grad_norm": 0.00981169193983078, - "learning_rate": 0.00019998912294456718, - "loss": 46.0, - "step": 29164 - }, - { - "epoch": 4.69680743991304, - "grad_norm": 0.008081848733127117, - "learning_rate": 0.00019998912219838864, - "loss": 46.0, - "step": 29165 - }, - { - "epoch": 4.696968476991827, - "grad_norm": 0.0027035672683268785, - "learning_rate": 0.00019998912145218446, - "loss": 46.0, - "step": 29166 - }, - { - "epoch": 4.697129514070615, - "grad_norm": 0.014703537337481976, - "learning_rate": 0.00019998912070595475, - "loss": 46.0, - "step": 29167 - }, - { - "epoch": 4.697290551149402, - "grad_norm": 0.004574276506900787, - "learning_rate": 0.0001999891199596994, - "loss": 46.0, - "step": 29168 - }, - { - "epoch": 4.69745158822819, - "grad_norm": 0.0051938071846961975, - "learning_rate": 0.0001999891192134185, - "loss": 46.0, - "step": 29169 - }, - { - "epoch": 4.697612625306977, - "grad_norm": 0.0034741077106446028, - "learning_rate": 0.00019998911846711196, - "loss": 46.0, - "step": 29170 - }, - { - "epoch": 4.697773662385765, - "grad_norm": 0.0009269822621718049, - "learning_rate": 0.00019998911772077987, - "loss": 46.0, - "step": 29171 - }, - { - "epoch": 4.697934699464552, - "grad_norm": 0.00685663940384984, - "learning_rate": 0.00019998911697442217, - "loss": 46.0, - "step": 29172 - }, - { - "epoch": 4.698095736543339, - "grad_norm": 0.0053397961892187595, - "learning_rate": 0.00019998911622803888, - "loss": 46.0, - "step": 29173 - }, - { - "epoch": 4.698256773622126, - "grad_norm": 0.02413760870695114, - "learning_rate": 0.00019998911548163, - "loss": 46.0, - "step": 29174 - }, - { - "epoch": 4.698417810700914, - "grad_norm": 0.003894147463142872, - "learning_rate": 0.00019998911473519552, - "loss": 46.0, - "step": 29175 - }, - { - "epoch": 4.698578847779701, - "grad_norm": 0.000604710599873215, - "learning_rate": 0.00019998911398873544, - "loss": 46.0, - "step": 29176 - }, - { - "epoch": 4.6987398848584885, - "grad_norm": 0.0025112025905400515, - "learning_rate": 0.0001999891132422498, - "loss": 46.0, - "step": 29177 - }, - { - "epoch": 4.698900921937276, - "grad_norm": 0.010967286303639412, - "learning_rate": 0.00019998911249573855, - "loss": 46.0, - "step": 29178 - }, - { - "epoch": 4.699061959016063, - "grad_norm": 0.0010603993432596326, - "learning_rate": 0.0001999891117492017, - "loss": 46.0, - "step": 29179 - }, - { - "epoch": 4.699222996094851, - "grad_norm": 0.001997011713683605, - "learning_rate": 0.0001999891110026393, - "loss": 46.0, - "step": 29180 - }, - { - "epoch": 4.699384033173638, - "grad_norm": 0.007600634824484587, - "learning_rate": 0.00019998911025605125, - "loss": 46.0, - "step": 29181 - }, - { - "epoch": 4.699545070252426, - "grad_norm": 0.008752559311687946, - "learning_rate": 0.00019998910950943762, - "loss": 46.0, - "step": 29182 - }, - { - "epoch": 4.699706107331213, - "grad_norm": 0.006707174703478813, - "learning_rate": 0.0001999891087627984, - "loss": 46.0, - "step": 29183 - }, - { - "epoch": 4.699867144410001, - "grad_norm": 0.003188934875652194, - "learning_rate": 0.0001999891080161336, - "loss": 46.0, - "step": 29184 - }, - { - "epoch": 4.700028181488788, - "grad_norm": 0.0010247660102322698, - "learning_rate": 0.00019998910726944322, - "loss": 46.0, - "step": 29185 - }, - { - "epoch": 4.7001892185675755, - "grad_norm": 0.003599941497668624, - "learning_rate": 0.00019998910652272724, - "loss": 46.0, - "step": 29186 - }, - { - "epoch": 4.700350255646363, - "grad_norm": 0.005215603392571211, - "learning_rate": 0.00019998910577598565, - "loss": 46.0, - "step": 29187 - }, - { - "epoch": 4.70051129272515, - "grad_norm": 0.0033436438534408808, - "learning_rate": 0.00019998910502921848, - "loss": 46.0, - "step": 29188 - }, - { - "epoch": 4.700672329803937, - "grad_norm": 0.00279989093542099, - "learning_rate": 0.0001999891042824257, - "loss": 46.0, - "step": 29189 - }, - { - "epoch": 4.7008333668827245, - "grad_norm": 0.005491578485816717, - "learning_rate": 0.00019998910353560736, - "loss": 46.0, - "step": 29190 - }, - { - "epoch": 4.700994403961512, - "grad_norm": 0.004783495794981718, - "learning_rate": 0.00019998910278876342, - "loss": 46.0, - "step": 29191 - }, - { - "epoch": 4.701155441040299, - "grad_norm": 0.0027589083183556795, - "learning_rate": 0.00019998910204189387, - "loss": 46.0, - "step": 29192 - }, - { - "epoch": 4.701316478119087, - "grad_norm": 0.007185005582869053, - "learning_rate": 0.00019998910129499876, - "loss": 46.0, - "step": 29193 - }, - { - "epoch": 4.701477515197874, - "grad_norm": 0.0050863465294241905, - "learning_rate": 0.00019998910054807803, - "loss": 46.0, - "step": 29194 - }, - { - "epoch": 4.701638552276662, - "grad_norm": 0.0014377750921994448, - "learning_rate": 0.00019998909980113169, - "loss": 46.0, - "step": 29195 - }, - { - "epoch": 4.701799589355449, - "grad_norm": 0.0034104082733392715, - "learning_rate": 0.00019998909905415978, - "loss": 46.0, - "step": 29196 - }, - { - "epoch": 4.701960626434237, - "grad_norm": 0.008074586279690266, - "learning_rate": 0.0001999890983071623, - "loss": 46.0, - "step": 29197 - }, - { - "epoch": 4.702121663513024, - "grad_norm": 0.0018546972423791885, - "learning_rate": 0.0001999890975601392, - "loss": 46.0, - "step": 29198 - }, - { - "epoch": 4.7022827005918115, - "grad_norm": 0.005170183256268501, - "learning_rate": 0.00019998909681309053, - "loss": 46.0, - "step": 29199 - }, - { - "epoch": 4.702443737670599, - "grad_norm": 0.0027615155559033155, - "learning_rate": 0.00019998909606601622, - "loss": 46.0, - "step": 29200 - }, - { - "epoch": 4.702604774749386, - "grad_norm": 0.002034719567745924, - "learning_rate": 0.0001999890953189164, - "loss": 46.0, - "step": 29201 - }, - { - "epoch": 4.702765811828174, - "grad_norm": 0.010149680078029633, - "learning_rate": 0.0001999890945717909, - "loss": 46.0, - "step": 29202 - }, - { - "epoch": 4.7029268489069604, - "grad_norm": 0.003622225718572736, - "learning_rate": 0.00019998909382463987, - "loss": 46.0, - "step": 29203 - }, - { - "epoch": 4.703087885985748, - "grad_norm": 0.0034116932656615973, - "learning_rate": 0.00019998909307746322, - "loss": 46.0, - "step": 29204 - }, - { - "epoch": 4.703248923064535, - "grad_norm": 0.006020493805408478, - "learning_rate": 0.00019998909233026098, - "loss": 46.0, - "step": 29205 - }, - { - "epoch": 4.703409960143323, - "grad_norm": 0.002413938520476222, - "learning_rate": 0.00019998909158303315, - "loss": 46.0, - "step": 29206 - }, - { - "epoch": 4.70357099722211, - "grad_norm": 0.006398188881576061, - "learning_rate": 0.00019998909083577973, - "loss": 46.0, - "step": 29207 - }, - { - "epoch": 4.703732034300898, - "grad_norm": 0.00417255237698555, - "learning_rate": 0.00019998909008850073, - "loss": 46.0, - "step": 29208 - }, - { - "epoch": 4.703893071379685, - "grad_norm": 0.002781786723062396, - "learning_rate": 0.00019998908934119612, - "loss": 46.0, - "step": 29209 - }, - { - "epoch": 4.704054108458473, - "grad_norm": 0.003387131029739976, - "learning_rate": 0.0001999890885938659, - "loss": 46.0, - "step": 29210 - }, - { - "epoch": 4.70421514553726, - "grad_norm": 0.003240004414692521, - "learning_rate": 0.00019998908784651015, - "loss": 46.0, - "step": 29211 - }, - { - "epoch": 4.7043761826160475, - "grad_norm": 0.0025013363920152187, - "learning_rate": 0.00019998908709912874, - "loss": 46.0, - "step": 29212 - }, - { - "epoch": 4.704537219694835, - "grad_norm": 0.002901485189795494, - "learning_rate": 0.00019998908635172178, - "loss": 46.0, - "step": 29213 - }, - { - "epoch": 4.704698256773622, - "grad_norm": 0.00529779028147459, - "learning_rate": 0.0001999890856042892, - "loss": 46.0, - "step": 29214 - }, - { - "epoch": 4.70485929385241, - "grad_norm": 0.012361081317067146, - "learning_rate": 0.00019998908485683106, - "loss": 46.0, - "step": 29215 - }, - { - "epoch": 4.705020330931196, - "grad_norm": 0.01101479772478342, - "learning_rate": 0.00019998908410934728, - "loss": 46.0, - "step": 29216 - }, - { - "epoch": 4.705181368009985, - "grad_norm": 0.017921939492225647, - "learning_rate": 0.00019998908336183794, - "loss": 46.0, - "step": 29217 - }, - { - "epoch": 4.705342405088771, - "grad_norm": 0.0027513159438967705, - "learning_rate": 0.000199989082614303, - "loss": 46.0, - "step": 29218 - }, - { - "epoch": 4.705503442167559, - "grad_norm": 0.0020752442069351673, - "learning_rate": 0.0001999890818667425, - "loss": 46.0, - "step": 29219 - }, - { - "epoch": 4.705664479246346, - "grad_norm": 0.009334743954241276, - "learning_rate": 0.0001999890811191564, - "loss": 46.0, - "step": 29220 - }, - { - "epoch": 4.705825516325134, - "grad_norm": 0.006994061172008514, - "learning_rate": 0.00019998908037154468, - "loss": 46.0, - "step": 29221 - }, - { - "epoch": 4.705986553403921, - "grad_norm": 0.007131069432944059, - "learning_rate": 0.00019998907962390735, - "loss": 46.0, - "step": 29222 - }, - { - "epoch": 4.706147590482709, - "grad_norm": 0.002933935495093465, - "learning_rate": 0.00019998907887624448, - "loss": 46.0, - "step": 29223 - }, - { - "epoch": 4.706308627561496, - "grad_norm": 0.006688784807920456, - "learning_rate": 0.000199989078128556, - "loss": 46.0, - "step": 29224 - }, - { - "epoch": 4.7064696646402835, - "grad_norm": 0.0032244024332612753, - "learning_rate": 0.0001999890773808419, - "loss": 46.0, - "step": 29225 - }, - { - "epoch": 4.706630701719071, - "grad_norm": 0.009899741038680077, - "learning_rate": 0.00019998907663310223, - "loss": 46.0, - "step": 29226 - }, - { - "epoch": 4.706791738797858, - "grad_norm": 0.004525924101471901, - "learning_rate": 0.00019998907588533697, - "loss": 46.0, - "step": 29227 - }, - { - "epoch": 4.706952775876646, - "grad_norm": 0.0018723371904343367, - "learning_rate": 0.0001999890751375461, - "loss": 46.0, - "step": 29228 - }, - { - "epoch": 4.707113812955433, - "grad_norm": 0.0017093382775783539, - "learning_rate": 0.0001999890743897297, - "loss": 46.0, - "step": 29229 - }, - { - "epoch": 4.707274850034221, - "grad_norm": 0.003120125737041235, - "learning_rate": 0.00019998907364188764, - "loss": 46.0, - "step": 29230 - }, - { - "epoch": 4.707435887113007, - "grad_norm": 0.001238172990269959, - "learning_rate": 0.00019998907289402, - "loss": 46.0, - "step": 29231 - }, - { - "epoch": 4.707596924191796, - "grad_norm": 0.0034787170588970184, - "learning_rate": 0.0001999890721461268, - "loss": 46.0, - "step": 29232 - }, - { - "epoch": 4.707757961270582, - "grad_norm": 0.004233032464981079, - "learning_rate": 0.00019998907139820798, - "loss": 46.0, - "step": 29233 - }, - { - "epoch": 4.70791899834937, - "grad_norm": 0.002697734860703349, - "learning_rate": 0.00019998907065026358, - "loss": 46.0, - "step": 29234 - }, - { - "epoch": 4.708080035428157, - "grad_norm": 0.005337487440556288, - "learning_rate": 0.0001999890699022936, - "loss": 46.0, - "step": 29235 - }, - { - "epoch": 4.708241072506945, - "grad_norm": 0.0030276046600192785, - "learning_rate": 0.00019998906915429798, - "loss": 46.0, - "step": 29236 - }, - { - "epoch": 4.708402109585732, - "grad_norm": 0.0014151401119306684, - "learning_rate": 0.0001999890684062768, - "loss": 46.0, - "step": 29237 - }, - { - "epoch": 4.7085631466645195, - "grad_norm": 0.002331359777599573, - "learning_rate": 0.00019998906765823, - "loss": 46.0, - "step": 29238 - }, - { - "epoch": 4.708724183743307, - "grad_norm": 0.007948044687509537, - "learning_rate": 0.00019998906691015767, - "loss": 46.0, - "step": 29239 - }, - { - "epoch": 4.708885220822094, - "grad_norm": 0.003867212450131774, - "learning_rate": 0.0001999890661620597, - "loss": 46.0, - "step": 29240 - }, - { - "epoch": 4.709046257900882, - "grad_norm": 0.007837440818548203, - "learning_rate": 0.00019998906541393615, - "loss": 46.0, - "step": 29241 - }, - { - "epoch": 4.709207294979669, - "grad_norm": 0.003048261860385537, - "learning_rate": 0.000199989064665787, - "loss": 46.0, - "step": 29242 - }, - { - "epoch": 4.709368332058457, - "grad_norm": 0.0028265388682484627, - "learning_rate": 0.00019998906391761225, - "loss": 46.0, - "step": 29243 - }, - { - "epoch": 4.709529369137244, - "grad_norm": 0.0034669069573283195, - "learning_rate": 0.00019998906316941195, - "loss": 46.0, - "step": 29244 - }, - { - "epoch": 4.709690406216032, - "grad_norm": 0.0012782543199136853, - "learning_rate": 0.00019998906242118603, - "loss": 46.0, - "step": 29245 - }, - { - "epoch": 4.709851443294818, - "grad_norm": 0.0034581669606268406, - "learning_rate": 0.00019998906167293453, - "loss": 46.0, - "step": 29246 - }, - { - "epoch": 4.710012480373606, - "grad_norm": 0.0022560099605470896, - "learning_rate": 0.00019998906092465738, - "loss": 46.0, - "step": 29247 - }, - { - "epoch": 4.710173517452393, - "grad_norm": 0.00527240289375186, - "learning_rate": 0.0001999890601763547, - "loss": 46.0, - "step": 29248 - }, - { - "epoch": 4.7103345545311806, - "grad_norm": 0.004832895006984472, - "learning_rate": 0.0001999890594280264, - "loss": 46.0, - "step": 29249 - }, - { - "epoch": 4.710495591609968, - "grad_norm": 0.0005298354080878198, - "learning_rate": 0.00019998905867967253, - "loss": 46.0, - "step": 29250 - }, - { - "epoch": 4.7106566286887555, - "grad_norm": 0.0026842583902180195, - "learning_rate": 0.0001999890579312931, - "loss": 46.0, - "step": 29251 - }, - { - "epoch": 4.710817665767543, - "grad_norm": 0.0022797740530222654, - "learning_rate": 0.000199989057182888, - "loss": 46.0, - "step": 29252 - }, - { - "epoch": 4.71097870284633, - "grad_norm": 0.0024207711685448885, - "learning_rate": 0.00019998905643445737, - "loss": 46.0, - "step": 29253 - }, - { - "epoch": 4.711139739925118, - "grad_norm": 0.0023152839858084917, - "learning_rate": 0.0001999890556860011, - "loss": 46.0, - "step": 29254 - }, - { - "epoch": 4.711300777003905, - "grad_norm": 0.007540998049080372, - "learning_rate": 0.00019998905493751927, - "loss": 46.0, - "step": 29255 - }, - { - "epoch": 4.711461814082693, - "grad_norm": 0.007167678326368332, - "learning_rate": 0.00019998905418901184, - "loss": 46.0, - "step": 29256 - }, - { - "epoch": 4.71162285116148, - "grad_norm": 0.002133867470547557, - "learning_rate": 0.0001999890534404788, - "loss": 46.0, - "step": 29257 - }, - { - "epoch": 4.711783888240268, - "grad_norm": 0.0020729033276438713, - "learning_rate": 0.0001999890526919202, - "loss": 46.0, - "step": 29258 - }, - { - "epoch": 4.711944925319055, - "grad_norm": 0.0018183771753683686, - "learning_rate": 0.00019998905194333597, - "loss": 46.0, - "step": 29259 - }, - { - "epoch": 4.7121059623978425, - "grad_norm": 0.003653302090242505, - "learning_rate": 0.00019998905119472616, - "loss": 46.0, - "step": 29260 - }, - { - "epoch": 4.712266999476629, - "grad_norm": 0.0081028426066041, - "learning_rate": 0.0001999890504460908, - "loss": 46.0, - "step": 29261 - }, - { - "epoch": 4.7124280365554165, - "grad_norm": 0.004289961885660887, - "learning_rate": 0.0001999890496974298, - "loss": 46.0, - "step": 29262 - }, - { - "epoch": 4.712589073634204, - "grad_norm": 0.00216194661334157, - "learning_rate": 0.00019998904894874322, - "loss": 46.0, - "step": 29263 - }, - { - "epoch": 4.712750110712991, - "grad_norm": 0.004811721853911877, - "learning_rate": 0.00019998904820003107, - "loss": 46.0, - "step": 29264 - }, - { - "epoch": 4.712911147791779, - "grad_norm": 0.0019144603284075856, - "learning_rate": 0.00019998904745129327, - "loss": 46.0, - "step": 29265 - }, - { - "epoch": 4.713072184870566, - "grad_norm": 0.01709693856537342, - "learning_rate": 0.00019998904670252994, - "loss": 46.0, - "step": 29266 - }, - { - "epoch": 4.713233221949354, - "grad_norm": 0.0048108892515301704, - "learning_rate": 0.000199989045953741, - "loss": 46.0, - "step": 29267 - }, - { - "epoch": 4.713394259028141, - "grad_norm": 0.0031093235593289137, - "learning_rate": 0.00019998904520492644, - "loss": 46.0, - "step": 29268 - }, - { - "epoch": 4.713555296106929, - "grad_norm": 0.007624708581715822, - "learning_rate": 0.00019998904445608633, - "loss": 46.0, - "step": 29269 - }, - { - "epoch": 4.713716333185716, - "grad_norm": 0.0036514580715447664, - "learning_rate": 0.0001999890437072206, - "loss": 46.0, - "step": 29270 - }, - { - "epoch": 4.713877370264504, - "grad_norm": 0.0028236950747668743, - "learning_rate": 0.0001999890429583293, - "loss": 46.0, - "step": 29271 - }, - { - "epoch": 4.714038407343291, - "grad_norm": 0.000693981593940407, - "learning_rate": 0.0001999890422094124, - "loss": 46.0, - "step": 29272 - }, - { - "epoch": 4.7141994444220785, - "grad_norm": 0.004591328091919422, - "learning_rate": 0.00019998904146046988, - "loss": 46.0, - "step": 29273 - }, - { - "epoch": 4.714360481500866, - "grad_norm": 0.003871242282912135, - "learning_rate": 0.0001999890407115018, - "loss": 46.0, - "step": 29274 - }, - { - "epoch": 4.714521518579653, - "grad_norm": 0.0035130351316183805, - "learning_rate": 0.00019998903996250813, - "loss": 46.0, - "step": 29275 - }, - { - "epoch": 4.71468255565844, - "grad_norm": 0.00098819297272712, - "learning_rate": 0.00019998903921348885, - "loss": 46.0, - "step": 29276 - }, - { - "epoch": 4.714843592737227, - "grad_norm": 0.0017879941733554006, - "learning_rate": 0.00019998903846444398, - "loss": 46.0, - "step": 29277 - }, - { - "epoch": 4.715004629816015, - "grad_norm": 0.005383829586207867, - "learning_rate": 0.00019998903771537352, - "loss": 46.0, - "step": 29278 - }, - { - "epoch": 4.715165666894802, - "grad_norm": 0.002313616219907999, - "learning_rate": 0.00019998903696627748, - "loss": 46.0, - "step": 29279 - }, - { - "epoch": 4.71532670397359, - "grad_norm": 0.002853547688573599, - "learning_rate": 0.00019998903621715585, - "loss": 46.0, - "step": 29280 - }, - { - "epoch": 4.715487741052377, - "grad_norm": 0.004320763982832432, - "learning_rate": 0.0001999890354680086, - "loss": 46.0, - "step": 29281 - }, - { - "epoch": 4.715648778131165, - "grad_norm": 0.0020063386764377356, - "learning_rate": 0.00019998903471883577, - "loss": 46.0, - "step": 29282 - }, - { - "epoch": 4.715809815209952, - "grad_norm": 0.0028386530466377735, - "learning_rate": 0.00019998903396963735, - "loss": 46.0, - "step": 29283 - }, - { - "epoch": 4.71597085228874, - "grad_norm": 0.020207369700074196, - "learning_rate": 0.00019998903322041334, - "loss": 46.0, - "step": 29284 - }, - { - "epoch": 4.716131889367527, - "grad_norm": 0.0034559525083750486, - "learning_rate": 0.00019998903247116375, - "loss": 46.0, - "step": 29285 - }, - { - "epoch": 4.7162929264463145, - "grad_norm": 0.00772892776876688, - "learning_rate": 0.00019998903172188854, - "loss": 46.0, - "step": 29286 - }, - { - "epoch": 4.716453963525102, - "grad_norm": 0.012257229536771774, - "learning_rate": 0.00019998903097258777, - "loss": 46.0, - "step": 29287 - }, - { - "epoch": 4.716615000603889, - "grad_norm": 0.01703144982457161, - "learning_rate": 0.0001999890302232614, - "loss": 46.0, - "step": 29288 - }, - { - "epoch": 4.716776037682676, - "grad_norm": 0.0026686512865126133, - "learning_rate": 0.0001999890294739094, - "loss": 46.0, - "step": 29289 - }, - { - "epoch": 4.716937074761464, - "grad_norm": 0.0013272403739392757, - "learning_rate": 0.00019998902872453184, - "loss": 46.0, - "step": 29290 - }, - { - "epoch": 4.717098111840251, - "grad_norm": 0.0008416443015448749, - "learning_rate": 0.0001999890279751287, - "loss": 46.0, - "step": 29291 - }, - { - "epoch": 4.717259148919038, - "grad_norm": 0.011600710451602936, - "learning_rate": 0.00019998902722569994, - "loss": 46.0, - "step": 29292 - }, - { - "epoch": 4.717420185997826, - "grad_norm": 0.003983397968113422, - "learning_rate": 0.00019998902647624562, - "loss": 46.0, - "step": 29293 - }, - { - "epoch": 4.717581223076613, - "grad_norm": 0.00986267626285553, - "learning_rate": 0.0001999890257267657, - "loss": 46.0, - "step": 29294 - }, - { - "epoch": 4.717742260155401, - "grad_norm": 0.0030516362749040127, - "learning_rate": 0.00019998902497726014, - "loss": 46.0, - "step": 29295 - }, - { - "epoch": 4.717903297234188, - "grad_norm": 0.0014918227680027485, - "learning_rate": 0.00019998902422772906, - "loss": 46.0, - "step": 29296 - }, - { - "epoch": 4.718064334312976, - "grad_norm": 0.003218598896637559, - "learning_rate": 0.0001999890234781723, - "loss": 46.0, - "step": 29297 - }, - { - "epoch": 4.718225371391763, - "grad_norm": 0.005359306000173092, - "learning_rate": 0.00019998902272859003, - "loss": 46.0, - "step": 29298 - }, - { - "epoch": 4.7183864084705505, - "grad_norm": 0.0026564777363091707, - "learning_rate": 0.00019998902197898214, - "loss": 46.0, - "step": 29299 - }, - { - "epoch": 4.718547445549338, - "grad_norm": 0.0033268441911786795, - "learning_rate": 0.00019998902122934865, - "loss": 46.0, - "step": 29300 - }, - { - "epoch": 4.718708482628125, - "grad_norm": 0.003321029245853424, - "learning_rate": 0.00019998902047968956, - "loss": 46.0, - "step": 29301 - }, - { - "epoch": 4.718869519706913, - "grad_norm": 0.004557216074317694, - "learning_rate": 0.0001999890197300049, - "loss": 46.0, - "step": 29302 - }, - { - "epoch": 4.7190305567857, - "grad_norm": 0.005502277985215187, - "learning_rate": 0.00019998901898029466, - "loss": 46.0, - "step": 29303 - }, - { - "epoch": 4.719191593864487, - "grad_norm": 0.0010434914147481322, - "learning_rate": 0.00019998901823055883, - "loss": 46.0, - "step": 29304 - }, - { - "epoch": 4.719352630943275, - "grad_norm": 0.01131720095872879, - "learning_rate": 0.00019998901748079738, - "loss": 46.0, - "step": 29305 - }, - { - "epoch": 4.719513668022062, - "grad_norm": 0.003906423691660166, - "learning_rate": 0.00019998901673101035, - "loss": 46.0, - "step": 29306 - }, - { - "epoch": 4.719674705100849, - "grad_norm": 0.005399438086897135, - "learning_rate": 0.0001999890159811977, - "loss": 46.0, - "step": 29307 - }, - { - "epoch": 4.719835742179637, - "grad_norm": 0.003501476952806115, - "learning_rate": 0.0001999890152313595, - "loss": 46.0, - "step": 29308 - }, - { - "epoch": 4.719996779258424, - "grad_norm": 0.002397339791059494, - "learning_rate": 0.00019998901448149567, - "loss": 46.0, - "step": 29309 - }, - { - "epoch": 4.7201578163372115, - "grad_norm": 0.011559433303773403, - "learning_rate": 0.0001999890137316063, - "loss": 46.0, - "step": 29310 - }, - { - "epoch": 4.720318853415999, - "grad_norm": 0.004457653500139713, - "learning_rate": 0.00019998901298169127, - "loss": 46.0, - "step": 29311 - }, - { - "epoch": 4.7204798904947864, - "grad_norm": 0.002904606517404318, - "learning_rate": 0.00019998901223175069, - "loss": 46.0, - "step": 29312 - }, - { - "epoch": 4.720640927573574, - "grad_norm": 0.003907191101461649, - "learning_rate": 0.00019998901148178452, - "loss": 46.0, - "step": 29313 - }, - { - "epoch": 4.720801964652361, - "grad_norm": 0.0123291639611125, - "learning_rate": 0.00019998901073179273, - "loss": 46.0, - "step": 29314 - }, - { - "epoch": 4.720963001731149, - "grad_norm": 0.0017887142021209002, - "learning_rate": 0.00019998900998177536, - "loss": 46.0, - "step": 29315 - }, - { - "epoch": 4.721124038809936, - "grad_norm": 0.00621689623221755, - "learning_rate": 0.0001999890092317324, - "loss": 46.0, - "step": 29316 - }, - { - "epoch": 4.721285075888724, - "grad_norm": 0.0029398349579423666, - "learning_rate": 0.00019998900848166386, - "loss": 46.0, - "step": 29317 - }, - { - "epoch": 4.721446112967511, - "grad_norm": 0.0005799714126624167, - "learning_rate": 0.00019998900773156972, - "loss": 46.0, - "step": 29318 - }, - { - "epoch": 4.721607150046298, - "grad_norm": 0.0037565052043646574, - "learning_rate": 0.00019998900698144998, - "loss": 46.0, - "step": 29319 - }, - { - "epoch": 4.721768187125085, - "grad_norm": 0.004496282432228327, - "learning_rate": 0.00019998900623130464, - "loss": 46.0, - "step": 29320 - }, - { - "epoch": 4.721929224203873, - "grad_norm": 0.003233895171433687, - "learning_rate": 0.00019998900548113375, - "loss": 46.0, - "step": 29321 - }, - { - "epoch": 4.72209026128266, - "grad_norm": 0.003190218936651945, - "learning_rate": 0.0001999890047309372, - "loss": 46.0, - "step": 29322 - }, - { - "epoch": 4.7222512983614475, - "grad_norm": 0.005417706910520792, - "learning_rate": 0.00019998900398071514, - "loss": 46.0, - "step": 29323 - }, - { - "epoch": 4.722412335440235, - "grad_norm": 0.0026388815604150295, - "learning_rate": 0.00019998900323046743, - "loss": 46.0, - "step": 29324 - }, - { - "epoch": 4.722573372519022, - "grad_norm": 0.002630612114444375, - "learning_rate": 0.00019998900248019416, - "loss": 46.0, - "step": 29325 - }, - { - "epoch": 4.72273440959781, - "grad_norm": 0.0053198328241705894, - "learning_rate": 0.00019998900172989528, - "loss": 46.0, - "step": 29326 - }, - { - "epoch": 4.722895446676597, - "grad_norm": 0.002506533870473504, - "learning_rate": 0.0001999890009795708, - "loss": 46.0, - "step": 29327 - }, - { - "epoch": 4.723056483755385, - "grad_norm": 0.003889096202328801, - "learning_rate": 0.00019998900022922075, - "loss": 46.0, - "step": 29328 - }, - { - "epoch": 4.723217520834172, - "grad_norm": 0.0018801233964040875, - "learning_rate": 0.0001999889994788451, - "loss": 46.0, - "step": 29329 - }, - { - "epoch": 4.72337855791296, - "grad_norm": 0.010475135408341885, - "learning_rate": 0.00019998899872844384, - "loss": 46.0, - "step": 29330 - }, - { - "epoch": 4.723539594991747, - "grad_norm": 0.0016895242733880877, - "learning_rate": 0.00019998899797801702, - "loss": 46.0, - "step": 29331 - }, - { - "epoch": 4.723700632070535, - "grad_norm": 0.0015805610455572605, - "learning_rate": 0.0001999889972275646, - "loss": 46.0, - "step": 29332 - }, - { - "epoch": 4.723861669149322, - "grad_norm": 0.001714044134132564, - "learning_rate": 0.00019998899647708657, - "loss": 46.0, - "step": 29333 - }, - { - "epoch": 4.724022706228109, - "grad_norm": 0.00274398154579103, - "learning_rate": 0.00019998899572658296, - "loss": 46.0, - "step": 29334 - }, - { - "epoch": 4.724183743306896, - "grad_norm": 0.004670158494263887, - "learning_rate": 0.00019998899497605373, - "loss": 46.0, - "step": 29335 - }, - { - "epoch": 4.7243447803856835, - "grad_norm": 0.00502600846812129, - "learning_rate": 0.00019998899422549898, - "loss": 46.0, - "step": 29336 - }, - { - "epoch": 4.724505817464471, - "grad_norm": 0.004184090998023748, - "learning_rate": 0.00019998899347491858, - "loss": 46.0, - "step": 29337 - }, - { - "epoch": 4.724666854543258, - "grad_norm": 0.0057343426160514355, - "learning_rate": 0.0001999889927243126, - "loss": 46.0, - "step": 29338 - }, - { - "epoch": 4.724827891622046, - "grad_norm": 0.014873027801513672, - "learning_rate": 0.00019998899197368102, - "loss": 46.0, - "step": 29339 - }, - { - "epoch": 4.724988928700833, - "grad_norm": 0.0017248686635866761, - "learning_rate": 0.00019998899122302386, - "loss": 46.0, - "step": 29340 - }, - { - "epoch": 4.725149965779621, - "grad_norm": 0.008649676106870174, - "learning_rate": 0.0001999889904723411, - "loss": 46.0, - "step": 29341 - }, - { - "epoch": 4.725311002858408, - "grad_norm": 0.008922945708036423, - "learning_rate": 0.00019998898972163276, - "loss": 46.0, - "step": 29342 - }, - { - "epoch": 4.725472039937196, - "grad_norm": 0.004946904256939888, - "learning_rate": 0.00019998898897089878, - "loss": 46.0, - "step": 29343 - }, - { - "epoch": 4.725633077015983, - "grad_norm": 0.0019709193147718906, - "learning_rate": 0.00019998898822013925, - "loss": 46.0, - "step": 29344 - }, - { - "epoch": 4.725794114094771, - "grad_norm": 0.005183913744986057, - "learning_rate": 0.00019998898746935413, - "loss": 46.0, - "step": 29345 - }, - { - "epoch": 4.725955151173558, - "grad_norm": 0.01176372542977333, - "learning_rate": 0.00019998898671854342, - "loss": 46.0, - "step": 29346 - }, - { - "epoch": 4.7261161882523455, - "grad_norm": 0.0020438064821064472, - "learning_rate": 0.0001999889859677071, - "loss": 46.0, - "step": 29347 - }, - { - "epoch": 4.726277225331133, - "grad_norm": 0.0004885127418674529, - "learning_rate": 0.0001999889852168452, - "loss": 46.0, - "step": 29348 - }, - { - "epoch": 4.7264382624099195, - "grad_norm": 0.010794468224048615, - "learning_rate": 0.0001999889844659577, - "loss": 46.0, - "step": 29349 - }, - { - "epoch": 4.726599299488707, - "grad_norm": 0.01155570987612009, - "learning_rate": 0.00019998898371504465, - "loss": 46.0, - "step": 29350 - }, - { - "epoch": 4.726760336567494, - "grad_norm": 0.003462859196588397, - "learning_rate": 0.00019998898296410595, - "loss": 46.0, - "step": 29351 - }, - { - "epoch": 4.726921373646282, - "grad_norm": 0.0027673481963574886, - "learning_rate": 0.0001999889822131417, - "loss": 46.0, - "step": 29352 - }, - { - "epoch": 4.727082410725069, - "grad_norm": 0.002472318010404706, - "learning_rate": 0.00019998898146215185, - "loss": 46.0, - "step": 29353 - }, - { - "epoch": 4.727243447803857, - "grad_norm": 0.0022224565036594868, - "learning_rate": 0.00019998898071113636, - "loss": 46.0, - "step": 29354 - }, - { - "epoch": 4.727404484882644, - "grad_norm": 0.0022227480076253414, - "learning_rate": 0.0001999889799600953, - "loss": 46.0, - "step": 29355 - }, - { - "epoch": 4.727565521961432, - "grad_norm": 0.0015774141065776348, - "learning_rate": 0.00019998897920902868, - "loss": 46.0, - "step": 29356 - }, - { - "epoch": 4.727726559040219, - "grad_norm": 0.005216037388890982, - "learning_rate": 0.00019998897845793646, - "loss": 46.0, - "step": 29357 - }, - { - "epoch": 4.7278875961190066, - "grad_norm": 0.0028596031479537487, - "learning_rate": 0.00019998897770681865, - "loss": 46.0, - "step": 29358 - }, - { - "epoch": 4.728048633197794, - "grad_norm": 0.0021019771229475737, - "learning_rate": 0.00019998897695567522, - "loss": 46.0, - "step": 29359 - }, - { - "epoch": 4.7282096702765815, - "grad_norm": 0.00817108154296875, - "learning_rate": 0.0001999889762045062, - "loss": 46.0, - "step": 29360 - }, - { - "epoch": 4.728370707355369, - "grad_norm": 0.004792229272425175, - "learning_rate": 0.00019998897545331161, - "loss": 46.0, - "step": 29361 - }, - { - "epoch": 4.7285317444341555, - "grad_norm": 0.0008420338854193687, - "learning_rate": 0.0001999889747020914, - "loss": 46.0, - "step": 29362 - }, - { - "epoch": 4.728692781512944, - "grad_norm": 0.015027117915451527, - "learning_rate": 0.00019998897395084563, - "loss": 46.0, - "step": 29363 - }, - { - "epoch": 4.72885381859173, - "grad_norm": 0.0037622731178998947, - "learning_rate": 0.00019998897319957427, - "loss": 46.0, - "step": 29364 - }, - { - "epoch": 4.729014855670518, - "grad_norm": 0.0054428973235189915, - "learning_rate": 0.0001999889724482773, - "loss": 46.0, - "step": 29365 - }, - { - "epoch": 4.729175892749305, - "grad_norm": 0.0036208373494446278, - "learning_rate": 0.00019998897169695473, - "loss": 46.0, - "step": 29366 - }, - { - "epoch": 4.729336929828093, - "grad_norm": 0.0008238909067586064, - "learning_rate": 0.0001999889709456066, - "loss": 46.0, - "step": 29367 - }, - { - "epoch": 4.72949796690688, - "grad_norm": 0.0014410164440050721, - "learning_rate": 0.00019998897019423285, - "loss": 46.0, - "step": 29368 - }, - { - "epoch": 4.729659003985668, - "grad_norm": 0.0032045100815594196, - "learning_rate": 0.00019998896944283353, - "loss": 46.0, - "step": 29369 - }, - { - "epoch": 4.729820041064455, - "grad_norm": 0.0012403222499415278, - "learning_rate": 0.00019998896869140856, - "loss": 46.0, - "step": 29370 - }, - { - "epoch": 4.7299810781432425, - "grad_norm": 0.011964606121182442, - "learning_rate": 0.00019998896793995807, - "loss": 46.0, - "step": 29371 - }, - { - "epoch": 4.73014211522203, - "grad_norm": 0.0021035645622760057, - "learning_rate": 0.00019998896718848196, - "loss": 46.0, - "step": 29372 - }, - { - "epoch": 4.730303152300817, - "grad_norm": 0.0036605617497116327, - "learning_rate": 0.00019998896643698023, - "loss": 46.0, - "step": 29373 - }, - { - "epoch": 4.730464189379605, - "grad_norm": 0.020130915567278862, - "learning_rate": 0.00019998896568545295, - "loss": 46.0, - "step": 29374 - }, - { - "epoch": 4.730625226458392, - "grad_norm": 0.001779328566044569, - "learning_rate": 0.00019998896493390007, - "loss": 46.0, - "step": 29375 - }, - { - "epoch": 4.73078626353718, - "grad_norm": 0.002841960871592164, - "learning_rate": 0.0001999889641823216, - "loss": 46.0, - "step": 29376 - }, - { - "epoch": 4.730947300615966, - "grad_norm": 0.003420154331251979, - "learning_rate": 0.00019998896343071754, - "loss": 46.0, - "step": 29377 - }, - { - "epoch": 4.731108337694755, - "grad_norm": 0.0027154451236128807, - "learning_rate": 0.00019998896267908785, - "loss": 46.0, - "step": 29378 - }, - { - "epoch": 4.731269374773541, - "grad_norm": 0.0006392297800630331, - "learning_rate": 0.00019998896192743263, - "loss": 46.0, - "step": 29379 - }, - { - "epoch": 4.731430411852329, - "grad_norm": 0.003128868294879794, - "learning_rate": 0.00019998896117575177, - "loss": 46.0, - "step": 29380 - }, - { - "epoch": 4.731591448931116, - "grad_norm": 0.002107715466991067, - "learning_rate": 0.00019998896042404532, - "loss": 46.0, - "step": 29381 - }, - { - "epoch": 4.731752486009904, - "grad_norm": 0.002084210980683565, - "learning_rate": 0.00019998895967231328, - "loss": 46.0, - "step": 29382 - }, - { - "epoch": 4.731913523088691, - "grad_norm": 0.005268099717795849, - "learning_rate": 0.00019998895892055566, - "loss": 46.0, - "step": 29383 - }, - { - "epoch": 4.7320745601674785, - "grad_norm": 0.011741421185433865, - "learning_rate": 0.00019998895816877244, - "loss": 46.0, - "step": 29384 - }, - { - "epoch": 4.732235597246266, - "grad_norm": 0.007093553431332111, - "learning_rate": 0.00019998895741696365, - "loss": 46.0, - "step": 29385 - }, - { - "epoch": 4.732396634325053, - "grad_norm": 0.002017336431890726, - "learning_rate": 0.00019998895666512923, - "loss": 46.0, - "step": 29386 - }, - { - "epoch": 4.732557671403841, - "grad_norm": 0.009207571856677532, - "learning_rate": 0.00019998895591326926, - "loss": 46.0, - "step": 29387 - }, - { - "epoch": 4.732718708482628, - "grad_norm": 0.0025740114506334066, - "learning_rate": 0.00019998895516138365, - "loss": 46.0, - "step": 29388 - }, - { - "epoch": 4.732879745561416, - "grad_norm": 0.0072769238613545895, - "learning_rate": 0.0001999889544094725, - "loss": 46.0, - "step": 29389 - }, - { - "epoch": 4.733040782640203, - "grad_norm": 0.004357141442596912, - "learning_rate": 0.0001999889536575357, - "loss": 46.0, - "step": 29390 - }, - { - "epoch": 4.733201819718991, - "grad_norm": 0.005643466021865606, - "learning_rate": 0.00019998895290557336, - "loss": 46.0, - "step": 29391 - }, - { - "epoch": 4.733362856797777, - "grad_norm": 0.003422433976083994, - "learning_rate": 0.00019998895215358542, - "loss": 46.0, - "step": 29392 - }, - { - "epoch": 4.733523893876565, - "grad_norm": 0.008912176825106144, - "learning_rate": 0.00019998895140157185, - "loss": 46.0, - "step": 29393 - }, - { - "epoch": 4.733684930955352, - "grad_norm": 0.002388990018516779, - "learning_rate": 0.0001999889506495327, - "loss": 46.0, - "step": 29394 - }, - { - "epoch": 4.73384596803414, - "grad_norm": 0.0008180595468729734, - "learning_rate": 0.00019998894989746799, - "loss": 46.0, - "step": 29395 - }, - { - "epoch": 4.734007005112927, - "grad_norm": 0.005835089832544327, - "learning_rate": 0.0001999889491453777, - "loss": 46.0, - "step": 29396 - }, - { - "epoch": 4.7341680421917145, - "grad_norm": 0.003755220677703619, - "learning_rate": 0.00019998894839326177, - "loss": 46.0, - "step": 29397 - }, - { - "epoch": 4.734329079270502, - "grad_norm": 0.003420136636123061, - "learning_rate": 0.00019998894764112026, - "loss": 46.0, - "step": 29398 - }, - { - "epoch": 4.734490116349289, - "grad_norm": 0.016514291986823082, - "learning_rate": 0.0001999889468889532, - "loss": 46.0, - "step": 29399 - }, - { - "epoch": 4.734651153428077, - "grad_norm": 0.0060422117821872234, - "learning_rate": 0.00019998894613676047, - "loss": 46.0, - "step": 29400 - }, - { - "epoch": 4.734812190506864, - "grad_norm": 0.009305658750236034, - "learning_rate": 0.0001999889453845422, - "loss": 46.0, - "step": 29401 - }, - { - "epoch": 4.734973227585652, - "grad_norm": 0.004312990698963404, - "learning_rate": 0.0001999889446322983, - "loss": 46.0, - "step": 29402 - }, - { - "epoch": 4.735134264664439, - "grad_norm": 0.0009957398287951946, - "learning_rate": 0.00019998894388002886, - "loss": 46.0, - "step": 29403 - }, - { - "epoch": 4.735295301743227, - "grad_norm": 0.009375032968819141, - "learning_rate": 0.0001999889431277338, - "loss": 46.0, - "step": 29404 - }, - { - "epoch": 4.735456338822014, - "grad_norm": 0.003856866853311658, - "learning_rate": 0.00019998894237541315, - "loss": 46.0, - "step": 29405 - }, - { - "epoch": 4.735617375900802, - "grad_norm": 0.001658300287090242, - "learning_rate": 0.0001999889416230669, - "loss": 46.0, - "step": 29406 - }, - { - "epoch": 4.735778412979588, - "grad_norm": 0.0038576764054596424, - "learning_rate": 0.00019998894087069509, - "loss": 46.0, - "step": 29407 - }, - { - "epoch": 4.735939450058376, - "grad_norm": 0.004734646063297987, - "learning_rate": 0.00019998894011829767, - "loss": 46.0, - "step": 29408 - }, - { - "epoch": 4.736100487137163, - "grad_norm": 0.002241219161078334, - "learning_rate": 0.00019998893936587467, - "loss": 46.0, - "step": 29409 - }, - { - "epoch": 4.7362615242159505, - "grad_norm": 0.0013894643634557724, - "learning_rate": 0.00019998893861342603, - "loss": 46.0, - "step": 29410 - }, - { - "epoch": 4.736422561294738, - "grad_norm": 0.010313589125871658, - "learning_rate": 0.00019998893786095186, - "loss": 46.0, - "step": 29411 - }, - { - "epoch": 4.736583598373525, - "grad_norm": 0.002238770015537739, - "learning_rate": 0.00019998893710845205, - "loss": 46.0, - "step": 29412 - }, - { - "epoch": 4.736744635452313, - "grad_norm": 0.004633741453289986, - "learning_rate": 0.00019998893635592667, - "loss": 46.0, - "step": 29413 - }, - { - "epoch": 4.7369056725311, - "grad_norm": 0.01556902565062046, - "learning_rate": 0.00019998893560337568, - "loss": 46.0, - "step": 29414 - }, - { - "epoch": 4.737066709609888, - "grad_norm": 0.028797954320907593, - "learning_rate": 0.0001999889348507991, - "loss": 46.0, - "step": 29415 - }, - { - "epoch": 4.737227746688675, - "grad_norm": 0.004877353087067604, - "learning_rate": 0.00019998893409819694, - "loss": 46.0, - "step": 29416 - }, - { - "epoch": 4.737388783767463, - "grad_norm": 0.0019912421703338623, - "learning_rate": 0.00019998893334556922, - "loss": 46.0, - "step": 29417 - }, - { - "epoch": 4.73754982084625, - "grad_norm": 0.007213990204036236, - "learning_rate": 0.00019998893259291585, - "loss": 46.0, - "step": 29418 - }, - { - "epoch": 4.7377108579250375, - "grad_norm": 0.0038081426173448563, - "learning_rate": 0.00019998893184023693, - "loss": 46.0, - "step": 29419 - }, - { - "epoch": 4.737871895003825, - "grad_norm": 0.0012381550623103976, - "learning_rate": 0.0001999889310875324, - "loss": 46.0, - "step": 29420 - }, - { - "epoch": 4.7380329320826124, - "grad_norm": 0.005331635940819979, - "learning_rate": 0.0001999889303348023, - "loss": 46.0, - "step": 29421 - }, - { - "epoch": 4.738193969161399, - "grad_norm": 0.0043566869571805, - "learning_rate": 0.00019998892958204655, - "loss": 46.0, - "step": 29422 - }, - { - "epoch": 4.7383550062401865, - "grad_norm": 0.002686574589461088, - "learning_rate": 0.00019998892882926525, - "loss": 46.0, - "step": 29423 - }, - { - "epoch": 4.738516043318974, - "grad_norm": 0.011207504197955132, - "learning_rate": 0.00019998892807645836, - "loss": 46.0, - "step": 29424 - }, - { - "epoch": 4.738677080397761, - "grad_norm": 0.003423671703785658, - "learning_rate": 0.00019998892732362586, - "loss": 46.0, - "step": 29425 - }, - { - "epoch": 4.738838117476549, - "grad_norm": 0.0017669046064838767, - "learning_rate": 0.00019998892657076777, - "loss": 46.0, - "step": 29426 - }, - { - "epoch": 4.738999154555336, - "grad_norm": 0.004224743228405714, - "learning_rate": 0.00019998892581788412, - "loss": 46.0, - "step": 29427 - }, - { - "epoch": 4.739160191634124, - "grad_norm": 0.0030419223476201296, - "learning_rate": 0.00019998892506497485, - "loss": 46.0, - "step": 29428 - }, - { - "epoch": 4.739321228712911, - "grad_norm": 0.0019750739447772503, - "learning_rate": 0.00019998892431204, - "loss": 46.0, - "step": 29429 - }, - { - "epoch": 4.739482265791699, - "grad_norm": 0.0037552407011389732, - "learning_rate": 0.00019998892355907954, - "loss": 46.0, - "step": 29430 - }, - { - "epoch": 4.739643302870486, - "grad_norm": 0.01344646792858839, - "learning_rate": 0.0001999889228060935, - "loss": 46.0, - "step": 29431 - }, - { - "epoch": 4.7398043399492735, - "grad_norm": 0.010561018250882626, - "learning_rate": 0.00019998892205308187, - "loss": 46.0, - "step": 29432 - }, - { - "epoch": 4.739965377028061, - "grad_norm": 0.001239353441633284, - "learning_rate": 0.00019998892130004462, - "loss": 46.0, - "step": 29433 - }, - { - "epoch": 4.740126414106848, - "grad_norm": 0.004819825757294893, - "learning_rate": 0.0001999889205469818, - "loss": 46.0, - "step": 29434 - }, - { - "epoch": 4.740287451185636, - "grad_norm": 0.008641612716019154, - "learning_rate": 0.00019998891979389343, - "loss": 46.0, - "step": 29435 - }, - { - "epoch": 4.740448488264423, - "grad_norm": 0.002989055821672082, - "learning_rate": 0.00019998891904077942, - "loss": 46.0, - "step": 29436 - }, - { - "epoch": 4.74060952534321, - "grad_norm": 0.0026798087637871504, - "learning_rate": 0.00019998891828763981, - "loss": 46.0, - "step": 29437 - }, - { - "epoch": 4.740770562421997, - "grad_norm": 0.0035659109707921743, - "learning_rate": 0.00019998891753447465, - "loss": 46.0, - "step": 29438 - }, - { - "epoch": 4.740931599500785, - "grad_norm": 0.0047483728267252445, - "learning_rate": 0.00019998891678128387, - "loss": 46.0, - "step": 29439 - }, - { - "epoch": 4.741092636579572, - "grad_norm": 0.002634359523653984, - "learning_rate": 0.00019998891602806748, - "loss": 46.0, - "step": 29440 - }, - { - "epoch": 4.74125367365836, - "grad_norm": 0.005374438129365444, - "learning_rate": 0.00019998891527482553, - "loss": 46.0, - "step": 29441 - }, - { - "epoch": 4.741414710737147, - "grad_norm": 0.00490146316587925, - "learning_rate": 0.00019998891452155797, - "loss": 46.0, - "step": 29442 - }, - { - "epoch": 4.741575747815935, - "grad_norm": 0.001773351919837296, - "learning_rate": 0.00019998891376826484, - "loss": 46.0, - "step": 29443 - }, - { - "epoch": 4.741736784894722, - "grad_norm": 0.009983288124203682, - "learning_rate": 0.0001999889130149461, - "loss": 46.0, - "step": 29444 - }, - { - "epoch": 4.7418978219735095, - "grad_norm": 0.005572212394326925, - "learning_rate": 0.00019998891226160175, - "loss": 46.0, - "step": 29445 - }, - { - "epoch": 4.742058859052297, - "grad_norm": 0.007713311351835728, - "learning_rate": 0.00019998891150823186, - "loss": 46.0, - "step": 29446 - }, - { - "epoch": 4.742219896131084, - "grad_norm": 0.002674366580322385, - "learning_rate": 0.00019998891075483633, - "loss": 46.0, - "step": 29447 - }, - { - "epoch": 4.742380933209872, - "grad_norm": 0.008505702018737793, - "learning_rate": 0.00019998891000141522, - "loss": 46.0, - "step": 29448 - }, - { - "epoch": 4.742541970288659, - "grad_norm": 0.004731995053589344, - "learning_rate": 0.00019998890924796854, - "loss": 46.0, - "step": 29449 - }, - { - "epoch": 4.742703007367446, - "grad_norm": 0.007525719236582518, - "learning_rate": 0.00019998890849449622, - "loss": 46.0, - "step": 29450 - }, - { - "epoch": 4.742864044446234, - "grad_norm": 0.006710035726428032, - "learning_rate": 0.00019998890774099835, - "loss": 46.0, - "step": 29451 - }, - { - "epoch": 4.743025081525021, - "grad_norm": 0.0023106341250240803, - "learning_rate": 0.00019998890698747488, - "loss": 46.0, - "step": 29452 - }, - { - "epoch": 4.743186118603808, - "grad_norm": 0.010832172818481922, - "learning_rate": 0.0001999889062339258, - "loss": 46.0, - "step": 29453 - }, - { - "epoch": 4.743347155682596, - "grad_norm": 0.001380281406454742, - "learning_rate": 0.00019998890548035116, - "loss": 46.0, - "step": 29454 - }, - { - "epoch": 4.743508192761383, - "grad_norm": 0.007656737230718136, - "learning_rate": 0.0001999889047267509, - "loss": 46.0, - "step": 29455 - }, - { - "epoch": 4.743669229840171, - "grad_norm": 0.0022427341900765896, - "learning_rate": 0.00019998890397312507, - "loss": 46.0, - "step": 29456 - }, - { - "epoch": 4.743830266918958, - "grad_norm": 0.005935526918619871, - "learning_rate": 0.00019998890321947364, - "loss": 46.0, - "step": 29457 - }, - { - "epoch": 4.7439913039977455, - "grad_norm": 0.007354166824370623, - "learning_rate": 0.0001999889024657966, - "loss": 46.0, - "step": 29458 - }, - { - "epoch": 4.744152341076533, - "grad_norm": 0.0018673599697649479, - "learning_rate": 0.000199988901712094, - "loss": 46.0, - "step": 29459 - }, - { - "epoch": 4.74431337815532, - "grad_norm": 0.003711445489898324, - "learning_rate": 0.00019998890095836578, - "loss": 46.0, - "step": 29460 - }, - { - "epoch": 4.744474415234108, - "grad_norm": 0.00214279699139297, - "learning_rate": 0.00019998890020461198, - "loss": 46.0, - "step": 29461 - }, - { - "epoch": 4.744635452312895, - "grad_norm": 0.006456930655986071, - "learning_rate": 0.0001999888994508326, - "loss": 46.0, - "step": 29462 - }, - { - "epoch": 4.744796489391683, - "grad_norm": 0.0016828099032863975, - "learning_rate": 0.0001999888986970276, - "loss": 46.0, - "step": 29463 - }, - { - "epoch": 4.74495752647047, - "grad_norm": 0.006650710012763739, - "learning_rate": 0.00019998889794319702, - "loss": 46.0, - "step": 29464 - }, - { - "epoch": 4.745118563549257, - "grad_norm": 0.0035515003837645054, - "learning_rate": 0.00019998889718934084, - "loss": 46.0, - "step": 29465 - }, - { - "epoch": 4.745279600628045, - "grad_norm": 0.0035023754462599754, - "learning_rate": 0.00019998889643545908, - "loss": 46.0, - "step": 29466 - }, - { - "epoch": 4.745440637706832, - "grad_norm": 0.0028557120822370052, - "learning_rate": 0.00019998889568155172, - "loss": 46.0, - "step": 29467 - }, - { - "epoch": 4.745601674785619, - "grad_norm": 0.01697532832622528, - "learning_rate": 0.00019998889492761878, - "loss": 46.0, - "step": 29468 - }, - { - "epoch": 4.745762711864407, - "grad_norm": 0.01211035717278719, - "learning_rate": 0.00019998889417366026, - "loss": 46.0, - "step": 29469 - }, - { - "epoch": 4.745923748943194, - "grad_norm": 0.005862020421773195, - "learning_rate": 0.00019998889341967611, - "loss": 46.0, - "step": 29470 - }, - { - "epoch": 4.7460847860219815, - "grad_norm": 0.007999500259757042, - "learning_rate": 0.00019998889266566638, - "loss": 46.0, - "step": 29471 - }, - { - "epoch": 4.746245823100769, - "grad_norm": 0.0044660987332463264, - "learning_rate": 0.00019998889191163107, - "loss": 46.0, - "step": 29472 - }, - { - "epoch": 4.746406860179556, - "grad_norm": 0.010455731302499771, - "learning_rate": 0.00019998889115757017, - "loss": 46.0, - "step": 29473 - }, - { - "epoch": 4.746567897258344, - "grad_norm": 0.004530957899987698, - "learning_rate": 0.00019998889040348367, - "loss": 46.0, - "step": 29474 - }, - { - "epoch": 4.746728934337131, - "grad_norm": 0.006481084506958723, - "learning_rate": 0.00019998888964937157, - "loss": 46.0, - "step": 29475 - }, - { - "epoch": 4.746889971415919, - "grad_norm": 0.0032839993946254253, - "learning_rate": 0.00019998888889523388, - "loss": 46.0, - "step": 29476 - }, - { - "epoch": 4.747051008494706, - "grad_norm": 0.002434573834761977, - "learning_rate": 0.00019998888814107063, - "loss": 46.0, - "step": 29477 - }, - { - "epoch": 4.747212045573494, - "grad_norm": 0.0016336912522092462, - "learning_rate": 0.00019998888738688176, - "loss": 46.0, - "step": 29478 - }, - { - "epoch": 4.747373082652281, - "grad_norm": 0.005440335255116224, - "learning_rate": 0.0001999888866326673, - "loss": 46.0, - "step": 29479 - }, - { - "epoch": 4.747534119731068, - "grad_norm": 0.010634477250277996, - "learning_rate": 0.00019998888587842727, - "loss": 46.0, - "step": 29480 - }, - { - "epoch": 4.747695156809855, - "grad_norm": 0.002356134122237563, - "learning_rate": 0.0001999888851241616, - "loss": 46.0, - "step": 29481 - }, - { - "epoch": 4.7478561938886426, - "grad_norm": 0.004118389915674925, - "learning_rate": 0.00019998888436987037, - "loss": 46.0, - "step": 29482 - }, - { - "epoch": 4.74801723096743, - "grad_norm": 0.009017317555844784, - "learning_rate": 0.00019998888361555354, - "loss": 46.0, - "step": 29483 - }, - { - "epoch": 4.7481782680462175, - "grad_norm": 0.001375826308503747, - "learning_rate": 0.00019998888286121112, - "loss": 46.0, - "step": 29484 - }, - { - "epoch": 4.748339305125005, - "grad_norm": 0.0009405128075741231, - "learning_rate": 0.0001999888821068431, - "loss": 46.0, - "step": 29485 - }, - { - "epoch": 4.748500342203792, - "grad_norm": 0.0032468524295836687, - "learning_rate": 0.0001999888813524495, - "loss": 46.0, - "step": 29486 - }, - { - "epoch": 4.74866137928258, - "grad_norm": 0.01063726469874382, - "learning_rate": 0.00019998888059803032, - "loss": 46.0, - "step": 29487 - }, - { - "epoch": 4.748822416361367, - "grad_norm": 0.0031899053137749434, - "learning_rate": 0.00019998887984358553, - "loss": 46.0, - "step": 29488 - }, - { - "epoch": 4.748983453440155, - "grad_norm": 0.02001010812819004, - "learning_rate": 0.00019998887908911515, - "loss": 46.0, - "step": 29489 - }, - { - "epoch": 4.749144490518942, - "grad_norm": 0.0026898193173110485, - "learning_rate": 0.00019998887833461919, - "loss": 46.0, - "step": 29490 - }, - { - "epoch": 4.74930552759773, - "grad_norm": 0.012236927635967731, - "learning_rate": 0.0001999888775800976, - "loss": 46.0, - "step": 29491 - }, - { - "epoch": 4.749466564676517, - "grad_norm": 0.005238463170826435, - "learning_rate": 0.00019998887682555046, - "loss": 46.0, - "step": 29492 - }, - { - "epoch": 4.7496276017553045, - "grad_norm": 0.0017704227939248085, - "learning_rate": 0.0001999888760709777, - "loss": 46.0, - "step": 29493 - }, - { - "epoch": 4.749788638834092, - "grad_norm": 0.0015627461252734065, - "learning_rate": 0.00019998887531637937, - "loss": 46.0, - "step": 29494 - }, - { - "epoch": 4.7499496759128785, - "grad_norm": 0.0007592069450765848, - "learning_rate": 0.00019998887456175544, - "loss": 46.0, - "step": 29495 - }, - { - "epoch": 4.750110712991666, - "grad_norm": 0.003729273797944188, - "learning_rate": 0.00019998887380710592, - "loss": 46.0, - "step": 29496 - }, - { - "epoch": 4.750271750070453, - "grad_norm": 0.00501279579475522, - "learning_rate": 0.00019998887305243082, - "loss": 46.0, - "step": 29497 - }, - { - "epoch": 4.750432787149241, - "grad_norm": 0.0038480714429169893, - "learning_rate": 0.00019998887229773012, - "loss": 46.0, - "step": 29498 - }, - { - "epoch": 4.750593824228028, - "grad_norm": 0.0017705995123833418, - "learning_rate": 0.0001999888715430038, - "loss": 46.0, - "step": 29499 - }, - { - "epoch": 4.750754861306816, - "grad_norm": 0.0019863443449139595, - "learning_rate": 0.0001999888707882519, - "loss": 46.0, - "step": 29500 - }, - { - "epoch": 4.750915898385603, - "grad_norm": 0.008791308850049973, - "learning_rate": 0.00019998887003347445, - "loss": 46.0, - "step": 29501 - }, - { - "epoch": 4.751076935464391, - "grad_norm": 0.0016202365513890982, - "learning_rate": 0.00019998886927867135, - "loss": 46.0, - "step": 29502 - }, - { - "epoch": 4.751237972543178, - "grad_norm": 0.004478315357118845, - "learning_rate": 0.0001999888685238427, - "loss": 46.0, - "step": 29503 - }, - { - "epoch": 4.751399009621966, - "grad_norm": 0.0031289015896618366, - "learning_rate": 0.00019998886776898843, - "loss": 46.0, - "step": 29504 - }, - { - "epoch": 4.751560046700753, - "grad_norm": 0.0047536808997392654, - "learning_rate": 0.00019998886701410858, - "loss": 46.0, - "step": 29505 - }, - { - "epoch": 4.7517210837795405, - "grad_norm": 0.0050652530044317245, - "learning_rate": 0.00019998886625920313, - "loss": 46.0, - "step": 29506 - }, - { - "epoch": 4.751882120858328, - "grad_norm": 0.004600538872182369, - "learning_rate": 0.0001999888655042721, - "loss": 46.0, - "step": 29507 - }, - { - "epoch": 4.752043157937115, - "grad_norm": 0.005968911107629538, - "learning_rate": 0.00019998886474931546, - "loss": 46.0, - "step": 29508 - }, - { - "epoch": 4.752204195015903, - "grad_norm": 0.002788045909255743, - "learning_rate": 0.00019998886399433326, - "loss": 46.0, - "step": 29509 - }, - { - "epoch": 4.752365232094689, - "grad_norm": 0.0011554601369425654, - "learning_rate": 0.0001999888632393254, - "loss": 46.0, - "step": 29510 - }, - { - "epoch": 4.752526269173477, - "grad_norm": 0.003151525044813752, - "learning_rate": 0.00019998886248429203, - "loss": 46.0, - "step": 29511 - }, - { - "epoch": 4.752687306252264, - "grad_norm": 0.0034211769234389067, - "learning_rate": 0.00019998886172923304, - "loss": 46.0, - "step": 29512 - }, - { - "epoch": 4.752848343331052, - "grad_norm": 0.013129165396094322, - "learning_rate": 0.00019998886097414843, - "loss": 46.0, - "step": 29513 - }, - { - "epoch": 4.753009380409839, - "grad_norm": 0.007085001096129417, - "learning_rate": 0.00019998886021903826, - "loss": 46.0, - "step": 29514 - }, - { - "epoch": 4.753170417488627, - "grad_norm": 0.0033717367332428694, - "learning_rate": 0.0001999888594639025, - "loss": 46.0, - "step": 29515 - }, - { - "epoch": 4.753331454567414, - "grad_norm": 0.007327381055802107, - "learning_rate": 0.00019998885870874114, - "loss": 46.0, - "step": 29516 - }, - { - "epoch": 4.753492491646202, - "grad_norm": 0.0016730257775634527, - "learning_rate": 0.00019998885795355419, - "loss": 46.0, - "step": 29517 - }, - { - "epoch": 4.753653528724989, - "grad_norm": 0.0070864129811525345, - "learning_rate": 0.00019998885719834162, - "loss": 46.0, - "step": 29518 - }, - { - "epoch": 4.7538145658037765, - "grad_norm": 0.002798378234729171, - "learning_rate": 0.00019998885644310349, - "loss": 46.0, - "step": 29519 - }, - { - "epoch": 4.753975602882564, - "grad_norm": 0.002048699650913477, - "learning_rate": 0.00019998885568783977, - "loss": 46.0, - "step": 29520 - }, - { - "epoch": 4.754136639961351, - "grad_norm": 0.00618006894364953, - "learning_rate": 0.00019998885493255044, - "loss": 46.0, - "step": 29521 - }, - { - "epoch": 4.754297677040139, - "grad_norm": 0.007030103355646133, - "learning_rate": 0.00019998885417723552, - "loss": 46.0, - "step": 29522 - }, - { - "epoch": 4.754458714118925, - "grad_norm": 0.004415110219269991, - "learning_rate": 0.000199988853421895, - "loss": 46.0, - "step": 29523 - }, - { - "epoch": 4.754619751197714, - "grad_norm": 0.006786801386624575, - "learning_rate": 0.00019998885266652892, - "loss": 46.0, - "step": 29524 - }, - { - "epoch": 4.7547807882765, - "grad_norm": 0.012729967013001442, - "learning_rate": 0.0001999888519111372, - "loss": 46.0, - "step": 29525 - }, - { - "epoch": 4.754941825355288, - "grad_norm": 0.0026426208205521107, - "learning_rate": 0.00019998885115571992, - "loss": 46.0, - "step": 29526 - }, - { - "epoch": 4.755102862434075, - "grad_norm": 0.00648786686360836, - "learning_rate": 0.00019998885040027706, - "loss": 46.0, - "step": 29527 - }, - { - "epoch": 4.755263899512863, - "grad_norm": 0.005379085429012775, - "learning_rate": 0.0001999888496448086, - "loss": 46.0, - "step": 29528 - }, - { - "epoch": 4.75542493659165, - "grad_norm": 0.010481463745236397, - "learning_rate": 0.00019998884888931454, - "loss": 46.0, - "step": 29529 - }, - { - "epoch": 4.755585973670438, - "grad_norm": 0.01833781599998474, - "learning_rate": 0.0001999888481337949, - "loss": 46.0, - "step": 29530 - }, - { - "epoch": 4.755747010749225, - "grad_norm": 0.002483892487362027, - "learning_rate": 0.00019998884737824964, - "loss": 46.0, - "step": 29531 - }, - { - "epoch": 4.7559080478280125, - "grad_norm": 0.0020950946491211653, - "learning_rate": 0.0001999888466226788, - "loss": 46.0, - "step": 29532 - }, - { - "epoch": 4.7560690849068, - "grad_norm": 0.004345500376075506, - "learning_rate": 0.0001999888458670824, - "loss": 46.0, - "step": 29533 - }, - { - "epoch": 4.756230121985587, - "grad_norm": 0.0018428814364597201, - "learning_rate": 0.00019998884511146034, - "loss": 46.0, - "step": 29534 - }, - { - "epoch": 4.756391159064375, - "grad_norm": 0.015519008040428162, - "learning_rate": 0.00019998884435581276, - "loss": 46.0, - "step": 29535 - }, - { - "epoch": 4.756552196143162, - "grad_norm": 0.006286387797445059, - "learning_rate": 0.00019998884360013954, - "loss": 46.0, - "step": 29536 - }, - { - "epoch": 4.75671323322195, - "grad_norm": 0.0033569256775081158, - "learning_rate": 0.00019998884284444076, - "loss": 46.0, - "step": 29537 - }, - { - "epoch": 4.756874270300736, - "grad_norm": 0.00317478203214705, - "learning_rate": 0.00019998884208871637, - "loss": 46.0, - "step": 29538 - }, - { - "epoch": 4.757035307379525, - "grad_norm": 0.004013438709080219, - "learning_rate": 0.00019998884133296639, - "loss": 46.0, - "step": 29539 - }, - { - "epoch": 4.757196344458311, - "grad_norm": 0.001528636901639402, - "learning_rate": 0.00019998884057719082, - "loss": 46.0, - "step": 29540 - }, - { - "epoch": 4.757357381537099, - "grad_norm": 0.005556702613830566, - "learning_rate": 0.00019998883982138966, - "loss": 46.0, - "step": 29541 - }, - { - "epoch": 4.757518418615886, - "grad_norm": 0.004374017007648945, - "learning_rate": 0.0001999888390655629, - "loss": 46.0, - "step": 29542 - }, - { - "epoch": 4.7576794556946735, - "grad_norm": 0.003192405914887786, - "learning_rate": 0.00019998883830971056, - "loss": 46.0, - "step": 29543 - }, - { - "epoch": 4.757840492773461, - "grad_norm": 0.0022569317370653152, - "learning_rate": 0.0001999888375538326, - "loss": 46.0, - "step": 29544 - }, - { - "epoch": 4.7580015298522484, - "grad_norm": 0.0034018929582089186, - "learning_rate": 0.00019998883679792906, - "loss": 46.0, - "step": 29545 - }, - { - "epoch": 4.758162566931036, - "grad_norm": 0.002782811876386404, - "learning_rate": 0.00019998883604199996, - "loss": 46.0, - "step": 29546 - }, - { - "epoch": 4.758323604009823, - "grad_norm": 0.0008092928910627961, - "learning_rate": 0.00019998883528604523, - "loss": 46.0, - "step": 29547 - }, - { - "epoch": 4.758484641088611, - "grad_norm": 0.0011253795819357038, - "learning_rate": 0.00019998883453006494, - "loss": 46.0, - "step": 29548 - }, - { - "epoch": 4.758645678167398, - "grad_norm": 0.0011445381678640842, - "learning_rate": 0.000199988833774059, - "loss": 46.0, - "step": 29549 - }, - { - "epoch": 4.758806715246186, - "grad_norm": 0.0024955698754638433, - "learning_rate": 0.00019998883301802753, - "loss": 46.0, - "step": 29550 - }, - { - "epoch": 4.758967752324973, - "grad_norm": 0.012237120419740677, - "learning_rate": 0.00019998883226197045, - "loss": 46.0, - "step": 29551 - }, - { - "epoch": 4.759128789403761, - "grad_norm": 0.02256646752357483, - "learning_rate": 0.00019998883150588775, - "loss": 46.0, - "step": 29552 - }, - { - "epoch": 4.759289826482547, - "grad_norm": 0.013496628031134605, - "learning_rate": 0.0001999888307497795, - "loss": 46.0, - "step": 29553 - }, - { - "epoch": 4.759450863561335, - "grad_norm": 0.012984919361770153, - "learning_rate": 0.00019998882999364565, - "loss": 46.0, - "step": 29554 - }, - { - "epoch": 4.759611900640122, - "grad_norm": 0.0016839482123032212, - "learning_rate": 0.0001999888292374862, - "loss": 46.0, - "step": 29555 - }, - { - "epoch": 4.7597729377189095, - "grad_norm": 0.0036112088710069656, - "learning_rate": 0.00019998882848130115, - "loss": 46.0, - "step": 29556 - }, - { - "epoch": 4.759933974797697, - "grad_norm": 0.0018007522448897362, - "learning_rate": 0.00019998882772509052, - "loss": 46.0, - "step": 29557 - }, - { - "epoch": 4.760095011876484, - "grad_norm": 0.00039856089279055595, - "learning_rate": 0.00019998882696885427, - "loss": 46.0, - "step": 29558 - }, - { - "epoch": 4.760256048955272, - "grad_norm": 0.0016185863642022014, - "learning_rate": 0.00019998882621259244, - "loss": 46.0, - "step": 29559 - }, - { - "epoch": 4.760417086034059, - "grad_norm": 0.0032558897510170937, - "learning_rate": 0.00019998882545630507, - "loss": 46.0, - "step": 29560 - }, - { - "epoch": 4.760578123112847, - "grad_norm": 0.0011197688290849328, - "learning_rate": 0.00019998882469999204, - "loss": 46.0, - "step": 29561 - }, - { - "epoch": 4.760739160191634, - "grad_norm": 0.00581378536298871, - "learning_rate": 0.00019998882394365347, - "loss": 46.0, - "step": 29562 - }, - { - "epoch": 4.760900197270422, - "grad_norm": 0.0063275909051299095, - "learning_rate": 0.00019998882318728926, - "loss": 46.0, - "step": 29563 - }, - { - "epoch": 4.761061234349209, - "grad_norm": 0.0014789545675739646, - "learning_rate": 0.0001999888224308995, - "loss": 46.0, - "step": 29564 - }, - { - "epoch": 4.761222271427997, - "grad_norm": 0.003132209647446871, - "learning_rate": 0.0001999888216744841, - "loss": 46.0, - "step": 29565 - }, - { - "epoch": 4.761383308506784, - "grad_norm": 0.002272936748340726, - "learning_rate": 0.00019998882091804316, - "loss": 46.0, - "step": 29566 - }, - { - "epoch": 4.7615443455855715, - "grad_norm": 0.004940418992191553, - "learning_rate": 0.00019998882016157658, - "loss": 46.0, - "step": 29567 - }, - { - "epoch": 4.761705382664358, - "grad_norm": 0.003442492103204131, - "learning_rate": 0.00019998881940508446, - "loss": 46.0, - "step": 29568 - }, - { - "epoch": 4.7618664197431455, - "grad_norm": 0.013806091621518135, - "learning_rate": 0.0001999888186485667, - "loss": 46.0, - "step": 29569 - }, - { - "epoch": 4.762027456821933, - "grad_norm": 0.0017356426687911153, - "learning_rate": 0.00019998881789202338, - "loss": 46.0, - "step": 29570 - }, - { - "epoch": 4.76218849390072, - "grad_norm": 0.001779056154191494, - "learning_rate": 0.00019998881713545444, - "loss": 46.0, - "step": 29571 - }, - { - "epoch": 4.762349530979508, - "grad_norm": 0.0057486738078296185, - "learning_rate": 0.00019998881637885992, - "loss": 46.0, - "step": 29572 - }, - { - "epoch": 4.762510568058295, - "grad_norm": 0.0010489927371963859, - "learning_rate": 0.00019998881562223981, - "loss": 46.0, - "step": 29573 - }, - { - "epoch": 4.762671605137083, - "grad_norm": 0.0031836184207350016, - "learning_rate": 0.00019998881486559412, - "loss": 46.0, - "step": 29574 - }, - { - "epoch": 4.76283264221587, - "grad_norm": 0.006618183571845293, - "learning_rate": 0.00019998881410892284, - "loss": 46.0, - "step": 29575 - }, - { - "epoch": 4.762993679294658, - "grad_norm": 0.0011528455652296543, - "learning_rate": 0.00019998881335222594, - "loss": 46.0, - "step": 29576 - }, - { - "epoch": 4.763154716373445, - "grad_norm": 0.02076326310634613, - "learning_rate": 0.00019998881259550348, - "loss": 46.0, - "step": 29577 - }, - { - "epoch": 4.763315753452233, - "grad_norm": 0.011185937561094761, - "learning_rate": 0.00019998881183875538, - "loss": 46.0, - "step": 29578 - }, - { - "epoch": 4.76347679053102, - "grad_norm": 0.006465322803705931, - "learning_rate": 0.00019998881108198173, - "loss": 46.0, - "step": 29579 - }, - { - "epoch": 4.7636378276098075, - "grad_norm": 0.00529022142291069, - "learning_rate": 0.00019998881032518248, - "loss": 46.0, - "step": 29580 - }, - { - "epoch": 4.763798864688595, - "grad_norm": 0.0014770481502637267, - "learning_rate": 0.00019998880956835765, - "loss": 46.0, - "step": 29581 - }, - { - "epoch": 4.763959901767382, - "grad_norm": 0.008048774674534798, - "learning_rate": 0.0001999888088115072, - "loss": 46.0, - "step": 29582 - }, - { - "epoch": 4.764120938846169, - "grad_norm": 0.0027655549347400665, - "learning_rate": 0.0001999888080546312, - "loss": 46.0, - "step": 29583 - }, - { - "epoch": 4.764281975924956, - "grad_norm": 0.0013003175845369697, - "learning_rate": 0.00019998880729772957, - "loss": 46.0, - "step": 29584 - }, - { - "epoch": 4.764443013003744, - "grad_norm": 0.002102261409163475, - "learning_rate": 0.00019998880654080234, - "loss": 46.0, - "step": 29585 - }, - { - "epoch": 4.764604050082531, - "grad_norm": 0.0018356706714257598, - "learning_rate": 0.00019998880578384954, - "loss": 46.0, - "step": 29586 - }, - { - "epoch": 4.764765087161319, - "grad_norm": 0.001584004727192223, - "learning_rate": 0.00019998880502687113, - "loss": 46.0, - "step": 29587 - }, - { - "epoch": 4.764926124240106, - "grad_norm": 0.005989392288029194, - "learning_rate": 0.00019998880426986716, - "loss": 46.0, - "step": 29588 - }, - { - "epoch": 4.765087161318894, - "grad_norm": 0.0026728089433163404, - "learning_rate": 0.00019998880351283758, - "loss": 46.0, - "step": 29589 - }, - { - "epoch": 4.765248198397681, - "grad_norm": 0.0032795623410493135, - "learning_rate": 0.0001999888027557824, - "loss": 46.0, - "step": 29590 - }, - { - "epoch": 4.7654092354764686, - "grad_norm": 0.007996369153261185, - "learning_rate": 0.00019998880199870164, - "loss": 46.0, - "step": 29591 - }, - { - "epoch": 4.765570272555256, - "grad_norm": 0.011026730760931969, - "learning_rate": 0.0001999888012415953, - "loss": 46.0, - "step": 29592 - }, - { - "epoch": 4.7657313096340435, - "grad_norm": 0.003427068470045924, - "learning_rate": 0.00019998880048446337, - "loss": 46.0, - "step": 29593 - }, - { - "epoch": 4.765892346712831, - "grad_norm": 0.009349680505692959, - "learning_rate": 0.0001999887997273058, - "loss": 46.0, - "step": 29594 - }, - { - "epoch": 4.766053383791618, - "grad_norm": 0.0038277243729680777, - "learning_rate": 0.00019998879897012266, - "loss": 46.0, - "step": 29595 - }, - { - "epoch": 4.766214420870405, - "grad_norm": 0.005733050871640444, - "learning_rate": 0.00019998879821291394, - "loss": 46.0, - "step": 29596 - }, - { - "epoch": 4.766375457949193, - "grad_norm": 0.0020615593530237675, - "learning_rate": 0.00019998879745567965, - "loss": 46.0, - "step": 29597 - }, - { - "epoch": 4.76653649502798, - "grad_norm": 0.00975791085511446, - "learning_rate": 0.00019998879669841973, - "loss": 46.0, - "step": 29598 - }, - { - "epoch": 4.766697532106767, - "grad_norm": 0.0031067838426679373, - "learning_rate": 0.00019998879594113422, - "loss": 46.0, - "step": 29599 - }, - { - "epoch": 4.766858569185555, - "grad_norm": 0.005844696890562773, - "learning_rate": 0.00019998879518382312, - "loss": 46.0, - "step": 29600 - }, - { - "epoch": 4.767019606264342, - "grad_norm": 0.005044762510806322, - "learning_rate": 0.00019998879442648644, - "loss": 46.0, - "step": 29601 - }, - { - "epoch": 4.76718064334313, - "grad_norm": 0.0058305230922997, - "learning_rate": 0.00019998879366912417, - "loss": 46.0, - "step": 29602 - }, - { - "epoch": 4.767341680421917, - "grad_norm": 0.01473147887736559, - "learning_rate": 0.00019998879291173628, - "loss": 46.0, - "step": 29603 - }, - { - "epoch": 4.7675027175007045, - "grad_norm": 0.0036365208216011524, - "learning_rate": 0.00019998879215432283, - "loss": 46.0, - "step": 29604 - }, - { - "epoch": 4.767663754579492, - "grad_norm": 0.004284884314984083, - "learning_rate": 0.00019998879139688377, - "loss": 46.0, - "step": 29605 - }, - { - "epoch": 4.767824791658279, - "grad_norm": 0.0049233026802539825, - "learning_rate": 0.00019998879063941915, - "loss": 46.0, - "step": 29606 - }, - { - "epoch": 4.767985828737067, - "grad_norm": 0.007909219712018967, - "learning_rate": 0.00019998878988192892, - "loss": 46.0, - "step": 29607 - }, - { - "epoch": 4.768146865815854, - "grad_norm": 0.010383238084614277, - "learning_rate": 0.00019998878912441307, - "loss": 46.0, - "step": 29608 - }, - { - "epoch": 4.768307902894642, - "grad_norm": 0.0022008235100656748, - "learning_rate": 0.00019998878836687166, - "loss": 46.0, - "step": 29609 - }, - { - "epoch": 4.768468939973429, - "grad_norm": 0.005689368117600679, - "learning_rate": 0.00019998878760930464, - "loss": 46.0, - "step": 29610 - }, - { - "epoch": 4.768629977052216, - "grad_norm": 0.0069381617940962315, - "learning_rate": 0.00019998878685171203, - "loss": 46.0, - "step": 29611 - }, - { - "epoch": 4.768791014131004, - "grad_norm": 0.0028861570172011852, - "learning_rate": 0.00019998878609409383, - "loss": 46.0, - "step": 29612 - }, - { - "epoch": 4.768952051209791, - "grad_norm": 0.011469135992228985, - "learning_rate": 0.00019998878533645007, - "loss": 46.0, - "step": 29613 - }, - { - "epoch": 4.769113088288578, - "grad_norm": 0.0030142394825816154, - "learning_rate": 0.00019998878457878067, - "loss": 46.0, - "step": 29614 - }, - { - "epoch": 4.769274125367366, - "grad_norm": 0.01402412448078394, - "learning_rate": 0.0001999887838210857, - "loss": 46.0, - "step": 29615 - }, - { - "epoch": 4.769435162446153, - "grad_norm": 0.004262520000338554, - "learning_rate": 0.00019998878306336514, - "loss": 46.0, - "step": 29616 - }, - { - "epoch": 4.7695961995249405, - "grad_norm": 0.002674600575119257, - "learning_rate": 0.00019998878230561898, - "loss": 46.0, - "step": 29617 - }, - { - "epoch": 4.769757236603728, - "grad_norm": 0.015288958325982094, - "learning_rate": 0.00019998878154784723, - "loss": 46.0, - "step": 29618 - }, - { - "epoch": 4.769918273682515, - "grad_norm": 0.0068214405328035355, - "learning_rate": 0.0001999887807900499, - "loss": 46.0, - "step": 29619 - }, - { - "epoch": 4.770079310761303, - "grad_norm": 0.006429290398955345, - "learning_rate": 0.00019998878003222697, - "loss": 46.0, - "step": 29620 - }, - { - "epoch": 4.77024034784009, - "grad_norm": 0.0014157138066366315, - "learning_rate": 0.00019998877927437844, - "loss": 46.0, - "step": 29621 - }, - { - "epoch": 4.770401384918878, - "grad_norm": 0.008382156491279602, - "learning_rate": 0.00019998877851650434, - "loss": 46.0, - "step": 29622 - }, - { - "epoch": 4.770562421997665, - "grad_norm": 0.0020573085639625788, - "learning_rate": 0.0001999887777586046, - "loss": 46.0, - "step": 29623 - }, - { - "epoch": 4.770723459076453, - "grad_norm": 0.0018315042834728956, - "learning_rate": 0.00019998877700067933, - "loss": 46.0, - "step": 29624 - }, - { - "epoch": 4.77088449615524, - "grad_norm": 0.0009660259820520878, - "learning_rate": 0.00019998877624272842, - "loss": 46.0, - "step": 29625 - }, - { - "epoch": 4.771045533234027, - "grad_norm": 0.0021420144475996494, - "learning_rate": 0.00019998877548475194, - "loss": 46.0, - "step": 29626 - }, - { - "epoch": 4.771206570312814, - "grad_norm": 0.0023009763099253178, - "learning_rate": 0.00019998877472674986, - "loss": 46.0, - "step": 29627 - }, - { - "epoch": 4.771367607391602, - "grad_norm": 0.0018324163975194097, - "learning_rate": 0.0001999887739687222, - "loss": 46.0, - "step": 29628 - }, - { - "epoch": 4.771528644470389, - "grad_norm": 0.020726684480905533, - "learning_rate": 0.00019998877321066892, - "loss": 46.0, - "step": 29629 - }, - { - "epoch": 4.7716896815491765, - "grad_norm": 0.005250627640634775, - "learning_rate": 0.0001999887724525901, - "loss": 46.0, - "step": 29630 - }, - { - "epoch": 4.771850718627964, - "grad_norm": 0.009093799628317356, - "learning_rate": 0.0001999887716944856, - "loss": 46.0, - "step": 29631 - }, - { - "epoch": 4.772011755706751, - "grad_norm": 0.0037383155431598425, - "learning_rate": 0.00019998877093635562, - "loss": 46.0, - "step": 29632 - }, - { - "epoch": 4.772172792785539, - "grad_norm": 0.008891158737242222, - "learning_rate": 0.00019998877017819998, - "loss": 46.0, - "step": 29633 - }, - { - "epoch": 4.772333829864326, - "grad_norm": 0.0023776928428560495, - "learning_rate": 0.00019998876942001873, - "loss": 46.0, - "step": 29634 - }, - { - "epoch": 4.772494866943114, - "grad_norm": 0.0023169522173702717, - "learning_rate": 0.00019998876866181195, - "loss": 46.0, - "step": 29635 - }, - { - "epoch": 4.772655904021901, - "grad_norm": 0.009000053629279137, - "learning_rate": 0.00019998876790357955, - "loss": 46.0, - "step": 29636 - }, - { - "epoch": 4.772816941100689, - "grad_norm": 0.0033633296843618155, - "learning_rate": 0.00019998876714532156, - "loss": 46.0, - "step": 29637 - }, - { - "epoch": 4.772977978179476, - "grad_norm": 0.014860347844660282, - "learning_rate": 0.00019998876638703793, - "loss": 46.0, - "step": 29638 - }, - { - "epoch": 4.773139015258264, - "grad_norm": 0.0018532221438363194, - "learning_rate": 0.00019998876562872877, - "loss": 46.0, - "step": 29639 - }, - { - "epoch": 4.773300052337051, - "grad_norm": 0.01112237386405468, - "learning_rate": 0.000199988764870394, - "loss": 46.0, - "step": 29640 - }, - { - "epoch": 4.773461089415838, - "grad_norm": 0.007151414640247822, - "learning_rate": 0.0001999887641120336, - "loss": 46.0, - "step": 29641 - }, - { - "epoch": 4.773622126494625, - "grad_norm": 0.0030269986018538475, - "learning_rate": 0.0001999887633536477, - "loss": 46.0, - "step": 29642 - }, - { - "epoch": 4.7737831635734125, - "grad_norm": 0.010572108440101147, - "learning_rate": 0.00019998876259523613, - "loss": 46.0, - "step": 29643 - }, - { - "epoch": 4.7739442006522, - "grad_norm": 0.015476390719413757, - "learning_rate": 0.000199988761836799, - "loss": 46.0, - "step": 29644 - }, - { - "epoch": 4.774105237730987, - "grad_norm": 0.008253595791757107, - "learning_rate": 0.00019998876107833627, - "loss": 46.0, - "step": 29645 - }, - { - "epoch": 4.774266274809775, - "grad_norm": 0.0030193838756531477, - "learning_rate": 0.00019998876031984794, - "loss": 46.0, - "step": 29646 - }, - { - "epoch": 4.774427311888562, - "grad_norm": 0.0036472859792411327, - "learning_rate": 0.00019998875956133403, - "loss": 46.0, - "step": 29647 - }, - { - "epoch": 4.77458834896735, - "grad_norm": 0.00540729146450758, - "learning_rate": 0.00019998875880279453, - "loss": 46.0, - "step": 29648 - }, - { - "epoch": 4.774749386046137, - "grad_norm": 0.001582067459821701, - "learning_rate": 0.0001999887580442294, - "loss": 46.0, - "step": 29649 - }, - { - "epoch": 4.774910423124925, - "grad_norm": 0.005634675268083811, - "learning_rate": 0.0001999887572856387, - "loss": 46.0, - "step": 29650 - }, - { - "epoch": 4.775071460203712, - "grad_norm": 0.019446276128292084, - "learning_rate": 0.00019998875652702243, - "loss": 46.0, - "step": 29651 - }, - { - "epoch": 4.7752324972824995, - "grad_norm": 0.0036128598731011152, - "learning_rate": 0.00019998875576838056, - "loss": 46.0, - "step": 29652 - }, - { - "epoch": 4.775393534361287, - "grad_norm": 0.004637226928025484, - "learning_rate": 0.00019998875500971307, - "loss": 46.0, - "step": 29653 - }, - { - "epoch": 4.7755545714400744, - "grad_norm": 0.005163829308003187, - "learning_rate": 0.00019998875425102002, - "loss": 46.0, - "step": 29654 - }, - { - "epoch": 4.775715608518862, - "grad_norm": 0.0034166902769356966, - "learning_rate": 0.00019998875349230136, - "loss": 46.0, - "step": 29655 - }, - { - "epoch": 4.7758766455976485, - "grad_norm": 0.0013359319418668747, - "learning_rate": 0.00019998875273355714, - "loss": 46.0, - "step": 29656 - }, - { - "epoch": 4.776037682676436, - "grad_norm": 0.010175767354667187, - "learning_rate": 0.0001999887519747873, - "loss": 46.0, - "step": 29657 - }, - { - "epoch": 4.776198719755223, - "grad_norm": 0.004424629267305136, - "learning_rate": 0.00019998875121599187, - "loss": 46.0, - "step": 29658 - }, - { - "epoch": 4.776359756834011, - "grad_norm": 0.0019444164354354143, - "learning_rate": 0.00019998875045717084, - "loss": 46.0, - "step": 29659 - }, - { - "epoch": 4.776520793912798, - "grad_norm": 0.003968354314565659, - "learning_rate": 0.00019998874969832424, - "loss": 46.0, - "step": 29660 - }, - { - "epoch": 4.776681830991586, - "grad_norm": 0.0024845667649060488, - "learning_rate": 0.00019998874893945202, - "loss": 46.0, - "step": 29661 - }, - { - "epoch": 4.776842868070373, - "grad_norm": 0.0068671079352498055, - "learning_rate": 0.00019998874818055425, - "loss": 46.0, - "step": 29662 - }, - { - "epoch": 4.777003905149161, - "grad_norm": 0.002554807346314192, - "learning_rate": 0.00019998874742163086, - "loss": 46.0, - "step": 29663 - }, - { - "epoch": 4.777164942227948, - "grad_norm": 0.0033804159611463547, - "learning_rate": 0.0001999887466626819, - "loss": 46.0, - "step": 29664 - }, - { - "epoch": 4.7773259793067355, - "grad_norm": 0.006721459329128265, - "learning_rate": 0.0001999887459037073, - "loss": 46.0, - "step": 29665 - }, - { - "epoch": 4.777487016385523, - "grad_norm": 0.0022280775010585785, - "learning_rate": 0.00019998874514470712, - "loss": 46.0, - "step": 29666 - }, - { - "epoch": 4.77764805346431, - "grad_norm": 0.0026382971554994583, - "learning_rate": 0.0001999887443856814, - "loss": 46.0, - "step": 29667 - }, - { - "epoch": 4.777809090543098, - "grad_norm": 0.0062921964563429356, - "learning_rate": 0.00019998874362663004, - "loss": 46.0, - "step": 29668 - }, - { - "epoch": 4.7779701276218844, - "grad_norm": 0.005004643928259611, - "learning_rate": 0.0001999887428675531, - "loss": 46.0, - "step": 29669 - }, - { - "epoch": 4.778131164700673, - "grad_norm": 0.0064356098882853985, - "learning_rate": 0.00019998874210845057, - "loss": 46.0, - "step": 29670 - }, - { - "epoch": 4.778292201779459, - "grad_norm": 0.007435160223394632, - "learning_rate": 0.00019998874134932244, - "loss": 46.0, - "step": 29671 - }, - { - "epoch": 4.778453238858247, - "grad_norm": 0.004613068420439959, - "learning_rate": 0.0001999887405901687, - "loss": 46.0, - "step": 29672 - }, - { - "epoch": 4.778614275937034, - "grad_norm": 0.010184877552092075, - "learning_rate": 0.00019998873983098942, - "loss": 46.0, - "step": 29673 - }, - { - "epoch": 4.778775313015822, - "grad_norm": 0.0011658653384074569, - "learning_rate": 0.0001999887390717845, - "loss": 46.0, - "step": 29674 - }, - { - "epoch": 4.778936350094609, - "grad_norm": 0.006491041276603937, - "learning_rate": 0.00019998873831255404, - "loss": 46.0, - "step": 29675 - }, - { - "epoch": 4.779097387173397, - "grad_norm": 0.008794019930064678, - "learning_rate": 0.00019998873755329793, - "loss": 46.0, - "step": 29676 - }, - { - "epoch": 4.779258424252184, - "grad_norm": 0.004227577243000269, - "learning_rate": 0.00019998873679401627, - "loss": 46.0, - "step": 29677 - }, - { - "epoch": 4.7794194613309715, - "grad_norm": 0.005861061159521341, - "learning_rate": 0.000199988736034709, - "loss": 46.0, - "step": 29678 - }, - { - "epoch": 4.779580498409759, - "grad_norm": 0.01167288888245821, - "learning_rate": 0.00019998873527537613, - "loss": 46.0, - "step": 29679 - }, - { - "epoch": 4.779741535488546, - "grad_norm": 0.0011174395913258195, - "learning_rate": 0.00019998873451601768, - "loss": 46.0, - "step": 29680 - }, - { - "epoch": 4.779902572567334, - "grad_norm": 0.0061180549673736095, - "learning_rate": 0.00019998873375663364, - "loss": 46.0, - "step": 29681 - }, - { - "epoch": 4.780063609646121, - "grad_norm": 0.023945102468132973, - "learning_rate": 0.00019998873299722402, - "loss": 46.0, - "step": 29682 - }, - { - "epoch": 4.780224646724909, - "grad_norm": 0.01428929902613163, - "learning_rate": 0.00019998873223778878, - "loss": 46.0, - "step": 29683 - }, - { - "epoch": 4.780385683803695, - "grad_norm": 0.00374038890004158, - "learning_rate": 0.00019998873147832798, - "loss": 46.0, - "step": 29684 - }, - { - "epoch": 4.780546720882484, - "grad_norm": 0.005625316873192787, - "learning_rate": 0.00019998873071884154, - "loss": 46.0, - "step": 29685 - }, - { - "epoch": 4.78070775796127, - "grad_norm": 0.010602965019643307, - "learning_rate": 0.00019998872995932954, - "loss": 46.0, - "step": 29686 - }, - { - "epoch": 4.780868795040058, - "grad_norm": 0.004356399178504944, - "learning_rate": 0.00019998872919979195, - "loss": 46.0, - "step": 29687 - }, - { - "epoch": 4.781029832118845, - "grad_norm": 0.005907748360186815, - "learning_rate": 0.00019998872844022875, - "loss": 46.0, - "step": 29688 - }, - { - "epoch": 4.781190869197633, - "grad_norm": 0.0007467250688932836, - "learning_rate": 0.00019998872768063998, - "loss": 46.0, - "step": 29689 - }, - { - "epoch": 4.78135190627642, - "grad_norm": 0.000649077002890408, - "learning_rate": 0.0001999887269210256, - "loss": 46.0, - "step": 29690 - }, - { - "epoch": 4.7815129433552075, - "grad_norm": 0.008154094219207764, - "learning_rate": 0.00019998872616138564, - "loss": 46.0, - "step": 29691 - }, - { - "epoch": 4.781673980433995, - "grad_norm": 0.011132962070405483, - "learning_rate": 0.0001999887254017201, - "loss": 46.0, - "step": 29692 - }, - { - "epoch": 4.781835017512782, - "grad_norm": 0.00263105146586895, - "learning_rate": 0.00019998872464202893, - "loss": 46.0, - "step": 29693 - }, - { - "epoch": 4.78199605459157, - "grad_norm": 0.01146366260945797, - "learning_rate": 0.0001999887238823122, - "loss": 46.0, - "step": 29694 - }, - { - "epoch": 4.782157091670357, - "grad_norm": 0.003947129473090172, - "learning_rate": 0.0001999887231225699, - "loss": 46.0, - "step": 29695 - }, - { - "epoch": 4.782318128749145, - "grad_norm": 0.0026669823564589024, - "learning_rate": 0.00019998872236280196, - "loss": 46.0, - "step": 29696 - }, - { - "epoch": 4.782479165827932, - "grad_norm": 0.0018738452345132828, - "learning_rate": 0.00019998872160300842, - "loss": 46.0, - "step": 29697 - }, - { - "epoch": 4.78264020290672, - "grad_norm": 0.004479184281080961, - "learning_rate": 0.00019998872084318935, - "loss": 46.0, - "step": 29698 - }, - { - "epoch": 4.782801239985506, - "grad_norm": 0.0032427734695374966, - "learning_rate": 0.0001999887200833446, - "loss": 46.0, - "step": 29699 - }, - { - "epoch": 4.782962277064294, - "grad_norm": 0.001625141710974276, - "learning_rate": 0.00019998871932347433, - "loss": 46.0, - "step": 29700 - }, - { - "epoch": 4.783123314143081, - "grad_norm": 0.006443955469876528, - "learning_rate": 0.00019998871856357844, - "loss": 46.0, - "step": 29701 - }, - { - "epoch": 4.783284351221869, - "grad_norm": 0.002301170490682125, - "learning_rate": 0.00019998871780365696, - "loss": 46.0, - "step": 29702 - }, - { - "epoch": 4.783445388300656, - "grad_norm": 0.004346245899796486, - "learning_rate": 0.0001999887170437099, - "loss": 46.0, - "step": 29703 - }, - { - "epoch": 4.7836064253794435, - "grad_norm": 0.0034879206214100122, - "learning_rate": 0.00019998871628373725, - "loss": 46.0, - "step": 29704 - }, - { - "epoch": 4.783767462458231, - "grad_norm": 0.005502117332071066, - "learning_rate": 0.00019998871552373898, - "loss": 46.0, - "step": 29705 - }, - { - "epoch": 4.783928499537018, - "grad_norm": 0.0031630699522793293, - "learning_rate": 0.00019998871476371516, - "loss": 46.0, - "step": 29706 - }, - { - "epoch": 4.784089536615806, - "grad_norm": 0.003849920816719532, - "learning_rate": 0.00019998871400366572, - "loss": 46.0, - "step": 29707 - }, - { - "epoch": 4.784250573694593, - "grad_norm": 0.01764640025794506, - "learning_rate": 0.0001999887132435907, - "loss": 46.0, - "step": 29708 - }, - { - "epoch": 4.784411610773381, - "grad_norm": 0.005238171201199293, - "learning_rate": 0.00019998871248349008, - "loss": 46.0, - "step": 29709 - }, - { - "epoch": 4.784572647852168, - "grad_norm": 0.004160532262176275, - "learning_rate": 0.00019998871172336388, - "loss": 46.0, - "step": 29710 - }, - { - "epoch": 4.784733684930956, - "grad_norm": 0.002927604829892516, - "learning_rate": 0.00019998871096321206, - "loss": 46.0, - "step": 29711 - }, - { - "epoch": 4.784894722009743, - "grad_norm": 0.005752460565418005, - "learning_rate": 0.00019998871020303466, - "loss": 46.0, - "step": 29712 - }, - { - "epoch": 4.7850557590885305, - "grad_norm": 0.0006801657727919519, - "learning_rate": 0.00019998870944283167, - "loss": 46.0, - "step": 29713 - }, - { - "epoch": 4.785216796167317, - "grad_norm": 0.0007681837887503207, - "learning_rate": 0.00019998870868260312, - "loss": 46.0, - "step": 29714 - }, - { - "epoch": 4.7853778332461046, - "grad_norm": 0.0038328443188220263, - "learning_rate": 0.00019998870792234893, - "loss": 46.0, - "step": 29715 - }, - { - "epoch": 4.785538870324892, - "grad_norm": 0.004479486029595137, - "learning_rate": 0.00019998870716206917, - "loss": 46.0, - "step": 29716 - }, - { - "epoch": 4.7856999074036795, - "grad_norm": 0.003548318985849619, - "learning_rate": 0.0001999887064017638, - "loss": 46.0, - "step": 29717 - }, - { - "epoch": 4.785860944482467, - "grad_norm": 0.0008701508049853146, - "learning_rate": 0.00019998870564143286, - "loss": 46.0, - "step": 29718 - }, - { - "epoch": 4.786021981561254, - "grad_norm": 0.0025080766063183546, - "learning_rate": 0.00019998870488107632, - "loss": 46.0, - "step": 29719 - }, - { - "epoch": 4.786183018640042, - "grad_norm": 0.008069365285336971, - "learning_rate": 0.0001999887041206942, - "loss": 46.0, - "step": 29720 - }, - { - "epoch": 4.786344055718829, - "grad_norm": 0.0009555853321217, - "learning_rate": 0.00019998870336028647, - "loss": 46.0, - "step": 29721 - }, - { - "epoch": 4.786505092797617, - "grad_norm": 0.0037297927774488926, - "learning_rate": 0.00019998870259985317, - "loss": 46.0, - "step": 29722 - }, - { - "epoch": 4.786666129876404, - "grad_norm": 0.007247936446219683, - "learning_rate": 0.00019998870183939426, - "loss": 46.0, - "step": 29723 - }, - { - "epoch": 4.786827166955192, - "grad_norm": 0.008992508985102177, - "learning_rate": 0.00019998870107890975, - "loss": 46.0, - "step": 29724 - }, - { - "epoch": 4.786988204033979, - "grad_norm": 0.001904545584693551, - "learning_rate": 0.00019998870031839964, - "loss": 46.0, - "step": 29725 - }, - { - "epoch": 4.7871492411127665, - "grad_norm": 0.022919410839676857, - "learning_rate": 0.00019998869955786396, - "loss": 46.0, - "step": 29726 - }, - { - "epoch": 4.787310278191554, - "grad_norm": 0.0012247557751834393, - "learning_rate": 0.00019998869879730272, - "loss": 46.0, - "step": 29727 - }, - { - "epoch": 4.787471315270341, - "grad_norm": 0.0011363471858203411, - "learning_rate": 0.00019998869803671581, - "loss": 46.0, - "step": 29728 - }, - { - "epoch": 4.787632352349128, - "grad_norm": 0.00351770780980587, - "learning_rate": 0.00019998869727610338, - "loss": 46.0, - "step": 29729 - }, - { - "epoch": 4.787793389427915, - "grad_norm": 0.02019556611776352, - "learning_rate": 0.00019998869651546532, - "loss": 46.0, - "step": 29730 - }, - { - "epoch": 4.787954426506703, - "grad_norm": 0.0014853174798190594, - "learning_rate": 0.00019998869575480168, - "loss": 46.0, - "step": 29731 - }, - { - "epoch": 4.78811546358549, - "grad_norm": 0.0020423715468496084, - "learning_rate": 0.00019998869499411245, - "loss": 46.0, - "step": 29732 - }, - { - "epoch": 4.788276500664278, - "grad_norm": 0.011394595727324486, - "learning_rate": 0.00019998869423339764, - "loss": 46.0, - "step": 29733 - }, - { - "epoch": 4.788437537743065, - "grad_norm": 0.004124745726585388, - "learning_rate": 0.0001999886934726572, - "loss": 46.0, - "step": 29734 - }, - { - "epoch": 4.788598574821853, - "grad_norm": 0.0058473628014326096, - "learning_rate": 0.0001999886927118912, - "loss": 46.0, - "step": 29735 - }, - { - "epoch": 4.78875961190064, - "grad_norm": 0.005639522336423397, - "learning_rate": 0.0001999886919510996, - "loss": 46.0, - "step": 29736 - }, - { - "epoch": 4.788920648979428, - "grad_norm": 0.00618215836584568, - "learning_rate": 0.0001999886911902824, - "loss": 46.0, - "step": 29737 - }, - { - "epoch": 4.789081686058215, - "grad_norm": 0.0023963251151144505, - "learning_rate": 0.0001999886904294396, - "loss": 46.0, - "step": 29738 - }, - { - "epoch": 4.7892427231370025, - "grad_norm": 0.008571038953959942, - "learning_rate": 0.00019998868966857123, - "loss": 46.0, - "step": 29739 - }, - { - "epoch": 4.78940376021579, - "grad_norm": 0.004283445421606302, - "learning_rate": 0.00019998868890767725, - "loss": 46.0, - "step": 29740 - }, - { - "epoch": 4.789564797294577, - "grad_norm": 0.003792413044720888, - "learning_rate": 0.00019998868814675769, - "loss": 46.0, - "step": 29741 - }, - { - "epoch": 4.789725834373365, - "grad_norm": 0.001151659176684916, - "learning_rate": 0.00019998868738581256, - "loss": 46.0, - "step": 29742 - }, - { - "epoch": 4.789886871452152, - "grad_norm": 0.006588222924619913, - "learning_rate": 0.0001999886866248418, - "loss": 46.0, - "step": 29743 - }, - { - "epoch": 4.790047908530939, - "grad_norm": 0.00819449219852686, - "learning_rate": 0.00019998868586384546, - "loss": 46.0, - "step": 29744 - }, - { - "epoch": 4.790208945609726, - "grad_norm": 0.00936033483594656, - "learning_rate": 0.00019998868510282352, - "loss": 46.0, - "step": 29745 - }, - { - "epoch": 4.790369982688514, - "grad_norm": 0.006183766294270754, - "learning_rate": 0.000199988684341776, - "loss": 46.0, - "step": 29746 - }, - { - "epoch": 4.790531019767301, - "grad_norm": 0.009324228391051292, - "learning_rate": 0.00019998868358070288, - "loss": 46.0, - "step": 29747 - }, - { - "epoch": 4.790692056846089, - "grad_norm": 0.008201288990676403, - "learning_rate": 0.0001999886828196042, - "loss": 46.0, - "step": 29748 - }, - { - "epoch": 4.790853093924876, - "grad_norm": 0.0046630133874714375, - "learning_rate": 0.00019998868205847988, - "loss": 46.0, - "step": 29749 - }, - { - "epoch": 4.791014131003664, - "grad_norm": 0.007685188669711351, - "learning_rate": 0.00019998868129733, - "loss": 46.0, - "step": 29750 - }, - { - "epoch": 4.791175168082451, - "grad_norm": 0.013726707547903061, - "learning_rate": 0.0001999886805361545, - "loss": 46.0, - "step": 29751 - }, - { - "epoch": 4.7913362051612385, - "grad_norm": 0.0086591225117445, - "learning_rate": 0.00019998867977495343, - "loss": 46.0, - "step": 29752 - }, - { - "epoch": 4.791497242240026, - "grad_norm": 0.01896575838327408, - "learning_rate": 0.00019998867901372677, - "loss": 46.0, - "step": 29753 - }, - { - "epoch": 4.791658279318813, - "grad_norm": 0.005633487366139889, - "learning_rate": 0.0001999886782524745, - "loss": 46.0, - "step": 29754 - }, - { - "epoch": 4.791819316397601, - "grad_norm": 0.005812176037579775, - "learning_rate": 0.00019998867749119667, - "loss": 46.0, - "step": 29755 - }, - { - "epoch": 4.791980353476388, - "grad_norm": 0.00278187426738441, - "learning_rate": 0.00019998867672989322, - "loss": 46.0, - "step": 29756 - }, - { - "epoch": 4.792141390555175, - "grad_norm": 0.008168050087988377, - "learning_rate": 0.00019998867596856417, - "loss": 46.0, - "step": 29757 - }, - { - "epoch": 4.792302427633963, - "grad_norm": 0.007299512624740601, - "learning_rate": 0.00019998867520720957, - "loss": 46.0, - "step": 29758 - }, - { - "epoch": 4.79246346471275, - "grad_norm": 0.011872775852680206, - "learning_rate": 0.00019998867444582933, - "loss": 46.0, - "step": 29759 - }, - { - "epoch": 4.792624501791537, - "grad_norm": 0.0030426299199461937, - "learning_rate": 0.00019998867368442352, - "loss": 46.0, - "step": 29760 - }, - { - "epoch": 4.792785538870325, - "grad_norm": 0.0028889866080135107, - "learning_rate": 0.00019998867292299216, - "loss": 46.0, - "step": 29761 - }, - { - "epoch": 4.792946575949112, - "grad_norm": 0.0021509702783077955, - "learning_rate": 0.00019998867216153513, - "loss": 46.0, - "step": 29762 - }, - { - "epoch": 4.7931076130279, - "grad_norm": 0.0053243329748511314, - "learning_rate": 0.00019998867140005256, - "loss": 46.0, - "step": 29763 - }, - { - "epoch": 4.793268650106687, - "grad_norm": 0.0022179947700351477, - "learning_rate": 0.00019998867063854436, - "loss": 46.0, - "step": 29764 - }, - { - "epoch": 4.7934296871854745, - "grad_norm": 0.0023544016294181347, - "learning_rate": 0.0001999886698770106, - "loss": 46.0, - "step": 29765 - }, - { - "epoch": 4.793590724264262, - "grad_norm": 0.003395197680220008, - "learning_rate": 0.00019998866911545123, - "loss": 46.0, - "step": 29766 - }, - { - "epoch": 4.793751761343049, - "grad_norm": 0.0029666482005268335, - "learning_rate": 0.0001999886683538663, - "loss": 46.0, - "step": 29767 - }, - { - "epoch": 4.793912798421837, - "grad_norm": 0.004383180756121874, - "learning_rate": 0.00019998866759225574, - "loss": 46.0, - "step": 29768 - }, - { - "epoch": 4.794073835500624, - "grad_norm": 0.016333680599927902, - "learning_rate": 0.0001999886668306196, - "loss": 46.0, - "step": 29769 - }, - { - "epoch": 4.794234872579412, - "grad_norm": 0.009403948672115803, - "learning_rate": 0.00019998866606895787, - "loss": 46.0, - "step": 29770 - }, - { - "epoch": 4.794395909658199, - "grad_norm": 0.004911371041089296, - "learning_rate": 0.00019998866530727055, - "loss": 46.0, - "step": 29771 - }, - { - "epoch": 4.794556946736986, - "grad_norm": 0.004054257180541754, - "learning_rate": 0.00019998866454555764, - "loss": 46.0, - "step": 29772 - }, - { - "epoch": 4.794717983815774, - "grad_norm": 0.0054565006867051125, - "learning_rate": 0.00019998866378381913, - "loss": 46.0, - "step": 29773 - }, - { - "epoch": 4.794879020894561, - "grad_norm": 0.012302064336836338, - "learning_rate": 0.00019998866302205505, - "loss": 46.0, - "step": 29774 - }, - { - "epoch": 4.795040057973348, - "grad_norm": 0.0037337648682296276, - "learning_rate": 0.00019998866226026536, - "loss": 46.0, - "step": 29775 - }, - { - "epoch": 4.7952010950521355, - "grad_norm": 0.005812222603708506, - "learning_rate": 0.00019998866149845008, - "loss": 46.0, - "step": 29776 - }, - { - "epoch": 4.795362132130923, - "grad_norm": 0.002526002237573266, - "learning_rate": 0.00019998866073660918, - "loss": 46.0, - "step": 29777 - }, - { - "epoch": 4.7955231692097104, - "grad_norm": 0.002947915345430374, - "learning_rate": 0.00019998865997474275, - "loss": 46.0, - "step": 29778 - }, - { - "epoch": 4.795684206288498, - "grad_norm": 0.010503734461963177, - "learning_rate": 0.00019998865921285069, - "loss": 46.0, - "step": 29779 - }, - { - "epoch": 4.795845243367285, - "grad_norm": 0.014623298309743404, - "learning_rate": 0.00019998865845093303, - "loss": 46.0, - "step": 29780 - }, - { - "epoch": 4.796006280446073, - "grad_norm": 0.004749680869281292, - "learning_rate": 0.0001999886576889898, - "loss": 46.0, - "step": 29781 - }, - { - "epoch": 4.79616731752486, - "grad_norm": 0.02176261506974697, - "learning_rate": 0.00019998865692702093, - "loss": 46.0, - "step": 29782 - }, - { - "epoch": 4.796328354603648, - "grad_norm": 0.0008449893211945891, - "learning_rate": 0.0001999886561650265, - "loss": 46.0, - "step": 29783 - }, - { - "epoch": 4.796489391682435, - "grad_norm": 0.010199923068284988, - "learning_rate": 0.0001999886554030065, - "loss": 46.0, - "step": 29784 - }, - { - "epoch": 4.796650428761223, - "grad_norm": 0.0008555551758036017, - "learning_rate": 0.0001999886546409609, - "loss": 46.0, - "step": 29785 - }, - { - "epoch": 4.79681146584001, - "grad_norm": 0.02349877543747425, - "learning_rate": 0.0001999886538788897, - "loss": 46.0, - "step": 29786 - }, - { - "epoch": 4.796972502918797, - "grad_norm": 0.011679049581289291, - "learning_rate": 0.0001999886531167929, - "loss": 46.0, - "step": 29787 - }, - { - "epoch": 4.797133539997584, - "grad_norm": 0.0020645330660045147, - "learning_rate": 0.00019998865235467054, - "loss": 46.0, - "step": 29788 - }, - { - "epoch": 4.7972945770763715, - "grad_norm": 0.004109121859073639, - "learning_rate": 0.00019998865159252254, - "loss": 46.0, - "step": 29789 - }, - { - "epoch": 4.797455614155159, - "grad_norm": 0.0012318691005930305, - "learning_rate": 0.00019998865083034896, - "loss": 46.0, - "step": 29790 - }, - { - "epoch": 4.797616651233946, - "grad_norm": 0.01024674903601408, - "learning_rate": 0.00019998865006814982, - "loss": 46.0, - "step": 29791 - }, - { - "epoch": 4.797777688312734, - "grad_norm": 0.0024883365258574486, - "learning_rate": 0.00019998864930592504, - "loss": 46.0, - "step": 29792 - }, - { - "epoch": 4.797938725391521, - "grad_norm": 0.00452685821801424, - "learning_rate": 0.00019998864854367472, - "loss": 46.0, - "step": 29793 - }, - { - "epoch": 4.798099762470309, - "grad_norm": 0.004877728410065174, - "learning_rate": 0.00019998864778139876, - "loss": 46.0, - "step": 29794 - }, - { - "epoch": 4.798260799549096, - "grad_norm": 0.0031970189884305, - "learning_rate": 0.00019998864701909725, - "loss": 46.0, - "step": 29795 - }, - { - "epoch": 4.798421836627884, - "grad_norm": 0.002675888128578663, - "learning_rate": 0.00019998864625677011, - "loss": 46.0, - "step": 29796 - }, - { - "epoch": 4.798582873706671, - "grad_norm": 0.0044942288659513, - "learning_rate": 0.0001999886454944174, - "loss": 46.0, - "step": 29797 - }, - { - "epoch": 4.798743910785459, - "grad_norm": 0.018630048260092735, - "learning_rate": 0.00019998864473203912, - "loss": 46.0, - "step": 29798 - }, - { - "epoch": 4.798904947864246, - "grad_norm": 0.0017461758106946945, - "learning_rate": 0.00019998864396963522, - "loss": 46.0, - "step": 29799 - }, - { - "epoch": 4.7990659849430335, - "grad_norm": 0.0017961953999474645, - "learning_rate": 0.00019998864320720574, - "loss": 46.0, - "step": 29800 - }, - { - "epoch": 4.799227022021821, - "grad_norm": 0.003009121399372816, - "learning_rate": 0.00019998864244475065, - "loss": 46.0, - "step": 29801 - }, - { - "epoch": 4.7993880591006075, - "grad_norm": 0.00737546943128109, - "learning_rate": 0.00019998864168226997, - "loss": 46.0, - "step": 29802 - }, - { - "epoch": 4.799549096179395, - "grad_norm": 0.0007850078400224447, - "learning_rate": 0.0001999886409197637, - "loss": 46.0, - "step": 29803 - }, - { - "epoch": 4.799710133258182, - "grad_norm": 0.0029610597994178534, - "learning_rate": 0.00019998864015723187, - "loss": 46.0, - "step": 29804 - }, - { - "epoch": 4.79987117033697, - "grad_norm": 0.0028766151517629623, - "learning_rate": 0.0001999886393946744, - "loss": 46.0, - "step": 29805 - }, - { - "epoch": 4.800032207415757, - "grad_norm": 0.003073472296819091, - "learning_rate": 0.00019998863863209137, - "loss": 46.0, - "step": 29806 - }, - { - "epoch": 4.800193244494545, - "grad_norm": 0.014405116438865662, - "learning_rate": 0.00019998863786948272, - "loss": 46.0, - "step": 29807 - }, - { - "epoch": 4.800354281573332, - "grad_norm": 0.005622957367449999, - "learning_rate": 0.0001999886371068485, - "loss": 46.0, - "step": 29808 - }, - { - "epoch": 4.80051531865212, - "grad_norm": 0.0038253404200077057, - "learning_rate": 0.0001999886363441887, - "loss": 46.0, - "step": 29809 - }, - { - "epoch": 4.800676355730907, - "grad_norm": 0.002177529502660036, - "learning_rate": 0.00019998863558150326, - "loss": 46.0, - "step": 29810 - }, - { - "epoch": 4.800837392809695, - "grad_norm": 0.0028617591597139835, - "learning_rate": 0.0001999886348187923, - "loss": 46.0, - "step": 29811 - }, - { - "epoch": 4.800998429888482, - "grad_norm": 0.003431614488363266, - "learning_rate": 0.0001999886340560557, - "loss": 46.0, - "step": 29812 - }, - { - "epoch": 4.8011594669672695, - "grad_norm": 0.001781382830813527, - "learning_rate": 0.00019998863329329352, - "loss": 46.0, - "step": 29813 - }, - { - "epoch": 4.801320504046057, - "grad_norm": 0.004645872861146927, - "learning_rate": 0.00019998863253050574, - "loss": 46.0, - "step": 29814 - }, - { - "epoch": 4.801481541124844, - "grad_norm": 0.0049195787869393826, - "learning_rate": 0.00019998863176769237, - "loss": 46.0, - "step": 29815 - }, - { - "epoch": 4.801642578203632, - "grad_norm": 0.004815096501260996, - "learning_rate": 0.00019998863100485341, - "loss": 46.0, - "step": 29816 - }, - { - "epoch": 4.801803615282418, - "grad_norm": 0.003890766529366374, - "learning_rate": 0.00019998863024198884, - "loss": 46.0, - "step": 29817 - }, - { - "epoch": 4.801964652361206, - "grad_norm": 0.008310000412166119, - "learning_rate": 0.0001999886294790987, - "loss": 46.0, - "step": 29818 - }, - { - "epoch": 4.802125689439993, - "grad_norm": 0.008424951694905758, - "learning_rate": 0.00019998862871618297, - "loss": 46.0, - "step": 29819 - }, - { - "epoch": 4.802286726518781, - "grad_norm": 0.006671548821032047, - "learning_rate": 0.00019998862795324163, - "loss": 46.0, - "step": 29820 - }, - { - "epoch": 4.802447763597568, - "grad_norm": 0.0012449411442503333, - "learning_rate": 0.00019998862719027472, - "loss": 46.0, - "step": 29821 - }, - { - "epoch": 4.802608800676356, - "grad_norm": 0.004106082487851381, - "learning_rate": 0.0001999886264272822, - "loss": 46.0, - "step": 29822 - }, - { - "epoch": 4.802769837755143, - "grad_norm": 0.004259491804987192, - "learning_rate": 0.00019998862566426411, - "loss": 46.0, - "step": 29823 - }, - { - "epoch": 4.8029308748339306, - "grad_norm": 0.004347400274127722, - "learning_rate": 0.0001999886249012204, - "loss": 46.0, - "step": 29824 - }, - { - "epoch": 4.803091911912718, - "grad_norm": 0.008030601777136326, - "learning_rate": 0.00019998862413815114, - "loss": 46.0, - "step": 29825 - }, - { - "epoch": 4.8032529489915055, - "grad_norm": 0.005460289306938648, - "learning_rate": 0.00019998862337505626, - "loss": 46.0, - "step": 29826 - }, - { - "epoch": 4.803413986070293, - "grad_norm": 0.0029346952214837074, - "learning_rate": 0.00019998862261193576, - "loss": 46.0, - "step": 29827 - }, - { - "epoch": 4.80357502314908, - "grad_norm": 0.006770648993551731, - "learning_rate": 0.00019998862184878973, - "loss": 46.0, - "step": 29828 - }, - { - "epoch": 4.803736060227868, - "grad_norm": 0.003570184577256441, - "learning_rate": 0.00019998862108561806, - "loss": 46.0, - "step": 29829 - }, - { - "epoch": 4.803897097306654, - "grad_norm": 0.001085524563677609, - "learning_rate": 0.0001999886203224208, - "loss": 46.0, - "step": 29830 - }, - { - "epoch": 4.804058134385443, - "grad_norm": 0.004232665058225393, - "learning_rate": 0.00019998861955919796, - "loss": 46.0, - "step": 29831 - }, - { - "epoch": 4.804219171464229, - "grad_norm": 0.001190761337056756, - "learning_rate": 0.00019998861879594952, - "loss": 46.0, - "step": 29832 - }, - { - "epoch": 4.804380208543017, - "grad_norm": 0.0029191027861088514, - "learning_rate": 0.0001999886180326755, - "loss": 46.0, - "step": 29833 - }, - { - "epoch": 4.804541245621804, - "grad_norm": 0.004711335524916649, - "learning_rate": 0.0001999886172693759, - "loss": 46.0, - "step": 29834 - }, - { - "epoch": 4.804702282700592, - "grad_norm": 0.002555497922003269, - "learning_rate": 0.0001999886165060507, - "loss": 46.0, - "step": 29835 - }, - { - "epoch": 4.804863319779379, - "grad_norm": 0.0087977834045887, - "learning_rate": 0.0001999886157426999, - "loss": 46.0, - "step": 29836 - }, - { - "epoch": 4.8050243568581665, - "grad_norm": 0.004582275170832872, - "learning_rate": 0.0001999886149793235, - "loss": 46.0, - "step": 29837 - }, - { - "epoch": 4.805185393936954, - "grad_norm": 0.01322112139314413, - "learning_rate": 0.00019998861421592152, - "loss": 46.0, - "step": 29838 - }, - { - "epoch": 4.805346431015741, - "grad_norm": 0.0019608517177402973, - "learning_rate": 0.00019998861345249395, - "loss": 46.0, - "step": 29839 - }, - { - "epoch": 4.805507468094529, - "grad_norm": 0.002760443137958646, - "learning_rate": 0.00019998861268904076, - "loss": 46.0, - "step": 29840 - }, - { - "epoch": 4.805668505173316, - "grad_norm": 0.001646880991756916, - "learning_rate": 0.00019998861192556202, - "loss": 46.0, - "step": 29841 - }, - { - "epoch": 4.805829542252104, - "grad_norm": 0.007906120270490646, - "learning_rate": 0.00019998861116205766, - "loss": 46.0, - "step": 29842 - }, - { - "epoch": 4.805990579330891, - "grad_norm": 0.02050287462770939, - "learning_rate": 0.0001999886103985277, - "loss": 46.0, - "step": 29843 - }, - { - "epoch": 4.806151616409679, - "grad_norm": 0.01248864084482193, - "learning_rate": 0.00019998860963497218, - "loss": 46.0, - "step": 29844 - }, - { - "epoch": 4.806312653488465, - "grad_norm": 0.004966906271874905, - "learning_rate": 0.00019998860887139106, - "loss": 46.0, - "step": 29845 - }, - { - "epoch": 4.806473690567254, - "grad_norm": 0.0009638596093282104, - "learning_rate": 0.00019998860810778435, - "loss": 46.0, - "step": 29846 - }, - { - "epoch": 4.80663472764604, - "grad_norm": 0.0024966069031506777, - "learning_rate": 0.00019998860734415203, - "loss": 46.0, - "step": 29847 - }, - { - "epoch": 4.806795764724828, - "grad_norm": 0.018226562067866325, - "learning_rate": 0.00019998860658049412, - "loss": 46.0, - "step": 29848 - }, - { - "epoch": 4.806956801803615, - "grad_norm": 0.007932223379611969, - "learning_rate": 0.00019998860581681065, - "loss": 46.0, - "step": 29849 - }, - { - "epoch": 4.8071178388824025, - "grad_norm": 0.007319622207432985, - "learning_rate": 0.00019998860505310154, - "loss": 46.0, - "step": 29850 - }, - { - "epoch": 4.80727887596119, - "grad_norm": 0.0018009509658440948, - "learning_rate": 0.00019998860428936687, - "loss": 46.0, - "step": 29851 - }, - { - "epoch": 4.807439913039977, - "grad_norm": 0.01569865643978119, - "learning_rate": 0.0001999886035256066, - "loss": 46.0, - "step": 29852 - }, - { - "epoch": 4.807600950118765, - "grad_norm": 0.0026742275804281235, - "learning_rate": 0.00019998860276182075, - "loss": 46.0, - "step": 29853 - }, - { - "epoch": 4.807761987197552, - "grad_norm": 0.004862932953983545, - "learning_rate": 0.0001999886019980093, - "loss": 46.0, - "step": 29854 - }, - { - "epoch": 4.80792302427634, - "grad_norm": 0.0064040180295705795, - "learning_rate": 0.00019998860123417224, - "loss": 46.0, - "step": 29855 - }, - { - "epoch": 4.808084061355127, - "grad_norm": 0.008049490861594677, - "learning_rate": 0.00019998860047030958, - "loss": 46.0, - "step": 29856 - }, - { - "epoch": 4.808245098433915, - "grad_norm": 0.005625725258141756, - "learning_rate": 0.0001999885997064214, - "loss": 46.0, - "step": 29857 - }, - { - "epoch": 4.808406135512702, - "grad_norm": 0.001973808743059635, - "learning_rate": 0.00019998859894250755, - "loss": 46.0, - "step": 29858 - }, - { - "epoch": 4.80856717259149, - "grad_norm": 0.01069825142621994, - "learning_rate": 0.00019998859817856813, - "loss": 46.0, - "step": 29859 - }, - { - "epoch": 4.808728209670276, - "grad_norm": 0.0028028555680066347, - "learning_rate": 0.00019998859741460315, - "loss": 46.0, - "step": 29860 - }, - { - "epoch": 4.808889246749064, - "grad_norm": 0.0026830544229596853, - "learning_rate": 0.00019998859665061253, - "loss": 46.0, - "step": 29861 - }, - { - "epoch": 4.809050283827851, - "grad_norm": 0.00322624109685421, - "learning_rate": 0.00019998859588659634, - "loss": 46.0, - "step": 29862 - }, - { - "epoch": 4.8092113209066385, - "grad_norm": 0.0058891805820167065, - "learning_rate": 0.00019998859512255455, - "loss": 46.0, - "step": 29863 - }, - { - "epoch": 4.809372357985426, - "grad_norm": 0.010692720301449299, - "learning_rate": 0.0001999885943584872, - "loss": 46.0, - "step": 29864 - }, - { - "epoch": 4.809533395064213, - "grad_norm": 0.0008916780934669077, - "learning_rate": 0.00019998859359439424, - "loss": 46.0, - "step": 29865 - }, - { - "epoch": 4.809694432143001, - "grad_norm": 0.0034505638759583235, - "learning_rate": 0.00019998859283027566, - "loss": 46.0, - "step": 29866 - }, - { - "epoch": 4.809855469221788, - "grad_norm": 0.00544386263936758, - "learning_rate": 0.0001999885920661315, - "loss": 46.0, - "step": 29867 - }, - { - "epoch": 4.810016506300576, - "grad_norm": 0.011903595179319382, - "learning_rate": 0.00019998859130196178, - "loss": 46.0, - "step": 29868 - }, - { - "epoch": 4.810177543379363, - "grad_norm": 0.008608249947428703, - "learning_rate": 0.00019998859053776643, - "loss": 46.0, - "step": 29869 - }, - { - "epoch": 4.810338580458151, - "grad_norm": 0.005572275724261999, - "learning_rate": 0.00019998858977354552, - "loss": 46.0, - "step": 29870 - }, - { - "epoch": 4.810499617536938, - "grad_norm": 0.002035081386566162, - "learning_rate": 0.000199988589009299, - "loss": 46.0, - "step": 29871 - }, - { - "epoch": 4.810660654615726, - "grad_norm": 0.012300747446715832, - "learning_rate": 0.0001999885882450269, - "loss": 46.0, - "step": 29872 - }, - { - "epoch": 4.810821691694513, - "grad_norm": 0.005571066867560148, - "learning_rate": 0.0001999885874807292, - "loss": 46.0, - "step": 29873 - }, - { - "epoch": 4.8109827287733005, - "grad_norm": 0.0026481610257178545, - "learning_rate": 0.00019998858671640588, - "loss": 46.0, - "step": 29874 - }, - { - "epoch": 4.811143765852087, - "grad_norm": 0.01004584040492773, - "learning_rate": 0.000199988585952057, - "loss": 46.0, - "step": 29875 - }, - { - "epoch": 4.8113048029308745, - "grad_norm": 0.005056702997535467, - "learning_rate": 0.00019998858518768253, - "loss": 46.0, - "step": 29876 - }, - { - "epoch": 4.811465840009662, - "grad_norm": 0.007630080915987492, - "learning_rate": 0.00019998858442328246, - "loss": 46.0, - "step": 29877 - }, - { - "epoch": 4.811626877088449, - "grad_norm": 0.01838238537311554, - "learning_rate": 0.0001999885836588568, - "loss": 46.0, - "step": 29878 - }, - { - "epoch": 4.811787914167237, - "grad_norm": 0.01748710870742798, - "learning_rate": 0.00019998858289440555, - "loss": 46.0, - "step": 29879 - }, - { - "epoch": 4.811948951246024, - "grad_norm": 0.002137694275006652, - "learning_rate": 0.0001999885821299287, - "loss": 46.0, - "step": 29880 - }, - { - "epoch": 4.812109988324812, - "grad_norm": 0.007220414467155933, - "learning_rate": 0.00019998858136542627, - "loss": 46.0, - "step": 29881 - }, - { - "epoch": 4.812271025403599, - "grad_norm": 0.0009207912953570485, - "learning_rate": 0.00019998858060089823, - "loss": 46.0, - "step": 29882 - }, - { - "epoch": 4.812432062482387, - "grad_norm": 0.0033582462929189205, - "learning_rate": 0.0001999885798363446, - "loss": 46.0, - "step": 29883 - }, - { - "epoch": 4.812593099561174, - "grad_norm": 0.0026050391606986523, - "learning_rate": 0.0001999885790717654, - "loss": 46.0, - "step": 29884 - }, - { - "epoch": 4.8127541366399615, - "grad_norm": 0.009508085437119007, - "learning_rate": 0.0001999885783071606, - "loss": 46.0, - "step": 29885 - }, - { - "epoch": 4.812915173718749, - "grad_norm": 0.003897358663380146, - "learning_rate": 0.0001999885775425302, - "loss": 46.0, - "step": 29886 - }, - { - "epoch": 4.8130762107975364, - "grad_norm": 0.008441883139312267, - "learning_rate": 0.00019998857677787422, - "loss": 46.0, - "step": 29887 - }, - { - "epoch": 4.813237247876324, - "grad_norm": 0.0009966617217287421, - "learning_rate": 0.00019998857601319264, - "loss": 46.0, - "step": 29888 - }, - { - "epoch": 4.813398284955111, - "grad_norm": 0.006653158459812403, - "learning_rate": 0.00019998857524848544, - "loss": 46.0, - "step": 29889 - }, - { - "epoch": 4.813559322033898, - "grad_norm": 0.003060739254578948, - "learning_rate": 0.00019998857448375268, - "loss": 46.0, - "step": 29890 - }, - { - "epoch": 4.813720359112685, - "grad_norm": 0.0009679802460595965, - "learning_rate": 0.00019998857371899433, - "loss": 46.0, - "step": 29891 - }, - { - "epoch": 4.813881396191473, - "grad_norm": 0.0020888003055006266, - "learning_rate": 0.00019998857295421037, - "loss": 46.0, - "step": 29892 - }, - { - "epoch": 4.81404243327026, - "grad_norm": 0.008264970034360886, - "learning_rate": 0.00019998857218940085, - "loss": 46.0, - "step": 29893 - }, - { - "epoch": 4.814203470349048, - "grad_norm": 0.005687571130692959, - "learning_rate": 0.00019998857142456572, - "loss": 46.0, - "step": 29894 - }, - { - "epoch": 4.814364507427835, - "grad_norm": 0.005523642525076866, - "learning_rate": 0.00019998857065970497, - "loss": 46.0, - "step": 29895 - }, - { - "epoch": 4.814525544506623, - "grad_norm": 0.004472050350159407, - "learning_rate": 0.00019998856989481868, - "loss": 46.0, - "step": 29896 - }, - { - "epoch": 4.81468658158541, - "grad_norm": 0.0013236034428700805, - "learning_rate": 0.0001999885691299068, - "loss": 46.0, - "step": 29897 - }, - { - "epoch": 4.8148476186641975, - "grad_norm": 0.005061403848230839, - "learning_rate": 0.00019998856836496928, - "loss": 46.0, - "step": 29898 - }, - { - "epoch": 4.815008655742985, - "grad_norm": 0.0015141351614147425, - "learning_rate": 0.00019998856760000618, - "loss": 46.0, - "step": 29899 - }, - { - "epoch": 4.815169692821772, - "grad_norm": 0.0074175018817186356, - "learning_rate": 0.00019998856683501746, - "loss": 46.0, - "step": 29900 - }, - { - "epoch": 4.81533072990056, - "grad_norm": 0.0039913528598845005, - "learning_rate": 0.0001999885660700032, - "loss": 46.0, - "step": 29901 - }, - { - "epoch": 4.815491766979347, - "grad_norm": 0.0038621709682047367, - "learning_rate": 0.00019998856530496336, - "loss": 46.0, - "step": 29902 - }, - { - "epoch": 4.815652804058134, - "grad_norm": 0.004586962517350912, - "learning_rate": 0.00019998856453989789, - "loss": 46.0, - "step": 29903 - }, - { - "epoch": 4.815813841136922, - "grad_norm": 0.0020976581145077944, - "learning_rate": 0.00019998856377480682, - "loss": 46.0, - "step": 29904 - }, - { - "epoch": 4.815974878215709, - "grad_norm": 0.017379876226186752, - "learning_rate": 0.0001999885630096902, - "loss": 46.0, - "step": 29905 - }, - { - "epoch": 4.816135915294496, - "grad_norm": 0.004537864588201046, - "learning_rate": 0.00019998856224454797, - "loss": 46.0, - "step": 29906 - }, - { - "epoch": 4.816296952373284, - "grad_norm": 0.001742177177220583, - "learning_rate": 0.00019998856147938012, - "loss": 46.0, - "step": 29907 - }, - { - "epoch": 4.816457989452071, - "grad_norm": 0.01234701182693243, - "learning_rate": 0.0001999885607141867, - "loss": 46.0, - "step": 29908 - }, - { - "epoch": 4.816619026530859, - "grad_norm": 0.0011545680463314056, - "learning_rate": 0.0001999885599489677, - "loss": 46.0, - "step": 29909 - }, - { - "epoch": 4.816780063609646, - "grad_norm": 0.011560438200831413, - "learning_rate": 0.0001999885591837231, - "loss": 46.0, - "step": 29910 - }, - { - "epoch": 4.8169411006884335, - "grad_norm": 0.00048677678569220006, - "learning_rate": 0.0001999885584184529, - "loss": 46.0, - "step": 29911 - }, - { - "epoch": 4.817102137767221, - "grad_norm": 0.0067277164198458195, - "learning_rate": 0.00019998855765315711, - "loss": 46.0, - "step": 29912 - }, - { - "epoch": 4.817263174846008, - "grad_norm": 0.0041071283631026745, - "learning_rate": 0.00019998855688783574, - "loss": 46.0, - "step": 29913 - }, - { - "epoch": 4.817424211924796, - "grad_norm": 0.0024829935282468796, - "learning_rate": 0.00019998855612248878, - "loss": 46.0, - "step": 29914 - }, - { - "epoch": 4.817585249003583, - "grad_norm": 0.00551704503595829, - "learning_rate": 0.0001999885553571162, - "loss": 46.0, - "step": 29915 - }, - { - "epoch": 4.817746286082371, - "grad_norm": 0.017143117263913155, - "learning_rate": 0.00019998855459171805, - "loss": 46.0, - "step": 29916 - }, - { - "epoch": 4.817907323161158, - "grad_norm": 0.010960116051137447, - "learning_rate": 0.00019998855382629432, - "loss": 46.0, - "step": 29917 - }, - { - "epoch": 4.818068360239945, - "grad_norm": 0.0022764671593904495, - "learning_rate": 0.00019998855306084496, - "loss": 46.0, - "step": 29918 - }, - { - "epoch": 4.818229397318733, - "grad_norm": 0.008843803778290749, - "learning_rate": 0.00019998855229537004, - "loss": 46.0, - "step": 29919 - }, - { - "epoch": 4.81839043439752, - "grad_norm": 0.005262303631752729, - "learning_rate": 0.0001999885515298695, - "loss": 46.0, - "step": 29920 - }, - { - "epoch": 4.818551471476307, - "grad_norm": 0.0019062585197389126, - "learning_rate": 0.0001999885507643434, - "loss": 46.0, - "step": 29921 - }, - { - "epoch": 4.818712508555095, - "grad_norm": 0.01564912311732769, - "learning_rate": 0.0001999885499987917, - "loss": 46.0, - "step": 29922 - }, - { - "epoch": 4.818873545633882, - "grad_norm": 0.001540483208373189, - "learning_rate": 0.0001999885492332144, - "loss": 46.0, - "step": 29923 - }, - { - "epoch": 4.8190345827126695, - "grad_norm": 0.0037821594160050154, - "learning_rate": 0.0001999885484676115, - "loss": 46.0, - "step": 29924 - }, - { - "epoch": 4.819195619791457, - "grad_norm": 0.0028067342936992645, - "learning_rate": 0.00019998854770198303, - "loss": 46.0, - "step": 29925 - }, - { - "epoch": 4.819356656870244, - "grad_norm": 0.021875912323594093, - "learning_rate": 0.00019998854693632895, - "loss": 46.0, - "step": 29926 - }, - { - "epoch": 4.819517693949032, - "grad_norm": 0.0023984243161976337, - "learning_rate": 0.00019998854617064927, - "loss": 46.0, - "step": 29927 - }, - { - "epoch": 4.819678731027819, - "grad_norm": 0.0029302360489964485, - "learning_rate": 0.00019998854540494404, - "loss": 46.0, - "step": 29928 - }, - { - "epoch": 4.819839768106607, - "grad_norm": 0.0022596800699830055, - "learning_rate": 0.00019998854463921316, - "loss": 46.0, - "step": 29929 - }, - { - "epoch": 4.820000805185394, - "grad_norm": 0.0018841740675270557, - "learning_rate": 0.00019998854387345673, - "loss": 46.0, - "step": 29930 - }, - { - "epoch": 4.820161842264182, - "grad_norm": 0.002793577965348959, - "learning_rate": 0.0001999885431076747, - "loss": 46.0, - "step": 29931 - }, - { - "epoch": 4.820322879342969, - "grad_norm": 0.004283386282622814, - "learning_rate": 0.0001999885423418671, - "loss": 46.0, - "step": 29932 - }, - { - "epoch": 4.820483916421756, - "grad_norm": 0.005727067124098539, - "learning_rate": 0.00019998854157603387, - "loss": 46.0, - "step": 29933 - }, - { - "epoch": 4.820644953500543, - "grad_norm": 0.007798322010785341, - "learning_rate": 0.00019998854081017503, - "loss": 46.0, - "step": 29934 - }, - { - "epoch": 4.820805990579331, - "grad_norm": 0.002008601790294051, - "learning_rate": 0.00019998854004429063, - "loss": 46.0, - "step": 29935 - }, - { - "epoch": 4.820967027658118, - "grad_norm": 0.0019837799482047558, - "learning_rate": 0.00019998853927838064, - "loss": 46.0, - "step": 29936 - }, - { - "epoch": 4.8211280647369055, - "grad_norm": 0.00276742922142148, - "learning_rate": 0.00019998853851244507, - "loss": 46.0, - "step": 29937 - }, - { - "epoch": 4.821289101815693, - "grad_norm": 0.007150628138333559, - "learning_rate": 0.0001999885377464839, - "loss": 46.0, - "step": 29938 - }, - { - "epoch": 4.82145013889448, - "grad_norm": 0.006005027797073126, - "learning_rate": 0.0001999885369804971, - "loss": 46.0, - "step": 29939 - }, - { - "epoch": 4.821611175973268, - "grad_norm": 0.0085380170494318, - "learning_rate": 0.00019998853621448474, - "loss": 46.0, - "step": 29940 - }, - { - "epoch": 4.821772213052055, - "grad_norm": 0.004049663431942463, - "learning_rate": 0.0001999885354484468, - "loss": 46.0, - "step": 29941 - }, - { - "epoch": 4.821933250130843, - "grad_norm": 0.0049303132109344006, - "learning_rate": 0.00019998853468238326, - "loss": 46.0, - "step": 29942 - }, - { - "epoch": 4.82209428720963, - "grad_norm": 0.009177030995488167, - "learning_rate": 0.00019998853391629413, - "loss": 46.0, - "step": 29943 - }, - { - "epoch": 4.822255324288418, - "grad_norm": 0.0006188903353177011, - "learning_rate": 0.00019998853315017937, - "loss": 46.0, - "step": 29944 - }, - { - "epoch": 4.822416361367205, - "grad_norm": 0.006716664414852858, - "learning_rate": 0.00019998853238403905, - "loss": 46.0, - "step": 29945 - }, - { - "epoch": 4.8225773984459925, - "grad_norm": 0.00544294249266386, - "learning_rate": 0.00019998853161787316, - "loss": 46.0, - "step": 29946 - }, - { - "epoch": 4.82273843552478, - "grad_norm": 0.0007383980555459857, - "learning_rate": 0.00019998853085168166, - "loss": 46.0, - "step": 29947 - }, - { - "epoch": 4.8228994726035666, - "grad_norm": 0.0011091517517343163, - "learning_rate": 0.00019998853008546457, - "loss": 46.0, - "step": 29948 - }, - { - "epoch": 4.823060509682354, - "grad_norm": 0.007518439553678036, - "learning_rate": 0.00019998852931922185, - "loss": 46.0, - "step": 29949 - }, - { - "epoch": 4.8232215467611415, - "grad_norm": 0.003972018137574196, - "learning_rate": 0.00019998852855295359, - "loss": 46.0, - "step": 29950 - }, - { - "epoch": 4.823382583839929, - "grad_norm": 0.009267158806324005, - "learning_rate": 0.0001999885277866597, - "loss": 46.0, - "step": 29951 - }, - { - "epoch": 4.823543620918716, - "grad_norm": 0.0057826293632388115, - "learning_rate": 0.00019998852702034022, - "loss": 46.0, - "step": 29952 - }, - { - "epoch": 4.823704657997504, - "grad_norm": 0.008165759034454823, - "learning_rate": 0.0001999885262539952, - "loss": 46.0, - "step": 29953 - }, - { - "epoch": 4.823865695076291, - "grad_norm": 0.006560510955750942, - "learning_rate": 0.00019998852548762453, - "loss": 46.0, - "step": 29954 - }, - { - "epoch": 4.824026732155079, - "grad_norm": 0.006294541526585817, - "learning_rate": 0.0001999885247212283, - "loss": 46.0, - "step": 29955 - }, - { - "epoch": 4.824187769233866, - "grad_norm": 0.015574057586491108, - "learning_rate": 0.00019998852395480645, - "loss": 46.0, - "step": 29956 - }, - { - "epoch": 4.824348806312654, - "grad_norm": 0.0024759573861956596, - "learning_rate": 0.00019998852318835905, - "loss": 46.0, - "step": 29957 - }, - { - "epoch": 4.824509843391441, - "grad_norm": 0.015002661384642124, - "learning_rate": 0.00019998852242188603, - "loss": 46.0, - "step": 29958 - }, - { - "epoch": 4.8246708804702285, - "grad_norm": 0.002515319036319852, - "learning_rate": 0.0001999885216553874, - "loss": 46.0, - "step": 29959 - }, - { - "epoch": 4.824831917549016, - "grad_norm": 0.0063401684165000916, - "learning_rate": 0.00019998852088886322, - "loss": 46.0, - "step": 29960 - }, - { - "epoch": 4.824992954627803, - "grad_norm": 0.01542993076145649, - "learning_rate": 0.00019998852012231342, - "loss": 46.0, - "step": 29961 - }, - { - "epoch": 4.825153991706591, - "grad_norm": 0.002219581510871649, - "learning_rate": 0.00019998851935573803, - "loss": 46.0, - "step": 29962 - }, - { - "epoch": 4.825315028785377, - "grad_norm": 0.004082961939275265, - "learning_rate": 0.00019998851858913708, - "loss": 46.0, - "step": 29963 - }, - { - "epoch": 4.825476065864165, - "grad_norm": 0.011154497042298317, - "learning_rate": 0.0001999885178225105, - "loss": 46.0, - "step": 29964 - }, - { - "epoch": 4.825637102942952, - "grad_norm": 0.004594013094902039, - "learning_rate": 0.00019998851705585832, - "loss": 46.0, - "step": 29965 - }, - { - "epoch": 4.82579814002174, - "grad_norm": 0.0013535520993173122, - "learning_rate": 0.00019998851628918058, - "loss": 46.0, - "step": 29966 - }, - { - "epoch": 4.825959177100527, - "grad_norm": 0.0030616759322583675, - "learning_rate": 0.00019998851552247723, - "loss": 46.0, - "step": 29967 - }, - { - "epoch": 4.826120214179315, - "grad_norm": 0.00381903862580657, - "learning_rate": 0.00019998851475574832, - "loss": 46.0, - "step": 29968 - }, - { - "epoch": 4.826281251258102, - "grad_norm": 0.010005421936511993, - "learning_rate": 0.0001999885139889938, - "loss": 46.0, - "step": 29969 - }, - { - "epoch": 4.82644228833689, - "grad_norm": 0.006665172521024942, - "learning_rate": 0.00019998851322221365, - "loss": 46.0, - "step": 29970 - }, - { - "epoch": 4.826603325415677, - "grad_norm": 0.003194404998794198, - "learning_rate": 0.00019998851245540795, - "loss": 46.0, - "step": 29971 - }, - { - "epoch": 4.8267643624944645, - "grad_norm": 0.018488630652427673, - "learning_rate": 0.00019998851168857664, - "loss": 46.0, - "step": 29972 - }, - { - "epoch": 4.826925399573252, - "grad_norm": 0.003305562539026141, - "learning_rate": 0.00019998851092171976, - "loss": 46.0, - "step": 29973 - }, - { - "epoch": 4.827086436652039, - "grad_norm": 0.00537894805893302, - "learning_rate": 0.00019998851015483725, - "loss": 46.0, - "step": 29974 - }, - { - "epoch": 4.827247473730827, - "grad_norm": 0.0015342716360464692, - "learning_rate": 0.0001999885093879292, - "loss": 46.0, - "step": 29975 - }, - { - "epoch": 4.827408510809613, - "grad_norm": 0.0036027287133038044, - "learning_rate": 0.0001999885086209955, - "loss": 46.0, - "step": 29976 - }, - { - "epoch": 4.827569547888402, - "grad_norm": 0.001188427908346057, - "learning_rate": 0.00019998850785403626, - "loss": 46.0, - "step": 29977 - }, - { - "epoch": 4.827730584967188, - "grad_norm": 0.0009356028749607503, - "learning_rate": 0.0001999885070870514, - "loss": 46.0, - "step": 29978 - }, - { - "epoch": 4.827891622045976, - "grad_norm": 0.015714846551418304, - "learning_rate": 0.00019998850632004094, - "loss": 46.0, - "step": 29979 - }, - { - "epoch": 4.828052659124763, - "grad_norm": 0.008404687978327274, - "learning_rate": 0.0001999885055530049, - "loss": 46.0, - "step": 29980 - }, - { - "epoch": 4.828213696203551, - "grad_norm": 0.004202132113277912, - "learning_rate": 0.00019998850478594328, - "loss": 46.0, - "step": 29981 - }, - { - "epoch": 4.828374733282338, - "grad_norm": 0.0030121146701276302, - "learning_rate": 0.00019998850401885604, - "loss": 46.0, - "step": 29982 - }, - { - "epoch": 4.828535770361126, - "grad_norm": 0.004591251723468304, - "learning_rate": 0.00019998850325174324, - "loss": 46.0, - "step": 29983 - }, - { - "epoch": 4.828696807439913, - "grad_norm": 0.006869460456073284, - "learning_rate": 0.00019998850248460483, - "loss": 46.0, - "step": 29984 - }, - { - "epoch": 4.8288578445187005, - "grad_norm": 0.001769445720128715, - "learning_rate": 0.00019998850171744082, - "loss": 46.0, - "step": 29985 - }, - { - "epoch": 4.829018881597488, - "grad_norm": 0.0017876693746075034, - "learning_rate": 0.00019998850095025126, - "loss": 46.0, - "step": 29986 - }, - { - "epoch": 4.829179918676275, - "grad_norm": 0.0012478568824008107, - "learning_rate": 0.00019998850018303609, - "loss": 46.0, - "step": 29987 - }, - { - "epoch": 4.829340955755063, - "grad_norm": 0.0021852231584489346, - "learning_rate": 0.0001999884994157953, - "loss": 46.0, - "step": 29988 - }, - { - "epoch": 4.82950199283385, - "grad_norm": 0.0010365054477006197, - "learning_rate": 0.00019998849864852895, - "loss": 46.0, - "step": 29989 - }, - { - "epoch": 4.829663029912638, - "grad_norm": 0.0026975921355187893, - "learning_rate": 0.00019998849788123698, - "loss": 46.0, - "step": 29990 - }, - { - "epoch": 4.829824066991424, - "grad_norm": 0.00727562140673399, - "learning_rate": 0.00019998849711391943, - "loss": 46.0, - "step": 29991 - }, - { - "epoch": 4.829985104070213, - "grad_norm": 0.0065017263405025005, - "learning_rate": 0.0001999884963465763, - "loss": 46.0, - "step": 29992 - }, - { - "epoch": 4.830146141148999, - "grad_norm": 0.008813540451228619, - "learning_rate": 0.00019998849557920756, - "loss": 46.0, - "step": 29993 - }, - { - "epoch": 4.830307178227787, - "grad_norm": 0.002243209630250931, - "learning_rate": 0.00019998849481181322, - "loss": 46.0, - "step": 29994 - }, - { - "epoch": 4.830468215306574, - "grad_norm": 0.002013383898884058, - "learning_rate": 0.0001999884940443933, - "loss": 46.0, - "step": 29995 - }, - { - "epoch": 4.830629252385362, - "grad_norm": 0.005970244761556387, - "learning_rate": 0.0001999884932769478, - "loss": 46.0, - "step": 29996 - }, - { - "epoch": 4.830790289464149, - "grad_norm": 0.007251771632581949, - "learning_rate": 0.00019998849250947668, - "loss": 46.0, - "step": 29997 - }, - { - "epoch": 4.8309513265429365, - "grad_norm": 0.00426494050770998, - "learning_rate": 0.00019998849174198, - "loss": 46.0, - "step": 29998 - }, - { - "epoch": 4.831112363621724, - "grad_norm": 0.0014846059493720531, - "learning_rate": 0.00019998849097445771, - "loss": 46.0, - "step": 29999 - }, - { - "epoch": 4.831273400700511, - "grad_norm": 0.006002186331897974, - "learning_rate": 0.00019998849020690985, - "loss": 46.0, - "step": 30000 - }, - { - "epoch": 4.831434437779299, - "grad_norm": 0.0027920566499233246, - "learning_rate": 0.00019998848943933637, - "loss": 46.0, - "step": 30001 - }, - { - "epoch": 4.831595474858086, - "grad_norm": 0.004230475518852472, - "learning_rate": 0.0001999884886717373, - "loss": 46.0, - "step": 30002 - }, - { - "epoch": 4.831756511936874, - "grad_norm": 0.008627585135400295, - "learning_rate": 0.00019998848790411263, - "loss": 46.0, - "step": 30003 - }, - { - "epoch": 4.831917549015661, - "grad_norm": 0.003110413206741214, - "learning_rate": 0.00019998848713646242, - "loss": 46.0, - "step": 30004 - }, - { - "epoch": 4.832078586094449, - "grad_norm": 0.0030101905576884747, - "learning_rate": 0.00019998848636878656, - "loss": 46.0, - "step": 30005 - }, - { - "epoch": 4.832239623173235, - "grad_norm": 0.003456271719187498, - "learning_rate": 0.00019998848560108515, - "loss": 46.0, - "step": 30006 - }, - { - "epoch": 4.8324006602520235, - "grad_norm": 0.0014123270520940423, - "learning_rate": 0.00019998848483335812, - "loss": 46.0, - "step": 30007 - }, - { - "epoch": 4.83256169733081, - "grad_norm": 0.012516341172158718, - "learning_rate": 0.0001999884840656055, - "loss": 46.0, - "step": 30008 - }, - { - "epoch": 4.8327227344095975, - "grad_norm": 0.006561226677149534, - "learning_rate": 0.00019998848329782728, - "loss": 46.0, - "step": 30009 - }, - { - "epoch": 4.832883771488385, - "grad_norm": 0.019363123923540115, - "learning_rate": 0.00019998848253002352, - "loss": 46.0, - "step": 30010 - }, - { - "epoch": 4.8330448085671724, - "grad_norm": 0.020547227934002876, - "learning_rate": 0.0001999884817621941, - "loss": 46.0, - "step": 30011 - }, - { - "epoch": 4.83320584564596, - "grad_norm": 0.0028513846918940544, - "learning_rate": 0.00019998848099433912, - "loss": 46.0, - "step": 30012 - }, - { - "epoch": 4.833366882724747, - "grad_norm": 0.006960470695048571, - "learning_rate": 0.00019998848022645857, - "loss": 46.0, - "step": 30013 - }, - { - "epoch": 4.833527919803535, - "grad_norm": 0.004035444930195808, - "learning_rate": 0.00019998847945855238, - "loss": 46.0, - "step": 30014 - }, - { - "epoch": 4.833688956882322, - "grad_norm": 0.010005823336541653, - "learning_rate": 0.00019998847869062065, - "loss": 46.0, - "step": 30015 - }, - { - "epoch": 4.83384999396111, - "grad_norm": 0.010618772357702255, - "learning_rate": 0.0001999884779226633, - "loss": 46.0, - "step": 30016 - }, - { - "epoch": 4.834011031039897, - "grad_norm": 0.000913581345230341, - "learning_rate": 0.00019998847715468036, - "loss": 46.0, - "step": 30017 - }, - { - "epoch": 4.834172068118685, - "grad_norm": 0.001419433276169002, - "learning_rate": 0.00019998847638667182, - "loss": 46.0, - "step": 30018 - }, - { - "epoch": 4.834333105197472, - "grad_norm": 0.002580949105322361, - "learning_rate": 0.0001999884756186377, - "loss": 46.0, - "step": 30019 - }, - { - "epoch": 4.8344941422762595, - "grad_norm": 0.010713114403188229, - "learning_rate": 0.00019998847485057798, - "loss": 46.0, - "step": 30020 - }, - { - "epoch": 4.834655179355046, - "grad_norm": 0.0009170554694719613, - "learning_rate": 0.00019998847408249267, - "loss": 46.0, - "step": 30021 - }, - { - "epoch": 4.8348162164338335, - "grad_norm": 0.0032446573022753, - "learning_rate": 0.00019998847331438178, - "loss": 46.0, - "step": 30022 - }, - { - "epoch": 4.834977253512621, - "grad_norm": 0.002422585152089596, - "learning_rate": 0.00019998847254624528, - "loss": 46.0, - "step": 30023 - }, - { - "epoch": 4.835138290591408, - "grad_norm": 0.01699347421526909, - "learning_rate": 0.0001999884717780832, - "loss": 46.0, - "step": 30024 - }, - { - "epoch": 4.835299327670196, - "grad_norm": 0.013538056053221226, - "learning_rate": 0.00019998847100989554, - "loss": 46.0, - "step": 30025 - }, - { - "epoch": 4.835460364748983, - "grad_norm": 0.002329036360606551, - "learning_rate": 0.00019998847024168227, - "loss": 46.0, - "step": 30026 - }, - { - "epoch": 4.835621401827771, - "grad_norm": 0.0018605185905471444, - "learning_rate": 0.0001999884694734434, - "loss": 46.0, - "step": 30027 - }, - { - "epoch": 4.835782438906558, - "grad_norm": 0.002526079071685672, - "learning_rate": 0.00019998846870517895, - "loss": 46.0, - "step": 30028 - }, - { - "epoch": 4.835943475985346, - "grad_norm": 0.0045343260280787945, - "learning_rate": 0.0001999884679368889, - "loss": 46.0, - "step": 30029 - }, - { - "epoch": 4.836104513064133, - "grad_norm": 0.004781306255608797, - "learning_rate": 0.00019998846716857326, - "loss": 46.0, - "step": 30030 - }, - { - "epoch": 4.836265550142921, - "grad_norm": 0.007476066239178181, - "learning_rate": 0.00019998846640023206, - "loss": 46.0, - "step": 30031 - }, - { - "epoch": 4.836426587221708, - "grad_norm": 0.019446317106485367, - "learning_rate": 0.00019998846563186522, - "loss": 46.0, - "step": 30032 - }, - { - "epoch": 4.8365876243004955, - "grad_norm": 0.008398410864174366, - "learning_rate": 0.0001999884648634728, - "loss": 46.0, - "step": 30033 - }, - { - "epoch": 4.836748661379283, - "grad_norm": 0.00261465716175735, - "learning_rate": 0.0001999884640950548, - "loss": 46.0, - "step": 30034 - }, - { - "epoch": 4.83690969845807, - "grad_norm": 0.009719041176140308, - "learning_rate": 0.00019998846332661122, - "loss": 46.0, - "step": 30035 - }, - { - "epoch": 4.837070735536857, - "grad_norm": 0.015909885987639427, - "learning_rate": 0.00019998846255814203, - "loss": 46.0, - "step": 30036 - }, - { - "epoch": 4.837231772615644, - "grad_norm": 0.0069299242459237576, - "learning_rate": 0.00019998846178964725, - "loss": 46.0, - "step": 30037 - }, - { - "epoch": 4.837392809694432, - "grad_norm": 0.002662136685103178, - "learning_rate": 0.00019998846102112688, - "loss": 46.0, - "step": 30038 - }, - { - "epoch": 4.837553846773219, - "grad_norm": 0.028122657909989357, - "learning_rate": 0.0001999884602525809, - "loss": 46.0, - "step": 30039 - }, - { - "epoch": 4.837714883852007, - "grad_norm": 0.0028424824122339487, - "learning_rate": 0.00019998845948400936, - "loss": 46.0, - "step": 30040 - }, - { - "epoch": 4.837875920930794, - "grad_norm": 0.006363618187606335, - "learning_rate": 0.00019998845871541224, - "loss": 46.0, - "step": 30041 - }, - { - "epoch": 4.838036958009582, - "grad_norm": 0.004169164225459099, - "learning_rate": 0.0001999884579467895, - "loss": 46.0, - "step": 30042 - }, - { - "epoch": 4.838197995088369, - "grad_norm": 0.002681769896298647, - "learning_rate": 0.00019998845717814117, - "loss": 46.0, - "step": 30043 - }, - { - "epoch": 4.838359032167157, - "grad_norm": 0.0017171020153909922, - "learning_rate": 0.00019998845640946725, - "loss": 46.0, - "step": 30044 - }, - { - "epoch": 4.838520069245944, - "grad_norm": 0.0024586499202996492, - "learning_rate": 0.00019998845564076772, - "loss": 46.0, - "step": 30045 - }, - { - "epoch": 4.8386811063247315, - "grad_norm": 0.0016594165936112404, - "learning_rate": 0.00019998845487204263, - "loss": 46.0, - "step": 30046 - }, - { - "epoch": 4.838842143403519, - "grad_norm": 0.004528514109551907, - "learning_rate": 0.00019998845410329193, - "loss": 46.0, - "step": 30047 - }, - { - "epoch": 4.839003180482306, - "grad_norm": 0.0020554414950311184, - "learning_rate": 0.00019998845333451563, - "loss": 46.0, - "step": 30048 - }, - { - "epoch": 4.839164217561094, - "grad_norm": 0.0022366635967046022, - "learning_rate": 0.00019998845256571376, - "loss": 46.0, - "step": 30049 - }, - { - "epoch": 4.839325254639881, - "grad_norm": 0.004545716568827629, - "learning_rate": 0.0001999884517968863, - "loss": 46.0, - "step": 30050 - }, - { - "epoch": 4.839486291718668, - "grad_norm": 0.0018451712094247341, - "learning_rate": 0.0001999884510280332, - "loss": 46.0, - "step": 30051 - }, - { - "epoch": 4.839647328797455, - "grad_norm": 0.004489523824304342, - "learning_rate": 0.00019998845025915457, - "loss": 46.0, - "step": 30052 - }, - { - "epoch": 4.839808365876243, - "grad_norm": 0.0024036963004618883, - "learning_rate": 0.00019998844949025032, - "loss": 46.0, - "step": 30053 - }, - { - "epoch": 4.83996940295503, - "grad_norm": 0.003908371087163687, - "learning_rate": 0.00019998844872132047, - "loss": 46.0, - "step": 30054 - }, - { - "epoch": 4.840130440033818, - "grad_norm": 0.006355820223689079, - "learning_rate": 0.00019998844795236504, - "loss": 46.0, - "step": 30055 - }, - { - "epoch": 4.840291477112605, - "grad_norm": 0.003235979937016964, - "learning_rate": 0.000199988447183384, - "loss": 46.0, - "step": 30056 - }, - { - "epoch": 4.8404525141913926, - "grad_norm": 0.0024941901210695505, - "learning_rate": 0.0001999884464143774, - "loss": 46.0, - "step": 30057 - }, - { - "epoch": 4.84061355127018, - "grad_norm": 0.01161390170454979, - "learning_rate": 0.00019998844564534518, - "loss": 46.0, - "step": 30058 - }, - { - "epoch": 4.8407745883489675, - "grad_norm": 0.003102522809058428, - "learning_rate": 0.00019998844487628738, - "loss": 46.0, - "step": 30059 - }, - { - "epoch": 4.840935625427755, - "grad_norm": 0.001814471441321075, - "learning_rate": 0.000199988444107204, - "loss": 46.0, - "step": 30060 - }, - { - "epoch": 4.841096662506542, - "grad_norm": 0.006588350981473923, - "learning_rate": 0.000199988443338095, - "loss": 46.0, - "step": 30061 - }, - { - "epoch": 4.84125769958533, - "grad_norm": 0.0020115876104682684, - "learning_rate": 0.00019998844256896044, - "loss": 46.0, - "step": 30062 - }, - { - "epoch": 4.841418736664117, - "grad_norm": 0.005327795632183552, - "learning_rate": 0.00019998844179980029, - "loss": 46.0, - "step": 30063 - }, - { - "epoch": 4.841579773742904, - "grad_norm": 0.008031301200389862, - "learning_rate": 0.0001999884410306145, - "loss": 46.0, - "step": 30064 - }, - { - "epoch": 4.841740810821692, - "grad_norm": 0.0035582315176725388, - "learning_rate": 0.00019998844026140316, - "loss": 46.0, - "step": 30065 - }, - { - "epoch": 4.841901847900479, - "grad_norm": 0.019608670845627785, - "learning_rate": 0.00019998843949216622, - "loss": 46.0, - "step": 30066 - }, - { - "epoch": 4.842062884979266, - "grad_norm": 0.0021964400075376034, - "learning_rate": 0.00019998843872290367, - "loss": 46.0, - "step": 30067 - }, - { - "epoch": 4.842223922058054, - "grad_norm": 0.014563900418579578, - "learning_rate": 0.00019998843795361555, - "loss": 46.0, - "step": 30068 - }, - { - "epoch": 4.842384959136841, - "grad_norm": 0.0036409483291208744, - "learning_rate": 0.00019998843718430182, - "loss": 46.0, - "step": 30069 - }, - { - "epoch": 4.8425459962156285, - "grad_norm": 0.010826571844518185, - "learning_rate": 0.00019998843641496253, - "loss": 46.0, - "step": 30070 - }, - { - "epoch": 4.842707033294416, - "grad_norm": 0.009756935760378838, - "learning_rate": 0.00019998843564559762, - "loss": 46.0, - "step": 30071 - }, - { - "epoch": 4.842868070373203, - "grad_norm": 0.006842344533652067, - "learning_rate": 0.0001999884348762071, - "loss": 46.0, - "step": 30072 - }, - { - "epoch": 4.843029107451991, - "grad_norm": 0.0023168267216533422, - "learning_rate": 0.00019998843410679102, - "loss": 46.0, - "step": 30073 - }, - { - "epoch": 4.843190144530778, - "grad_norm": 0.0026655595283955336, - "learning_rate": 0.00019998843333734936, - "loss": 46.0, - "step": 30074 - }, - { - "epoch": 4.843351181609566, - "grad_norm": 0.002843728754669428, - "learning_rate": 0.00019998843256788208, - "loss": 46.0, - "step": 30075 - }, - { - "epoch": 4.843512218688353, - "grad_norm": 0.021774034947156906, - "learning_rate": 0.0001999884317983892, - "loss": 46.0, - "step": 30076 - }, - { - "epoch": 4.843673255767141, - "grad_norm": 0.00770561071112752, - "learning_rate": 0.00019998843102887075, - "loss": 46.0, - "step": 30077 - }, - { - "epoch": 4.843834292845928, - "grad_norm": 0.0008823887910693884, - "learning_rate": 0.0001999884302593267, - "loss": 46.0, - "step": 30078 - }, - { - "epoch": 4.843995329924715, - "grad_norm": 0.0016032927669584751, - "learning_rate": 0.00019998842948975706, - "loss": 46.0, - "step": 30079 - }, - { - "epoch": 4.844156367003503, - "grad_norm": 0.004396903328597546, - "learning_rate": 0.00019998842872016184, - "loss": 46.0, - "step": 30080 - }, - { - "epoch": 4.84431740408229, - "grad_norm": 0.016406282782554626, - "learning_rate": 0.00019998842795054104, - "loss": 46.0, - "step": 30081 - }, - { - "epoch": 4.844478441161077, - "grad_norm": 0.0028925766237080097, - "learning_rate": 0.00019998842718089462, - "loss": 46.0, - "step": 30082 - }, - { - "epoch": 4.8446394782398645, - "grad_norm": 0.006768523715436459, - "learning_rate": 0.0001999884264112226, - "loss": 46.0, - "step": 30083 - }, - { - "epoch": 4.844800515318652, - "grad_norm": 0.0020876924972981215, - "learning_rate": 0.00019998842564152502, - "loss": 46.0, - "step": 30084 - }, - { - "epoch": 4.844961552397439, - "grad_norm": 0.0031605486292392015, - "learning_rate": 0.00019998842487180181, - "loss": 46.0, - "step": 30085 - }, - { - "epoch": 4.845122589476227, - "grad_norm": 0.0017508193850517273, - "learning_rate": 0.00019998842410205305, - "loss": 46.0, - "step": 30086 - }, - { - "epoch": 4.845283626555014, - "grad_norm": 0.0005341361975297332, - "learning_rate": 0.00019998842333227867, - "loss": 46.0, - "step": 30087 - }, - { - "epoch": 4.845444663633802, - "grad_norm": 0.004532895050942898, - "learning_rate": 0.0001999884225624787, - "loss": 46.0, - "step": 30088 - }, - { - "epoch": 4.845605700712589, - "grad_norm": 0.0018501088488847017, - "learning_rate": 0.00019998842179265314, - "loss": 46.0, - "step": 30089 - }, - { - "epoch": 4.845766737791377, - "grad_norm": 0.005854758433997631, - "learning_rate": 0.00019998842102280197, - "loss": 46.0, - "step": 30090 - }, - { - "epoch": 4.845927774870164, - "grad_norm": 0.004057453945279121, - "learning_rate": 0.00019998842025292524, - "loss": 46.0, - "step": 30091 - }, - { - "epoch": 4.846088811948952, - "grad_norm": 0.008809411898255348, - "learning_rate": 0.00019998841948302293, - "loss": 46.0, - "step": 30092 - }, - { - "epoch": 4.846249849027739, - "grad_norm": 0.004371872637420893, - "learning_rate": 0.000199988418713095, - "loss": 46.0, - "step": 30093 - }, - { - "epoch": 4.846410886106526, - "grad_norm": 0.005941818468272686, - "learning_rate": 0.00019998841794314148, - "loss": 46.0, - "step": 30094 - }, - { - "epoch": 4.846571923185313, - "grad_norm": 0.0021971401292830706, - "learning_rate": 0.00019998841717316237, - "loss": 46.0, - "step": 30095 - }, - { - "epoch": 4.8467329602641005, - "grad_norm": 0.0011607641354203224, - "learning_rate": 0.00019998841640315765, - "loss": 46.0, - "step": 30096 - }, - { - "epoch": 4.846893997342888, - "grad_norm": 0.0034573585726320744, - "learning_rate": 0.00019998841563312735, - "loss": 46.0, - "step": 30097 - }, - { - "epoch": 4.847055034421675, - "grad_norm": 0.0053519150242209435, - "learning_rate": 0.00019998841486307148, - "loss": 46.0, - "step": 30098 - }, - { - "epoch": 4.847216071500463, - "grad_norm": 0.007368908263742924, - "learning_rate": 0.00019998841409299, - "loss": 46.0, - "step": 30099 - }, - { - "epoch": 4.84737710857925, - "grad_norm": 0.008754079230129719, - "learning_rate": 0.00019998841332288293, - "loss": 46.0, - "step": 30100 - }, - { - "epoch": 4.847538145658038, - "grad_norm": 0.0011550977360457182, - "learning_rate": 0.0001999884125527503, - "loss": 46.0, - "step": 30101 - }, - { - "epoch": 4.847699182736825, - "grad_norm": 0.0049674613401293755, - "learning_rate": 0.00019998841178259206, - "loss": 46.0, - "step": 30102 - }, - { - "epoch": 4.847860219815613, - "grad_norm": 0.001759710954502225, - "learning_rate": 0.00019998841101240817, - "loss": 46.0, - "step": 30103 - }, - { - "epoch": 4.8480212568944, - "grad_norm": 0.0023341637570410967, - "learning_rate": 0.00019998841024219876, - "loss": 46.0, - "step": 30104 - }, - { - "epoch": 4.848182293973188, - "grad_norm": 0.0013407682999968529, - "learning_rate": 0.00019998840947196372, - "loss": 46.0, - "step": 30105 - }, - { - "epoch": 4.848343331051975, - "grad_norm": 0.0016165488632395864, - "learning_rate": 0.0001999884087017031, - "loss": 46.0, - "step": 30106 - }, - { - "epoch": 4.8485043681307625, - "grad_norm": 0.010297886095941067, - "learning_rate": 0.0001999884079314169, - "loss": 46.0, - "step": 30107 - }, - { - "epoch": 4.84866540520955, - "grad_norm": 0.004696574993431568, - "learning_rate": 0.00019998840716110508, - "loss": 46.0, - "step": 30108 - }, - { - "epoch": 4.8488264422883365, - "grad_norm": 0.013613591901957989, - "learning_rate": 0.00019998840639076767, - "loss": 46.0, - "step": 30109 - }, - { - "epoch": 4.848987479367124, - "grad_norm": 0.0024449399206787348, - "learning_rate": 0.0001999884056204047, - "loss": 46.0, - "step": 30110 - }, - { - "epoch": 4.849148516445911, - "grad_norm": 0.005865130107849836, - "learning_rate": 0.0001999884048500161, - "loss": 46.0, - "step": 30111 - }, - { - "epoch": 4.849309553524699, - "grad_norm": 0.004899154417216778, - "learning_rate": 0.00019998840407960196, - "loss": 46.0, - "step": 30112 - }, - { - "epoch": 4.849470590603486, - "grad_norm": 0.0012283639516681433, - "learning_rate": 0.00019998840330916217, - "loss": 46.0, - "step": 30113 - }, - { - "epoch": 4.849631627682274, - "grad_norm": 0.0009270452428609133, - "learning_rate": 0.00019998840253869683, - "loss": 46.0, - "step": 30114 - }, - { - "epoch": 4.849792664761061, - "grad_norm": 0.005930482409894466, - "learning_rate": 0.00019998840176820587, - "loss": 46.0, - "step": 30115 - }, - { - "epoch": 4.849953701839849, - "grad_norm": 0.005603109486401081, - "learning_rate": 0.00019998840099768933, - "loss": 46.0, - "step": 30116 - }, - { - "epoch": 4.850114738918636, - "grad_norm": 0.004156806971877813, - "learning_rate": 0.00019998840022714722, - "loss": 46.0, - "step": 30117 - }, - { - "epoch": 4.8502757759974235, - "grad_norm": 0.003789585782214999, - "learning_rate": 0.00019998839945657948, - "loss": 46.0, - "step": 30118 - }, - { - "epoch": 4.850436813076211, - "grad_norm": 0.005212790798395872, - "learning_rate": 0.00019998839868598617, - "loss": 46.0, - "step": 30119 - }, - { - "epoch": 4.8505978501549984, - "grad_norm": 0.007081334479153156, - "learning_rate": 0.00019998839791536728, - "loss": 46.0, - "step": 30120 - }, - { - "epoch": 4.850758887233786, - "grad_norm": 0.007732132915407419, - "learning_rate": 0.00019998839714472277, - "loss": 46.0, - "step": 30121 - }, - { - "epoch": 4.850919924312573, - "grad_norm": 0.005993806757032871, - "learning_rate": 0.00019998839637405268, - "loss": 46.0, - "step": 30122 - }, - { - "epoch": 4.851080961391361, - "grad_norm": 0.008535230532288551, - "learning_rate": 0.000199988395603357, - "loss": 46.0, - "step": 30123 - }, - { - "epoch": 4.851241998470147, - "grad_norm": 0.008338553830981255, - "learning_rate": 0.00019998839483263573, - "loss": 46.0, - "step": 30124 - }, - { - "epoch": 4.851403035548935, - "grad_norm": 0.004016068298369646, - "learning_rate": 0.00019998839406188887, - "loss": 46.0, - "step": 30125 - }, - { - "epoch": 4.851564072627722, - "grad_norm": 0.0038904640823602676, - "learning_rate": 0.0001999883932911164, - "loss": 46.0, - "step": 30126 - }, - { - "epoch": 4.85172510970651, - "grad_norm": 0.00815887562930584, - "learning_rate": 0.00019998839252031837, - "loss": 46.0, - "step": 30127 - }, - { - "epoch": 4.851886146785297, - "grad_norm": 0.008666375651955605, - "learning_rate": 0.0001999883917494947, - "loss": 46.0, - "step": 30128 - }, - { - "epoch": 4.852047183864085, - "grad_norm": 0.008966566994786263, - "learning_rate": 0.00019998839097864547, - "loss": 46.0, - "step": 30129 - }, - { - "epoch": 4.852208220942872, - "grad_norm": 0.001107836258597672, - "learning_rate": 0.00019998839020777065, - "loss": 46.0, - "step": 30130 - }, - { - "epoch": 4.8523692580216595, - "grad_norm": 0.012590380385518074, - "learning_rate": 0.00019998838943687024, - "loss": 46.0, - "step": 30131 - }, - { - "epoch": 4.852530295100447, - "grad_norm": 0.003554648719727993, - "learning_rate": 0.00019998838866594422, - "loss": 46.0, - "step": 30132 - }, - { - "epoch": 4.852691332179234, - "grad_norm": 0.007092298939824104, - "learning_rate": 0.00019998838789499264, - "loss": 46.0, - "step": 30133 - }, - { - "epoch": 4.852852369258022, - "grad_norm": 0.0020292927511036396, - "learning_rate": 0.00019998838712401544, - "loss": 46.0, - "step": 30134 - }, - { - "epoch": 4.853013406336809, - "grad_norm": 0.009691545739769936, - "learning_rate": 0.00019998838635301264, - "loss": 46.0, - "step": 30135 - }, - { - "epoch": 4.853174443415597, - "grad_norm": 0.011362004093825817, - "learning_rate": 0.00019998838558198427, - "loss": 46.0, - "step": 30136 - }, - { - "epoch": 4.853335480494383, - "grad_norm": 0.004084752406924963, - "learning_rate": 0.00019998838481093028, - "loss": 46.0, - "step": 30137 - }, - { - "epoch": 4.853496517573172, - "grad_norm": 0.013240302912890911, - "learning_rate": 0.00019998838403985074, - "loss": 46.0, - "step": 30138 - }, - { - "epoch": 4.853657554651958, - "grad_norm": 0.00785786472260952, - "learning_rate": 0.0001999883832687456, - "loss": 46.0, - "step": 30139 - }, - { - "epoch": 4.853818591730746, - "grad_norm": 0.0011068106396123767, - "learning_rate": 0.00019998838249761484, - "loss": 46.0, - "step": 30140 - }, - { - "epoch": 4.853979628809533, - "grad_norm": 0.001177026773802936, - "learning_rate": 0.0001999883817264585, - "loss": 46.0, - "step": 30141 - }, - { - "epoch": 4.854140665888321, - "grad_norm": 0.0051734596490859985, - "learning_rate": 0.00019998838095527658, - "loss": 46.0, - "step": 30142 - }, - { - "epoch": 4.854301702967108, - "grad_norm": 0.0038889681454747915, - "learning_rate": 0.00019998838018406905, - "loss": 46.0, - "step": 30143 - }, - { - "epoch": 4.8544627400458955, - "grad_norm": 0.0060275462456047535, - "learning_rate": 0.00019998837941283596, - "loss": 46.0, - "step": 30144 - }, - { - "epoch": 4.854623777124683, - "grad_norm": 0.011145524680614471, - "learning_rate": 0.00019998837864157725, - "loss": 46.0, - "step": 30145 - }, - { - "epoch": 4.85478481420347, - "grad_norm": 0.0065670073963701725, - "learning_rate": 0.00019998837787029295, - "loss": 46.0, - "step": 30146 - }, - { - "epoch": 4.854945851282258, - "grad_norm": 0.0009768144227564335, - "learning_rate": 0.00019998837709898307, - "loss": 46.0, - "step": 30147 - }, - { - "epoch": 4.855106888361045, - "grad_norm": 0.003639065660536289, - "learning_rate": 0.0001999883763276476, - "loss": 46.0, - "step": 30148 - }, - { - "epoch": 4.855267925439833, - "grad_norm": 0.015276482328772545, - "learning_rate": 0.00019998837555628652, - "loss": 46.0, - "step": 30149 - }, - { - "epoch": 4.85542896251862, - "grad_norm": 0.013655618764460087, - "learning_rate": 0.00019998837478489985, - "loss": 46.0, - "step": 30150 - }, - { - "epoch": 4.855589999597408, - "grad_norm": 0.00477282889187336, - "learning_rate": 0.0001999883740134876, - "loss": 46.0, - "step": 30151 - }, - { - "epoch": 4.855751036676194, - "grad_norm": 0.001977842766791582, - "learning_rate": 0.00019998837324204972, - "loss": 46.0, - "step": 30152 - }, - { - "epoch": 4.855912073754983, - "grad_norm": 0.0033407658338546753, - "learning_rate": 0.0001999883724705863, - "loss": 46.0, - "step": 30153 - }, - { - "epoch": 4.856073110833769, - "grad_norm": 0.0027965158224105835, - "learning_rate": 0.00019998837169909727, - "loss": 46.0, - "step": 30154 - }, - { - "epoch": 4.856234147912557, - "grad_norm": 0.0026018614880740643, - "learning_rate": 0.00019998837092758266, - "loss": 46.0, - "step": 30155 - }, - { - "epoch": 4.856395184991344, - "grad_norm": 0.001808958244509995, - "learning_rate": 0.00019998837015604244, - "loss": 46.0, - "step": 30156 - }, - { - "epoch": 4.8565562220701315, - "grad_norm": 0.023240918293595314, - "learning_rate": 0.00019998836938447663, - "loss": 46.0, - "step": 30157 - }, - { - "epoch": 4.856717259148919, - "grad_norm": 0.0030549464281648397, - "learning_rate": 0.00019998836861288523, - "loss": 46.0, - "step": 30158 - }, - { - "epoch": 4.856878296227706, - "grad_norm": 0.00876065343618393, - "learning_rate": 0.00019998836784126825, - "loss": 46.0, - "step": 30159 - }, - { - "epoch": 4.857039333306494, - "grad_norm": 0.0046285721473395824, - "learning_rate": 0.00019998836706962566, - "loss": 46.0, - "step": 30160 - }, - { - "epoch": 4.857200370385281, - "grad_norm": 0.005354483611881733, - "learning_rate": 0.0001999883662979575, - "loss": 46.0, - "step": 30161 - }, - { - "epoch": 4.857361407464069, - "grad_norm": 0.002823406597599387, - "learning_rate": 0.00019998836552626373, - "loss": 46.0, - "step": 30162 - }, - { - "epoch": 4.857522444542856, - "grad_norm": 0.006698140408843756, - "learning_rate": 0.00019998836475454437, - "loss": 46.0, - "step": 30163 - }, - { - "epoch": 4.857683481621644, - "grad_norm": 0.0060594757087528706, - "learning_rate": 0.0001999883639827994, - "loss": 46.0, - "step": 30164 - }, - { - "epoch": 4.857844518700431, - "grad_norm": 0.0035164153669029474, - "learning_rate": 0.00019998836321102886, - "loss": 46.0, - "step": 30165 - }, - { - "epoch": 4.8580055557792186, - "grad_norm": 0.0018185873050242662, - "learning_rate": 0.00019998836243923274, - "loss": 46.0, - "step": 30166 - }, - { - "epoch": 4.858166592858005, - "grad_norm": 0.0027507031336426735, - "learning_rate": 0.000199988361667411, - "loss": 46.0, - "step": 30167 - }, - { - "epoch": 4.858327629936793, - "grad_norm": 0.0015631286660209298, - "learning_rate": 0.0001999883608955637, - "loss": 46.0, - "step": 30168 - }, - { - "epoch": 4.85848866701558, - "grad_norm": 0.0015366551233455539, - "learning_rate": 0.0001999883601236908, - "loss": 46.0, - "step": 30169 - }, - { - "epoch": 4.8586497040943675, - "grad_norm": 0.00434156646952033, - "learning_rate": 0.00019998835935179228, - "loss": 46.0, - "step": 30170 - }, - { - "epoch": 4.858810741173155, - "grad_norm": 0.02606615424156189, - "learning_rate": 0.00019998835857986817, - "loss": 46.0, - "step": 30171 - }, - { - "epoch": 4.858971778251942, - "grad_norm": 0.0031105487141758204, - "learning_rate": 0.0001999883578079185, - "loss": 46.0, - "step": 30172 - }, - { - "epoch": 4.85913281533073, - "grad_norm": 0.004212117288261652, - "learning_rate": 0.00019998835703594322, - "loss": 46.0, - "step": 30173 - }, - { - "epoch": 4.859293852409517, - "grad_norm": 0.004099614452570677, - "learning_rate": 0.00019998835626394238, - "loss": 46.0, - "step": 30174 - }, - { - "epoch": 4.859454889488305, - "grad_norm": 0.011553723365068436, - "learning_rate": 0.0001999883554919159, - "loss": 46.0, - "step": 30175 - }, - { - "epoch": 4.859615926567092, - "grad_norm": 0.00280005089007318, - "learning_rate": 0.00019998835471986384, - "loss": 46.0, - "step": 30176 - }, - { - "epoch": 4.85977696364588, - "grad_norm": 0.003370868042111397, - "learning_rate": 0.0001999883539477862, - "loss": 46.0, - "step": 30177 - }, - { - "epoch": 4.859938000724667, - "grad_norm": 0.006544739007949829, - "learning_rate": 0.00019998835317568296, - "loss": 46.0, - "step": 30178 - }, - { - "epoch": 4.8600990378034545, - "grad_norm": 0.0028320089913904667, - "learning_rate": 0.00019998835240355413, - "loss": 46.0, - "step": 30179 - }, - { - "epoch": 4.860260074882242, - "grad_norm": 0.006709593813866377, - "learning_rate": 0.0001999883516313997, - "loss": 46.0, - "step": 30180 - }, - { - "epoch": 4.860421111961029, - "grad_norm": 0.009623527526855469, - "learning_rate": 0.0001999883508592197, - "loss": 46.0, - "step": 30181 - }, - { - "epoch": 4.860582149039816, - "grad_norm": 0.011304141022264957, - "learning_rate": 0.0001999883500870141, - "loss": 46.0, - "step": 30182 - }, - { - "epoch": 4.8607431861186035, - "grad_norm": 0.0010971177835017443, - "learning_rate": 0.0001999883493147829, - "loss": 46.0, - "step": 30183 - }, - { - "epoch": 4.860904223197391, - "grad_norm": 0.0016037834575399756, - "learning_rate": 0.0001999883485425261, - "loss": 46.0, - "step": 30184 - }, - { - "epoch": 4.861065260276178, - "grad_norm": 0.004147219937294722, - "learning_rate": 0.00019998834777024374, - "loss": 46.0, - "step": 30185 - }, - { - "epoch": 4.861226297354966, - "grad_norm": 0.00477145379409194, - "learning_rate": 0.00019998834699793577, - "loss": 46.0, - "step": 30186 - }, - { - "epoch": 4.861387334433753, - "grad_norm": 0.005681208334863186, - "learning_rate": 0.0001999883462256022, - "loss": 46.0, - "step": 30187 - }, - { - "epoch": 4.861548371512541, - "grad_norm": 0.011490699835121632, - "learning_rate": 0.00019998834545324306, - "loss": 46.0, - "step": 30188 - }, - { - "epoch": 4.861709408591328, - "grad_norm": 0.00845568347722292, - "learning_rate": 0.0001999883446808583, - "loss": 46.0, - "step": 30189 - }, - { - "epoch": 4.861870445670116, - "grad_norm": 0.005368630401790142, - "learning_rate": 0.00019998834390844795, - "loss": 46.0, - "step": 30190 - }, - { - "epoch": 4.862031482748903, - "grad_norm": 0.004007462877780199, - "learning_rate": 0.00019998834313601202, - "loss": 46.0, - "step": 30191 - }, - { - "epoch": 4.8621925198276905, - "grad_norm": 0.004882803652435541, - "learning_rate": 0.0001999883423635505, - "loss": 46.0, - "step": 30192 - }, - { - "epoch": 4.862353556906478, - "grad_norm": 0.004058310762047768, - "learning_rate": 0.0001999883415910634, - "loss": 46.0, - "step": 30193 - }, - { - "epoch": 4.862514593985265, - "grad_norm": 0.01806306280195713, - "learning_rate": 0.0001999883408185507, - "loss": 46.0, - "step": 30194 - }, - { - "epoch": 4.862675631064053, - "grad_norm": 0.004027595277875662, - "learning_rate": 0.00019998834004601238, - "loss": 46.0, - "step": 30195 - }, - { - "epoch": 4.86283666814284, - "grad_norm": 0.012487724423408508, - "learning_rate": 0.0001999883392734485, - "loss": 46.0, - "step": 30196 - }, - { - "epoch": 4.862997705221627, - "grad_norm": 0.01029012817889452, - "learning_rate": 0.000199988338500859, - "loss": 46.0, - "step": 30197 - }, - { - "epoch": 4.863158742300414, - "grad_norm": 0.002836928702890873, - "learning_rate": 0.00019998833772824393, - "loss": 46.0, - "step": 30198 - }, - { - "epoch": 4.863319779379202, - "grad_norm": 0.0032990477047860622, - "learning_rate": 0.00019998833695560327, - "loss": 46.0, - "step": 30199 - }, - { - "epoch": 4.863480816457989, - "grad_norm": 0.007273085415363312, - "learning_rate": 0.00019998833618293702, - "loss": 46.0, - "step": 30200 - }, - { - "epoch": 4.863641853536777, - "grad_norm": 0.0014787502586841583, - "learning_rate": 0.00019998833541024516, - "loss": 46.0, - "step": 30201 - }, - { - "epoch": 4.863802890615564, - "grad_norm": 0.00284698442555964, - "learning_rate": 0.00019998833463752774, - "loss": 46.0, - "step": 30202 - }, - { - "epoch": 4.863963927694352, - "grad_norm": 0.01878676563501358, - "learning_rate": 0.00019998833386478468, - "loss": 46.0, - "step": 30203 - }, - { - "epoch": 4.864124964773139, - "grad_norm": 0.0051757642067968845, - "learning_rate": 0.00019998833309201608, - "loss": 46.0, - "step": 30204 - }, - { - "epoch": 4.8642860018519265, - "grad_norm": 0.0012672676239162683, - "learning_rate": 0.00019998833231922185, - "loss": 46.0, - "step": 30205 - }, - { - "epoch": 4.864447038930714, - "grad_norm": 0.007116463501006365, - "learning_rate": 0.00019998833154640205, - "loss": 46.0, - "step": 30206 - }, - { - "epoch": 4.864608076009501, - "grad_norm": 0.000850141397677362, - "learning_rate": 0.00019998833077355664, - "loss": 46.0, - "step": 30207 - }, - { - "epoch": 4.864769113088289, - "grad_norm": 0.001165292109362781, - "learning_rate": 0.00019998833000068564, - "loss": 46.0, - "step": 30208 - }, - { - "epoch": 4.864930150167076, - "grad_norm": 0.001940658432431519, - "learning_rate": 0.00019998832922778906, - "loss": 46.0, - "step": 30209 - }, - { - "epoch": 4.865091187245863, - "grad_norm": 0.0036168291699141264, - "learning_rate": 0.00019998832845486688, - "loss": 46.0, - "step": 30210 - }, - { - "epoch": 4.865252224324651, - "grad_norm": 0.0011742841452360153, - "learning_rate": 0.00019998832768191913, - "loss": 46.0, - "step": 30211 - }, - { - "epoch": 4.865413261403438, - "grad_norm": 0.001702725188806653, - "learning_rate": 0.00019998832690894575, - "loss": 46.0, - "step": 30212 - }, - { - "epoch": 4.865574298482225, - "grad_norm": 0.0012292437022551894, - "learning_rate": 0.0001999883261359468, - "loss": 46.0, - "step": 30213 - }, - { - "epoch": 4.865735335561013, - "grad_norm": 0.0007949745049700141, - "learning_rate": 0.00019998832536292227, - "loss": 46.0, - "step": 30214 - }, - { - "epoch": 4.8658963726398, - "grad_norm": 0.007670976221561432, - "learning_rate": 0.0001999883245898721, - "loss": 46.0, - "step": 30215 - }, - { - "epoch": 4.866057409718588, - "grad_norm": 0.004126108251512051, - "learning_rate": 0.00019998832381679639, - "loss": 46.0, - "step": 30216 - }, - { - "epoch": 4.866218446797375, - "grad_norm": 0.0031773000955581665, - "learning_rate": 0.00019998832304369508, - "loss": 46.0, - "step": 30217 - }, - { - "epoch": 4.8663794838761625, - "grad_norm": 0.003949275240302086, - "learning_rate": 0.00019998832227056815, - "loss": 46.0, - "step": 30218 - }, - { - "epoch": 4.86654052095495, - "grad_norm": 0.004697037860751152, - "learning_rate": 0.00019998832149741567, - "loss": 46.0, - "step": 30219 - }, - { - "epoch": 4.866701558033737, - "grad_norm": 0.0032105708960443735, - "learning_rate": 0.00019998832072423757, - "loss": 46.0, - "step": 30220 - }, - { - "epoch": 4.866862595112525, - "grad_norm": 0.0019673542119562626, - "learning_rate": 0.00019998831995103386, - "loss": 46.0, - "step": 30221 - }, - { - "epoch": 4.867023632191312, - "grad_norm": 0.003639364615082741, - "learning_rate": 0.0001999883191778046, - "loss": 46.0, - "step": 30222 - }, - { - "epoch": 4.8671846692701, - "grad_norm": 0.0025361175648868084, - "learning_rate": 0.00019998831840454973, - "loss": 46.0, - "step": 30223 - }, - { - "epoch": 4.867345706348887, - "grad_norm": 0.0024909640196710825, - "learning_rate": 0.00019998831763126925, - "loss": 46.0, - "step": 30224 - }, - { - "epoch": 4.867506743427674, - "grad_norm": 0.0006870125071145594, - "learning_rate": 0.00019998831685796322, - "loss": 46.0, - "step": 30225 - }, - { - "epoch": 4.867667780506462, - "grad_norm": 0.002211096463724971, - "learning_rate": 0.00019998831608463154, - "loss": 46.0, - "step": 30226 - }, - { - "epoch": 4.867828817585249, - "grad_norm": 0.016503984108567238, - "learning_rate": 0.0001999883153112743, - "loss": 46.0, - "step": 30227 - }, - { - "epoch": 4.867989854664036, - "grad_norm": 0.0012430896749719977, - "learning_rate": 0.00019998831453789148, - "loss": 46.0, - "step": 30228 - }, - { - "epoch": 4.868150891742824, - "grad_norm": 0.007462533190846443, - "learning_rate": 0.00019998831376448307, - "loss": 46.0, - "step": 30229 - }, - { - "epoch": 4.868311928821611, - "grad_norm": 0.01666617952287197, - "learning_rate": 0.00019998831299104905, - "loss": 46.0, - "step": 30230 - }, - { - "epoch": 4.8684729659003985, - "grad_norm": 0.002064245520159602, - "learning_rate": 0.00019998831221758944, - "loss": 46.0, - "step": 30231 - }, - { - "epoch": 4.868634002979186, - "grad_norm": 0.005984019488096237, - "learning_rate": 0.00019998831144410427, - "loss": 46.0, - "step": 30232 - }, - { - "epoch": 4.868795040057973, - "grad_norm": 0.006874515675008297, - "learning_rate": 0.00019998831067059345, - "loss": 46.0, - "step": 30233 - }, - { - "epoch": 4.868956077136761, - "grad_norm": 0.0031324748415499926, - "learning_rate": 0.00019998830989705708, - "loss": 46.0, - "step": 30234 - }, - { - "epoch": 4.869117114215548, - "grad_norm": 0.009701925329864025, - "learning_rate": 0.00019998830912349512, - "loss": 46.0, - "step": 30235 - }, - { - "epoch": 4.869278151294336, - "grad_norm": 0.0017760891932994127, - "learning_rate": 0.00019998830834990755, - "loss": 46.0, - "step": 30236 - }, - { - "epoch": 4.869439188373123, - "grad_norm": 0.004007274284958839, - "learning_rate": 0.0001999883075762944, - "loss": 46.0, - "step": 30237 - }, - { - "epoch": 4.869600225451911, - "grad_norm": 0.008442658931016922, - "learning_rate": 0.00019998830680265564, - "loss": 46.0, - "step": 30238 - }, - { - "epoch": 4.869761262530698, - "grad_norm": 0.005054876673966646, - "learning_rate": 0.00019998830602899128, - "loss": 46.0, - "step": 30239 - }, - { - "epoch": 4.869922299609485, - "grad_norm": 0.004223821219056845, - "learning_rate": 0.00019998830525530138, - "loss": 46.0, - "step": 30240 - }, - { - "epoch": 4.870083336688272, - "grad_norm": 0.017017167061567307, - "learning_rate": 0.00019998830448158584, - "loss": 46.0, - "step": 30241 - }, - { - "epoch": 4.8702443737670595, - "grad_norm": 0.0032326721120625734, - "learning_rate": 0.00019998830370784472, - "loss": 46.0, - "step": 30242 - }, - { - "epoch": 4.870405410845847, - "grad_norm": 0.005026225931942463, - "learning_rate": 0.00019998830293407803, - "loss": 46.0, - "step": 30243 - }, - { - "epoch": 4.8705664479246344, - "grad_norm": 0.0020192961674183607, - "learning_rate": 0.0001999883021602857, - "loss": 46.0, - "step": 30244 - }, - { - "epoch": 4.870727485003422, - "grad_norm": 0.0018568228697404265, - "learning_rate": 0.0001999883013864678, - "loss": 46.0, - "step": 30245 - }, - { - "epoch": 4.870888522082209, - "grad_norm": 0.0020218132995069027, - "learning_rate": 0.00019998830061262432, - "loss": 46.0, - "step": 30246 - }, - { - "epoch": 4.871049559160997, - "grad_norm": 0.0018797758966684341, - "learning_rate": 0.00019998829983875526, - "loss": 46.0, - "step": 30247 - }, - { - "epoch": 4.871210596239784, - "grad_norm": 0.008374815806746483, - "learning_rate": 0.0001999882990648606, - "loss": 46.0, - "step": 30248 - }, - { - "epoch": 4.871371633318572, - "grad_norm": 0.0010333530372008681, - "learning_rate": 0.00019998829829094033, - "loss": 46.0, - "step": 30249 - }, - { - "epoch": 4.871532670397359, - "grad_norm": 0.0016382524045184255, - "learning_rate": 0.00019998829751699448, - "loss": 46.0, - "step": 30250 - }, - { - "epoch": 4.871693707476147, - "grad_norm": 0.006623008754104376, - "learning_rate": 0.00019998829674302301, - "loss": 46.0, - "step": 30251 - }, - { - "epoch": 4.871854744554934, - "grad_norm": 0.004867917858064175, - "learning_rate": 0.000199988295969026, - "loss": 46.0, - "step": 30252 - }, - { - "epoch": 4.8720157816337215, - "grad_norm": 0.010020680725574493, - "learning_rate": 0.00019998829519500338, - "loss": 46.0, - "step": 30253 - }, - { - "epoch": 4.872176818712509, - "grad_norm": 0.0018205250380560756, - "learning_rate": 0.00019998829442095516, - "loss": 46.0, - "step": 30254 - }, - { - "epoch": 4.8723378557912955, - "grad_norm": 0.0014964076690375805, - "learning_rate": 0.00019998829364688134, - "loss": 46.0, - "step": 30255 - }, - { - "epoch": 4.872498892870083, - "grad_norm": 0.006149259861558676, - "learning_rate": 0.00019998829287278194, - "loss": 46.0, - "step": 30256 - }, - { - "epoch": 4.87265992994887, - "grad_norm": 0.002652009017765522, - "learning_rate": 0.00019998829209865693, - "loss": 46.0, - "step": 30257 - }, - { - "epoch": 4.872820967027658, - "grad_norm": 0.007546828594058752, - "learning_rate": 0.00019998829132450636, - "loss": 46.0, - "step": 30258 - }, - { - "epoch": 4.872982004106445, - "grad_norm": 0.004417920019477606, - "learning_rate": 0.00019998829055033017, - "loss": 46.0, - "step": 30259 - }, - { - "epoch": 4.873143041185233, - "grad_norm": 0.0015945524210110307, - "learning_rate": 0.0001999882897761284, - "loss": 46.0, - "step": 30260 - }, - { - "epoch": 4.87330407826402, - "grad_norm": 0.0023466814309358597, - "learning_rate": 0.00019998828900190106, - "loss": 46.0, - "step": 30261 - }, - { - "epoch": 4.873465115342808, - "grad_norm": 0.012706050649285316, - "learning_rate": 0.00019998828822764809, - "loss": 46.0, - "step": 30262 - }, - { - "epoch": 4.873626152421595, - "grad_norm": 0.0036380223464220762, - "learning_rate": 0.00019998828745336955, - "loss": 46.0, - "step": 30263 - }, - { - "epoch": 4.873787189500383, - "grad_norm": 0.002249692566692829, - "learning_rate": 0.0001999882866790654, - "loss": 46.0, - "step": 30264 - }, - { - "epoch": 4.87394822657917, - "grad_norm": 0.005754155106842518, - "learning_rate": 0.0001999882859047357, - "loss": 46.0, - "step": 30265 - }, - { - "epoch": 4.8741092636579575, - "grad_norm": 0.0035027097910642624, - "learning_rate": 0.00019998828513038034, - "loss": 46.0, - "step": 30266 - }, - { - "epoch": 4.874270300736745, - "grad_norm": 0.00200637592934072, - "learning_rate": 0.00019998828435599942, - "loss": 46.0, - "step": 30267 - }, - { - "epoch": 4.874431337815532, - "grad_norm": 0.0037039672024548054, - "learning_rate": 0.00019998828358159295, - "loss": 46.0, - "step": 30268 - }, - { - "epoch": 4.87459237489432, - "grad_norm": 0.018483847379684448, - "learning_rate": 0.00019998828280716084, - "loss": 46.0, - "step": 30269 - }, - { - "epoch": 4.874753411973106, - "grad_norm": 0.009647499769926071, - "learning_rate": 0.00019998828203270314, - "loss": 46.0, - "step": 30270 - }, - { - "epoch": 4.874914449051894, - "grad_norm": 0.00447214487940073, - "learning_rate": 0.00019998828125821988, - "loss": 46.0, - "step": 30271 - }, - { - "epoch": 4.875075486130681, - "grad_norm": 0.0038623660802841187, - "learning_rate": 0.00019998828048371097, - "loss": 46.0, - "step": 30272 - }, - { - "epoch": 4.875236523209469, - "grad_norm": 0.0018283635145053267, - "learning_rate": 0.00019998827970917654, - "loss": 46.0, - "step": 30273 - }, - { - "epoch": 4.875397560288256, - "grad_norm": 0.003693078877404332, - "learning_rate": 0.00019998827893461646, - "loss": 46.0, - "step": 30274 - }, - { - "epoch": 4.875558597367044, - "grad_norm": 0.014787347987294197, - "learning_rate": 0.00019998827816003083, - "loss": 46.0, - "step": 30275 - }, - { - "epoch": 4.875719634445831, - "grad_norm": 0.012749990448355675, - "learning_rate": 0.00019998827738541958, - "loss": 46.0, - "step": 30276 - }, - { - "epoch": 4.875880671524619, - "grad_norm": 0.0014343708753585815, - "learning_rate": 0.00019998827661078274, - "loss": 46.0, - "step": 30277 - }, - { - "epoch": 4.876041708603406, - "grad_norm": 0.0012202727375552058, - "learning_rate": 0.0001999882758361203, - "loss": 46.0, - "step": 30278 - }, - { - "epoch": 4.8762027456821935, - "grad_norm": 0.0057300073094666, - "learning_rate": 0.00019998827506143233, - "loss": 46.0, - "step": 30279 - }, - { - "epoch": 4.876363782760981, - "grad_norm": 0.0021433464717119932, - "learning_rate": 0.0001999882742867187, - "loss": 46.0, - "step": 30280 - }, - { - "epoch": 4.876524819839768, - "grad_norm": 0.0040053422562778, - "learning_rate": 0.00019998827351197951, - "loss": 46.0, - "step": 30281 - }, - { - "epoch": 4.876685856918556, - "grad_norm": 0.006644417066127062, - "learning_rate": 0.00019998827273721469, - "loss": 46.0, - "step": 30282 - }, - { - "epoch": 4.876846893997342, - "grad_norm": 0.007489868439733982, - "learning_rate": 0.00019998827196242433, - "loss": 46.0, - "step": 30283 - }, - { - "epoch": 4.877007931076131, - "grad_norm": 0.0031959705520421267, - "learning_rate": 0.00019998827118760835, - "loss": 46.0, - "step": 30284 - }, - { - "epoch": 4.877168968154917, - "grad_norm": 0.009030255489051342, - "learning_rate": 0.0001999882704127668, - "loss": 46.0, - "step": 30285 - }, - { - "epoch": 4.877330005233705, - "grad_norm": 0.006461299955844879, - "learning_rate": 0.00019998826963789964, - "loss": 46.0, - "step": 30286 - }, - { - "epoch": 4.877491042312492, - "grad_norm": 0.00523214740678668, - "learning_rate": 0.00019998826886300687, - "loss": 46.0, - "step": 30287 - }, - { - "epoch": 4.87765207939128, - "grad_norm": 0.008426429703831673, - "learning_rate": 0.00019998826808808855, - "loss": 46.0, - "step": 30288 - }, - { - "epoch": 4.877813116470067, - "grad_norm": 0.016680296510457993, - "learning_rate": 0.00019998826731314458, - "loss": 46.0, - "step": 30289 - }, - { - "epoch": 4.8779741535488546, - "grad_norm": 0.008195668458938599, - "learning_rate": 0.00019998826653817506, - "loss": 46.0, - "step": 30290 - }, - { - "epoch": 4.878135190627642, - "grad_norm": 0.005405410658568144, - "learning_rate": 0.00019998826576317995, - "loss": 46.0, - "step": 30291 - }, - { - "epoch": 4.8782962277064295, - "grad_norm": 0.006686722859740257, - "learning_rate": 0.00019998826498815925, - "loss": 46.0, - "step": 30292 - }, - { - "epoch": 4.878457264785217, - "grad_norm": 0.0007601392571814358, - "learning_rate": 0.00019998826421311293, - "loss": 46.0, - "step": 30293 - }, - { - "epoch": 4.878618301864004, - "grad_norm": 0.017041867598891258, - "learning_rate": 0.00019998826343804106, - "loss": 46.0, - "step": 30294 - }, - { - "epoch": 4.878779338942792, - "grad_norm": 0.005406068172305822, - "learning_rate": 0.00019998826266294354, - "loss": 46.0, - "step": 30295 - }, - { - "epoch": 4.878940376021579, - "grad_norm": 0.0026037776842713356, - "learning_rate": 0.00019998826188782047, - "loss": 46.0, - "step": 30296 - }, - { - "epoch": 4.879101413100367, - "grad_norm": 0.005247111432254314, - "learning_rate": 0.0001999882611126718, - "loss": 46.0, - "step": 30297 - }, - { - "epoch": 4.879262450179153, - "grad_norm": 0.01520408596843481, - "learning_rate": 0.00019998826033749756, - "loss": 46.0, - "step": 30298 - }, - { - "epoch": 4.879423487257942, - "grad_norm": 0.003051883541047573, - "learning_rate": 0.0001999882595622977, - "loss": 46.0, - "step": 30299 - }, - { - "epoch": 4.879584524336728, - "grad_norm": 0.008032537996768951, - "learning_rate": 0.00019998825878707227, - "loss": 46.0, - "step": 30300 - }, - { - "epoch": 4.879745561415516, - "grad_norm": 0.010447860695421696, - "learning_rate": 0.00019998825801182123, - "loss": 46.0, - "step": 30301 - }, - { - "epoch": 4.879906598494303, - "grad_norm": 0.006183580029755831, - "learning_rate": 0.00019998825723654458, - "loss": 46.0, - "step": 30302 - }, - { - "epoch": 4.8800676355730905, - "grad_norm": 0.0260601919144392, - "learning_rate": 0.00019998825646124236, - "loss": 46.0, - "step": 30303 - }, - { - "epoch": 4.880228672651878, - "grad_norm": 0.004913515876978636, - "learning_rate": 0.00019998825568591456, - "loss": 46.0, - "step": 30304 - }, - { - "epoch": 4.880389709730665, - "grad_norm": 0.01729273796081543, - "learning_rate": 0.00019998825491056115, - "loss": 46.0, - "step": 30305 - }, - { - "epoch": 4.880550746809453, - "grad_norm": 0.0011975648812949657, - "learning_rate": 0.00019998825413518217, - "loss": 46.0, - "step": 30306 - }, - { - "epoch": 4.88071178388824, - "grad_norm": 0.0027460732962936163, - "learning_rate": 0.00019998825335977759, - "loss": 46.0, - "step": 30307 - }, - { - "epoch": 4.880872820967028, - "grad_norm": 0.004315624944865704, - "learning_rate": 0.00019998825258434738, - "loss": 46.0, - "step": 30308 - }, - { - "epoch": 4.881033858045815, - "grad_norm": 0.007370648439973593, - "learning_rate": 0.00019998825180889162, - "loss": 46.0, - "step": 30309 - }, - { - "epoch": 4.881194895124603, - "grad_norm": 0.003338668728247285, - "learning_rate": 0.00019998825103341027, - "loss": 46.0, - "step": 30310 - }, - { - "epoch": 4.88135593220339, - "grad_norm": 0.0010761988814920187, - "learning_rate": 0.0001999882502579033, - "loss": 46.0, - "step": 30311 - }, - { - "epoch": 4.881516969282178, - "grad_norm": 0.0032709992956370115, - "learning_rate": 0.00019998824948237073, - "loss": 46.0, - "step": 30312 - }, - { - "epoch": 4.881678006360964, - "grad_norm": 0.002058805199339986, - "learning_rate": 0.00019998824870681261, - "loss": 46.0, - "step": 30313 - }, - { - "epoch": 4.8818390434397525, - "grad_norm": 0.0021465301979333162, - "learning_rate": 0.0001999882479312289, - "loss": 46.0, - "step": 30314 - }, - { - "epoch": 4.882000080518539, - "grad_norm": 0.006435915362089872, - "learning_rate": 0.00019998824715561955, - "loss": 46.0, - "step": 30315 - }, - { - "epoch": 4.8821611175973265, - "grad_norm": 0.0026787524111568928, - "learning_rate": 0.00019998824637998462, - "loss": 46.0, - "step": 30316 - }, - { - "epoch": 4.882322154676114, - "grad_norm": 0.02055901288986206, - "learning_rate": 0.00019998824560432413, - "loss": 46.0, - "step": 30317 - }, - { - "epoch": 4.882483191754901, - "grad_norm": 0.004382592625916004, - "learning_rate": 0.00019998824482863803, - "loss": 46.0, - "step": 30318 - }, - { - "epoch": 4.882644228833689, - "grad_norm": 0.0028473767451941967, - "learning_rate": 0.00019998824405292634, - "loss": 46.0, - "step": 30319 - }, - { - "epoch": 4.882805265912476, - "grad_norm": 0.0022538998164236546, - "learning_rate": 0.00019998824327718907, - "loss": 46.0, - "step": 30320 - }, - { - "epoch": 4.882966302991264, - "grad_norm": 0.006421265192329884, - "learning_rate": 0.00019998824250142618, - "loss": 46.0, - "step": 30321 - }, - { - "epoch": 4.883127340070051, - "grad_norm": 0.02118576690554619, - "learning_rate": 0.0001999882417256377, - "loss": 46.0, - "step": 30322 - }, - { - "epoch": 4.883288377148839, - "grad_norm": 0.009731358848512173, - "learning_rate": 0.00019998824094982366, - "loss": 46.0, - "step": 30323 - }, - { - "epoch": 4.883449414227626, - "grad_norm": 0.0035421494394540787, - "learning_rate": 0.000199988240173984, - "loss": 46.0, - "step": 30324 - }, - { - "epoch": 4.883610451306414, - "grad_norm": 0.0027094639372080564, - "learning_rate": 0.00019998823939811877, - "loss": 46.0, - "step": 30325 - }, - { - "epoch": 4.883771488385201, - "grad_norm": 0.009699389338493347, - "learning_rate": 0.00019998823862222792, - "loss": 46.0, - "step": 30326 - }, - { - "epoch": 4.8839325254639885, - "grad_norm": 0.007638414856046438, - "learning_rate": 0.0001999882378463115, - "loss": 46.0, - "step": 30327 - }, - { - "epoch": 4.884093562542775, - "grad_norm": 0.012364639900624752, - "learning_rate": 0.00019998823707036948, - "loss": 46.0, - "step": 30328 - }, - { - "epoch": 4.8842545996215625, - "grad_norm": 0.003991544712334871, - "learning_rate": 0.0001999882362944019, - "loss": 46.0, - "step": 30329 - }, - { - "epoch": 4.88441563670035, - "grad_norm": 0.0011267001973465085, - "learning_rate": 0.0001999882355184087, - "loss": 46.0, - "step": 30330 - }, - { - "epoch": 4.884576673779137, - "grad_norm": 0.010877440683543682, - "learning_rate": 0.0001999882347423899, - "loss": 46.0, - "step": 30331 - }, - { - "epoch": 4.884737710857925, - "grad_norm": 0.0014555666130036116, - "learning_rate": 0.00019998823396634552, - "loss": 46.0, - "step": 30332 - }, - { - "epoch": 4.884898747936712, - "grad_norm": 0.008220436982810497, - "learning_rate": 0.00019998823319027553, - "loss": 46.0, - "step": 30333 - }, - { - "epoch": 4.8850597850155, - "grad_norm": 0.0010317128617316484, - "learning_rate": 0.00019998823241417995, - "loss": 46.0, - "step": 30334 - }, - { - "epoch": 4.885220822094287, - "grad_norm": 0.007349408231675625, - "learning_rate": 0.0001999882316380588, - "loss": 46.0, - "step": 30335 - }, - { - "epoch": 4.885381859173075, - "grad_norm": 0.004547438118606806, - "learning_rate": 0.00019998823086191206, - "loss": 46.0, - "step": 30336 - }, - { - "epoch": 4.885542896251862, - "grad_norm": 0.007054098881781101, - "learning_rate": 0.0001999882300857397, - "loss": 46.0, - "step": 30337 - }, - { - "epoch": 4.88570393333065, - "grad_norm": 0.005873312707990408, - "learning_rate": 0.00019998822930954177, - "loss": 46.0, - "step": 30338 - }, - { - "epoch": 4.885864970409437, - "grad_norm": 0.01052178256213665, - "learning_rate": 0.00019998822853331826, - "loss": 46.0, - "step": 30339 - }, - { - "epoch": 4.8860260074882245, - "grad_norm": 0.003539759898558259, - "learning_rate": 0.00019998822775706913, - "loss": 46.0, - "step": 30340 - }, - { - "epoch": 4.886187044567012, - "grad_norm": 0.005448522511869669, - "learning_rate": 0.0001999882269807944, - "loss": 46.0, - "step": 30341 - }, - { - "epoch": 4.886348081645799, - "grad_norm": 0.012556903064250946, - "learning_rate": 0.00019998822620449408, - "loss": 46.0, - "step": 30342 - }, - { - "epoch": 4.886509118724586, - "grad_norm": 0.00702200410887599, - "learning_rate": 0.0001999882254281682, - "loss": 46.0, - "step": 30343 - }, - { - "epoch": 4.886670155803373, - "grad_norm": 0.001821403275243938, - "learning_rate": 0.00019998822465181672, - "loss": 46.0, - "step": 30344 - }, - { - "epoch": 4.886831192882161, - "grad_norm": 0.001047898200340569, - "learning_rate": 0.00019998822387543963, - "loss": 46.0, - "step": 30345 - }, - { - "epoch": 4.886992229960948, - "grad_norm": 0.003435407066717744, - "learning_rate": 0.00019998822309903698, - "loss": 46.0, - "step": 30346 - }, - { - "epoch": 4.887153267039736, - "grad_norm": 0.0014386746333912015, - "learning_rate": 0.0001999882223226087, - "loss": 46.0, - "step": 30347 - }, - { - "epoch": 4.887314304118523, - "grad_norm": 0.006209913641214371, - "learning_rate": 0.00019998822154615486, - "loss": 46.0, - "step": 30348 - }, - { - "epoch": 4.887475341197311, - "grad_norm": 0.0028700323309749365, - "learning_rate": 0.0001999882207696754, - "loss": 46.0, - "step": 30349 - }, - { - "epoch": 4.887636378276098, - "grad_norm": 0.005138221196830273, - "learning_rate": 0.00019998821999317037, - "loss": 46.0, - "step": 30350 - }, - { - "epoch": 4.8877974153548855, - "grad_norm": 0.00448189489543438, - "learning_rate": 0.00019998821921663975, - "loss": 46.0, - "step": 30351 - }, - { - "epoch": 4.887958452433673, - "grad_norm": 0.004331518430262804, - "learning_rate": 0.0001999882184400835, - "loss": 46.0, - "step": 30352 - }, - { - "epoch": 4.8881194895124604, - "grad_norm": 0.0023716462310403585, - "learning_rate": 0.0001999882176635017, - "loss": 46.0, - "step": 30353 - }, - { - "epoch": 4.888280526591248, - "grad_norm": 0.002219144022092223, - "learning_rate": 0.00019998821688689428, - "loss": 46.0, - "step": 30354 - }, - { - "epoch": 4.888441563670035, - "grad_norm": 0.00531558133661747, - "learning_rate": 0.00019998821611026132, - "loss": 46.0, - "step": 30355 - }, - { - "epoch": 4.888602600748823, - "grad_norm": 0.0030300694052129984, - "learning_rate": 0.0001999882153336027, - "loss": 46.0, - "step": 30356 - }, - { - "epoch": 4.88876363782761, - "grad_norm": 0.0028026322834193707, - "learning_rate": 0.00019998821455691852, - "loss": 46.0, - "step": 30357 - }, - { - "epoch": 4.888924674906397, - "grad_norm": 0.011993777006864548, - "learning_rate": 0.00019998821378020874, - "loss": 46.0, - "step": 30358 - }, - { - "epoch": 4.889085711985184, - "grad_norm": 0.0022229549940675497, - "learning_rate": 0.0001999882130034734, - "loss": 46.0, - "step": 30359 - }, - { - "epoch": 4.889246749063972, - "grad_norm": 0.007727064657956362, - "learning_rate": 0.00019998821222671245, - "loss": 46.0, - "step": 30360 - }, - { - "epoch": 4.889407786142759, - "grad_norm": 0.006744609214365482, - "learning_rate": 0.0001999882114499259, - "loss": 46.0, - "step": 30361 - }, - { - "epoch": 4.889568823221547, - "grad_norm": 0.004535200539976358, - "learning_rate": 0.00019998821067311376, - "loss": 46.0, - "step": 30362 - }, - { - "epoch": 4.889729860300334, - "grad_norm": 0.008297115564346313, - "learning_rate": 0.00019998820989627602, - "loss": 46.0, - "step": 30363 - }, - { - "epoch": 4.8898908973791215, - "grad_norm": 0.0032407615799456835, - "learning_rate": 0.0001999882091194127, - "loss": 46.0, - "step": 30364 - }, - { - "epoch": 4.890051934457909, - "grad_norm": 0.002680008765310049, - "learning_rate": 0.0001999882083425238, - "loss": 46.0, - "step": 30365 - }, - { - "epoch": 4.890212971536696, - "grad_norm": 0.0033730214927345514, - "learning_rate": 0.00019998820756560924, - "loss": 46.0, - "step": 30366 - }, - { - "epoch": 4.890374008615484, - "grad_norm": 0.004781623836606741, - "learning_rate": 0.00019998820678866915, - "loss": 46.0, - "step": 30367 - }, - { - "epoch": 4.890535045694271, - "grad_norm": 0.005390890873968601, - "learning_rate": 0.00019998820601170347, - "loss": 46.0, - "step": 30368 - }, - { - "epoch": 4.890696082773059, - "grad_norm": 0.002060280181467533, - "learning_rate": 0.00019998820523471218, - "loss": 46.0, - "step": 30369 - }, - { - "epoch": 4.890857119851846, - "grad_norm": 0.006463841535151005, - "learning_rate": 0.00019998820445769533, - "loss": 46.0, - "step": 30370 - }, - { - "epoch": 4.891018156930633, - "grad_norm": 0.005537079647183418, - "learning_rate": 0.00019998820368065284, - "loss": 46.0, - "step": 30371 - }, - { - "epoch": 4.891179194009421, - "grad_norm": 0.006676413584500551, - "learning_rate": 0.00019998820290358479, - "loss": 46.0, - "step": 30372 - }, - { - "epoch": 4.891340231088208, - "grad_norm": 0.004183501936495304, - "learning_rate": 0.00019998820212649115, - "loss": 46.0, - "step": 30373 - }, - { - "epoch": 4.891501268166995, - "grad_norm": 0.002491398248821497, - "learning_rate": 0.00019998820134937192, - "loss": 46.0, - "step": 30374 - }, - { - "epoch": 4.891662305245783, - "grad_norm": 0.014169501140713692, - "learning_rate": 0.00019998820057222708, - "loss": 46.0, - "step": 30375 - }, - { - "epoch": 4.89182334232457, - "grad_norm": 0.0033617732115089893, - "learning_rate": 0.00019998819979505665, - "loss": 46.0, - "step": 30376 - }, - { - "epoch": 4.8919843794033575, - "grad_norm": 0.003594490699470043, - "learning_rate": 0.0001999881990178606, - "loss": 46.0, - "step": 30377 - }, - { - "epoch": 4.892145416482145, - "grad_norm": 0.003582483623176813, - "learning_rate": 0.000199988198240639, - "loss": 46.0, - "step": 30378 - }, - { - "epoch": 4.892306453560932, - "grad_norm": 0.003846733132377267, - "learning_rate": 0.0001999881974633918, - "loss": 46.0, - "step": 30379 - }, - { - "epoch": 4.89246749063972, - "grad_norm": 0.0031894822604954243, - "learning_rate": 0.000199988196686119, - "loss": 46.0, - "step": 30380 - }, - { - "epoch": 4.892628527718507, - "grad_norm": 0.021642964333295822, - "learning_rate": 0.00019998819590882064, - "loss": 46.0, - "step": 30381 - }, - { - "epoch": 4.892789564797295, - "grad_norm": 0.0023222407326102257, - "learning_rate": 0.00019998819513149664, - "loss": 46.0, - "step": 30382 - }, - { - "epoch": 4.892950601876082, - "grad_norm": 0.018712298944592476, - "learning_rate": 0.00019998819435414707, - "loss": 46.0, - "step": 30383 - }, - { - "epoch": 4.89311163895487, - "grad_norm": 0.009970182552933693, - "learning_rate": 0.00019998819357677192, - "loss": 46.0, - "step": 30384 - }, - { - "epoch": 4.893272676033657, - "grad_norm": 0.0011596559779718518, - "learning_rate": 0.00019998819279937115, - "loss": 46.0, - "step": 30385 - }, - { - "epoch": 4.893433713112444, - "grad_norm": 0.005341564305126667, - "learning_rate": 0.0001999881920219448, - "loss": 46.0, - "step": 30386 - }, - { - "epoch": 4.893594750191232, - "grad_norm": 0.0016422390472143888, - "learning_rate": 0.00019998819124449288, - "loss": 46.0, - "step": 30387 - }, - { - "epoch": 4.893755787270019, - "grad_norm": 0.002725841011852026, - "learning_rate": 0.00019998819046701535, - "loss": 46.0, - "step": 30388 - }, - { - "epoch": 4.893916824348806, - "grad_norm": 0.004834026098251343, - "learning_rate": 0.00019998818968951226, - "loss": 46.0, - "step": 30389 - }, - { - "epoch": 4.8940778614275935, - "grad_norm": 0.013006445951759815, - "learning_rate": 0.00019998818891198353, - "loss": 46.0, - "step": 30390 - }, - { - "epoch": 4.894238898506381, - "grad_norm": 0.005409400910139084, - "learning_rate": 0.00019998818813442922, - "loss": 46.0, - "step": 30391 - }, - { - "epoch": 4.894399935585168, - "grad_norm": 0.004750378895550966, - "learning_rate": 0.0001999881873568493, - "loss": 46.0, - "step": 30392 - }, - { - "epoch": 4.894560972663956, - "grad_norm": 0.001828087493777275, - "learning_rate": 0.00019998818657924385, - "loss": 46.0, - "step": 30393 - }, - { - "epoch": 4.894722009742743, - "grad_norm": 0.003092053811997175, - "learning_rate": 0.00019998818580161277, - "loss": 46.0, - "step": 30394 - }, - { - "epoch": 4.894883046821531, - "grad_norm": 0.0020606161560863256, - "learning_rate": 0.0001999881850239561, - "loss": 46.0, - "step": 30395 - }, - { - "epoch": 4.895044083900318, - "grad_norm": 0.003499452257528901, - "learning_rate": 0.00019998818424627382, - "loss": 46.0, - "step": 30396 - }, - { - "epoch": 4.895205120979106, - "grad_norm": 0.004869183525443077, - "learning_rate": 0.00019998818346856595, - "loss": 46.0, - "step": 30397 - }, - { - "epoch": 4.895366158057893, - "grad_norm": 0.011048783548176289, - "learning_rate": 0.00019998818269083252, - "loss": 46.0, - "step": 30398 - }, - { - "epoch": 4.8955271951366806, - "grad_norm": 0.005548267159610987, - "learning_rate": 0.00019998818191307348, - "loss": 46.0, - "step": 30399 - }, - { - "epoch": 4.895688232215468, - "grad_norm": 0.005866540130227804, - "learning_rate": 0.00019998818113528885, - "loss": 46.0, - "step": 30400 - }, - { - "epoch": 4.895849269294255, - "grad_norm": 0.0019323013257235289, - "learning_rate": 0.0001999881803574786, - "loss": 46.0, - "step": 30401 - }, - { - "epoch": 4.896010306373042, - "grad_norm": 0.005561450030654669, - "learning_rate": 0.00019998817957964283, - "loss": 46.0, - "step": 30402 - }, - { - "epoch": 4.8961713434518295, - "grad_norm": 0.013374553062021732, - "learning_rate": 0.0001999881788017814, - "loss": 46.0, - "step": 30403 - }, - { - "epoch": 4.896332380530617, - "grad_norm": 0.0043674553744494915, - "learning_rate": 0.0001999881780238944, - "loss": 46.0, - "step": 30404 - }, - { - "epoch": 4.896493417609404, - "grad_norm": 0.004610069561749697, - "learning_rate": 0.00019998817724598182, - "loss": 46.0, - "step": 30405 - }, - { - "epoch": 4.896654454688192, - "grad_norm": 0.0042014289647340775, - "learning_rate": 0.00019998817646804364, - "loss": 46.0, - "step": 30406 - }, - { - "epoch": 4.896815491766979, - "grad_norm": 0.005477037280797958, - "learning_rate": 0.00019998817569007987, - "loss": 46.0, - "step": 30407 - }, - { - "epoch": 4.896976528845767, - "grad_norm": 0.00274669355712831, - "learning_rate": 0.0001999881749120905, - "loss": 46.0, - "step": 30408 - }, - { - "epoch": 4.897137565924554, - "grad_norm": 0.0032716146670281887, - "learning_rate": 0.00019998817413407552, - "loss": 46.0, - "step": 30409 - }, - { - "epoch": 4.897298603003342, - "grad_norm": 0.003663887968286872, - "learning_rate": 0.000199988173356035, - "loss": 46.0, - "step": 30410 - }, - { - "epoch": 4.897459640082129, - "grad_norm": 0.0022849636152386665, - "learning_rate": 0.00019998817257796888, - "loss": 46.0, - "step": 30411 - }, - { - "epoch": 4.8976206771609165, - "grad_norm": 0.0036398752126842737, - "learning_rate": 0.00019998817179987712, - "loss": 46.0, - "step": 30412 - }, - { - "epoch": 4.897781714239704, - "grad_norm": 0.006911598611623049, - "learning_rate": 0.0001999881710217598, - "loss": 46.0, - "step": 30413 - }, - { - "epoch": 4.897942751318491, - "grad_norm": 0.007757857907563448, - "learning_rate": 0.00019998817024361688, - "loss": 46.0, - "step": 30414 - }, - { - "epoch": 4.898103788397279, - "grad_norm": 0.0018110171658918262, - "learning_rate": 0.00019998816946544836, - "loss": 46.0, - "step": 30415 - }, - { - "epoch": 4.8982648254760655, - "grad_norm": 0.008003453724086285, - "learning_rate": 0.00019998816868725428, - "loss": 46.0, - "step": 30416 - }, - { - "epoch": 4.898425862554853, - "grad_norm": 0.003984391689300537, - "learning_rate": 0.0001999881679090346, - "loss": 46.0, - "step": 30417 - }, - { - "epoch": 4.89858689963364, - "grad_norm": 0.0027316221967339516, - "learning_rate": 0.0001999881671307893, - "loss": 46.0, - "step": 30418 - }, - { - "epoch": 4.898747936712428, - "grad_norm": 0.006737871095538139, - "learning_rate": 0.0001999881663525184, - "loss": 46.0, - "step": 30419 - }, - { - "epoch": 4.898908973791215, - "grad_norm": 0.002492668339982629, - "learning_rate": 0.00019998816557422196, - "loss": 46.0, - "step": 30420 - }, - { - "epoch": 4.899070010870003, - "grad_norm": 0.002593198325484991, - "learning_rate": 0.0001999881647958999, - "loss": 46.0, - "step": 30421 - }, - { - "epoch": 4.89923104794879, - "grad_norm": 0.004081309773027897, - "learning_rate": 0.00019998816401755224, - "loss": 46.0, - "step": 30422 - }, - { - "epoch": 4.899392085027578, - "grad_norm": 0.0021048986818641424, - "learning_rate": 0.000199988163239179, - "loss": 46.0, - "step": 30423 - }, - { - "epoch": 4.899553122106365, - "grad_norm": 0.007963730953633785, - "learning_rate": 0.00019998816246078016, - "loss": 46.0, - "step": 30424 - }, - { - "epoch": 4.8997141591851525, - "grad_norm": 0.0086543383076787, - "learning_rate": 0.00019998816168235575, - "loss": 46.0, - "step": 30425 - }, - { - "epoch": 4.89987519626394, - "grad_norm": 0.0004533409664873034, - "learning_rate": 0.00019998816090390574, - "loss": 46.0, - "step": 30426 - }, - { - "epoch": 4.900036233342727, - "grad_norm": 0.0030244532972574234, - "learning_rate": 0.00019998816012543012, - "loss": 46.0, - "step": 30427 - }, - { - "epoch": 4.900197270421515, - "grad_norm": 0.0015222234651446342, - "learning_rate": 0.00019998815934692892, - "loss": 46.0, - "step": 30428 - }, - { - "epoch": 4.900358307500302, - "grad_norm": 0.0079489229246974, - "learning_rate": 0.00019998815856840213, - "loss": 46.0, - "step": 30429 - }, - { - "epoch": 4.90051934457909, - "grad_norm": 0.010724318213760853, - "learning_rate": 0.00019998815778984975, - "loss": 46.0, - "step": 30430 - }, - { - "epoch": 4.900680381657876, - "grad_norm": 0.015424591489136219, - "learning_rate": 0.00019998815701127172, - "loss": 46.0, - "step": 30431 - }, - { - "epoch": 4.900841418736664, - "grad_norm": 0.0312027707695961, - "learning_rate": 0.00019998815623266817, - "loss": 46.0, - "step": 30432 - }, - { - "epoch": 4.901002455815451, - "grad_norm": 0.0021878459956496954, - "learning_rate": 0.00019998815545403903, - "loss": 46.0, - "step": 30433 - }, - { - "epoch": 4.901163492894239, - "grad_norm": 0.0016812459798529744, - "learning_rate": 0.00019998815467538425, - "loss": 46.0, - "step": 30434 - }, - { - "epoch": 4.901324529973026, - "grad_norm": 0.00404763501137495, - "learning_rate": 0.00019998815389670393, - "loss": 46.0, - "step": 30435 - }, - { - "epoch": 4.901485567051814, - "grad_norm": 0.006528899073600769, - "learning_rate": 0.000199988153117998, - "loss": 46.0, - "step": 30436 - }, - { - "epoch": 4.901646604130601, - "grad_norm": 0.0030419824179261923, - "learning_rate": 0.00019998815233926646, - "loss": 46.0, - "step": 30437 - }, - { - "epoch": 4.9018076412093885, - "grad_norm": 0.00468186242505908, - "learning_rate": 0.00019998815156050932, - "loss": 46.0, - "step": 30438 - }, - { - "epoch": 4.901968678288176, - "grad_norm": 0.01168001163750887, - "learning_rate": 0.00019998815078172663, - "loss": 46.0, - "step": 30439 - }, - { - "epoch": 4.902129715366963, - "grad_norm": 0.004269199445843697, - "learning_rate": 0.00019998815000291833, - "loss": 46.0, - "step": 30440 - }, - { - "epoch": 4.902290752445751, - "grad_norm": 0.0019266594899818301, - "learning_rate": 0.0001999881492240844, - "loss": 46.0, - "step": 30441 - }, - { - "epoch": 4.902451789524538, - "grad_norm": 0.017323721200227737, - "learning_rate": 0.00019998814844522493, - "loss": 46.0, - "step": 30442 - }, - { - "epoch": 4.902612826603326, - "grad_norm": 0.0006327939918264747, - "learning_rate": 0.00019998814766633983, - "loss": 46.0, - "step": 30443 - }, - { - "epoch": 4.902773863682112, - "grad_norm": 0.0005349363200366497, - "learning_rate": 0.00019998814688742918, - "loss": 46.0, - "step": 30444 - }, - { - "epoch": 4.902934900760901, - "grad_norm": 0.0034096436575055122, - "learning_rate": 0.00019998814610849294, - "loss": 46.0, - "step": 30445 - }, - { - "epoch": 4.903095937839687, - "grad_norm": 0.01690039224922657, - "learning_rate": 0.00019998814532953105, - "loss": 46.0, - "step": 30446 - }, - { - "epoch": 4.903256974918475, - "grad_norm": 0.0039896718226373196, - "learning_rate": 0.0001999881445505436, - "loss": 46.0, - "step": 30447 - }, - { - "epoch": 4.903418011997262, - "grad_norm": 0.01275465078651905, - "learning_rate": 0.00019998814377153058, - "loss": 46.0, - "step": 30448 - }, - { - "epoch": 4.90357904907605, - "grad_norm": 0.009657173417508602, - "learning_rate": 0.00019998814299249196, - "loss": 46.0, - "step": 30449 - }, - { - "epoch": 4.903740086154837, - "grad_norm": 0.0028049226384609938, - "learning_rate": 0.00019998814221342773, - "loss": 46.0, - "step": 30450 - }, - { - "epoch": 4.9039011232336245, - "grad_norm": 0.004891689866781235, - "learning_rate": 0.0001999881414343379, - "loss": 46.0, - "step": 30451 - }, - { - "epoch": 4.904062160312412, - "grad_norm": 0.008742043748497963, - "learning_rate": 0.0001999881406552225, - "loss": 46.0, - "step": 30452 - }, - { - "epoch": 4.904223197391199, - "grad_norm": 0.005531701724976301, - "learning_rate": 0.00019998813987608149, - "loss": 46.0, - "step": 30453 - }, - { - "epoch": 4.904384234469987, - "grad_norm": 0.008188373409211636, - "learning_rate": 0.0001999881390969149, - "loss": 46.0, - "step": 30454 - }, - { - "epoch": 4.904545271548774, - "grad_norm": 0.013696274720132351, - "learning_rate": 0.00019998813831772274, - "loss": 46.0, - "step": 30455 - }, - { - "epoch": 4.904706308627562, - "grad_norm": 0.011236791498959064, - "learning_rate": 0.00019998813753850496, - "loss": 46.0, - "step": 30456 - }, - { - "epoch": 4.904867345706349, - "grad_norm": 0.007230163086205721, - "learning_rate": 0.00019998813675926159, - "loss": 46.0, - "step": 30457 - }, - { - "epoch": 4.905028382785137, - "grad_norm": 0.0028765499591827393, - "learning_rate": 0.00019998813597999263, - "loss": 46.0, - "step": 30458 - }, - { - "epoch": 4.905189419863923, - "grad_norm": 0.005269724875688553, - "learning_rate": 0.0001999881352006981, - "loss": 46.0, - "step": 30459 - }, - { - "epoch": 4.9053504569427115, - "grad_norm": 0.006079904735088348, - "learning_rate": 0.00019998813442137796, - "loss": 46.0, - "step": 30460 - }, - { - "epoch": 4.905511494021498, - "grad_norm": 0.0034910067915916443, - "learning_rate": 0.0001999881336420322, - "loss": 46.0, - "step": 30461 - }, - { - "epoch": 4.905672531100286, - "grad_norm": 0.0014764934312552214, - "learning_rate": 0.0001999881328626609, - "loss": 46.0, - "step": 30462 - }, - { - "epoch": 4.905833568179073, - "grad_norm": 0.025733614340424538, - "learning_rate": 0.000199988132083264, - "loss": 46.0, - "step": 30463 - }, - { - "epoch": 4.9059946052578605, - "grad_norm": 0.006812684237957001, - "learning_rate": 0.00019998813130384145, - "loss": 46.0, - "step": 30464 - }, - { - "epoch": 4.906155642336648, - "grad_norm": 0.00285906414501369, - "learning_rate": 0.00019998813052439336, - "loss": 46.0, - "step": 30465 - }, - { - "epoch": 4.906316679415435, - "grad_norm": 0.0029761367477476597, - "learning_rate": 0.00019998812974491968, - "loss": 46.0, - "step": 30466 - }, - { - "epoch": 4.906477716494223, - "grad_norm": 0.008120449259877205, - "learning_rate": 0.00019998812896542038, - "loss": 46.0, - "step": 30467 - }, - { - "epoch": 4.90663875357301, - "grad_norm": 0.00585969490930438, - "learning_rate": 0.00019998812818589553, - "loss": 46.0, - "step": 30468 - }, - { - "epoch": 4.906799790651798, - "grad_norm": 0.006378841586410999, - "learning_rate": 0.00019998812740634506, - "loss": 46.0, - "step": 30469 - }, - { - "epoch": 4.906960827730585, - "grad_norm": 0.0033306467812508345, - "learning_rate": 0.000199988126626769, - "loss": 46.0, - "step": 30470 - }, - { - "epoch": 4.907121864809373, - "grad_norm": 0.0010022582719102502, - "learning_rate": 0.00019998812584716733, - "loss": 46.0, - "step": 30471 - }, - { - "epoch": 4.90728290188816, - "grad_norm": 0.020576857030391693, - "learning_rate": 0.00019998812506754007, - "loss": 46.0, - "step": 30472 - }, - { - "epoch": 4.9074439389669475, - "grad_norm": 0.009301193989813328, - "learning_rate": 0.00019998812428788726, - "loss": 46.0, - "step": 30473 - }, - { - "epoch": 4.907604976045734, - "grad_norm": 0.01914830692112446, - "learning_rate": 0.00019998812350820882, - "loss": 46.0, - "step": 30474 - }, - { - "epoch": 4.9077660131245215, - "grad_norm": 0.0014181635342538357, - "learning_rate": 0.0001999881227285048, - "loss": 46.0, - "step": 30475 - }, - { - "epoch": 4.907927050203309, - "grad_norm": 0.005296367220580578, - "learning_rate": 0.0001999881219487752, - "loss": 46.0, - "step": 30476 - }, - { - "epoch": 4.9080880872820964, - "grad_norm": 0.005020085256546736, - "learning_rate": 0.00019998812116902, - "loss": 46.0, - "step": 30477 - }, - { - "epoch": 4.908249124360884, - "grad_norm": 0.0006146127707324922, - "learning_rate": 0.00019998812038923922, - "loss": 46.0, - "step": 30478 - }, - { - "epoch": 4.908410161439671, - "grad_norm": 0.0041870009154081345, - "learning_rate": 0.00019998811960943283, - "loss": 46.0, - "step": 30479 - }, - { - "epoch": 4.908571198518459, - "grad_norm": 0.001721163745969534, - "learning_rate": 0.00019998811882960085, - "loss": 46.0, - "step": 30480 - }, - { - "epoch": 4.908732235597246, - "grad_norm": 0.017062436789274216, - "learning_rate": 0.00019998811804974328, - "loss": 46.0, - "step": 30481 - }, - { - "epoch": 4.908893272676034, - "grad_norm": 0.0018941735615953803, - "learning_rate": 0.00019998811726986012, - "loss": 46.0, - "step": 30482 - }, - { - "epoch": 4.909054309754821, - "grad_norm": 0.0016774415271356702, - "learning_rate": 0.00019998811648995135, - "loss": 46.0, - "step": 30483 - }, - { - "epoch": 4.909215346833609, - "grad_norm": 0.012793129310011864, - "learning_rate": 0.00019998811571001702, - "loss": 46.0, - "step": 30484 - }, - { - "epoch": 4.909376383912396, - "grad_norm": 0.023086661472916603, - "learning_rate": 0.0001999881149300571, - "loss": 46.0, - "step": 30485 - }, - { - "epoch": 4.9095374209911835, - "grad_norm": 0.0045737880282104015, - "learning_rate": 0.00019998811415007154, - "loss": 46.0, - "step": 30486 - }, - { - "epoch": 4.909698458069971, - "grad_norm": 0.003975625149905682, - "learning_rate": 0.00019998811337006042, - "loss": 46.0, - "step": 30487 - }, - { - "epoch": 4.909859495148758, - "grad_norm": 0.005778086371719837, - "learning_rate": 0.0001999881125900237, - "loss": 46.0, - "step": 30488 - }, - { - "epoch": 4.910020532227545, - "grad_norm": 0.009252060204744339, - "learning_rate": 0.00019998811180996142, - "loss": 46.0, - "step": 30489 - }, - { - "epoch": 4.910181569306332, - "grad_norm": 0.00075905816629529, - "learning_rate": 0.0001999881110298735, - "loss": 46.0, - "step": 30490 - }, - { - "epoch": 4.91034260638512, - "grad_norm": 0.011408671736717224, - "learning_rate": 0.00019998811024976004, - "loss": 46.0, - "step": 30491 - }, - { - "epoch": 4.910503643463907, - "grad_norm": 0.006041468121111393, - "learning_rate": 0.00019998810946962093, - "loss": 46.0, - "step": 30492 - }, - { - "epoch": 4.910664680542695, - "grad_norm": 0.005167999770492315, - "learning_rate": 0.0001999881086894563, - "loss": 46.0, - "step": 30493 - }, - { - "epoch": 4.910825717621482, - "grad_norm": 0.011471273377537727, - "learning_rate": 0.00019998810790926603, - "loss": 46.0, - "step": 30494 - }, - { - "epoch": 4.91098675470027, - "grad_norm": 0.0029366835951805115, - "learning_rate": 0.00019998810712905016, - "loss": 46.0, - "step": 30495 - }, - { - "epoch": 4.911147791779057, - "grad_norm": 0.0074851494282484055, - "learning_rate": 0.0001999881063488087, - "loss": 46.0, - "step": 30496 - }, - { - "epoch": 4.911308828857845, - "grad_norm": 0.011722452938556671, - "learning_rate": 0.00019998810556854166, - "loss": 46.0, - "step": 30497 - }, - { - "epoch": 4.911469865936632, - "grad_norm": 0.0035150835756212473, - "learning_rate": 0.00019998810478824902, - "loss": 46.0, - "step": 30498 - }, - { - "epoch": 4.9116309030154195, - "grad_norm": 0.015447013080120087, - "learning_rate": 0.00019998810400793083, - "loss": 46.0, - "step": 30499 - }, - { - "epoch": 4.911791940094207, - "grad_norm": 0.002246592426672578, - "learning_rate": 0.000199988103227587, - "loss": 46.0, - "step": 30500 - }, - { - "epoch": 4.911952977172994, - "grad_norm": 0.001899111084640026, - "learning_rate": 0.00019998810244721763, - "loss": 46.0, - "step": 30501 - }, - { - "epoch": 4.912114014251782, - "grad_norm": 0.0031420388258993626, - "learning_rate": 0.0001999881016668226, - "loss": 46.0, - "step": 30502 - }, - { - "epoch": 4.912275051330569, - "grad_norm": 0.0027979863807559013, - "learning_rate": 0.000199988100886402, - "loss": 46.0, - "step": 30503 - }, - { - "epoch": 4.912436088409356, - "grad_norm": 0.003839815268293023, - "learning_rate": 0.00019998810010595584, - "loss": 46.0, - "step": 30504 - }, - { - "epoch": 4.912597125488143, - "grad_norm": 0.010382944718003273, - "learning_rate": 0.00019998809932548407, - "loss": 46.0, - "step": 30505 - }, - { - "epoch": 4.912758162566931, - "grad_norm": 0.0017271682154387236, - "learning_rate": 0.0001999880985449867, - "loss": 46.0, - "step": 30506 - }, - { - "epoch": 4.912919199645718, - "grad_norm": 0.006422657053917646, - "learning_rate": 0.00019998809776446374, - "loss": 46.0, - "step": 30507 - }, - { - "epoch": 4.913080236724506, - "grad_norm": 0.0037081504706293344, - "learning_rate": 0.00019998809698391519, - "loss": 46.0, - "step": 30508 - }, - { - "epoch": 4.913241273803293, - "grad_norm": 0.0043160878121852875, - "learning_rate": 0.00019998809620334104, - "loss": 46.0, - "step": 30509 - }, - { - "epoch": 4.913402310882081, - "grad_norm": 0.0044052740558981895, - "learning_rate": 0.00019998809542274133, - "loss": 46.0, - "step": 30510 - }, - { - "epoch": 4.913563347960868, - "grad_norm": 0.0050047775730490685, - "learning_rate": 0.00019998809464211599, - "loss": 46.0, - "step": 30511 - }, - { - "epoch": 4.9137243850396555, - "grad_norm": 0.009869280271232128, - "learning_rate": 0.00019998809386146505, - "loss": 46.0, - "step": 30512 - }, - { - "epoch": 4.913885422118443, - "grad_norm": 0.006626026704907417, - "learning_rate": 0.00019998809308078856, - "loss": 46.0, - "step": 30513 - }, - { - "epoch": 4.91404645919723, - "grad_norm": 0.004311206750571728, - "learning_rate": 0.00019998809230008648, - "loss": 46.0, - "step": 30514 - }, - { - "epoch": 4.914207496276018, - "grad_norm": 0.0018047093180939555, - "learning_rate": 0.00019998809151935878, - "loss": 46.0, - "step": 30515 - }, - { - "epoch": 4.914368533354805, - "grad_norm": 0.0034651169553399086, - "learning_rate": 0.0001999880907386055, - "loss": 46.0, - "step": 30516 - }, - { - "epoch": 4.914529570433592, - "grad_norm": 0.0027213257271796465, - "learning_rate": 0.00019998808995782663, - "loss": 46.0, - "step": 30517 - }, - { - "epoch": 4.91469060751238, - "grad_norm": 0.015327741391956806, - "learning_rate": 0.00019998808917702214, - "loss": 46.0, - "step": 30518 - }, - { - "epoch": 4.914851644591167, - "grad_norm": 0.003087273333221674, - "learning_rate": 0.0001999880883961921, - "loss": 46.0, - "step": 30519 - }, - { - "epoch": 4.915012681669954, - "grad_norm": 0.004218158312141895, - "learning_rate": 0.00019998808761533644, - "loss": 46.0, - "step": 30520 - }, - { - "epoch": 4.915173718748742, - "grad_norm": 0.008186622522771358, - "learning_rate": 0.0001999880868344552, - "loss": 46.0, - "step": 30521 - }, - { - "epoch": 4.915334755827529, - "grad_norm": 0.010806851089000702, - "learning_rate": 0.00019998808605354838, - "loss": 46.0, - "step": 30522 - }, - { - "epoch": 4.9154957929063166, - "grad_norm": 0.005453029647469521, - "learning_rate": 0.00019998808527261594, - "loss": 46.0, - "step": 30523 - }, - { - "epoch": 4.915656829985104, - "grad_norm": 0.001271772664040327, - "learning_rate": 0.0001999880844916579, - "loss": 46.0, - "step": 30524 - }, - { - "epoch": 4.9158178670638915, - "grad_norm": 0.0022089264821261168, - "learning_rate": 0.00019998808371067433, - "loss": 46.0, - "step": 30525 - }, - { - "epoch": 4.915978904142679, - "grad_norm": 0.003307298058643937, - "learning_rate": 0.0001999880829296651, - "loss": 46.0, - "step": 30526 - }, - { - "epoch": 4.916139941221466, - "grad_norm": 0.0011044098064303398, - "learning_rate": 0.00019998808214863033, - "loss": 46.0, - "step": 30527 - }, - { - "epoch": 4.916300978300254, - "grad_norm": 0.0006588261458091438, - "learning_rate": 0.00019998808136756995, - "loss": 46.0, - "step": 30528 - }, - { - "epoch": 4.916462015379041, - "grad_norm": 0.005067430902272463, - "learning_rate": 0.00019998808058648395, - "loss": 46.0, - "step": 30529 - }, - { - "epoch": 4.916623052457829, - "grad_norm": 0.016253668814897537, - "learning_rate": 0.0001999880798053724, - "loss": 46.0, - "step": 30530 - }, - { - "epoch": 4.916784089536616, - "grad_norm": 0.016832856461405754, - "learning_rate": 0.00019998807902423522, - "loss": 46.0, - "step": 30531 - }, - { - "epoch": 4.916945126615403, - "grad_norm": 0.01826532371342182, - "learning_rate": 0.00019998807824307249, - "loss": 46.0, - "step": 30532 - }, - { - "epoch": 4.917106163694191, - "grad_norm": 0.004080309998244047, - "learning_rate": 0.00019998807746188414, - "loss": 46.0, - "step": 30533 - }, - { - "epoch": 4.917267200772978, - "grad_norm": 0.010835053399205208, - "learning_rate": 0.00019998807668067018, - "loss": 46.0, - "step": 30534 - }, - { - "epoch": 4.917428237851765, - "grad_norm": 0.012455137446522713, - "learning_rate": 0.00019998807589943066, - "loss": 46.0, - "step": 30535 - }, - { - "epoch": 4.9175892749305525, - "grad_norm": 0.004329121671617031, - "learning_rate": 0.00019998807511816555, - "loss": 46.0, - "step": 30536 - }, - { - "epoch": 4.91775031200934, - "grad_norm": 0.004221884533762932, - "learning_rate": 0.00019998807433687486, - "loss": 46.0, - "step": 30537 - }, - { - "epoch": 4.917911349088127, - "grad_norm": 0.01379698421806097, - "learning_rate": 0.00019998807355555855, - "loss": 46.0, - "step": 30538 - }, - { - "epoch": 4.918072386166915, - "grad_norm": 0.008747509680688381, - "learning_rate": 0.00019998807277421662, - "loss": 46.0, - "step": 30539 - }, - { - "epoch": 4.918233423245702, - "grad_norm": 0.014827131293714046, - "learning_rate": 0.00019998807199284917, - "loss": 46.0, - "step": 30540 - }, - { - "epoch": 4.91839446032449, - "grad_norm": 0.006563459523022175, - "learning_rate": 0.0001999880712114561, - "loss": 46.0, - "step": 30541 - }, - { - "epoch": 4.918555497403277, - "grad_norm": 0.008277750574052334, - "learning_rate": 0.0001999880704300374, - "loss": 46.0, - "step": 30542 - }, - { - "epoch": 4.918716534482065, - "grad_norm": 0.0018741608364507556, - "learning_rate": 0.00019998806964859314, - "loss": 46.0, - "step": 30543 - }, - { - "epoch": 4.918877571560852, - "grad_norm": 0.0015922526363283396, - "learning_rate": 0.00019998806886712328, - "loss": 46.0, - "step": 30544 - }, - { - "epoch": 4.91903860863964, - "grad_norm": 0.004034386482089758, - "learning_rate": 0.00019998806808562786, - "loss": 46.0, - "step": 30545 - }, - { - "epoch": 4.919199645718427, - "grad_norm": 0.0011017619399353862, - "learning_rate": 0.0001999880673041068, - "loss": 46.0, - "step": 30546 - }, - { - "epoch": 4.919360682797214, - "grad_norm": 0.003767644288018346, - "learning_rate": 0.0001999880665225602, - "loss": 46.0, - "step": 30547 - }, - { - "epoch": 4.919521719876001, - "grad_norm": 0.0010831747204065323, - "learning_rate": 0.00019998806574098797, - "loss": 46.0, - "step": 30548 - }, - { - "epoch": 4.9196827569547885, - "grad_norm": 0.00435589998960495, - "learning_rate": 0.00019998806495939015, - "loss": 46.0, - "step": 30549 - }, - { - "epoch": 4.919843794033576, - "grad_norm": 0.002801221562549472, - "learning_rate": 0.00019998806417776674, - "loss": 46.0, - "step": 30550 - }, - { - "epoch": 4.920004831112363, - "grad_norm": 0.0012493854155763984, - "learning_rate": 0.00019998806339611777, - "loss": 46.0, - "step": 30551 - }, - { - "epoch": 4.920165868191151, - "grad_norm": 0.0074576702900230885, - "learning_rate": 0.00019998806261444315, - "loss": 46.0, - "step": 30552 - }, - { - "epoch": 4.920326905269938, - "grad_norm": 0.0026332444977015257, - "learning_rate": 0.00019998806183274296, - "loss": 46.0, - "step": 30553 - }, - { - "epoch": 4.920487942348726, - "grad_norm": 0.001999326515942812, - "learning_rate": 0.0001999880610510172, - "loss": 46.0, - "step": 30554 - }, - { - "epoch": 4.920648979427513, - "grad_norm": 0.0025490603875368834, - "learning_rate": 0.00019998806026926583, - "loss": 46.0, - "step": 30555 - }, - { - "epoch": 4.920810016506301, - "grad_norm": 0.008558065630495548, - "learning_rate": 0.00019998805948748887, - "loss": 46.0, - "step": 30556 - }, - { - "epoch": 4.920971053585088, - "grad_norm": 0.013050605542957783, - "learning_rate": 0.00019998805870568632, - "loss": 46.0, - "step": 30557 - }, - { - "epoch": 4.921132090663876, - "grad_norm": 0.0031150965951383114, - "learning_rate": 0.00019998805792385818, - "loss": 46.0, - "step": 30558 - }, - { - "epoch": 4.921293127742663, - "grad_norm": 0.0055672600865364075, - "learning_rate": 0.00019998805714200446, - "loss": 46.0, - "step": 30559 - }, - { - "epoch": 4.9214541648214505, - "grad_norm": 0.006387733854353428, - "learning_rate": 0.00019998805636012515, - "loss": 46.0, - "step": 30560 - }, - { - "epoch": 4.921615201900238, - "grad_norm": 0.01056070625782013, - "learning_rate": 0.00019998805557822023, - "loss": 46.0, - "step": 30561 - }, - { - "epoch": 4.9217762389790245, - "grad_norm": 0.00413871556520462, - "learning_rate": 0.0001999880547962897, - "loss": 46.0, - "step": 30562 - }, - { - "epoch": 4.921937276057812, - "grad_norm": 0.002815619111061096, - "learning_rate": 0.0001999880540143336, - "loss": 46.0, - "step": 30563 - }, - { - "epoch": 4.922098313136599, - "grad_norm": 0.011996772140264511, - "learning_rate": 0.00019998805323235191, - "loss": 46.0, - "step": 30564 - }, - { - "epoch": 4.922259350215387, - "grad_norm": 0.0037979588378220797, - "learning_rate": 0.00019998805245034464, - "loss": 46.0, - "step": 30565 - }, - { - "epoch": 4.922420387294174, - "grad_norm": 0.0024226531386375427, - "learning_rate": 0.00019998805166831178, - "loss": 46.0, - "step": 30566 - }, - { - "epoch": 4.922581424372962, - "grad_norm": 0.0066368901170790195, - "learning_rate": 0.0001999880508862533, - "loss": 46.0, - "step": 30567 - }, - { - "epoch": 4.922742461451749, - "grad_norm": 0.0017153420485556126, - "learning_rate": 0.00019998805010416922, - "loss": 46.0, - "step": 30568 - }, - { - "epoch": 4.922903498530537, - "grad_norm": 0.008662872947752476, - "learning_rate": 0.0001999880493220596, - "loss": 46.0, - "step": 30569 - }, - { - "epoch": 4.923064535609324, - "grad_norm": 0.005743962712585926, - "learning_rate": 0.00019998804853992437, - "loss": 46.0, - "step": 30570 - }, - { - "epoch": 4.923225572688112, - "grad_norm": 0.004111960995942354, - "learning_rate": 0.00019998804775776352, - "loss": 46.0, - "step": 30571 - }, - { - "epoch": 4.923386609766899, - "grad_norm": 0.015256236307322979, - "learning_rate": 0.00019998804697557708, - "loss": 46.0, - "step": 30572 - }, - { - "epoch": 4.9235476468456865, - "grad_norm": 0.005067076068371534, - "learning_rate": 0.0001999880461933651, - "loss": 46.0, - "step": 30573 - }, - { - "epoch": 4.923708683924474, - "grad_norm": 0.0018835312221199274, - "learning_rate": 0.00019998804541112748, - "loss": 46.0, - "step": 30574 - }, - { - "epoch": 4.923869721003261, - "grad_norm": 0.009835692122578621, - "learning_rate": 0.00019998804462886428, - "loss": 46.0, - "step": 30575 - }, - { - "epoch": 4.924030758082049, - "grad_norm": 0.01062731072306633, - "learning_rate": 0.00019998804384657547, - "loss": 46.0, - "step": 30576 - }, - { - "epoch": 4.924191795160835, - "grad_norm": 0.0022026204969733953, - "learning_rate": 0.0001999880430642611, - "loss": 46.0, - "step": 30577 - }, - { - "epoch": 4.924352832239623, - "grad_norm": 0.009752246551215649, - "learning_rate": 0.0001999880422819211, - "loss": 46.0, - "step": 30578 - }, - { - "epoch": 4.92451386931841, - "grad_norm": 0.007753752637654543, - "learning_rate": 0.00019998804149955554, - "loss": 46.0, - "step": 30579 - }, - { - "epoch": 4.924674906397198, - "grad_norm": 0.0047276997938752174, - "learning_rate": 0.0001999880407171644, - "loss": 46.0, - "step": 30580 - }, - { - "epoch": 4.924835943475985, - "grad_norm": 0.00304826139472425, - "learning_rate": 0.00019998803993474763, - "loss": 46.0, - "step": 30581 - }, - { - "epoch": 4.924996980554773, - "grad_norm": 0.007551336195319891, - "learning_rate": 0.0001999880391523053, - "loss": 46.0, - "step": 30582 - }, - { - "epoch": 4.92515801763356, - "grad_norm": 0.012198468670248985, - "learning_rate": 0.00019998803836983737, - "loss": 46.0, - "step": 30583 - }, - { - "epoch": 4.9253190547123475, - "grad_norm": 0.006854066625237465, - "learning_rate": 0.00019998803758734384, - "loss": 46.0, - "step": 30584 - }, - { - "epoch": 4.925480091791135, - "grad_norm": 0.002870082389563322, - "learning_rate": 0.00019998803680482471, - "loss": 46.0, - "step": 30585 - }, - { - "epoch": 4.9256411288699224, - "grad_norm": 0.010755311697721481, - "learning_rate": 0.00019998803602228003, - "loss": 46.0, - "step": 30586 - }, - { - "epoch": 4.92580216594871, - "grad_norm": 0.0014694230630993843, - "learning_rate": 0.0001999880352397097, - "loss": 46.0, - "step": 30587 - }, - { - "epoch": 4.925963203027497, - "grad_norm": 0.01283201202750206, - "learning_rate": 0.0001999880344571138, - "loss": 46.0, - "step": 30588 - }, - { - "epoch": 4.926124240106285, - "grad_norm": 0.002975200302898884, - "learning_rate": 0.00019998803367449232, - "loss": 46.0, - "step": 30589 - }, - { - "epoch": 4.926285277185071, - "grad_norm": 0.015369229018688202, - "learning_rate": 0.00019998803289184526, - "loss": 46.0, - "step": 30590 - }, - { - "epoch": 4.92644631426386, - "grad_norm": 0.002160497009754181, - "learning_rate": 0.0001999880321091726, - "loss": 46.0, - "step": 30591 - }, - { - "epoch": 4.926607351342646, - "grad_norm": 0.0012843343429267406, - "learning_rate": 0.00019998803132647433, - "loss": 46.0, - "step": 30592 - }, - { - "epoch": 4.926768388421434, - "grad_norm": 0.007385102566331625, - "learning_rate": 0.00019998803054375048, - "loss": 46.0, - "step": 30593 - }, - { - "epoch": 4.926929425500221, - "grad_norm": 0.0009941896423697472, - "learning_rate": 0.00019998802976100102, - "loss": 46.0, - "step": 30594 - }, - { - "epoch": 4.927090462579009, - "grad_norm": 0.0013169351732358336, - "learning_rate": 0.00019998802897822597, - "loss": 46.0, - "step": 30595 - }, - { - "epoch": 4.927251499657796, - "grad_norm": 0.010834407061338425, - "learning_rate": 0.00019998802819542533, - "loss": 46.0, - "step": 30596 - }, - { - "epoch": 4.9274125367365835, - "grad_norm": 0.002388420980423689, - "learning_rate": 0.00019998802741259914, - "loss": 46.0, - "step": 30597 - }, - { - "epoch": 4.927573573815371, - "grad_norm": 0.004795580171048641, - "learning_rate": 0.00019998802662974733, - "loss": 46.0, - "step": 30598 - }, - { - "epoch": 4.927734610894158, - "grad_norm": 0.002790645696222782, - "learning_rate": 0.0001999880258468699, - "loss": 46.0, - "step": 30599 - }, - { - "epoch": 4.927895647972946, - "grad_norm": 0.0010824664495885372, - "learning_rate": 0.00019998802506396692, - "loss": 46.0, - "step": 30600 - }, - { - "epoch": 4.928056685051733, - "grad_norm": 0.004720304161310196, - "learning_rate": 0.00019998802428103834, - "loss": 46.0, - "step": 30601 - }, - { - "epoch": 4.928217722130521, - "grad_norm": 0.005735001061111689, - "learning_rate": 0.00019998802349808416, - "loss": 46.0, - "step": 30602 - }, - { - "epoch": 4.928378759209308, - "grad_norm": 0.00919886864721775, - "learning_rate": 0.00019998802271510438, - "loss": 46.0, - "step": 30603 - }, - { - "epoch": 4.928539796288096, - "grad_norm": 0.0024084672331809998, - "learning_rate": 0.00019998802193209905, - "loss": 46.0, - "step": 30604 - }, - { - "epoch": 4.928700833366882, - "grad_norm": 0.0016177950892597437, - "learning_rate": 0.00019998802114906808, - "loss": 46.0, - "step": 30605 - }, - { - "epoch": 4.928861870445671, - "grad_norm": 0.011064939200878143, - "learning_rate": 0.0001999880203660115, - "loss": 46.0, - "step": 30606 - }, - { - "epoch": 4.929022907524457, - "grad_norm": 0.030213357880711555, - "learning_rate": 0.0001999880195829294, - "loss": 46.0, - "step": 30607 - }, - { - "epoch": 4.929183944603245, - "grad_norm": 0.004055551718920469, - "learning_rate": 0.00019998801879982168, - "loss": 46.0, - "step": 30608 - }, - { - "epoch": 4.929344981682032, - "grad_norm": 0.01130355428904295, - "learning_rate": 0.00019998801801668833, - "loss": 46.0, - "step": 30609 - }, - { - "epoch": 4.9295060187608195, - "grad_norm": 0.003402838483452797, - "learning_rate": 0.00019998801723352942, - "loss": 46.0, - "step": 30610 - }, - { - "epoch": 4.929667055839607, - "grad_norm": 0.006314252037554979, - "learning_rate": 0.00019998801645034492, - "loss": 46.0, - "step": 30611 - }, - { - "epoch": 4.929828092918394, - "grad_norm": 0.010180431418120861, - "learning_rate": 0.0001999880156671348, - "loss": 46.0, - "step": 30612 - }, - { - "epoch": 4.929989129997182, - "grad_norm": 0.0019255392253398895, - "learning_rate": 0.00019998801488389914, - "loss": 46.0, - "step": 30613 - }, - { - "epoch": 4.930150167075969, - "grad_norm": 0.008755774237215519, - "learning_rate": 0.00019998801410063785, - "loss": 46.0, - "step": 30614 - }, - { - "epoch": 4.930311204154757, - "grad_norm": 0.011707756668329239, - "learning_rate": 0.00019998801331735098, - "loss": 46.0, - "step": 30615 - }, - { - "epoch": 4.930472241233544, - "grad_norm": 0.004657205194234848, - "learning_rate": 0.00019998801253403851, - "loss": 46.0, - "step": 30616 - }, - { - "epoch": 4.930633278312332, - "grad_norm": 0.0028296730015426874, - "learning_rate": 0.00019998801175070047, - "loss": 46.0, - "step": 30617 - }, - { - "epoch": 4.930794315391119, - "grad_norm": 0.0026236700359731913, - "learning_rate": 0.0001999880109673368, - "loss": 46.0, - "step": 30618 - }, - { - "epoch": 4.930955352469907, - "grad_norm": 0.0018111473182216287, - "learning_rate": 0.00019998801018394758, - "loss": 46.0, - "step": 30619 - }, - { - "epoch": 4.931116389548693, - "grad_norm": 0.003679076209664345, - "learning_rate": 0.00019998800940053272, - "loss": 46.0, - "step": 30620 - }, - { - "epoch": 4.9312774266274815, - "grad_norm": 0.012358995154500008, - "learning_rate": 0.0001999880086170923, - "loss": 46.0, - "step": 30621 - }, - { - "epoch": 4.931438463706268, - "grad_norm": 0.0016086430987343192, - "learning_rate": 0.0001999880078336263, - "loss": 46.0, - "step": 30622 - }, - { - "epoch": 4.9315995007850555, - "grad_norm": 0.0025007708463817835, - "learning_rate": 0.00019998800705013468, - "loss": 46.0, - "step": 30623 - }, - { - "epoch": 4.931760537863843, - "grad_norm": 0.004497797694057226, - "learning_rate": 0.0001999880062666175, - "loss": 46.0, - "step": 30624 - }, - { - "epoch": 4.93192157494263, - "grad_norm": 0.005750203039497137, - "learning_rate": 0.0001999880054830747, - "loss": 46.0, - "step": 30625 - }, - { - "epoch": 4.932082612021418, - "grad_norm": 0.0030960417352616787, - "learning_rate": 0.0001999880046995063, - "loss": 46.0, - "step": 30626 - }, - { - "epoch": 4.932243649100205, - "grad_norm": 0.0053208451718091965, - "learning_rate": 0.00019998800391591234, - "loss": 46.0, - "step": 30627 - }, - { - "epoch": 4.932404686178993, - "grad_norm": 0.013970322906970978, - "learning_rate": 0.0001999880031322928, - "loss": 46.0, - "step": 30628 - }, - { - "epoch": 4.93256572325778, - "grad_norm": 0.003336176509037614, - "learning_rate": 0.00019998800234864763, - "loss": 46.0, - "step": 30629 - }, - { - "epoch": 4.932726760336568, - "grad_norm": 0.004502678755670786, - "learning_rate": 0.0001999880015649769, - "loss": 46.0, - "step": 30630 - }, - { - "epoch": 4.932887797415355, - "grad_norm": 0.003126248251646757, - "learning_rate": 0.00019998800078128051, - "loss": 46.0, - "step": 30631 - }, - { - "epoch": 4.9330488344941426, - "grad_norm": 0.004403343889862299, - "learning_rate": 0.0001999879999975586, - "loss": 46.0, - "step": 30632 - }, - { - "epoch": 4.93320987157293, - "grad_norm": 0.004321679472923279, - "learning_rate": 0.00019998799921381108, - "loss": 46.0, - "step": 30633 - }, - { - "epoch": 4.9333709086517175, - "grad_norm": 0.007235975936055183, - "learning_rate": 0.00019998799843003794, - "loss": 46.0, - "step": 30634 - }, - { - "epoch": 4.933531945730504, - "grad_norm": 0.0024102397728711367, - "learning_rate": 0.00019998799764623924, - "loss": 46.0, - "step": 30635 - }, - { - "epoch": 4.9336929828092915, - "grad_norm": 0.0024136831052601337, - "learning_rate": 0.00019998799686241493, - "loss": 46.0, - "step": 30636 - }, - { - "epoch": 4.933854019888079, - "grad_norm": 0.003616488305851817, - "learning_rate": 0.00019998799607856506, - "loss": 46.0, - "step": 30637 - }, - { - "epoch": 4.934015056966866, - "grad_norm": 0.0038302731700241566, - "learning_rate": 0.00019998799529468957, - "loss": 46.0, - "step": 30638 - }, - { - "epoch": 4.934176094045654, - "grad_norm": 0.0060085938312113285, - "learning_rate": 0.0001999879945107885, - "loss": 46.0, - "step": 30639 - }, - { - "epoch": 4.934337131124441, - "grad_norm": 0.008247928693890572, - "learning_rate": 0.00019998799372686183, - "loss": 46.0, - "step": 30640 - }, - { - "epoch": 4.934498168203229, - "grad_norm": 0.0027552242390811443, - "learning_rate": 0.00019998799294290958, - "loss": 46.0, - "step": 30641 - }, - { - "epoch": 4.934659205282016, - "grad_norm": 0.013369555585086346, - "learning_rate": 0.00019998799215893175, - "loss": 46.0, - "step": 30642 - }, - { - "epoch": 4.934820242360804, - "grad_norm": 0.020754095166921616, - "learning_rate": 0.0001999879913749283, - "loss": 46.0, - "step": 30643 - }, - { - "epoch": 4.934981279439591, - "grad_norm": 0.006118000019341707, - "learning_rate": 0.00019998799059089923, - "loss": 46.0, - "step": 30644 - }, - { - "epoch": 4.9351423165183785, - "grad_norm": 0.003953027073293924, - "learning_rate": 0.00019998798980684464, - "loss": 46.0, - "step": 30645 - }, - { - "epoch": 4.935303353597166, - "grad_norm": 0.006661976221948862, - "learning_rate": 0.00019998798902276442, - "loss": 46.0, - "step": 30646 - }, - { - "epoch": 4.935464390675953, - "grad_norm": 0.0008577602566219866, - "learning_rate": 0.0001999879882386586, - "loss": 46.0, - "step": 30647 - }, - { - "epoch": 4.935625427754741, - "grad_norm": 0.0018127582734450698, - "learning_rate": 0.00019998798745452718, - "loss": 46.0, - "step": 30648 - }, - { - "epoch": 4.935786464833528, - "grad_norm": 0.0017990750493481755, - "learning_rate": 0.00019998798667037018, - "loss": 46.0, - "step": 30649 - }, - { - "epoch": 4.935947501912315, - "grad_norm": 0.002674547955393791, - "learning_rate": 0.00019998798588618762, - "loss": 46.0, - "step": 30650 - }, - { - "epoch": 4.936108538991102, - "grad_norm": 0.0059504699893295765, - "learning_rate": 0.00019998798510197942, - "loss": 46.0, - "step": 30651 - }, - { - "epoch": 4.93626957606989, - "grad_norm": 0.005228621419519186, - "learning_rate": 0.0001999879843177457, - "loss": 46.0, - "step": 30652 - }, - { - "epoch": 4.936430613148677, - "grad_norm": 0.008515691384673119, - "learning_rate": 0.0001999879835334863, - "loss": 46.0, - "step": 30653 - }, - { - "epoch": 4.936591650227465, - "grad_norm": 0.001437501166947186, - "learning_rate": 0.00019998798274920137, - "loss": 46.0, - "step": 30654 - }, - { - "epoch": 4.936752687306252, - "grad_norm": 0.0013436836889013648, - "learning_rate": 0.0001999879819648908, - "loss": 46.0, - "step": 30655 - }, - { - "epoch": 4.93691372438504, - "grad_norm": 0.033359453082084656, - "learning_rate": 0.00019998798118055469, - "loss": 46.0, - "step": 30656 - }, - { - "epoch": 4.937074761463827, - "grad_norm": 0.008371630683541298, - "learning_rate": 0.00019998798039619296, - "loss": 46.0, - "step": 30657 - }, - { - "epoch": 4.9372357985426145, - "grad_norm": 0.004646202549338341, - "learning_rate": 0.00019998797961180562, - "loss": 46.0, - "step": 30658 - }, - { - "epoch": 4.937396835621402, - "grad_norm": 0.002892756136134267, - "learning_rate": 0.0001999879788273927, - "loss": 46.0, - "step": 30659 - }, - { - "epoch": 4.937557872700189, - "grad_norm": 0.0036805877462029457, - "learning_rate": 0.00019998797804295418, - "loss": 46.0, - "step": 30660 - }, - { - "epoch": 4.937718909778977, - "grad_norm": 0.009498110972344875, - "learning_rate": 0.0001999879772584901, - "loss": 46.0, - "step": 30661 - }, - { - "epoch": 4.937879946857764, - "grad_norm": 0.010308600030839443, - "learning_rate": 0.00019998797647400042, - "loss": 46.0, - "step": 30662 - }, - { - "epoch": 4.938040983936552, - "grad_norm": 0.0038641702849417925, - "learning_rate": 0.00019998797568948512, - "loss": 46.0, - "step": 30663 - }, - { - "epoch": 4.938202021015339, - "grad_norm": 0.0046220156364142895, - "learning_rate": 0.00019998797490494428, - "loss": 46.0, - "step": 30664 - }, - { - "epoch": 4.938363058094126, - "grad_norm": 0.000665553321596235, - "learning_rate": 0.0001999879741203778, - "loss": 46.0, - "step": 30665 - }, - { - "epoch": 4.938524095172913, - "grad_norm": 0.0027863301802426577, - "learning_rate": 0.00019998797333578577, - "loss": 46.0, - "step": 30666 - }, - { - "epoch": 4.938685132251701, - "grad_norm": 0.00626972783356905, - "learning_rate": 0.0001999879725511681, - "loss": 46.0, - "step": 30667 - }, - { - "epoch": 4.938846169330488, - "grad_norm": 0.001908987876959145, - "learning_rate": 0.00019998797176652486, - "loss": 46.0, - "step": 30668 - }, - { - "epoch": 4.939007206409276, - "grad_norm": 0.012596477754414082, - "learning_rate": 0.00019998797098185603, - "loss": 46.0, - "step": 30669 - }, - { - "epoch": 4.939168243488063, - "grad_norm": 0.00546701205894351, - "learning_rate": 0.00019998797019716162, - "loss": 46.0, - "step": 30670 - }, - { - "epoch": 4.9393292805668505, - "grad_norm": 0.010783938691020012, - "learning_rate": 0.00019998796941244162, - "loss": 46.0, - "step": 30671 - }, - { - "epoch": 4.939490317645638, - "grad_norm": 0.0013946013059467077, - "learning_rate": 0.00019998796862769603, - "loss": 46.0, - "step": 30672 - }, - { - "epoch": 4.939651354724425, - "grad_norm": 0.0008063954301178455, - "learning_rate": 0.0001999879678429248, - "loss": 46.0, - "step": 30673 - }, - { - "epoch": 4.939812391803213, - "grad_norm": 0.001811600406654179, - "learning_rate": 0.00019998796705812804, - "loss": 46.0, - "step": 30674 - }, - { - "epoch": 4.939973428882, - "grad_norm": 0.007117320317775011, - "learning_rate": 0.00019998796627330564, - "loss": 46.0, - "step": 30675 - }, - { - "epoch": 4.940134465960788, - "grad_norm": 0.002142674755305052, - "learning_rate": 0.00019998796548845768, - "loss": 46.0, - "step": 30676 - }, - { - "epoch": 4.940295503039575, - "grad_norm": 0.00502261146903038, - "learning_rate": 0.0001999879647035841, - "loss": 46.0, - "step": 30677 - }, - { - "epoch": 4.940456540118362, - "grad_norm": 0.0016893288120627403, - "learning_rate": 0.00019998796391868496, - "loss": 46.0, - "step": 30678 - }, - { - "epoch": 4.94061757719715, - "grad_norm": 0.012884542346000671, - "learning_rate": 0.0001999879631337602, - "loss": 46.0, - "step": 30679 - }, - { - "epoch": 4.940778614275937, - "grad_norm": 0.0037730729673057795, - "learning_rate": 0.00019998796234880985, - "loss": 46.0, - "step": 30680 - }, - { - "epoch": 4.940939651354724, - "grad_norm": 0.009059794247150421, - "learning_rate": 0.00019998796156383395, - "loss": 46.0, - "step": 30681 - }, - { - "epoch": 4.941100688433512, - "grad_norm": 0.005041288211941719, - "learning_rate": 0.0001999879607788324, - "loss": 46.0, - "step": 30682 - }, - { - "epoch": 4.941261725512299, - "grad_norm": 0.0025501365307718515, - "learning_rate": 0.0001999879599938053, - "loss": 46.0, - "step": 30683 - }, - { - "epoch": 4.9414227625910865, - "grad_norm": 0.003085311036556959, - "learning_rate": 0.00019998795920875257, - "loss": 46.0, - "step": 30684 - }, - { - "epoch": 4.941583799669874, - "grad_norm": 0.001115269260481, - "learning_rate": 0.0001999879584236743, - "loss": 46.0, - "step": 30685 - }, - { - "epoch": 4.941744836748661, - "grad_norm": 0.006796635687351227, - "learning_rate": 0.0001999879576385704, - "loss": 46.0, - "step": 30686 - }, - { - "epoch": 4.941905873827449, - "grad_norm": 0.011606014333665371, - "learning_rate": 0.0001999879568534409, - "loss": 46.0, - "step": 30687 - }, - { - "epoch": 4.942066910906236, - "grad_norm": 0.019449952989816666, - "learning_rate": 0.00019998795606828584, - "loss": 46.0, - "step": 30688 - }, - { - "epoch": 4.942227947985024, - "grad_norm": 0.006146786734461784, - "learning_rate": 0.00019998795528310516, - "loss": 46.0, - "step": 30689 - }, - { - "epoch": 4.942388985063811, - "grad_norm": 0.00306873582303524, - "learning_rate": 0.0001999879544978989, - "loss": 46.0, - "step": 30690 - }, - { - "epoch": 4.942550022142599, - "grad_norm": 0.002787427045404911, - "learning_rate": 0.00019998795371266708, - "loss": 46.0, - "step": 30691 - }, - { - "epoch": 4.942711059221386, - "grad_norm": 0.0028707613237202168, - "learning_rate": 0.00019998795292740964, - "loss": 46.0, - "step": 30692 - }, - { - "epoch": 4.942872096300173, - "grad_norm": 0.008118171244859695, - "learning_rate": 0.0001999879521421266, - "loss": 46.0, - "step": 30693 - }, - { - "epoch": 4.943033133378961, - "grad_norm": 0.0020772370044142008, - "learning_rate": 0.00019998795135681797, - "loss": 46.0, - "step": 30694 - }, - { - "epoch": 4.943194170457748, - "grad_norm": 0.006738371681421995, - "learning_rate": 0.00019998795057148377, - "loss": 46.0, - "step": 30695 - }, - { - "epoch": 4.943355207536535, - "grad_norm": 0.003445185488089919, - "learning_rate": 0.00019998794978612393, - "loss": 46.0, - "step": 30696 - }, - { - "epoch": 4.9435162446153225, - "grad_norm": 0.005194577854126692, - "learning_rate": 0.00019998794900073853, - "loss": 46.0, - "step": 30697 - }, - { - "epoch": 4.94367728169411, - "grad_norm": 0.0015515319537371397, - "learning_rate": 0.00019998794821532757, - "loss": 46.0, - "step": 30698 - }, - { - "epoch": 4.943838318772897, - "grad_norm": 0.012259879149496555, - "learning_rate": 0.00019998794742989096, - "loss": 46.0, - "step": 30699 - }, - { - "epoch": 4.943999355851685, - "grad_norm": 0.0016848270315676928, - "learning_rate": 0.0001999879466444288, - "loss": 46.0, - "step": 30700 - }, - { - "epoch": 4.944160392930472, - "grad_norm": 0.0017575763631612062, - "learning_rate": 0.00019998794585894105, - "loss": 46.0, - "step": 30701 - }, - { - "epoch": 4.94432143000926, - "grad_norm": 0.006004973314702511, - "learning_rate": 0.0001999879450734277, - "loss": 46.0, - "step": 30702 - }, - { - "epoch": 4.944482467088047, - "grad_norm": 0.0045472909696400166, - "learning_rate": 0.00019998794428788874, - "loss": 46.0, - "step": 30703 - }, - { - "epoch": 4.944643504166835, - "grad_norm": 0.003451672615483403, - "learning_rate": 0.0001999879435023242, - "loss": 46.0, - "step": 30704 - }, - { - "epoch": 4.944804541245622, - "grad_norm": 0.0039057403337210417, - "learning_rate": 0.00019998794271673407, - "loss": 46.0, - "step": 30705 - }, - { - "epoch": 4.9449655783244095, - "grad_norm": 0.013285440392792225, - "learning_rate": 0.0001999879419311183, - "loss": 46.0, - "step": 30706 - }, - { - "epoch": 4.945126615403197, - "grad_norm": 0.0026773347053676844, - "learning_rate": 0.000199987941145477, - "loss": 46.0, - "step": 30707 - }, - { - "epoch": 4.9452876524819835, - "grad_norm": 0.004525552503764629, - "learning_rate": 0.0001999879403598101, - "loss": 46.0, - "step": 30708 - }, - { - "epoch": 4.945448689560771, - "grad_norm": 0.004753525368869305, - "learning_rate": 0.0001999879395741176, - "loss": 46.0, - "step": 30709 - }, - { - "epoch": 4.9456097266395584, - "grad_norm": 0.0020778074394911528, - "learning_rate": 0.00019998793878839952, - "loss": 46.0, - "step": 30710 - }, - { - "epoch": 4.945770763718346, - "grad_norm": 0.005345955025404692, - "learning_rate": 0.00019998793800265582, - "loss": 46.0, - "step": 30711 - }, - { - "epoch": 4.945931800797133, - "grad_norm": 0.007204418536275625, - "learning_rate": 0.00019998793721688656, - "loss": 46.0, - "step": 30712 - }, - { - "epoch": 4.946092837875921, - "grad_norm": 0.01259730663150549, - "learning_rate": 0.00019998793643109168, - "loss": 46.0, - "step": 30713 - }, - { - "epoch": 4.946253874954708, - "grad_norm": 0.003062712959945202, - "learning_rate": 0.00019998793564527122, - "loss": 46.0, - "step": 30714 - }, - { - "epoch": 4.946414912033496, - "grad_norm": 0.006511330604553223, - "learning_rate": 0.00019998793485942516, - "loss": 46.0, - "step": 30715 - }, - { - "epoch": 4.946575949112283, - "grad_norm": 0.002218864159658551, - "learning_rate": 0.00019998793407355355, - "loss": 46.0, - "step": 30716 - }, - { - "epoch": 4.946736986191071, - "grad_norm": 0.0012778856325894594, - "learning_rate": 0.00019998793328765627, - "loss": 46.0, - "step": 30717 - }, - { - "epoch": 4.946898023269858, - "grad_norm": 0.0029207635670900345, - "learning_rate": 0.00019998793250173346, - "loss": 46.0, - "step": 30718 - }, - { - "epoch": 4.9470590603486455, - "grad_norm": 0.006804576143622398, - "learning_rate": 0.00019998793171578506, - "loss": 46.0, - "step": 30719 - }, - { - "epoch": 4.947220097427433, - "grad_norm": 0.004211138002574444, - "learning_rate": 0.00019998793092981102, - "loss": 46.0, - "step": 30720 - }, - { - "epoch": 4.94738113450622, - "grad_norm": 0.007896900177001953, - "learning_rate": 0.00019998793014381144, - "loss": 46.0, - "step": 30721 - }, - { - "epoch": 4.947542171585008, - "grad_norm": 0.004960453137755394, - "learning_rate": 0.00019998792935778625, - "loss": 46.0, - "step": 30722 - }, - { - "epoch": 4.947703208663794, - "grad_norm": 0.012006685137748718, - "learning_rate": 0.00019998792857173545, - "loss": 46.0, - "step": 30723 - }, - { - "epoch": 4.947864245742582, - "grad_norm": 0.005531883332878351, - "learning_rate": 0.00019998792778565906, - "loss": 46.0, - "step": 30724 - }, - { - "epoch": 4.948025282821369, - "grad_norm": 0.007913201116025448, - "learning_rate": 0.0001999879269995571, - "loss": 46.0, - "step": 30725 - }, - { - "epoch": 4.948186319900157, - "grad_norm": 0.0027838731184601784, - "learning_rate": 0.00019998792621342954, - "loss": 46.0, - "step": 30726 - }, - { - "epoch": 4.948347356978944, - "grad_norm": 0.00880567915737629, - "learning_rate": 0.0001999879254272764, - "loss": 46.0, - "step": 30727 - }, - { - "epoch": 4.948508394057732, - "grad_norm": 0.010923749767243862, - "learning_rate": 0.00019998792464109765, - "loss": 46.0, - "step": 30728 - }, - { - "epoch": 4.948669431136519, - "grad_norm": 0.00965795386582613, - "learning_rate": 0.00019998792385489332, - "loss": 46.0, - "step": 30729 - }, - { - "epoch": 4.948830468215307, - "grad_norm": 0.005377555266022682, - "learning_rate": 0.00019998792306866338, - "loss": 46.0, - "step": 30730 - }, - { - "epoch": 4.948991505294094, - "grad_norm": 0.002625696361064911, - "learning_rate": 0.00019998792228240786, - "loss": 46.0, - "step": 30731 - }, - { - "epoch": 4.9491525423728815, - "grad_norm": 0.0037672684993594885, - "learning_rate": 0.00019998792149612674, - "loss": 46.0, - "step": 30732 - }, - { - "epoch": 4.949313579451669, - "grad_norm": 0.01357579231262207, - "learning_rate": 0.00019998792070982004, - "loss": 46.0, - "step": 30733 - }, - { - "epoch": 4.949474616530456, - "grad_norm": 0.0016936531756073236, - "learning_rate": 0.00019998791992348775, - "loss": 46.0, - "step": 30734 - }, - { - "epoch": 4.949635653609244, - "grad_norm": 0.00652466993778944, - "learning_rate": 0.00019998791913712987, - "loss": 46.0, - "step": 30735 - }, - { - "epoch": 4.949796690688031, - "grad_norm": 0.0016508083790540695, - "learning_rate": 0.00019998791835074638, - "loss": 46.0, - "step": 30736 - }, - { - "epoch": 4.949957727766819, - "grad_norm": 0.014912517741322517, - "learning_rate": 0.00019998791756433733, - "loss": 46.0, - "step": 30737 - }, - { - "epoch": 4.950118764845605, - "grad_norm": 0.0018059872090816498, - "learning_rate": 0.00019998791677790264, - "loss": 46.0, - "step": 30738 - }, - { - "epoch": 4.950279801924393, - "grad_norm": 0.0017447017598897219, - "learning_rate": 0.00019998791599144239, - "loss": 46.0, - "step": 30739 - }, - { - "epoch": 4.95044083900318, - "grad_norm": 0.0019586447160691023, - "learning_rate": 0.00019998791520495655, - "loss": 46.0, - "step": 30740 - }, - { - "epoch": 4.950601876081968, - "grad_norm": 0.002781867515295744, - "learning_rate": 0.00019998791441844512, - "loss": 46.0, - "step": 30741 - }, - { - "epoch": 4.950762913160755, - "grad_norm": 0.004331487230956554, - "learning_rate": 0.0001999879136319081, - "loss": 46.0, - "step": 30742 - }, - { - "epoch": 4.950923950239543, - "grad_norm": 0.006971669849008322, - "learning_rate": 0.00019998791284534545, - "loss": 46.0, - "step": 30743 - }, - { - "epoch": 4.95108498731833, - "grad_norm": 0.007546829991042614, - "learning_rate": 0.00019998791205875724, - "loss": 46.0, - "step": 30744 - }, - { - "epoch": 4.9512460243971175, - "grad_norm": 0.00620879465714097, - "learning_rate": 0.00019998791127214346, - "loss": 46.0, - "step": 30745 - }, - { - "epoch": 4.951407061475905, - "grad_norm": 0.0024855416268110275, - "learning_rate": 0.00019998791048550404, - "loss": 46.0, - "step": 30746 - }, - { - "epoch": 4.951568098554692, - "grad_norm": 0.0022948093246668577, - "learning_rate": 0.00019998790969883907, - "loss": 46.0, - "step": 30747 - }, - { - "epoch": 4.95172913563348, - "grad_norm": 0.02409781888127327, - "learning_rate": 0.0001999879089121485, - "loss": 46.0, - "step": 30748 - }, - { - "epoch": 4.951890172712267, - "grad_norm": 0.0031440521124750376, - "learning_rate": 0.00019998790812543232, - "loss": 46.0, - "step": 30749 - }, - { - "epoch": 4.952051209791055, - "grad_norm": 0.004547193646430969, - "learning_rate": 0.00019998790733869056, - "loss": 46.0, - "step": 30750 - }, - { - "epoch": 4.952212246869841, - "grad_norm": 0.011125778779387474, - "learning_rate": 0.00019998790655192318, - "loss": 46.0, - "step": 30751 - }, - { - "epoch": 4.95237328394863, - "grad_norm": 0.002236815867945552, - "learning_rate": 0.00019998790576513024, - "loss": 46.0, - "step": 30752 - }, - { - "epoch": 4.952534321027416, - "grad_norm": 0.0036717483308166265, - "learning_rate": 0.0001999879049783117, - "loss": 46.0, - "step": 30753 - }, - { - "epoch": 4.952695358106204, - "grad_norm": 0.00991331972181797, - "learning_rate": 0.00019998790419146757, - "loss": 46.0, - "step": 30754 - }, - { - "epoch": 4.952856395184991, - "grad_norm": 0.0018810078036040068, - "learning_rate": 0.00019998790340459784, - "loss": 46.0, - "step": 30755 - }, - { - "epoch": 4.9530174322637786, - "grad_norm": 0.002276583109050989, - "learning_rate": 0.00019998790261770253, - "loss": 46.0, - "step": 30756 - }, - { - "epoch": 4.953178469342566, - "grad_norm": 0.005891216918826103, - "learning_rate": 0.00019998790183078162, - "loss": 46.0, - "step": 30757 - }, - { - "epoch": 4.9533395064213535, - "grad_norm": 0.001798367011360824, - "learning_rate": 0.0001999879010438351, - "loss": 46.0, - "step": 30758 - }, - { - "epoch": 4.953500543500141, - "grad_norm": 0.0049967155791819096, - "learning_rate": 0.00019998790025686303, - "loss": 46.0, - "step": 30759 - }, - { - "epoch": 4.953661580578928, - "grad_norm": 0.00212867627851665, - "learning_rate": 0.00019998789946986536, - "loss": 46.0, - "step": 30760 - }, - { - "epoch": 4.953822617657716, - "grad_norm": 0.005703443195670843, - "learning_rate": 0.00019998789868284206, - "loss": 46.0, - "step": 30761 - }, - { - "epoch": 4.953983654736503, - "grad_norm": 0.0011965708108618855, - "learning_rate": 0.00019998789789579322, - "loss": 46.0, - "step": 30762 - }, - { - "epoch": 4.954144691815291, - "grad_norm": 0.0067934091202914715, - "learning_rate": 0.00019998789710871874, - "loss": 46.0, - "step": 30763 - }, - { - "epoch": 4.954305728894078, - "grad_norm": 0.015066059306263924, - "learning_rate": 0.00019998789632161873, - "loss": 46.0, - "step": 30764 - }, - { - "epoch": 4.954466765972866, - "grad_norm": 0.005104829557240009, - "learning_rate": 0.00019998789553449307, - "loss": 46.0, - "step": 30765 - }, - { - "epoch": 4.954627803051652, - "grad_norm": 0.004770467057824135, - "learning_rate": 0.00019998789474734183, - "loss": 46.0, - "step": 30766 - }, - { - "epoch": 4.9547888401304405, - "grad_norm": 0.0018297730712220073, - "learning_rate": 0.00019998789396016503, - "loss": 46.0, - "step": 30767 - }, - { - "epoch": 4.954949877209227, - "grad_norm": 0.0027819073293358088, - "learning_rate": 0.00019998789317296261, - "loss": 46.0, - "step": 30768 - }, - { - "epoch": 4.9551109142880145, - "grad_norm": 0.013442805968225002, - "learning_rate": 0.00019998789238573458, - "loss": 46.0, - "step": 30769 - }, - { - "epoch": 4.955271951366802, - "grad_norm": 0.018889401108026505, - "learning_rate": 0.000199987891598481, - "loss": 46.0, - "step": 30770 - }, - { - "epoch": 4.955432988445589, - "grad_norm": 0.010513770394027233, - "learning_rate": 0.0001999878908112018, - "loss": 46.0, - "step": 30771 - }, - { - "epoch": 4.955594025524377, - "grad_norm": 0.0029280823655426502, - "learning_rate": 0.00019998789002389702, - "loss": 46.0, - "step": 30772 - }, - { - "epoch": 4.955755062603164, - "grad_norm": 0.005389626137912273, - "learning_rate": 0.00019998788923656665, - "loss": 46.0, - "step": 30773 - }, - { - "epoch": 4.955916099681952, - "grad_norm": 0.00129792932420969, - "learning_rate": 0.00019998788844921068, - "loss": 46.0, - "step": 30774 - }, - { - "epoch": 4.956077136760739, - "grad_norm": 0.00236967159435153, - "learning_rate": 0.00019998788766182913, - "loss": 46.0, - "step": 30775 - }, - { - "epoch": 4.956238173839527, - "grad_norm": 0.0024803578853607178, - "learning_rate": 0.00019998788687442196, - "loss": 46.0, - "step": 30776 - }, - { - "epoch": 4.956399210918314, - "grad_norm": 0.002935096388682723, - "learning_rate": 0.0001999878860869892, - "loss": 46.0, - "step": 30777 - }, - { - "epoch": 4.956560247997102, - "grad_norm": 0.005948344245553017, - "learning_rate": 0.0001999878852995309, - "loss": 46.0, - "step": 30778 - }, - { - "epoch": 4.956721285075889, - "grad_norm": 0.012712572701275349, - "learning_rate": 0.000199987884512047, - "loss": 46.0, - "step": 30779 - }, - { - "epoch": 4.9568823221546765, - "grad_norm": 0.00227820104919374, - "learning_rate": 0.00019998788372453744, - "loss": 46.0, - "step": 30780 - }, - { - "epoch": 4.957043359233463, - "grad_norm": 0.004744593519717455, - "learning_rate": 0.00019998788293700234, - "loss": 46.0, - "step": 30781 - }, - { - "epoch": 4.9572043963122505, - "grad_norm": 0.003750715870410204, - "learning_rate": 0.00019998788214944165, - "loss": 46.0, - "step": 30782 - }, - { - "epoch": 4.957365433391038, - "grad_norm": 0.0069166370667517185, - "learning_rate": 0.00019998788136185535, - "loss": 46.0, - "step": 30783 - }, - { - "epoch": 4.957526470469825, - "grad_norm": 0.002376657212153077, - "learning_rate": 0.00019998788057424348, - "loss": 46.0, - "step": 30784 - }, - { - "epoch": 4.957687507548613, - "grad_norm": 0.006184377241879702, - "learning_rate": 0.00019998787978660598, - "loss": 46.0, - "step": 30785 - }, - { - "epoch": 4.9578485446274, - "grad_norm": 0.0018443232402205467, - "learning_rate": 0.00019998787899894294, - "loss": 46.0, - "step": 30786 - }, - { - "epoch": 4.958009581706188, - "grad_norm": 0.02072012796998024, - "learning_rate": 0.00019998787821125426, - "loss": 46.0, - "step": 30787 - }, - { - "epoch": 4.958170618784975, - "grad_norm": 0.002755719004198909, - "learning_rate": 0.00019998787742354001, - "loss": 46.0, - "step": 30788 - }, - { - "epoch": 4.958331655863763, - "grad_norm": 0.004709905479103327, - "learning_rate": 0.0001999878766358002, - "loss": 46.0, - "step": 30789 - }, - { - "epoch": 4.95849269294255, - "grad_norm": 0.004154428839683533, - "learning_rate": 0.00019998787584803474, - "loss": 46.0, - "step": 30790 - }, - { - "epoch": 4.958653730021338, - "grad_norm": 0.001521439990028739, - "learning_rate": 0.0001999878750602437, - "loss": 46.0, - "step": 30791 - }, - { - "epoch": 4.958814767100125, - "grad_norm": 0.005281943362206221, - "learning_rate": 0.0001999878742724271, - "loss": 46.0, - "step": 30792 - }, - { - "epoch": 4.9589758041789125, - "grad_norm": 0.00515834242105484, - "learning_rate": 0.0001999878734845849, - "loss": 46.0, - "step": 30793 - }, - { - "epoch": 4.9591368412577, - "grad_norm": 0.0026498320512473583, - "learning_rate": 0.00019998787269671708, - "loss": 46.0, - "step": 30794 - }, - { - "epoch": 4.959297878336487, - "grad_norm": 0.0018939678557217121, - "learning_rate": 0.0001999878719088237, - "loss": 46.0, - "step": 30795 - }, - { - "epoch": 4.959458915415274, - "grad_norm": 0.006592052523046732, - "learning_rate": 0.0001999878711209047, - "loss": 46.0, - "step": 30796 - }, - { - "epoch": 4.959619952494061, - "grad_norm": 0.0020348874386399984, - "learning_rate": 0.0001999878703329601, - "loss": 46.0, - "step": 30797 - }, - { - "epoch": 4.959780989572849, - "grad_norm": 0.008914710022509098, - "learning_rate": 0.00019998786954498996, - "loss": 46.0, - "step": 30798 - }, - { - "epoch": 4.959942026651636, - "grad_norm": 0.0071667516604065895, - "learning_rate": 0.00019998786875699418, - "loss": 46.0, - "step": 30799 - }, - { - "epoch": 4.960103063730424, - "grad_norm": 0.006221315823495388, - "learning_rate": 0.00019998786796897284, - "loss": 46.0, - "step": 30800 - }, - { - "epoch": 4.960264100809211, - "grad_norm": 0.004883614834398031, - "learning_rate": 0.0001999878671809259, - "loss": 46.0, - "step": 30801 - }, - { - "epoch": 4.960425137887999, - "grad_norm": 0.008304298855364323, - "learning_rate": 0.00019998786639285337, - "loss": 46.0, - "step": 30802 - }, - { - "epoch": 4.960586174966786, - "grad_norm": 0.009872698225080967, - "learning_rate": 0.0001999878656047552, - "loss": 46.0, - "step": 30803 - }, - { - "epoch": 4.960747212045574, - "grad_norm": 0.0013597247889265418, - "learning_rate": 0.00019998786481663152, - "loss": 46.0, - "step": 30804 - }, - { - "epoch": 4.960908249124361, - "grad_norm": 0.0041055562905967236, - "learning_rate": 0.0001999878640284822, - "loss": 46.0, - "step": 30805 - }, - { - "epoch": 4.9610692862031485, - "grad_norm": 0.000781733135227114, - "learning_rate": 0.0001999878632403073, - "loss": 46.0, - "step": 30806 - }, - { - "epoch": 4.961230323281936, - "grad_norm": 0.010199559852480888, - "learning_rate": 0.00019998786245210682, - "loss": 46.0, - "step": 30807 - }, - { - "epoch": 4.961391360360723, - "grad_norm": 0.013601471669971943, - "learning_rate": 0.0001999878616638807, - "loss": 46.0, - "step": 30808 - }, - { - "epoch": 4.961552397439511, - "grad_norm": 0.005652326624840498, - "learning_rate": 0.00019998786087562905, - "loss": 46.0, - "step": 30809 - }, - { - "epoch": 4.961713434518298, - "grad_norm": 0.0021816270891577005, - "learning_rate": 0.00019998786008735178, - "loss": 46.0, - "step": 30810 - }, - { - "epoch": 4.961874471597085, - "grad_norm": 0.0051984842866659164, - "learning_rate": 0.0001999878592990489, - "loss": 46.0, - "step": 30811 - }, - { - "epoch": 4.962035508675872, - "grad_norm": 0.0021691059228032827, - "learning_rate": 0.00019998785851072046, - "loss": 46.0, - "step": 30812 - }, - { - "epoch": 4.96219654575466, - "grad_norm": 0.0024269712157547474, - "learning_rate": 0.0001999878577223664, - "loss": 46.0, - "step": 30813 - }, - { - "epoch": 4.962357582833447, - "grad_norm": 0.006437079980969429, - "learning_rate": 0.00019998785693398676, - "loss": 46.0, - "step": 30814 - }, - { - "epoch": 4.962518619912235, - "grad_norm": 0.0037849147338420153, - "learning_rate": 0.00019998785614558156, - "loss": 46.0, - "step": 30815 - }, - { - "epoch": 4.962679656991022, - "grad_norm": 0.006798509042710066, - "learning_rate": 0.00019998785535715072, - "loss": 46.0, - "step": 30816 - }, - { - "epoch": 4.9628406940698095, - "grad_norm": 0.004250046797096729, - "learning_rate": 0.0001999878545686943, - "loss": 46.0, - "step": 30817 - }, - { - "epoch": 4.963001731148597, - "grad_norm": 0.0032048027496784925, - "learning_rate": 0.00019998785378021232, - "loss": 46.0, - "step": 30818 - }, - { - "epoch": 4.9631627682273844, - "grad_norm": 0.008781819604337215, - "learning_rate": 0.00019998785299170472, - "loss": 46.0, - "step": 30819 - }, - { - "epoch": 4.963323805306172, - "grad_norm": 0.004050773102790117, - "learning_rate": 0.00019998785220317152, - "loss": 46.0, - "step": 30820 - }, - { - "epoch": 4.963484842384959, - "grad_norm": 0.00239588413387537, - "learning_rate": 0.00019998785141461274, - "loss": 46.0, - "step": 30821 - }, - { - "epoch": 4.963645879463747, - "grad_norm": 0.008822372183203697, - "learning_rate": 0.00019998785062602838, - "loss": 46.0, - "step": 30822 - }, - { - "epoch": 4.963806916542534, - "grad_norm": 0.003714524209499359, - "learning_rate": 0.0001999878498374184, - "loss": 46.0, - "step": 30823 - }, - { - "epoch": 4.963967953621321, - "grad_norm": 0.02254457026720047, - "learning_rate": 0.00019998784904878285, - "loss": 46.0, - "step": 30824 - }, - { - "epoch": 4.964128990700109, - "grad_norm": 0.001230532769113779, - "learning_rate": 0.00019998784826012173, - "loss": 46.0, - "step": 30825 - }, - { - "epoch": 4.964290027778896, - "grad_norm": 0.006136293988674879, - "learning_rate": 0.00019998784747143498, - "loss": 46.0, - "step": 30826 - }, - { - "epoch": 4.964451064857683, - "grad_norm": 0.005339915864169598, - "learning_rate": 0.00019998784668272265, - "loss": 46.0, - "step": 30827 - }, - { - "epoch": 4.964612101936471, - "grad_norm": 0.013644712045788765, - "learning_rate": 0.00019998784589398474, - "loss": 46.0, - "step": 30828 - }, - { - "epoch": 4.964773139015258, - "grad_norm": 0.008860011585056782, - "learning_rate": 0.0001999878451052212, - "loss": 46.0, - "step": 30829 - }, - { - "epoch": 4.9649341760940455, - "grad_norm": 0.002051092917099595, - "learning_rate": 0.0001999878443164321, - "loss": 46.0, - "step": 30830 - }, - { - "epoch": 4.965095213172833, - "grad_norm": 0.013038805685937405, - "learning_rate": 0.0001999878435276174, - "loss": 46.0, - "step": 30831 - }, - { - "epoch": 4.96525625025162, - "grad_norm": 0.004594277124851942, - "learning_rate": 0.00019998784273877714, - "loss": 46.0, - "step": 30832 - }, - { - "epoch": 4.965417287330408, - "grad_norm": 0.00135852734092623, - "learning_rate": 0.00019998784194991123, - "loss": 46.0, - "step": 30833 - }, - { - "epoch": 4.965578324409195, - "grad_norm": 0.003066158387809992, - "learning_rate": 0.00019998784116101977, - "loss": 46.0, - "step": 30834 - }, - { - "epoch": 4.965739361487983, - "grad_norm": 0.010096262209117413, - "learning_rate": 0.0001999878403721027, - "loss": 46.0, - "step": 30835 - }, - { - "epoch": 4.96590039856677, - "grad_norm": 0.0010591770987957716, - "learning_rate": 0.00019998783958316004, - "loss": 46.0, - "step": 30836 - }, - { - "epoch": 4.966061435645558, - "grad_norm": 0.008141896687448025, - "learning_rate": 0.00019998783879419181, - "loss": 46.0, - "step": 30837 - }, - { - "epoch": 4.966222472724345, - "grad_norm": 0.002683746861293912, - "learning_rate": 0.00019998783800519797, - "loss": 46.0, - "step": 30838 - }, - { - "epoch": 4.966383509803132, - "grad_norm": 0.005121730268001556, - "learning_rate": 0.00019998783721617854, - "loss": 46.0, - "step": 30839 - }, - { - "epoch": 4.96654454688192, - "grad_norm": 0.0031825529877096415, - "learning_rate": 0.00019998783642713352, - "loss": 46.0, - "step": 30840 - }, - { - "epoch": 4.966705583960707, - "grad_norm": 0.002786989789456129, - "learning_rate": 0.0001999878356380629, - "loss": 46.0, - "step": 30841 - }, - { - "epoch": 4.966866621039494, - "grad_norm": 0.010649020783603191, - "learning_rate": 0.00019998783484896667, - "loss": 46.0, - "step": 30842 - }, - { - "epoch": 4.9670276581182815, - "grad_norm": 0.004177742172032595, - "learning_rate": 0.0001999878340598449, - "loss": 46.0, - "step": 30843 - }, - { - "epoch": 4.967188695197069, - "grad_norm": 0.007972175255417824, - "learning_rate": 0.0001999878332706975, - "loss": 46.0, - "step": 30844 - }, - { - "epoch": 4.967349732275856, - "grad_norm": 0.002459653653204441, - "learning_rate": 0.00019998783248152452, - "loss": 46.0, - "step": 30845 - }, - { - "epoch": 4.967510769354644, - "grad_norm": 0.0047114272601902485, - "learning_rate": 0.00019998783169232595, - "loss": 46.0, - "step": 30846 - }, - { - "epoch": 4.967671806433431, - "grad_norm": 0.002555410610511899, - "learning_rate": 0.00019998783090310177, - "loss": 46.0, - "step": 30847 - }, - { - "epoch": 4.967832843512219, - "grad_norm": 0.0028972234576940536, - "learning_rate": 0.00019998783011385203, - "loss": 46.0, - "step": 30848 - }, - { - "epoch": 4.967993880591006, - "grad_norm": 0.01440118532627821, - "learning_rate": 0.00019998782932457667, - "loss": 46.0, - "step": 30849 - }, - { - "epoch": 4.968154917669794, - "grad_norm": 0.0037989113479852676, - "learning_rate": 0.00019998782853527573, - "loss": 46.0, - "step": 30850 - }, - { - "epoch": 4.968315954748581, - "grad_norm": 0.0014283166965469718, - "learning_rate": 0.0001999878277459492, - "loss": 46.0, - "step": 30851 - }, - { - "epoch": 4.968476991827369, - "grad_norm": 0.011698959395289421, - "learning_rate": 0.00019998782695659708, - "loss": 46.0, - "step": 30852 - }, - { - "epoch": 4.968638028906156, - "grad_norm": 0.0020634287502616644, - "learning_rate": 0.00019998782616721935, - "loss": 46.0, - "step": 30853 - }, - { - "epoch": 4.968799065984943, - "grad_norm": 0.015871427953243256, - "learning_rate": 0.00019998782537781606, - "loss": 46.0, - "step": 30854 - }, - { - "epoch": 4.96896010306373, - "grad_norm": 0.008170481771230698, - "learning_rate": 0.00019998782458838715, - "loss": 46.0, - "step": 30855 - }, - { - "epoch": 4.9691211401425175, - "grad_norm": 0.005524741485714912, - "learning_rate": 0.00019998782379893266, - "loss": 46.0, - "step": 30856 - }, - { - "epoch": 4.969282177221305, - "grad_norm": 0.001704831374809146, - "learning_rate": 0.00019998782300945258, - "loss": 46.0, - "step": 30857 - }, - { - "epoch": 4.969443214300092, - "grad_norm": 0.0036982260644435883, - "learning_rate": 0.0001999878222199469, - "loss": 46.0, - "step": 30858 - }, - { - "epoch": 4.96960425137888, - "grad_norm": 0.0019745659083127975, - "learning_rate": 0.00019998782143041565, - "loss": 46.0, - "step": 30859 - }, - { - "epoch": 4.969765288457667, - "grad_norm": 0.002938745077699423, - "learning_rate": 0.00019998782064085878, - "loss": 46.0, - "step": 30860 - }, - { - "epoch": 4.969926325536455, - "grad_norm": 0.008062013424932957, - "learning_rate": 0.00019998781985127635, - "loss": 46.0, - "step": 30861 - }, - { - "epoch": 4.970087362615242, - "grad_norm": 0.006624516565352678, - "learning_rate": 0.00019998781906166828, - "loss": 46.0, - "step": 30862 - }, - { - "epoch": 4.97024839969403, - "grad_norm": 0.0031087002716958523, - "learning_rate": 0.00019998781827203468, - "loss": 46.0, - "step": 30863 - }, - { - "epoch": 4.970409436772817, - "grad_norm": 0.005210024770349264, - "learning_rate": 0.00019998781748237546, - "loss": 46.0, - "step": 30864 - }, - { - "epoch": 4.9705704738516046, - "grad_norm": 0.0054557411931455135, - "learning_rate": 0.00019998781669269063, - "loss": 46.0, - "step": 30865 - }, - { - "epoch": 4.970731510930392, - "grad_norm": 0.005232595838606358, - "learning_rate": 0.0001999878159029802, - "loss": 46.0, - "step": 30866 - }, - { - "epoch": 4.9708925480091795, - "grad_norm": 0.00858256034553051, - "learning_rate": 0.0001999878151132442, - "loss": 46.0, - "step": 30867 - }, - { - "epoch": 4.971053585087967, - "grad_norm": 0.01136784814298153, - "learning_rate": 0.0001999878143234826, - "loss": 46.0, - "step": 30868 - }, - { - "epoch": 4.9712146221667535, - "grad_norm": 0.009713488630950451, - "learning_rate": 0.00019998781353369546, - "loss": 46.0, - "step": 30869 - }, - { - "epoch": 4.971375659245541, - "grad_norm": 0.0037244162522256374, - "learning_rate": 0.00019998781274388266, - "loss": 46.0, - "step": 30870 - }, - { - "epoch": 4.971536696324328, - "grad_norm": 0.010117919184267521, - "learning_rate": 0.00019998781195404428, - "loss": 46.0, - "step": 30871 - }, - { - "epoch": 4.971697733403116, - "grad_norm": 0.0011379426578059793, - "learning_rate": 0.00019998781116418034, - "loss": 46.0, - "step": 30872 - }, - { - "epoch": 4.971858770481903, - "grad_norm": 0.007301435340195894, - "learning_rate": 0.0001999878103742908, - "loss": 46.0, - "step": 30873 - }, - { - "epoch": 4.972019807560691, - "grad_norm": 0.026161599904298782, - "learning_rate": 0.00019998780958437563, - "loss": 46.0, - "step": 30874 - }, - { - "epoch": 4.972180844639478, - "grad_norm": 0.006837194785475731, - "learning_rate": 0.0001999878087944349, - "loss": 46.0, - "step": 30875 - }, - { - "epoch": 4.972341881718266, - "grad_norm": 0.002399842720478773, - "learning_rate": 0.00019998780800446858, - "loss": 46.0, - "step": 30876 - }, - { - "epoch": 4.972502918797053, - "grad_norm": 0.00424237409606576, - "learning_rate": 0.00019998780721447665, - "loss": 46.0, - "step": 30877 - }, - { - "epoch": 4.9726639558758405, - "grad_norm": 0.004987417720258236, - "learning_rate": 0.00019998780642445916, - "loss": 46.0, - "step": 30878 - }, - { - "epoch": 4.972824992954628, - "grad_norm": 0.021086450666189194, - "learning_rate": 0.00019998780563441605, - "loss": 46.0, - "step": 30879 - }, - { - "epoch": 4.972986030033415, - "grad_norm": 0.0005767957773059607, - "learning_rate": 0.00019998780484434736, - "loss": 46.0, - "step": 30880 - }, - { - "epoch": 4.973147067112203, - "grad_norm": 0.005905651021748781, - "learning_rate": 0.00019998780405425308, - "loss": 46.0, - "step": 30881 - }, - { - "epoch": 4.97330810419099, - "grad_norm": 0.006714705843478441, - "learning_rate": 0.0001999878032641332, - "loss": 46.0, - "step": 30882 - }, - { - "epoch": 4.973469141269778, - "grad_norm": 0.006292471196502447, - "learning_rate": 0.00019998780247398773, - "loss": 46.0, - "step": 30883 - }, - { - "epoch": 4.973630178348564, - "grad_norm": 0.005078427493572235, - "learning_rate": 0.00019998780168381668, - "loss": 46.0, - "step": 30884 - }, - { - "epoch": 4.973791215427352, - "grad_norm": 0.0019055759767070413, - "learning_rate": 0.00019998780089362003, - "loss": 46.0, - "step": 30885 - }, - { - "epoch": 4.973952252506139, - "grad_norm": 0.006134196650236845, - "learning_rate": 0.00019998780010339778, - "loss": 46.0, - "step": 30886 - }, - { - "epoch": 4.974113289584927, - "grad_norm": 0.0013564007822424173, - "learning_rate": 0.00019998779931314995, - "loss": 46.0, - "step": 30887 - }, - { - "epoch": 4.974274326663714, - "grad_norm": 0.007288997061550617, - "learning_rate": 0.0001999877985228765, - "loss": 46.0, - "step": 30888 - }, - { - "epoch": 4.974435363742502, - "grad_norm": 0.016209525987505913, - "learning_rate": 0.0001999877977325775, - "loss": 46.0, - "step": 30889 - }, - { - "epoch": 4.974596400821289, - "grad_norm": 0.003899284638464451, - "learning_rate": 0.00019998779694225288, - "loss": 46.0, - "step": 30890 - }, - { - "epoch": 4.9747574379000765, - "grad_norm": 0.010110035538673401, - "learning_rate": 0.00019998779615190267, - "loss": 46.0, - "step": 30891 - }, - { - "epoch": 4.974918474978864, - "grad_norm": 0.009592756628990173, - "learning_rate": 0.0001999877953615269, - "loss": 46.0, - "step": 30892 - }, - { - "epoch": 4.975079512057651, - "grad_norm": 0.00339070288464427, - "learning_rate": 0.0001999877945711255, - "loss": 46.0, - "step": 30893 - }, - { - "epoch": 4.975240549136439, - "grad_norm": 0.005003774538636208, - "learning_rate": 0.00019998779378069853, - "loss": 46.0, - "step": 30894 - }, - { - "epoch": 4.975401586215226, - "grad_norm": 0.005525850225239992, - "learning_rate": 0.00019998779299024597, - "loss": 46.0, - "step": 30895 - }, - { - "epoch": 4.975562623294014, - "grad_norm": 0.00617035198956728, - "learning_rate": 0.0001999877921997678, - "loss": 46.0, - "step": 30896 - }, - { - "epoch": 4.9757236603728, - "grad_norm": 0.00976397842168808, - "learning_rate": 0.00019998779140926405, - "loss": 46.0, - "step": 30897 - }, - { - "epoch": 4.975884697451589, - "grad_norm": 0.0016574239125475287, - "learning_rate": 0.00019998779061873473, - "loss": 46.0, - "step": 30898 - }, - { - "epoch": 4.976045734530375, - "grad_norm": 0.008125197142362595, - "learning_rate": 0.00019998778982817977, - "loss": 46.0, - "step": 30899 - }, - { - "epoch": 4.976206771609163, - "grad_norm": 0.005675680469721556, - "learning_rate": 0.00019998778903759922, - "loss": 46.0, - "step": 30900 - }, - { - "epoch": 4.97636780868795, - "grad_norm": 0.005658462177962065, - "learning_rate": 0.00019998778824699312, - "loss": 46.0, - "step": 30901 - }, - { - "epoch": 4.976528845766738, - "grad_norm": 0.001684830873273313, - "learning_rate": 0.00019998778745636142, - "loss": 46.0, - "step": 30902 - }, - { - "epoch": 4.976689882845525, - "grad_norm": 0.003426732262596488, - "learning_rate": 0.0001999877866657041, - "loss": 46.0, - "step": 30903 - }, - { - "epoch": 4.9768509199243125, - "grad_norm": 0.007448168471455574, - "learning_rate": 0.0001999877858750212, - "loss": 46.0, - "step": 30904 - }, - { - "epoch": 4.9770119570031, - "grad_norm": 0.003538034623488784, - "learning_rate": 0.00019998778508431274, - "loss": 46.0, - "step": 30905 - }, - { - "epoch": 4.977172994081887, - "grad_norm": 0.03049345500767231, - "learning_rate": 0.00019998778429357864, - "loss": 46.0, - "step": 30906 - }, - { - "epoch": 4.977334031160675, - "grad_norm": 0.004986871499568224, - "learning_rate": 0.000199987783502819, - "loss": 46.0, - "step": 30907 - }, - { - "epoch": 4.977495068239462, - "grad_norm": 0.0028877519071102142, - "learning_rate": 0.00019998778271203374, - "loss": 46.0, - "step": 30908 - }, - { - "epoch": 4.97765610531825, - "grad_norm": 0.01014393474906683, - "learning_rate": 0.00019998778192122286, - "loss": 46.0, - "step": 30909 - }, - { - "epoch": 4.977817142397037, - "grad_norm": 0.00386176654137671, - "learning_rate": 0.00019998778113038644, - "loss": 46.0, - "step": 30910 - }, - { - "epoch": 4.977978179475825, - "grad_norm": 0.0066736014559865, - "learning_rate": 0.00019998778033952438, - "loss": 46.0, - "step": 30911 - }, - { - "epoch": 4.978139216554611, - "grad_norm": 0.016126932576298714, - "learning_rate": 0.00019998777954863676, - "loss": 46.0, - "step": 30912 - }, - { - "epoch": 4.9783002536334, - "grad_norm": 0.004830478224903345, - "learning_rate": 0.00019998777875772353, - "loss": 46.0, - "step": 30913 - }, - { - "epoch": 4.978461290712186, - "grad_norm": 0.0030132890678942204, - "learning_rate": 0.0001999877779667847, - "loss": 46.0, - "step": 30914 - }, - { - "epoch": 4.978622327790974, - "grad_norm": 0.017756516113877296, - "learning_rate": 0.0001999877771758203, - "loss": 46.0, - "step": 30915 - }, - { - "epoch": 4.978783364869761, - "grad_norm": 0.012786378152668476, - "learning_rate": 0.0001999877763848303, - "loss": 46.0, - "step": 30916 - }, - { - "epoch": 4.9789444019485485, - "grad_norm": 0.005736165679991245, - "learning_rate": 0.00019998777559381475, - "loss": 46.0, - "step": 30917 - }, - { - "epoch": 4.979105439027336, - "grad_norm": 0.005925099831074476, - "learning_rate": 0.00019998777480277356, - "loss": 46.0, - "step": 30918 - }, - { - "epoch": 4.979266476106123, - "grad_norm": 0.004123123828321695, - "learning_rate": 0.00019998777401170677, - "loss": 46.0, - "step": 30919 - }, - { - "epoch": 4.979427513184911, - "grad_norm": 0.007774327881634235, - "learning_rate": 0.00019998777322061443, - "loss": 46.0, - "step": 30920 - }, - { - "epoch": 4.979588550263698, - "grad_norm": 0.00395645946264267, - "learning_rate": 0.00019998777242949647, - "loss": 46.0, - "step": 30921 - }, - { - "epoch": 4.979749587342486, - "grad_norm": 0.002001544926315546, - "learning_rate": 0.00019998777163835293, - "loss": 46.0, - "step": 30922 - }, - { - "epoch": 4.979910624421273, - "grad_norm": 0.009097103029489517, - "learning_rate": 0.0001999877708471838, - "loss": 46.0, - "step": 30923 - }, - { - "epoch": 4.980071661500061, - "grad_norm": 0.007415729574859142, - "learning_rate": 0.00019998777005598905, - "loss": 46.0, - "step": 30924 - }, - { - "epoch": 4.980232698578848, - "grad_norm": 0.0012105636997148395, - "learning_rate": 0.00019998776926476874, - "loss": 46.0, - "step": 30925 - }, - { - "epoch": 4.9803937356576355, - "grad_norm": 0.0026218751445412636, - "learning_rate": 0.00019998776847352282, - "loss": 46.0, - "step": 30926 - }, - { - "epoch": 4.980554772736422, - "grad_norm": 0.008122207596898079, - "learning_rate": 0.0001999877676822513, - "loss": 46.0, - "step": 30927 - }, - { - "epoch": 4.9807158098152104, - "grad_norm": 0.003486272180452943, - "learning_rate": 0.00019998776689095422, - "loss": 46.0, - "step": 30928 - }, - { - "epoch": 4.980876846893997, - "grad_norm": 0.0024311935994774103, - "learning_rate": 0.00019998776609963154, - "loss": 46.0, - "step": 30929 - }, - { - "epoch": 4.9810378839727845, - "grad_norm": 0.002731007756665349, - "learning_rate": 0.00019998776530828324, - "loss": 46.0, - "step": 30930 - }, - { - "epoch": 4.981198921051572, - "grad_norm": 0.0016660717083141208, - "learning_rate": 0.00019998776451690938, - "loss": 46.0, - "step": 30931 - }, - { - "epoch": 4.981359958130359, - "grad_norm": 0.011875721625983715, - "learning_rate": 0.0001999877637255099, - "loss": 46.0, - "step": 30932 - }, - { - "epoch": 4.981520995209147, - "grad_norm": 0.010018021799623966, - "learning_rate": 0.00019998776293408485, - "loss": 46.0, - "step": 30933 - }, - { - "epoch": 4.981682032287934, - "grad_norm": 0.004446617793291807, - "learning_rate": 0.0001999877621426342, - "loss": 46.0, - "step": 30934 - }, - { - "epoch": 4.981843069366722, - "grad_norm": 0.003570565953850746, - "learning_rate": 0.00019998776135115798, - "loss": 46.0, - "step": 30935 - }, - { - "epoch": 4.982004106445509, - "grad_norm": 0.001409512828104198, - "learning_rate": 0.00019998776055965616, - "loss": 46.0, - "step": 30936 - }, - { - "epoch": 4.982165143524297, - "grad_norm": 0.009436271153390408, - "learning_rate": 0.00019998775976812872, - "loss": 46.0, - "step": 30937 - }, - { - "epoch": 4.982326180603084, - "grad_norm": 0.0013256825041025877, - "learning_rate": 0.0001999877589765757, - "loss": 46.0, - "step": 30938 - }, - { - "epoch": 4.9824872176818715, - "grad_norm": 0.01066936831921339, - "learning_rate": 0.00019998775818499712, - "loss": 46.0, - "step": 30939 - }, - { - "epoch": 4.982648254760659, - "grad_norm": 0.0018738553626462817, - "learning_rate": 0.0001999877573933929, - "loss": 46.0, - "step": 30940 - }, - { - "epoch": 4.982809291839446, - "grad_norm": 0.003846400883048773, - "learning_rate": 0.00019998775660176312, - "loss": 46.0, - "step": 30941 - }, - { - "epoch": 4.982970328918233, - "grad_norm": 0.005156259518116713, - "learning_rate": 0.00019998775581010775, - "loss": 46.0, - "step": 30942 - }, - { - "epoch": 4.9831313659970204, - "grad_norm": 0.0021843810100108385, - "learning_rate": 0.00019998775501842676, - "loss": 46.0, - "step": 30943 - }, - { - "epoch": 4.983292403075808, - "grad_norm": 0.008953774347901344, - "learning_rate": 0.0001999877542267202, - "loss": 46.0, - "step": 30944 - }, - { - "epoch": 4.983453440154595, - "grad_norm": 0.013401445001363754, - "learning_rate": 0.00019998775343498806, - "loss": 46.0, - "step": 30945 - }, - { - "epoch": 4.983614477233383, - "grad_norm": 0.00585845485329628, - "learning_rate": 0.00019998775264323029, - "loss": 46.0, - "step": 30946 - }, - { - "epoch": 4.98377551431217, - "grad_norm": 0.0016057100147008896, - "learning_rate": 0.00019998775185144695, - "loss": 46.0, - "step": 30947 - }, - { - "epoch": 4.983936551390958, - "grad_norm": 0.0034353923983871937, - "learning_rate": 0.00019998775105963803, - "loss": 46.0, - "step": 30948 - }, - { - "epoch": 4.984097588469745, - "grad_norm": 0.0035877306945621967, - "learning_rate": 0.0001999877502678035, - "loss": 46.0, - "step": 30949 - }, - { - "epoch": 4.984258625548533, - "grad_norm": 0.002961766207590699, - "learning_rate": 0.00019998774947594338, - "loss": 46.0, - "step": 30950 - }, - { - "epoch": 4.98441966262732, - "grad_norm": 0.005100170150399208, - "learning_rate": 0.0001999877486840577, - "loss": 46.0, - "step": 30951 - }, - { - "epoch": 4.9845806997061075, - "grad_norm": 0.003901463933289051, - "learning_rate": 0.00019998774789214637, - "loss": 46.0, - "step": 30952 - }, - { - "epoch": 4.984741736784895, - "grad_norm": 0.003433484584093094, - "learning_rate": 0.0001999877471002095, - "loss": 46.0, - "step": 30953 - }, - { - "epoch": 4.984902773863682, - "grad_norm": 0.005060386378318071, - "learning_rate": 0.00019998774630824702, - "loss": 46.0, - "step": 30954 - }, - { - "epoch": 4.98506381094247, - "grad_norm": 0.004352663177996874, - "learning_rate": 0.00019998774551625894, - "loss": 46.0, - "step": 30955 - }, - { - "epoch": 4.985224848021257, - "grad_norm": 0.006874861195683479, - "learning_rate": 0.00019998774472424527, - "loss": 46.0, - "step": 30956 - }, - { - "epoch": 4.985385885100044, - "grad_norm": 0.006450616288930178, - "learning_rate": 0.00019998774393220603, - "loss": 46.0, - "step": 30957 - }, - { - "epoch": 4.985546922178831, - "grad_norm": 0.0057152630761265755, - "learning_rate": 0.00019998774314014116, - "loss": 46.0, - "step": 30958 - }, - { - "epoch": 4.985707959257619, - "grad_norm": 0.012109542265534401, - "learning_rate": 0.00019998774234805073, - "loss": 46.0, - "step": 30959 - }, - { - "epoch": 4.985868996336406, - "grad_norm": 0.0049367258325219154, - "learning_rate": 0.00019998774155593468, - "loss": 46.0, - "step": 30960 - }, - { - "epoch": 4.986030033415194, - "grad_norm": 0.0017551257042214274, - "learning_rate": 0.00019998774076379307, - "loss": 46.0, - "step": 30961 - }, - { - "epoch": 4.986191070493981, - "grad_norm": 0.006023516412824392, - "learning_rate": 0.00019998773997162585, - "loss": 46.0, - "step": 30962 - }, - { - "epoch": 4.986352107572769, - "grad_norm": 0.0025072949938476086, - "learning_rate": 0.00019998773917943304, - "loss": 46.0, - "step": 30963 - }, - { - "epoch": 4.986513144651556, - "grad_norm": 0.0038455044850707054, - "learning_rate": 0.00019998773838721465, - "loss": 46.0, - "step": 30964 - }, - { - "epoch": 4.9866741817303435, - "grad_norm": 0.004335750825703144, - "learning_rate": 0.00019998773759497066, - "loss": 46.0, - "step": 30965 - }, - { - "epoch": 4.986835218809131, - "grad_norm": 0.005389214958995581, - "learning_rate": 0.0001999877368027011, - "loss": 46.0, - "step": 30966 - }, - { - "epoch": 4.986996255887918, - "grad_norm": 0.010922829620540142, - "learning_rate": 0.0001999877360104059, - "loss": 46.0, - "step": 30967 - }, - { - "epoch": 4.987157292966706, - "grad_norm": 0.004410484340041876, - "learning_rate": 0.00019998773521808514, - "loss": 46.0, - "step": 30968 - }, - { - "epoch": 4.987318330045493, - "grad_norm": 0.0010219208197668195, - "learning_rate": 0.00019998773442573878, - "loss": 46.0, - "step": 30969 - }, - { - "epoch": 4.987479367124281, - "grad_norm": 0.004234891384840012, - "learning_rate": 0.00019998773363336683, - "loss": 46.0, - "step": 30970 - }, - { - "epoch": 4.987640404203068, - "grad_norm": 0.004864760208874941, - "learning_rate": 0.0001999877328409693, - "loss": 46.0, - "step": 30971 - }, - { - "epoch": 4.987801441281855, - "grad_norm": 0.007149561308324337, - "learning_rate": 0.00019998773204854615, - "loss": 46.0, - "step": 30972 - }, - { - "epoch": 4.987962478360642, - "grad_norm": 0.013999372720718384, - "learning_rate": 0.00019998773125609742, - "loss": 46.0, - "step": 30973 - }, - { - "epoch": 4.98812351543943, - "grad_norm": 0.0031189939472824335, - "learning_rate": 0.00019998773046362312, - "loss": 46.0, - "step": 30974 - }, - { - "epoch": 4.988284552518217, - "grad_norm": 0.007929399609565735, - "learning_rate": 0.00019998772967112321, - "loss": 46.0, - "step": 30975 - }, - { - "epoch": 4.988445589597005, - "grad_norm": 0.003621111623942852, - "learning_rate": 0.00019998772887859772, - "loss": 46.0, - "step": 30976 - }, - { - "epoch": 4.988606626675792, - "grad_norm": 0.0033273661974817514, - "learning_rate": 0.00019998772808604663, - "loss": 46.0, - "step": 30977 - }, - { - "epoch": 4.9887676637545795, - "grad_norm": 0.003927677869796753, - "learning_rate": 0.00019998772729346996, - "loss": 46.0, - "step": 30978 - }, - { - "epoch": 4.988928700833367, - "grad_norm": 0.001456919126212597, - "learning_rate": 0.00019998772650086765, - "loss": 46.0, - "step": 30979 - }, - { - "epoch": 4.989089737912154, - "grad_norm": 0.00237159407697618, - "learning_rate": 0.00019998772570823978, - "loss": 46.0, - "step": 30980 - }, - { - "epoch": 4.989250774990942, - "grad_norm": 0.01632472313940525, - "learning_rate": 0.00019998772491558632, - "loss": 46.0, - "step": 30981 - }, - { - "epoch": 4.989411812069729, - "grad_norm": 0.004613370168954134, - "learning_rate": 0.0001999877241229073, - "loss": 46.0, - "step": 30982 - }, - { - "epoch": 4.989572849148517, - "grad_norm": 0.006446159444749355, - "learning_rate": 0.00019998772333020264, - "loss": 46.0, - "step": 30983 - }, - { - "epoch": 4.989733886227304, - "grad_norm": 0.0079695088788867, - "learning_rate": 0.0001999877225374724, - "loss": 46.0, - "step": 30984 - }, - { - "epoch": 4.989894923306091, - "grad_norm": 0.0008088378235697746, - "learning_rate": 0.00019998772174471659, - "loss": 46.0, - "step": 30985 - }, - { - "epoch": 4.990055960384879, - "grad_norm": 0.003061580238863826, - "learning_rate": 0.0001999877209519352, - "loss": 46.0, - "step": 30986 - }, - { - "epoch": 4.990216997463666, - "grad_norm": 0.0015336141223087907, - "learning_rate": 0.00019998772015912815, - "loss": 46.0, - "step": 30987 - }, - { - "epoch": 4.990378034542453, - "grad_norm": 0.016620026901364326, - "learning_rate": 0.00019998771936629556, - "loss": 46.0, - "step": 30988 - }, - { - "epoch": 4.9905390716212406, - "grad_norm": 0.004312083590775728, - "learning_rate": 0.00019998771857343737, - "loss": 46.0, - "step": 30989 - }, - { - "epoch": 4.990700108700028, - "grad_norm": 0.012261553667485714, - "learning_rate": 0.00019998771778055358, - "loss": 46.0, - "step": 30990 - }, - { - "epoch": 4.9908611457788155, - "grad_norm": 0.0023435086477547884, - "learning_rate": 0.0001999877169876442, - "loss": 46.0, - "step": 30991 - }, - { - "epoch": 4.991022182857603, - "grad_norm": 0.0034246023278683424, - "learning_rate": 0.00019998771619470925, - "loss": 46.0, - "step": 30992 - }, - { - "epoch": 4.99118321993639, - "grad_norm": 0.0020012909080833197, - "learning_rate": 0.00019998771540174869, - "loss": 46.0, - "step": 30993 - }, - { - "epoch": 4.991344257015178, - "grad_norm": 0.001958669163286686, - "learning_rate": 0.00019998771460876254, - "loss": 46.0, - "step": 30994 - }, - { - "epoch": 4.991505294093965, - "grad_norm": 0.006679236888885498, - "learning_rate": 0.00019998771381575078, - "loss": 46.0, - "step": 30995 - }, - { - "epoch": 4.991666331172753, - "grad_norm": 0.004908496048301458, - "learning_rate": 0.00019998771302271346, - "loss": 46.0, - "step": 30996 - }, - { - "epoch": 4.99182736825154, - "grad_norm": 0.009609609842300415, - "learning_rate": 0.00019998771222965052, - "loss": 46.0, - "step": 30997 - }, - { - "epoch": 4.991988405330328, - "grad_norm": 0.008011335507035255, - "learning_rate": 0.000199987711436562, - "loss": 46.0, - "step": 30998 - }, - { - "epoch": 4.992149442409115, - "grad_norm": 0.007766086608171463, - "learning_rate": 0.0001999877106434479, - "loss": 46.0, - "step": 30999 - }, - { - "epoch": 4.992310479487902, - "grad_norm": 0.0015139388851821423, - "learning_rate": 0.0001999877098503082, - "loss": 46.0, - "step": 31000 - }, - { - "epoch": 4.99247151656669, - "grad_norm": 0.00339633715339005, - "learning_rate": 0.00019998770905714294, - "loss": 46.0, - "step": 31001 - }, - { - "epoch": 4.9926325536454765, - "grad_norm": 0.003292802022770047, - "learning_rate": 0.00019998770826395204, - "loss": 46.0, - "step": 31002 - }, - { - "epoch": 4.992793590724264, - "grad_norm": 0.008728443644940853, - "learning_rate": 0.00019998770747073555, - "loss": 46.0, - "step": 31003 - }, - { - "epoch": 4.992954627803051, - "grad_norm": 0.0023864663671702147, - "learning_rate": 0.00019998770667749348, - "loss": 46.0, - "step": 31004 - }, - { - "epoch": 4.993115664881839, - "grad_norm": 0.008173982612788677, - "learning_rate": 0.00019998770588422582, - "loss": 46.0, - "step": 31005 - }, - { - "epoch": 4.993276701960626, - "grad_norm": 0.0024232121650129557, - "learning_rate": 0.00019998770509093257, - "loss": 46.0, - "step": 31006 - }, - { - "epoch": 4.993437739039414, - "grad_norm": 0.008254874497652054, - "learning_rate": 0.00019998770429761374, - "loss": 46.0, - "step": 31007 - }, - { - "epoch": 4.993598776118201, - "grad_norm": 0.003584672464057803, - "learning_rate": 0.0001999877035042693, - "loss": 46.0, - "step": 31008 - }, - { - "epoch": 4.993759813196989, - "grad_norm": 0.04514848440885544, - "learning_rate": 0.00019998770271089926, - "loss": 46.0, - "step": 31009 - }, - { - "epoch": 4.993920850275776, - "grad_norm": 0.0050653438083827496, - "learning_rate": 0.00019998770191750363, - "loss": 46.0, - "step": 31010 - }, - { - "epoch": 4.994081887354564, - "grad_norm": 0.0041606230661273, - "learning_rate": 0.00019998770112408245, - "loss": 46.0, - "step": 31011 - }, - { - "epoch": 4.994242924433351, - "grad_norm": 0.008867884054780006, - "learning_rate": 0.00019998770033063563, - "loss": 46.0, - "step": 31012 - }, - { - "epoch": 4.9944039615121385, - "grad_norm": 0.0016287073958665133, - "learning_rate": 0.00019998769953716324, - "loss": 46.0, - "step": 31013 - }, - { - "epoch": 4.994564998590926, - "grad_norm": 0.0017289791721850634, - "learning_rate": 0.00019998769874366525, - "loss": 46.0, - "step": 31014 - }, - { - "epoch": 4.9947260356697125, - "grad_norm": 0.010679534636437893, - "learning_rate": 0.0001999876979501417, - "loss": 46.0, - "step": 31015 - }, - { - "epoch": 4.9948870727485, - "grad_norm": 0.006222427356988192, - "learning_rate": 0.00019998769715659251, - "loss": 46.0, - "step": 31016 - }, - { - "epoch": 4.995048109827287, - "grad_norm": 0.010056688450276852, - "learning_rate": 0.00019998769636301775, - "loss": 46.0, - "step": 31017 - }, - { - "epoch": 4.995209146906075, - "grad_norm": 0.004496424458920956, - "learning_rate": 0.0001999876955694174, - "loss": 46.0, - "step": 31018 - }, - { - "epoch": 4.995370183984862, - "grad_norm": 0.009411857463419437, - "learning_rate": 0.00019998769477579145, - "loss": 46.0, - "step": 31019 - }, - { - "epoch": 4.99553122106365, - "grad_norm": 0.0017789442790672183, - "learning_rate": 0.00019998769398213992, - "loss": 46.0, - "step": 31020 - }, - { - "epoch": 4.995692258142437, - "grad_norm": 0.005408520344644785, - "learning_rate": 0.0001999876931884628, - "loss": 46.0, - "step": 31021 - }, - { - "epoch": 4.995853295221225, - "grad_norm": 0.001626838929951191, - "learning_rate": 0.00019998769239476007, - "loss": 46.0, - "step": 31022 - }, - { - "epoch": 4.996014332300012, - "grad_norm": 0.017273297533392906, - "learning_rate": 0.00019998769160103176, - "loss": 46.0, - "step": 31023 - }, - { - "epoch": 4.9961753693788, - "grad_norm": 0.008846103213727474, - "learning_rate": 0.00019998769080727786, - "loss": 46.0, - "step": 31024 - }, - { - "epoch": 4.996336406457587, - "grad_norm": 0.0014638167340308428, - "learning_rate": 0.00019998769001349835, - "loss": 46.0, - "step": 31025 - }, - { - "epoch": 4.9964974435363745, - "grad_norm": 0.001910537132062018, - "learning_rate": 0.00019998768921969327, - "loss": 46.0, - "step": 31026 - }, - { - "epoch": 4.996658480615162, - "grad_norm": 0.009062094613909721, - "learning_rate": 0.0001999876884258626, - "loss": 46.0, - "step": 31027 - }, - { - "epoch": 4.996819517693949, - "grad_norm": 0.011714388616383076, - "learning_rate": 0.00019998768763200632, - "loss": 46.0, - "step": 31028 - }, - { - "epoch": 4.996980554772737, - "grad_norm": 0.0028681657277047634, - "learning_rate": 0.00019998768683812448, - "loss": 46.0, - "step": 31029 - }, - { - "epoch": 4.997141591851523, - "grad_norm": 0.0024218233302235603, - "learning_rate": 0.00019998768604421704, - "loss": 46.0, - "step": 31030 - }, - { - "epoch": 4.997302628930311, - "grad_norm": 0.012475757859647274, - "learning_rate": 0.00019998768525028397, - "loss": 46.0, - "step": 31031 - }, - { - "epoch": 4.997463666009098, - "grad_norm": 0.002611500909551978, - "learning_rate": 0.00019998768445632535, - "loss": 46.0, - "step": 31032 - }, - { - "epoch": 4.997624703087886, - "grad_norm": 0.0012553916312754154, - "learning_rate": 0.00019998768366234112, - "loss": 46.0, - "step": 31033 - }, - { - "epoch": 4.997785740166673, - "grad_norm": 0.0017350241541862488, - "learning_rate": 0.0001999876828683313, - "loss": 46.0, - "step": 31034 - }, - { - "epoch": 4.997946777245461, - "grad_norm": 0.005065203178673983, - "learning_rate": 0.00019998768207429588, - "loss": 46.0, - "step": 31035 - }, - { - "epoch": 4.998107814324248, - "grad_norm": 0.0024507571943104267, - "learning_rate": 0.00019998768128023489, - "loss": 46.0, - "step": 31036 - }, - { - "epoch": 4.998268851403036, - "grad_norm": 0.00270538660697639, - "learning_rate": 0.0001999876804861483, - "loss": 46.0, - "step": 31037 - }, - { - "epoch": 4.998429888481823, - "grad_norm": 0.006515856832265854, - "learning_rate": 0.0001999876796920361, - "loss": 46.0, - "step": 31038 - }, - { - "epoch": 4.9985909255606105, - "grad_norm": 0.007227238267660141, - "learning_rate": 0.00019998767889789832, - "loss": 46.0, - "step": 31039 - }, - { - "epoch": 4.998751962639398, - "grad_norm": 0.00120845518540591, - "learning_rate": 0.00019998767810373494, - "loss": 46.0, - "step": 31040 - }, - { - "epoch": 4.998912999718185, - "grad_norm": 0.0017949381144717336, - "learning_rate": 0.00019998767730954598, - "loss": 46.0, - "step": 31041 - }, - { - "epoch": 4.999074036796973, - "grad_norm": 0.001391031313687563, - "learning_rate": 0.00019998767651533143, - "loss": 46.0, - "step": 31042 - }, - { - "epoch": 4.99923507387576, - "grad_norm": 0.005238562356680632, - "learning_rate": 0.0001999876757210913, - "loss": 46.0, - "step": 31043 - }, - { - "epoch": 4.999396110954548, - "grad_norm": 0.001440562424249947, - "learning_rate": 0.00019998767492682555, - "loss": 46.0, - "step": 31044 - }, - { - "epoch": 4.999557148033334, - "grad_norm": 0.004359934478998184, - "learning_rate": 0.00019998767413253424, - "loss": 46.0, - "step": 31045 - }, - { - "epoch": 4.999718185112122, - "grad_norm": 0.017080603167414665, - "learning_rate": 0.00019998767333821732, - "loss": 46.0, - "step": 31046 - }, - { - "epoch": 4.999879222190909, - "grad_norm": 0.001398540218360722, - "learning_rate": 0.00019998767254387478, - "loss": 46.0, - "step": 31047 - }, - { - "epoch": 4.999879222190909, - "eval_loss": 11.5, - "eval_runtime": 15.2627, - "eval_samples_per_second": 171.333, - "eval_steps_per_second": 85.699, - "step": 31047 - }, - { - "epoch": 5.000080518539393, - "grad_norm": 0.0047907172702252865, - "learning_rate": 0.0001999876717495067, - "loss": 46.0, - "step": 31048 - }, - { - "epoch": 5.000241555618181, - "grad_norm": 0.003105146111920476, - "learning_rate": 0.000199987670955113, - "loss": 46.0, - "step": 31049 - }, - { - "epoch": 5.000402592696968, - "grad_norm": 0.0012852853396907449, - "learning_rate": 0.0001999876701606937, - "loss": 46.0, - "step": 31050 - }, - { - "epoch": 5.000563629775756, - "grad_norm": 0.005212837364524603, - "learning_rate": 0.00019998766936624884, - "loss": 46.0, - "step": 31051 - }, - { - "epoch": 5.000724666854543, - "grad_norm": 0.005906665697693825, - "learning_rate": 0.00019998766857177836, - "loss": 46.0, - "step": 31052 - }, - { - "epoch": 5.0008857039333305, - "grad_norm": 0.004046909511089325, - "learning_rate": 0.00019998766777728228, - "loss": 46.0, - "step": 31053 - }, - { - "epoch": 5.001046741012118, - "grad_norm": 0.006136893294751644, - "learning_rate": 0.00019998766698276063, - "loss": 46.0, - "step": 31054 - }, - { - "epoch": 5.001207778090905, - "grad_norm": 0.007463934365659952, - "learning_rate": 0.0001999876661882134, - "loss": 46.0, - "step": 31055 - }, - { - "epoch": 5.001368815169693, - "grad_norm": 0.006553539074957371, - "learning_rate": 0.00019998766539364054, - "loss": 46.0, - "step": 31056 - }, - { - "epoch": 5.00152985224848, - "grad_norm": 0.008825916796922684, - "learning_rate": 0.00019998766459904214, - "loss": 46.0, - "step": 31057 - }, - { - "epoch": 5.001690889327268, - "grad_norm": 0.006321832537651062, - "learning_rate": 0.0001999876638044181, - "loss": 46.0, - "step": 31058 - }, - { - "epoch": 5.001851926406055, - "grad_norm": 0.0011174511164426804, - "learning_rate": 0.0001999876630097685, - "loss": 46.0, - "step": 31059 - }, - { - "epoch": 5.002012963484843, - "grad_norm": 0.0032858788035809994, - "learning_rate": 0.0001999876622150933, - "loss": 46.0, - "step": 31060 - }, - { - "epoch": 5.00217400056363, - "grad_norm": 0.0013211389305070043, - "learning_rate": 0.00019998766142039252, - "loss": 46.0, - "step": 31061 - }, - { - "epoch": 5.002335037642418, - "grad_norm": 0.0038677456323057413, - "learning_rate": 0.0001999876606256661, - "loss": 46.0, - "step": 31062 - }, - { - "epoch": 5.002496074721204, - "grad_norm": 0.0029299515299499035, - "learning_rate": 0.00019998765983091413, - "loss": 46.0, - "step": 31063 - }, - { - "epoch": 5.002657111799992, - "grad_norm": 0.0014939854154363275, - "learning_rate": 0.00019998765903613656, - "loss": 46.0, - "step": 31064 - }, - { - "epoch": 5.002818148878779, - "grad_norm": 0.005311173386871815, - "learning_rate": 0.0001999876582413334, - "loss": 46.0, - "step": 31065 - }, - { - "epoch": 5.0029791859575665, - "grad_norm": 0.0051200916059315205, - "learning_rate": 0.00019998765744650462, - "loss": 46.0, - "step": 31066 - }, - { - "epoch": 5.003140223036354, - "grad_norm": 0.009907403960824013, - "learning_rate": 0.0001999876566516503, - "loss": 46.0, - "step": 31067 - }, - { - "epoch": 5.003301260115141, - "grad_norm": 0.003349072765558958, - "learning_rate": 0.00019998765585677036, - "loss": 46.0, - "step": 31068 - }, - { - "epoch": 5.003462297193929, - "grad_norm": 0.0053487312979996204, - "learning_rate": 0.00019998765506186483, - "loss": 46.0, - "step": 31069 - }, - { - "epoch": 5.003623334272716, - "grad_norm": 0.0007635327056050301, - "learning_rate": 0.0001999876542669337, - "loss": 46.0, - "step": 31070 - }, - { - "epoch": 5.003784371351504, - "grad_norm": 0.004554382991045713, - "learning_rate": 0.000199987653471977, - "loss": 46.0, - "step": 31071 - }, - { - "epoch": 5.003945408430291, - "grad_norm": 0.00705056544393301, - "learning_rate": 0.0001999876526769947, - "loss": 46.0, - "step": 31072 - }, - { - "epoch": 5.004106445509079, - "grad_norm": 0.0008869162993505597, - "learning_rate": 0.0001999876518819868, - "loss": 46.0, - "step": 31073 - }, - { - "epoch": 5.004267482587866, - "grad_norm": 0.002091227099299431, - "learning_rate": 0.0001999876510869533, - "loss": 46.0, - "step": 31074 - }, - { - "epoch": 5.004428519666654, - "grad_norm": 0.004489665850996971, - "learning_rate": 0.00019998765029189422, - "loss": 46.0, - "step": 31075 - }, - { - "epoch": 5.004589556745441, - "grad_norm": 0.00648216949775815, - "learning_rate": 0.00019998764949680955, - "loss": 46.0, - "step": 31076 - }, - { - "epoch": 5.004750593824228, - "grad_norm": 0.0076082851737737656, - "learning_rate": 0.0001999876487016993, - "loss": 46.0, - "step": 31077 - }, - { - "epoch": 5.004911630903015, - "grad_norm": 0.0014451676979660988, - "learning_rate": 0.00019998764790656344, - "loss": 46.0, - "step": 31078 - }, - { - "epoch": 5.0050726679818025, - "grad_norm": 0.0011559949489310384, - "learning_rate": 0.00019998764711140197, - "loss": 46.0, - "step": 31079 - }, - { - "epoch": 5.00523370506059, - "grad_norm": 0.008217500522732735, - "learning_rate": 0.00019998764631621495, - "loss": 46.0, - "step": 31080 - }, - { - "epoch": 5.005394742139377, - "grad_norm": 0.002597081009298563, - "learning_rate": 0.00019998764552100231, - "loss": 46.0, - "step": 31081 - }, - { - "epoch": 5.005555779218165, - "grad_norm": 0.002886929316446185, - "learning_rate": 0.0001999876447257641, - "loss": 46.0, - "step": 31082 - }, - { - "epoch": 5.005716816296952, - "grad_norm": 0.011141336522996426, - "learning_rate": 0.00019998764393050028, - "loss": 46.0, - "step": 31083 - }, - { - "epoch": 5.00587785337574, - "grad_norm": 0.008937749080359936, - "learning_rate": 0.00019998764313521088, - "loss": 46.0, - "step": 31084 - }, - { - "epoch": 5.006038890454527, - "grad_norm": 0.011391947977244854, - "learning_rate": 0.0001999876423398959, - "loss": 46.0, - "step": 31085 - }, - { - "epoch": 5.006199927533315, - "grad_norm": 0.0061226957477629185, - "learning_rate": 0.0001999876415445553, - "loss": 46.0, - "step": 31086 - }, - { - "epoch": 5.006360964612102, - "grad_norm": 0.0018338634399697185, - "learning_rate": 0.0001999876407491891, - "loss": 46.0, - "step": 31087 - }, - { - "epoch": 5.00652200169089, - "grad_norm": 0.006106365472078323, - "learning_rate": 0.00019998763995379733, - "loss": 46.0, - "step": 31088 - }, - { - "epoch": 5.006683038769677, - "grad_norm": 0.007486812770366669, - "learning_rate": 0.00019998763915837997, - "loss": 46.0, - "step": 31089 - }, - { - "epoch": 5.0068440758484645, - "grad_norm": 0.0017316878074780107, - "learning_rate": 0.00019998763836293702, - "loss": 46.0, - "step": 31090 - }, - { - "epoch": 5.007005112927252, - "grad_norm": 0.0035680984146893024, - "learning_rate": 0.0001999876375674685, - "loss": 46.0, - "step": 31091 - }, - { - "epoch": 5.0071661500060385, - "grad_norm": 0.00277877738699317, - "learning_rate": 0.00019998763677197434, - "loss": 46.0, - "step": 31092 - }, - { - "epoch": 5.007327187084826, - "grad_norm": 0.004459621850401163, - "learning_rate": 0.00019998763597645463, - "loss": 46.0, - "step": 31093 - }, - { - "epoch": 5.007488224163613, - "grad_norm": 0.006595935672521591, - "learning_rate": 0.0001999876351809093, - "loss": 46.0, - "step": 31094 - }, - { - "epoch": 5.007649261242401, - "grad_norm": 0.0010635115904733539, - "learning_rate": 0.00019998763438533836, - "loss": 46.0, - "step": 31095 - }, - { - "epoch": 5.007810298321188, - "grad_norm": 0.003824312472715974, - "learning_rate": 0.0001999876335897419, - "loss": 46.0, - "step": 31096 - }, - { - "epoch": 5.007971335399976, - "grad_norm": 0.00623067282140255, - "learning_rate": 0.00019998763279411978, - "loss": 46.0, - "step": 31097 - }, - { - "epoch": 5.008132372478763, - "grad_norm": 0.0035313498228788376, - "learning_rate": 0.00019998763199847208, - "loss": 46.0, - "step": 31098 - }, - { - "epoch": 5.008293409557551, - "grad_norm": 0.005680904723703861, - "learning_rate": 0.0001999876312027988, - "loss": 46.0, - "step": 31099 - }, - { - "epoch": 5.008454446636338, - "grad_norm": 0.006670357659459114, - "learning_rate": 0.00019998763040709994, - "loss": 46.0, - "step": 31100 - }, - { - "epoch": 5.0086154837151255, - "grad_norm": 0.004993006121367216, - "learning_rate": 0.00019998762961137548, - "loss": 46.0, - "step": 31101 - }, - { - "epoch": 5.008776520793913, - "grad_norm": 0.007767189759761095, - "learning_rate": 0.0001999876288156254, - "loss": 46.0, - "step": 31102 - }, - { - "epoch": 5.0089375578727005, - "grad_norm": 0.007792117074131966, - "learning_rate": 0.00019998762801984977, - "loss": 46.0, - "step": 31103 - }, - { - "epoch": 5.009098594951488, - "grad_norm": 0.003257140750065446, - "learning_rate": 0.00019998762722404855, - "loss": 46.0, - "step": 31104 - }, - { - "epoch": 5.009259632030275, - "grad_norm": 0.014960839413106441, - "learning_rate": 0.0001999876264282217, - "loss": 46.0, - "step": 31105 - }, - { - "epoch": 5.009420669109062, - "grad_norm": 0.008353774435818195, - "learning_rate": 0.00019998762563236926, - "loss": 46.0, - "step": 31106 - }, - { - "epoch": 5.009581706187849, - "grad_norm": 0.0054909707978367805, - "learning_rate": 0.00019998762483649127, - "loss": 46.0, - "step": 31107 - }, - { - "epoch": 5.009742743266637, - "grad_norm": 0.003979809116572142, - "learning_rate": 0.00019998762404058767, - "loss": 46.0, - "step": 31108 - }, - { - "epoch": 5.009903780345424, - "grad_norm": 0.005869802553206682, - "learning_rate": 0.00019998762324465846, - "loss": 46.0, - "step": 31109 - }, - { - "epoch": 5.010064817424212, - "grad_norm": 0.011984726414084435, - "learning_rate": 0.00019998762244870366, - "loss": 46.0, - "step": 31110 - }, - { - "epoch": 5.010225854502999, - "grad_norm": 0.001436824444681406, - "learning_rate": 0.00019998762165272327, - "loss": 46.0, - "step": 31111 - }, - { - "epoch": 5.010386891581787, - "grad_norm": 0.003839449491351843, - "learning_rate": 0.0001999876208567173, - "loss": 46.0, - "step": 31112 - }, - { - "epoch": 5.010547928660574, - "grad_norm": 0.017565922811627388, - "learning_rate": 0.00019998762006068573, - "loss": 46.0, - "step": 31113 - }, - { - "epoch": 5.0107089657393615, - "grad_norm": 0.0032677168492227793, - "learning_rate": 0.00019998761926462858, - "loss": 46.0, - "step": 31114 - }, - { - "epoch": 5.010870002818149, - "grad_norm": 0.0022214825730770826, - "learning_rate": 0.00019998761846854585, - "loss": 46.0, - "step": 31115 - }, - { - "epoch": 5.011031039896936, - "grad_norm": 0.006211854051798582, - "learning_rate": 0.0001999876176724375, - "loss": 46.0, - "step": 31116 - }, - { - "epoch": 5.011192076975724, - "grad_norm": 0.018349047750234604, - "learning_rate": 0.00019998761687630356, - "loss": 46.0, - "step": 31117 - }, - { - "epoch": 5.011353114054511, - "grad_norm": 0.00869948323816061, - "learning_rate": 0.00019998761608014403, - "loss": 46.0, - "step": 31118 - }, - { - "epoch": 5.011514151133299, - "grad_norm": 0.005436311475932598, - "learning_rate": 0.00019998761528395892, - "loss": 46.0, - "step": 31119 - }, - { - "epoch": 5.011675188212086, - "grad_norm": 0.0035337633453309536, - "learning_rate": 0.0001999876144877482, - "loss": 46.0, - "step": 31120 - }, - { - "epoch": 5.011836225290873, - "grad_norm": 0.003181623062118888, - "learning_rate": 0.00019998761369151194, - "loss": 46.0, - "step": 31121 - }, - { - "epoch": 5.01199726236966, - "grad_norm": 0.0017693424597382545, - "learning_rate": 0.00019998761289525004, - "loss": 46.0, - "step": 31122 - }, - { - "epoch": 5.012158299448448, - "grad_norm": 0.011082902550697327, - "learning_rate": 0.00019998761209896255, - "loss": 46.0, - "step": 31123 - }, - { - "epoch": 5.012319336527235, - "grad_norm": 0.001154599478468299, - "learning_rate": 0.0001999876113026495, - "loss": 46.0, - "step": 31124 - }, - { - "epoch": 5.012480373606023, - "grad_norm": 0.00754148792475462, - "learning_rate": 0.0001999876105063108, - "loss": 46.0, - "step": 31125 - }, - { - "epoch": 5.01264141068481, - "grad_norm": 0.0011923379497602582, - "learning_rate": 0.00019998760970994656, - "loss": 46.0, - "step": 31126 - }, - { - "epoch": 5.0128024477635975, - "grad_norm": 0.00216309423558414, - "learning_rate": 0.0001999876089135567, - "loss": 46.0, - "step": 31127 - }, - { - "epoch": 5.012963484842385, - "grad_norm": 0.014163664542138577, - "learning_rate": 0.00019998760811714127, - "loss": 46.0, - "step": 31128 - }, - { - "epoch": 5.013124521921172, - "grad_norm": 0.002531968289986253, - "learning_rate": 0.00019998760732070023, - "loss": 46.0, - "step": 31129 - }, - { - "epoch": 5.01328555899996, - "grad_norm": 0.0022920039482414722, - "learning_rate": 0.0001999876065242336, - "loss": 46.0, - "step": 31130 - }, - { - "epoch": 5.013446596078747, - "grad_norm": 0.004964747000485659, - "learning_rate": 0.0001999876057277414, - "loss": 46.0, - "step": 31131 - }, - { - "epoch": 5.013607633157535, - "grad_norm": 0.002050135051831603, - "learning_rate": 0.0001999876049312236, - "loss": 46.0, - "step": 31132 - }, - { - "epoch": 5.013768670236322, - "grad_norm": 0.014891921542584896, - "learning_rate": 0.0001999876041346802, - "loss": 46.0, - "step": 31133 - }, - { - "epoch": 5.01392970731511, - "grad_norm": 0.0050053102895617485, - "learning_rate": 0.0001999876033381112, - "loss": 46.0, - "step": 31134 - }, - { - "epoch": 5.014090744393897, - "grad_norm": 0.002103540115058422, - "learning_rate": 0.00019998760254151662, - "loss": 46.0, - "step": 31135 - }, - { - "epoch": 5.014251781472684, - "grad_norm": 0.013208774849772453, - "learning_rate": 0.00019998760174489645, - "loss": 46.0, - "step": 31136 - }, - { - "epoch": 5.014412818551471, - "grad_norm": 0.0017268150113523006, - "learning_rate": 0.00019998760094825068, - "loss": 46.0, - "step": 31137 - }, - { - "epoch": 5.014573855630259, - "grad_norm": 0.0022069367114454508, - "learning_rate": 0.00019998760015157933, - "loss": 46.0, - "step": 31138 - }, - { - "epoch": 5.014734892709046, - "grad_norm": 0.01026224810630083, - "learning_rate": 0.00019998759935488237, - "loss": 46.0, - "step": 31139 - }, - { - "epoch": 5.0148959297878335, - "grad_norm": 0.003364915493875742, - "learning_rate": 0.00019998759855815985, - "loss": 46.0, - "step": 31140 - }, - { - "epoch": 5.015056966866621, - "grad_norm": 0.0015189736150205135, - "learning_rate": 0.0001999875977614117, - "loss": 46.0, - "step": 31141 - }, - { - "epoch": 5.015218003945408, - "grad_norm": 0.001997397281229496, - "learning_rate": 0.000199987596964638, - "loss": 46.0, - "step": 31142 - }, - { - "epoch": 5.015379041024196, - "grad_norm": 0.003704332746565342, - "learning_rate": 0.00019998759616783867, - "loss": 46.0, - "step": 31143 - }, - { - "epoch": 5.015540078102983, - "grad_norm": 0.002264477778226137, - "learning_rate": 0.00019998759537101374, - "loss": 46.0, - "step": 31144 - }, - { - "epoch": 5.015701115181771, - "grad_norm": 0.0014337929897010326, - "learning_rate": 0.00019998759457416325, - "loss": 46.0, - "step": 31145 - }, - { - "epoch": 5.015862152260558, - "grad_norm": 0.0015657455660402775, - "learning_rate": 0.00019998759377728718, - "loss": 46.0, - "step": 31146 - }, - { - "epoch": 5.016023189339346, - "grad_norm": 0.01921684853732586, - "learning_rate": 0.0001999875929803855, - "loss": 46.0, - "step": 31147 - }, - { - "epoch": 5.016184226418133, - "grad_norm": 0.0017174036474898458, - "learning_rate": 0.00019998759218345822, - "loss": 46.0, - "step": 31148 - }, - { - "epoch": 5.016345263496921, - "grad_norm": 0.0022288498003035784, - "learning_rate": 0.00019998759138650535, - "loss": 46.0, - "step": 31149 - }, - { - "epoch": 5.016506300575707, - "grad_norm": 0.002555317245423794, - "learning_rate": 0.0001999875905895269, - "loss": 46.0, - "step": 31150 - }, - { - "epoch": 5.016667337654495, - "grad_norm": 0.00671594450250268, - "learning_rate": 0.00019998758979252284, - "loss": 46.0, - "step": 31151 - }, - { - "epoch": 5.016828374733282, - "grad_norm": 0.001396189909428358, - "learning_rate": 0.0001999875889954932, - "loss": 46.0, - "step": 31152 - }, - { - "epoch": 5.0169894118120695, - "grad_norm": 0.006512317340821028, - "learning_rate": 0.00019998758819843798, - "loss": 46.0, - "step": 31153 - }, - { - "epoch": 5.017150448890857, - "grad_norm": 0.007165046408772469, - "learning_rate": 0.00019998758740135715, - "loss": 46.0, - "step": 31154 - }, - { - "epoch": 5.017311485969644, - "grad_norm": 0.0015913145616650581, - "learning_rate": 0.00019998758660425074, - "loss": 46.0, - "step": 31155 - }, - { - "epoch": 5.017472523048432, - "grad_norm": 0.008072750642895699, - "learning_rate": 0.00019998758580711874, - "loss": 46.0, - "step": 31156 - }, - { - "epoch": 5.017633560127219, - "grad_norm": 0.01012209989130497, - "learning_rate": 0.00019998758500996112, - "loss": 46.0, - "step": 31157 - }, - { - "epoch": 5.017794597206007, - "grad_norm": 0.0016868539387360215, - "learning_rate": 0.00019998758421277795, - "loss": 46.0, - "step": 31158 - }, - { - "epoch": 5.017955634284794, - "grad_norm": 0.0039093708619475365, - "learning_rate": 0.00019998758341556916, - "loss": 46.0, - "step": 31159 - }, - { - "epoch": 5.018116671363582, - "grad_norm": 0.005279083736240864, - "learning_rate": 0.00019998758261833478, - "loss": 46.0, - "step": 31160 - }, - { - "epoch": 5.018277708442369, - "grad_norm": 0.01379369292408228, - "learning_rate": 0.0001999875818210748, - "loss": 46.0, - "step": 31161 - }, - { - "epoch": 5.0184387455211565, - "grad_norm": 0.004628817550837994, - "learning_rate": 0.00019998758102378927, - "loss": 46.0, - "step": 31162 - }, - { - "epoch": 5.018599782599944, - "grad_norm": 0.004593974910676479, - "learning_rate": 0.00019998758022647813, - "loss": 46.0, - "step": 31163 - }, - { - "epoch": 5.018760819678731, - "grad_norm": 0.007389673963189125, - "learning_rate": 0.00019998757942914138, - "loss": 46.0, - "step": 31164 - }, - { - "epoch": 5.018921856757518, - "grad_norm": 0.009068983606994152, - "learning_rate": 0.00019998757863177904, - "loss": 46.0, - "step": 31165 - }, - { - "epoch": 5.0190828938363055, - "grad_norm": 0.005277920980006456, - "learning_rate": 0.00019998757783439112, - "loss": 46.0, - "step": 31166 - }, - { - "epoch": 5.019243930915093, - "grad_norm": 0.0037230681627988815, - "learning_rate": 0.0001999875770369776, - "loss": 46.0, - "step": 31167 - }, - { - "epoch": 5.01940496799388, - "grad_norm": 0.0019074546871706843, - "learning_rate": 0.0001999875762395385, - "loss": 46.0, - "step": 31168 - }, - { - "epoch": 5.019566005072668, - "grad_norm": 0.008009098470211029, - "learning_rate": 0.0001999875754420738, - "loss": 46.0, - "step": 31169 - }, - { - "epoch": 5.019727042151455, - "grad_norm": 0.0022549203131347895, - "learning_rate": 0.00019998757464458349, - "loss": 46.0, - "step": 31170 - }, - { - "epoch": 5.019888079230243, - "grad_norm": 0.001208345522172749, - "learning_rate": 0.00019998757384706762, - "loss": 46.0, - "step": 31171 - }, - { - "epoch": 5.02004911630903, - "grad_norm": 0.00873639713972807, - "learning_rate": 0.00019998757304952612, - "loss": 46.0, - "step": 31172 - }, - { - "epoch": 5.020210153387818, - "grad_norm": 0.012708732858300209, - "learning_rate": 0.00019998757225195908, - "loss": 46.0, - "step": 31173 - }, - { - "epoch": 5.020371190466605, - "grad_norm": 0.008436281234025955, - "learning_rate": 0.0001999875714543664, - "loss": 46.0, - "step": 31174 - }, - { - "epoch": 5.0205322275453925, - "grad_norm": 0.013966712169349194, - "learning_rate": 0.00019998757065674817, - "loss": 46.0, - "step": 31175 - }, - { - "epoch": 5.02069326462418, - "grad_norm": 0.002289323601871729, - "learning_rate": 0.00019998756985910432, - "loss": 46.0, - "step": 31176 - }, - { - "epoch": 5.020854301702967, - "grad_norm": 0.008387654088437557, - "learning_rate": 0.00019998756906143488, - "loss": 46.0, - "step": 31177 - }, - { - "epoch": 5.021015338781755, - "grad_norm": 0.000529229873791337, - "learning_rate": 0.00019998756826373985, - "loss": 46.0, - "step": 31178 - }, - { - "epoch": 5.021176375860542, - "grad_norm": 0.005441870074719191, - "learning_rate": 0.00019998756746601926, - "loss": 46.0, - "step": 31179 - }, - { - "epoch": 5.021337412939329, - "grad_norm": 0.0012339279055595398, - "learning_rate": 0.00019998756666827304, - "loss": 46.0, - "step": 31180 - }, - { - "epoch": 5.021498450018116, - "grad_norm": 0.01439022645354271, - "learning_rate": 0.00019998756587050122, - "loss": 46.0, - "step": 31181 - }, - { - "epoch": 5.021659487096904, - "grad_norm": 0.002546433825045824, - "learning_rate": 0.00019998756507270385, - "loss": 46.0, - "step": 31182 - }, - { - "epoch": 5.021820524175691, - "grad_norm": 0.0054767681285738945, - "learning_rate": 0.00019998756427488088, - "loss": 46.0, - "step": 31183 - }, - { - "epoch": 5.021981561254479, - "grad_norm": 0.012280695140361786, - "learning_rate": 0.00019998756347703228, - "loss": 46.0, - "step": 31184 - }, - { - "epoch": 5.022142598333266, - "grad_norm": 0.013114583678543568, - "learning_rate": 0.00019998756267915812, - "loss": 46.0, - "step": 31185 - }, - { - "epoch": 5.022303635412054, - "grad_norm": 0.00488185603171587, - "learning_rate": 0.00019998756188125837, - "loss": 46.0, - "step": 31186 - }, - { - "epoch": 5.022464672490841, - "grad_norm": 0.0020085610449314117, - "learning_rate": 0.000199987561083333, - "loss": 46.0, - "step": 31187 - }, - { - "epoch": 5.0226257095696285, - "grad_norm": 0.011085419915616512, - "learning_rate": 0.00019998756028538208, - "loss": 46.0, - "step": 31188 - }, - { - "epoch": 5.022786746648416, - "grad_norm": 0.0025856555439531803, - "learning_rate": 0.00019998755948740554, - "loss": 46.0, - "step": 31189 - }, - { - "epoch": 5.022947783727203, - "grad_norm": 0.005422505084425211, - "learning_rate": 0.0001999875586894034, - "loss": 46.0, - "step": 31190 - }, - { - "epoch": 5.023108820805991, - "grad_norm": 0.0019389294320717454, - "learning_rate": 0.0001999875578913757, - "loss": 46.0, - "step": 31191 - }, - { - "epoch": 5.023269857884778, - "grad_norm": 0.005671205930411816, - "learning_rate": 0.0001999875570933224, - "loss": 46.0, - "step": 31192 - }, - { - "epoch": 5.023430894963566, - "grad_norm": 0.013513694517314434, - "learning_rate": 0.00019998755629524348, - "loss": 46.0, - "step": 31193 - }, - { - "epoch": 5.023591932042352, - "grad_norm": 0.005603449884802103, - "learning_rate": 0.000199987555497139, - "loss": 46.0, - "step": 31194 - }, - { - "epoch": 5.02375296912114, - "grad_norm": 0.003135878825560212, - "learning_rate": 0.00019998755469900892, - "loss": 46.0, - "step": 31195 - }, - { - "epoch": 5.023914006199927, - "grad_norm": 0.003170355688780546, - "learning_rate": 0.00019998755390085324, - "loss": 46.0, - "step": 31196 - }, - { - "epoch": 5.024075043278715, - "grad_norm": 0.009705073200166225, - "learning_rate": 0.00019998755310267197, - "loss": 46.0, - "step": 31197 - }, - { - "epoch": 5.024236080357502, - "grad_norm": 0.0016705053858458996, - "learning_rate": 0.0001999875523044651, - "loss": 46.0, - "step": 31198 - }, - { - "epoch": 5.02439711743629, - "grad_norm": 0.0015336641808971763, - "learning_rate": 0.00019998755150623269, - "loss": 46.0, - "step": 31199 - }, - { - "epoch": 5.024558154515077, - "grad_norm": 0.005949017126113176, - "learning_rate": 0.0001999875507079746, - "loss": 46.0, - "step": 31200 - }, - { - "epoch": 5.0247191915938645, - "grad_norm": 0.013331376947462559, - "learning_rate": 0.000199987549909691, - "loss": 46.0, - "step": 31201 - }, - { - "epoch": 5.024880228672652, - "grad_norm": 0.013253557495772839, - "learning_rate": 0.00019998754911138177, - "loss": 46.0, - "step": 31202 - }, - { - "epoch": 5.025041265751439, - "grad_norm": 0.0014974800869822502, - "learning_rate": 0.00019998754831304695, - "loss": 46.0, - "step": 31203 - }, - { - "epoch": 5.025202302830227, - "grad_norm": 0.004102327395230532, - "learning_rate": 0.00019998754751468652, - "loss": 46.0, - "step": 31204 - }, - { - "epoch": 5.025363339909014, - "grad_norm": 0.021181462332606316, - "learning_rate": 0.00019998754671630056, - "loss": 46.0, - "step": 31205 - }, - { - "epoch": 5.025524376987802, - "grad_norm": 0.01931837573647499, - "learning_rate": 0.00019998754591788896, - "loss": 46.0, - "step": 31206 - }, - { - "epoch": 5.025685414066589, - "grad_norm": 0.006445386912673712, - "learning_rate": 0.00019998754511945177, - "loss": 46.0, - "step": 31207 - }, - { - "epoch": 5.025846451145377, - "grad_norm": 0.004176761489361525, - "learning_rate": 0.00019998754432098902, - "loss": 46.0, - "step": 31208 - }, - { - "epoch": 5.026007488224163, - "grad_norm": 0.009595552459359169, - "learning_rate": 0.00019998754352250063, - "loss": 46.0, - "step": 31209 - }, - { - "epoch": 5.026168525302951, - "grad_norm": 0.001910917810164392, - "learning_rate": 0.00019998754272398665, - "loss": 46.0, - "step": 31210 - }, - { - "epoch": 5.026329562381738, - "grad_norm": 0.014299686998128891, - "learning_rate": 0.0001999875419254471, - "loss": 46.0, - "step": 31211 - }, - { - "epoch": 5.026490599460526, - "grad_norm": 0.002501022769138217, - "learning_rate": 0.00019998754112688199, - "loss": 46.0, - "step": 31212 - }, - { - "epoch": 5.026651636539313, - "grad_norm": 0.001806715619750321, - "learning_rate": 0.00019998754032829125, - "loss": 46.0, - "step": 31213 - }, - { - "epoch": 5.0268126736181005, - "grad_norm": 0.006467574741691351, - "learning_rate": 0.00019998753952967492, - "loss": 46.0, - "step": 31214 - }, - { - "epoch": 5.026973710696888, - "grad_norm": 0.005253058858215809, - "learning_rate": 0.000199987538731033, - "loss": 46.0, - "step": 31215 - }, - { - "epoch": 5.027134747775675, - "grad_norm": 0.009899591095745564, - "learning_rate": 0.00019998753793236548, - "loss": 46.0, - "step": 31216 - }, - { - "epoch": 5.027295784854463, - "grad_norm": 0.012147229164838791, - "learning_rate": 0.0001999875371336724, - "loss": 46.0, - "step": 31217 - }, - { - "epoch": 5.02745682193325, - "grad_norm": 0.0035418584011495113, - "learning_rate": 0.0001999875363349537, - "loss": 46.0, - "step": 31218 - }, - { - "epoch": 5.027617859012038, - "grad_norm": 0.007967657409608364, - "learning_rate": 0.00019998753553620942, - "loss": 46.0, - "step": 31219 - }, - { - "epoch": 5.027778896090825, - "grad_norm": 0.004422798287123442, - "learning_rate": 0.00019998753473743954, - "loss": 46.0, - "step": 31220 - }, - { - "epoch": 5.027939933169613, - "grad_norm": 0.0024494112003594637, - "learning_rate": 0.00019998753393864408, - "loss": 46.0, - "step": 31221 - }, - { - "epoch": 5.0281009702484, - "grad_norm": 0.002499592024832964, - "learning_rate": 0.000199987533139823, - "loss": 46.0, - "step": 31222 - }, - { - "epoch": 5.028262007327187, - "grad_norm": 0.017103072255849838, - "learning_rate": 0.0001999875323409764, - "loss": 46.0, - "step": 31223 - }, - { - "epoch": 5.028423044405974, - "grad_norm": 0.0033480413258075714, - "learning_rate": 0.00019998753154210414, - "loss": 46.0, - "step": 31224 - }, - { - "epoch": 5.0285840814847615, - "grad_norm": 0.012788372114300728, - "learning_rate": 0.0001999875307432063, - "loss": 46.0, - "step": 31225 - }, - { - "epoch": 5.028745118563549, - "grad_norm": 0.001563073368743062, - "learning_rate": 0.00019998752994428287, - "loss": 46.0, - "step": 31226 - }, - { - "epoch": 5.0289061556423365, - "grad_norm": 0.003520380472764373, - "learning_rate": 0.00019998752914533385, - "loss": 46.0, - "step": 31227 - }, - { - "epoch": 5.029067192721124, - "grad_norm": 0.0013068087864667177, - "learning_rate": 0.00019998752834635925, - "loss": 46.0, - "step": 31228 - }, - { - "epoch": 5.029228229799911, - "grad_norm": 0.0033790695015341043, - "learning_rate": 0.00019998752754735906, - "loss": 46.0, - "step": 31229 - }, - { - "epoch": 5.029389266878699, - "grad_norm": 0.008708048611879349, - "learning_rate": 0.00019998752674833323, - "loss": 46.0, - "step": 31230 - }, - { - "epoch": 5.029550303957486, - "grad_norm": 0.006931872572749853, - "learning_rate": 0.00019998752594928187, - "loss": 46.0, - "step": 31231 - }, - { - "epoch": 5.029711341036274, - "grad_norm": 0.0027630438562482595, - "learning_rate": 0.0001999875251502049, - "loss": 46.0, - "step": 31232 - }, - { - "epoch": 5.029872378115061, - "grad_norm": 0.009353955276310444, - "learning_rate": 0.00019998752435110233, - "loss": 46.0, - "step": 31233 - }, - { - "epoch": 5.030033415193849, - "grad_norm": 0.0018961328314617276, - "learning_rate": 0.00019998752355197418, - "loss": 46.0, - "step": 31234 - }, - { - "epoch": 5.030194452272636, - "grad_norm": 0.005243843421339989, - "learning_rate": 0.0001999875227528204, - "loss": 46.0, - "step": 31235 - }, - { - "epoch": 5.0303554893514235, - "grad_norm": 0.005383746698498726, - "learning_rate": 0.00019998752195364108, - "loss": 46.0, - "step": 31236 - }, - { - "epoch": 5.030516526430211, - "grad_norm": 0.0013867057859897614, - "learning_rate": 0.00019998752115443614, - "loss": 46.0, - "step": 31237 - }, - { - "epoch": 5.0306775635089975, - "grad_norm": 0.0018150400137528777, - "learning_rate": 0.00019998752035520561, - "loss": 46.0, - "step": 31238 - }, - { - "epoch": 5.030838600587785, - "grad_norm": 0.0033124228939414024, - "learning_rate": 0.00019998751955594947, - "loss": 46.0, - "step": 31239 - }, - { - "epoch": 5.030999637666572, - "grad_norm": 0.00721521582454443, - "learning_rate": 0.00019998751875666777, - "loss": 46.0, - "step": 31240 - }, - { - "epoch": 5.03116067474536, - "grad_norm": 0.007003186736255884, - "learning_rate": 0.00019998751795736048, - "loss": 46.0, - "step": 31241 - }, - { - "epoch": 5.031321711824147, - "grad_norm": 0.0025241137482225895, - "learning_rate": 0.0001999875171580276, - "loss": 46.0, - "step": 31242 - }, - { - "epoch": 5.031482748902935, - "grad_norm": 0.001630175393074751, - "learning_rate": 0.00019998751635866911, - "loss": 46.0, - "step": 31243 - }, - { - "epoch": 5.031643785981722, - "grad_norm": 0.0049359663389623165, - "learning_rate": 0.00019998751555928504, - "loss": 46.0, - "step": 31244 - }, - { - "epoch": 5.03180482306051, - "grad_norm": 0.009411569684743881, - "learning_rate": 0.00019998751475987537, - "loss": 46.0, - "step": 31245 - }, - { - "epoch": 5.031965860139297, - "grad_norm": 0.001628089346922934, - "learning_rate": 0.00019998751396044012, - "loss": 46.0, - "step": 31246 - }, - { - "epoch": 5.032126897218085, - "grad_norm": 0.002978523727506399, - "learning_rate": 0.00019998751316097925, - "loss": 46.0, - "step": 31247 - }, - { - "epoch": 5.032287934296872, - "grad_norm": 0.002056524157524109, - "learning_rate": 0.0001999875123614928, - "loss": 46.0, - "step": 31248 - }, - { - "epoch": 5.0324489713756595, - "grad_norm": 0.0015227656112983823, - "learning_rate": 0.00019998751156198079, - "loss": 46.0, - "step": 31249 - }, - { - "epoch": 5.032610008454447, - "grad_norm": 0.02733725495636463, - "learning_rate": 0.00019998751076244316, - "loss": 46.0, - "step": 31250 - }, - { - "epoch": 5.032771045533234, - "grad_norm": 0.003301442600786686, - "learning_rate": 0.00019998750996287994, - "loss": 46.0, - "step": 31251 - }, - { - "epoch": 5.032932082612022, - "grad_norm": 0.009448815137147903, - "learning_rate": 0.00019998750916329114, - "loss": 46.0, - "step": 31252 - }, - { - "epoch": 5.033093119690808, - "grad_norm": 0.006169270724058151, - "learning_rate": 0.00019998750836367672, - "loss": 46.0, - "step": 31253 - }, - { - "epoch": 5.033254156769596, - "grad_norm": 0.003445628099143505, - "learning_rate": 0.00019998750756403672, - "loss": 46.0, - "step": 31254 - }, - { - "epoch": 5.033415193848383, - "grad_norm": 0.003946996293962002, - "learning_rate": 0.00019998750676437116, - "loss": 46.0, - "step": 31255 - }, - { - "epoch": 5.033576230927171, - "grad_norm": 0.0019527708645910025, - "learning_rate": 0.00019998750596467998, - "loss": 46.0, - "step": 31256 - }, - { - "epoch": 5.033737268005958, - "grad_norm": 0.0045500583946704865, - "learning_rate": 0.0001999875051649632, - "loss": 46.0, - "step": 31257 - }, - { - "epoch": 5.033898305084746, - "grad_norm": 0.0023381556384265423, - "learning_rate": 0.00019998750436522083, - "loss": 46.0, - "step": 31258 - }, - { - "epoch": 5.034059342163533, - "grad_norm": 0.00200643646530807, - "learning_rate": 0.0001999875035654529, - "loss": 46.0, - "step": 31259 - }, - { - "epoch": 5.034220379242321, - "grad_norm": 0.011275083757936954, - "learning_rate": 0.00019998750276565934, - "loss": 46.0, - "step": 31260 - }, - { - "epoch": 5.034381416321108, - "grad_norm": 0.01734207756817341, - "learning_rate": 0.00019998750196584023, - "loss": 46.0, - "step": 31261 - }, - { - "epoch": 5.0345424533998955, - "grad_norm": 0.013983099721372128, - "learning_rate": 0.0001999875011659955, - "loss": 46.0, - "step": 31262 - }, - { - "epoch": 5.034703490478683, - "grad_norm": 0.01058198045939207, - "learning_rate": 0.00019998750036612516, - "loss": 46.0, - "step": 31263 - }, - { - "epoch": 5.03486452755747, - "grad_norm": 0.0030954291578382254, - "learning_rate": 0.00019998749956622928, - "loss": 46.0, - "step": 31264 - }, - { - "epoch": 5.035025564636258, - "grad_norm": 0.0050180512480437756, - "learning_rate": 0.0001999874987663078, - "loss": 46.0, - "step": 31265 - }, - { - "epoch": 5.035186601715045, - "grad_norm": 0.00824425183236599, - "learning_rate": 0.0001999874979663607, - "loss": 46.0, - "step": 31266 - }, - { - "epoch": 5.035347638793832, - "grad_norm": 0.009842642582952976, - "learning_rate": 0.000199987497166388, - "loss": 46.0, - "step": 31267 - }, - { - "epoch": 5.035508675872619, - "grad_norm": 0.0010818196460604668, - "learning_rate": 0.00019998749636638972, - "loss": 46.0, - "step": 31268 - }, - { - "epoch": 5.035669712951407, - "grad_norm": 0.0069105722941458225, - "learning_rate": 0.00019998749556636585, - "loss": 46.0, - "step": 31269 - }, - { - "epoch": 5.035830750030194, - "grad_norm": 0.003831706941127777, - "learning_rate": 0.0001999874947663164, - "loss": 46.0, - "step": 31270 - }, - { - "epoch": 5.035991787108982, - "grad_norm": 0.009741471149027348, - "learning_rate": 0.00019998749396624136, - "loss": 46.0, - "step": 31271 - }, - { - "epoch": 5.036152824187769, - "grad_norm": 0.01142609678208828, - "learning_rate": 0.0001999874931661407, - "loss": 46.0, - "step": 31272 - }, - { - "epoch": 5.036313861266557, - "grad_norm": 0.0040459115989506245, - "learning_rate": 0.00019998749236601446, - "loss": 46.0, - "step": 31273 - }, - { - "epoch": 5.036474898345344, - "grad_norm": 0.004744853358715773, - "learning_rate": 0.00019998749156586266, - "loss": 46.0, - "step": 31274 - }, - { - "epoch": 5.0366359354241315, - "grad_norm": 0.0057984162122011185, - "learning_rate": 0.00019998749076568522, - "loss": 46.0, - "step": 31275 - }, - { - "epoch": 5.036796972502919, - "grad_norm": 0.006191633641719818, - "learning_rate": 0.0001999874899654822, - "loss": 46.0, - "step": 31276 - }, - { - "epoch": 5.036958009581706, - "grad_norm": 0.003779152175411582, - "learning_rate": 0.0001999874891652536, - "loss": 46.0, - "step": 31277 - }, - { - "epoch": 5.037119046660494, - "grad_norm": 0.0015368192689493299, - "learning_rate": 0.00019998748836499942, - "loss": 46.0, - "step": 31278 - }, - { - "epoch": 5.037280083739281, - "grad_norm": 0.007674081716686487, - "learning_rate": 0.00019998748756471963, - "loss": 46.0, - "step": 31279 - }, - { - "epoch": 5.037441120818069, - "grad_norm": 0.0035461331717669964, - "learning_rate": 0.00019998748676441425, - "loss": 46.0, - "step": 31280 - }, - { - "epoch": 5.037602157896856, - "grad_norm": 0.0025449793320149183, - "learning_rate": 0.0001999874859640833, - "loss": 46.0, - "step": 31281 - }, - { - "epoch": 5.037763194975643, - "grad_norm": 0.006803569383919239, - "learning_rate": 0.00019998748516372673, - "loss": 46.0, - "step": 31282 - }, - { - "epoch": 5.03792423205443, - "grad_norm": 0.0013075171737000346, - "learning_rate": 0.00019998748436334457, - "loss": 46.0, - "step": 31283 - }, - { - "epoch": 5.038085269133218, - "grad_norm": 0.0029097353108227253, - "learning_rate": 0.00019998748356293684, - "loss": 46.0, - "step": 31284 - }, - { - "epoch": 5.038246306212005, - "grad_norm": 0.007517886348068714, - "learning_rate": 0.0001999874827625035, - "loss": 46.0, - "step": 31285 - }, - { - "epoch": 5.0384073432907925, - "grad_norm": 0.002215017331764102, - "learning_rate": 0.0001999874819620446, - "loss": 46.0, - "step": 31286 - }, - { - "epoch": 5.03856838036958, - "grad_norm": 0.004603625275194645, - "learning_rate": 0.00019998748116156008, - "loss": 46.0, - "step": 31287 - }, - { - "epoch": 5.038729417448367, - "grad_norm": 0.007704117335379124, - "learning_rate": 0.00019998748036104995, - "loss": 46.0, - "step": 31288 - }, - { - "epoch": 5.038890454527155, - "grad_norm": 0.009899219498038292, - "learning_rate": 0.00019998747956051426, - "loss": 46.0, - "step": 31289 - }, - { - "epoch": 5.039051491605942, - "grad_norm": 0.0007270171772688627, - "learning_rate": 0.00019998747875995295, - "loss": 46.0, - "step": 31290 - }, - { - "epoch": 5.03921252868473, - "grad_norm": 0.0006603830261155963, - "learning_rate": 0.00019998747795936606, - "loss": 46.0, - "step": 31291 - }, - { - "epoch": 5.039373565763517, - "grad_norm": 0.003224874846637249, - "learning_rate": 0.00019998747715875358, - "loss": 46.0, - "step": 31292 - }, - { - "epoch": 5.039534602842305, - "grad_norm": 0.02393312193453312, - "learning_rate": 0.00019998747635811555, - "loss": 46.0, - "step": 31293 - }, - { - "epoch": 5.039695639921092, - "grad_norm": 0.0041695875115692616, - "learning_rate": 0.00019998747555745187, - "loss": 46.0, - "step": 31294 - }, - { - "epoch": 5.03985667699988, - "grad_norm": 0.008520207367837429, - "learning_rate": 0.00019998747475676262, - "loss": 46.0, - "step": 31295 - }, - { - "epoch": 5.040017714078667, - "grad_norm": 0.015758665278553963, - "learning_rate": 0.0001999874739560478, - "loss": 46.0, - "step": 31296 - }, - { - "epoch": 5.040178751157454, - "grad_norm": 0.004376154858618975, - "learning_rate": 0.00019998747315530736, - "loss": 46.0, - "step": 31297 - }, - { - "epoch": 5.040339788236241, - "grad_norm": 0.007451072335243225, - "learning_rate": 0.00019998747235454133, - "loss": 46.0, - "step": 31298 - }, - { - "epoch": 5.0405008253150285, - "grad_norm": 0.005293107591569424, - "learning_rate": 0.0001999874715537497, - "loss": 46.0, - "step": 31299 - }, - { - "epoch": 5.040661862393816, - "grad_norm": 0.00759174395352602, - "learning_rate": 0.00019998747075293248, - "loss": 46.0, - "step": 31300 - }, - { - "epoch": 5.040822899472603, - "grad_norm": 0.0014590921346098185, - "learning_rate": 0.0001999874699520897, - "loss": 46.0, - "step": 31301 - }, - { - "epoch": 5.040983936551391, - "grad_norm": 0.003641568124294281, - "learning_rate": 0.0001999874691512213, - "loss": 46.0, - "step": 31302 - }, - { - "epoch": 5.041144973630178, - "grad_norm": 0.015508061274886131, - "learning_rate": 0.00019998746835032735, - "loss": 46.0, - "step": 31303 - }, - { - "epoch": 5.041306010708966, - "grad_norm": 0.0020176840480417013, - "learning_rate": 0.00019998746754940774, - "loss": 46.0, - "step": 31304 - }, - { - "epoch": 5.041467047787753, - "grad_norm": 0.003791556926444173, - "learning_rate": 0.00019998746674846257, - "loss": 46.0, - "step": 31305 - }, - { - "epoch": 5.041628084866541, - "grad_norm": 0.007079689763486385, - "learning_rate": 0.00019998746594749182, - "loss": 46.0, - "step": 31306 - }, - { - "epoch": 5.041789121945328, - "grad_norm": 0.003946996293962002, - "learning_rate": 0.00019998746514649548, - "loss": 46.0, - "step": 31307 - }, - { - "epoch": 5.041950159024116, - "grad_norm": 0.005046943202614784, - "learning_rate": 0.00019998746434547352, - "loss": 46.0, - "step": 31308 - }, - { - "epoch": 5.042111196102903, - "grad_norm": 0.0015233828453347087, - "learning_rate": 0.00019998746354442598, - "loss": 46.0, - "step": 31309 - }, - { - "epoch": 5.0422722331816905, - "grad_norm": 0.00573785649612546, - "learning_rate": 0.00019998746274335288, - "loss": 46.0, - "step": 31310 - }, - { - "epoch": 5.042433270260477, - "grad_norm": 0.0013896606396883726, - "learning_rate": 0.00019998746194225414, - "loss": 46.0, - "step": 31311 - }, - { - "epoch": 5.0425943073392645, - "grad_norm": 0.003102433867752552, - "learning_rate": 0.00019998746114112983, - "loss": 46.0, - "step": 31312 - }, - { - "epoch": 5.042755344418052, - "grad_norm": 0.0025348858907818794, - "learning_rate": 0.00019998746033997992, - "loss": 46.0, - "step": 31313 - }, - { - "epoch": 5.042916381496839, - "grad_norm": 0.0022277783136814833, - "learning_rate": 0.00019998745953880444, - "loss": 46.0, - "step": 31314 - }, - { - "epoch": 5.043077418575627, - "grad_norm": 0.008986001834273338, - "learning_rate": 0.00019998745873760337, - "loss": 46.0, - "step": 31315 - }, - { - "epoch": 5.043238455654414, - "grad_norm": 0.008521556854248047, - "learning_rate": 0.0001999874579363767, - "loss": 46.0, - "step": 31316 - }, - { - "epoch": 5.043399492733202, - "grad_norm": 0.002612863900139928, - "learning_rate": 0.00019998745713512443, - "loss": 46.0, - "step": 31317 - }, - { - "epoch": 5.043560529811989, - "grad_norm": 0.008150513283908367, - "learning_rate": 0.00019998745633384657, - "loss": 46.0, - "step": 31318 - }, - { - "epoch": 5.043721566890777, - "grad_norm": 0.004577559418976307, - "learning_rate": 0.00019998745553254313, - "loss": 46.0, - "step": 31319 - }, - { - "epoch": 5.043882603969564, - "grad_norm": 0.0018382498528808355, - "learning_rate": 0.00019998745473121408, - "loss": 46.0, - "step": 31320 - }, - { - "epoch": 5.044043641048352, - "grad_norm": 0.0040992083959281445, - "learning_rate": 0.00019998745392985943, - "loss": 46.0, - "step": 31321 - }, - { - "epoch": 5.044204678127139, - "grad_norm": 0.004502686206251383, - "learning_rate": 0.0001999874531284792, - "loss": 46.0, - "step": 31322 - }, - { - "epoch": 5.0443657152059265, - "grad_norm": 0.015286893583834171, - "learning_rate": 0.00019998745232707342, - "loss": 46.0, - "step": 31323 - }, - { - "epoch": 5.044526752284714, - "grad_norm": 0.0019343370804563165, - "learning_rate": 0.000199987451525642, - "loss": 46.0, - "step": 31324 - }, - { - "epoch": 5.044687789363501, - "grad_norm": 0.004592434968799353, - "learning_rate": 0.000199987450724185, - "loss": 46.0, - "step": 31325 - }, - { - "epoch": 5.044848826442288, - "grad_norm": 0.007957251742482185, - "learning_rate": 0.0001999874499227024, - "loss": 46.0, - "step": 31326 - }, - { - "epoch": 5.045009863521075, - "grad_norm": 0.0012921326560899615, - "learning_rate": 0.0001999874491211942, - "loss": 46.0, - "step": 31327 - }, - { - "epoch": 5.045170900599863, - "grad_norm": 0.007931535132229328, - "learning_rate": 0.00019998744831966044, - "loss": 46.0, - "step": 31328 - }, - { - "epoch": 5.04533193767865, - "grad_norm": 0.002153608947992325, - "learning_rate": 0.00019998744751810108, - "loss": 46.0, - "step": 31329 - }, - { - "epoch": 5.045492974757438, - "grad_norm": 0.00686339195817709, - "learning_rate": 0.0001999874467165161, - "loss": 46.0, - "step": 31330 - }, - { - "epoch": 5.045654011836225, - "grad_norm": 0.001382661983370781, - "learning_rate": 0.00019998744591490555, - "loss": 46.0, - "step": 31331 - }, - { - "epoch": 5.045815048915013, - "grad_norm": 0.005815770942717791, - "learning_rate": 0.0001999874451132694, - "loss": 46.0, - "step": 31332 - }, - { - "epoch": 5.0459760859938, - "grad_norm": 0.006721396930515766, - "learning_rate": 0.0001999874443116077, - "loss": 46.0, - "step": 31333 - }, - { - "epoch": 5.0461371230725875, - "grad_norm": 0.004813921172171831, - "learning_rate": 0.00019998744350992035, - "loss": 46.0, - "step": 31334 - }, - { - "epoch": 5.046298160151375, - "grad_norm": 0.003513301257044077, - "learning_rate": 0.00019998744270820744, - "loss": 46.0, - "step": 31335 - }, - { - "epoch": 5.0464591972301625, - "grad_norm": 0.017438331618905067, - "learning_rate": 0.00019998744190646893, - "loss": 46.0, - "step": 31336 - }, - { - "epoch": 5.04662023430895, - "grad_norm": 0.00219210353679955, - "learning_rate": 0.0001999874411047048, - "loss": 46.0, - "step": 31337 - }, - { - "epoch": 5.046781271387737, - "grad_norm": 0.0018554676789790392, - "learning_rate": 0.00019998744030291513, - "loss": 46.0, - "step": 31338 - }, - { - "epoch": 5.046942308466525, - "grad_norm": 0.0037925350479781628, - "learning_rate": 0.00019998743950109987, - "loss": 46.0, - "step": 31339 - }, - { - "epoch": 5.047103345545311, - "grad_norm": 0.002175368135794997, - "learning_rate": 0.000199987438699259, - "loss": 46.0, - "step": 31340 - }, - { - "epoch": 5.047264382624099, - "grad_norm": 0.007592346053570509, - "learning_rate": 0.00019998743789739252, - "loss": 46.0, - "step": 31341 - }, - { - "epoch": 5.047425419702886, - "grad_norm": 0.0024825402069836855, - "learning_rate": 0.00019998743709550044, - "loss": 46.0, - "step": 31342 - }, - { - "epoch": 5.047586456781674, - "grad_norm": 0.0038324189372360706, - "learning_rate": 0.0001999874362935828, - "loss": 46.0, - "step": 31343 - }, - { - "epoch": 5.047747493860461, - "grad_norm": 0.00716969883069396, - "learning_rate": 0.00019998743549163957, - "loss": 46.0, - "step": 31344 - }, - { - "epoch": 5.047908530939249, - "grad_norm": 0.011669233441352844, - "learning_rate": 0.00019998743468967073, - "loss": 46.0, - "step": 31345 - }, - { - "epoch": 5.048069568018036, - "grad_norm": 0.001614892389625311, - "learning_rate": 0.0001999874338876763, - "loss": 46.0, - "step": 31346 - }, - { - "epoch": 5.0482306050968235, - "grad_norm": 0.005077993031591177, - "learning_rate": 0.00019998743308565628, - "loss": 46.0, - "step": 31347 - }, - { - "epoch": 5.048391642175611, - "grad_norm": 0.0030572321265935898, - "learning_rate": 0.00019998743228361068, - "loss": 46.0, - "step": 31348 - }, - { - "epoch": 5.048552679254398, - "grad_norm": 0.015330816619098186, - "learning_rate": 0.00019998743148153948, - "loss": 46.0, - "step": 31349 - }, - { - "epoch": 5.048713716333186, - "grad_norm": 0.0018201855709776282, - "learning_rate": 0.0001999874306794427, - "loss": 46.0, - "step": 31350 - }, - { - "epoch": 5.048874753411973, - "grad_norm": 0.006428643129765987, - "learning_rate": 0.0001999874298773203, - "loss": 46.0, - "step": 31351 - }, - { - "epoch": 5.049035790490761, - "grad_norm": 0.0064857881516218185, - "learning_rate": 0.00019998742907517233, - "loss": 46.0, - "step": 31352 - }, - { - "epoch": 5.049196827569548, - "grad_norm": 0.006932168733328581, - "learning_rate": 0.00019998742827299877, - "loss": 46.0, - "step": 31353 - }, - { - "epoch": 5.049357864648336, - "grad_norm": 0.007753266952931881, - "learning_rate": 0.0001999874274707996, - "loss": 46.0, - "step": 31354 - }, - { - "epoch": 5.049518901727122, - "grad_norm": 0.011997136287391186, - "learning_rate": 0.00019998742666857484, - "loss": 46.0, - "step": 31355 - }, - { - "epoch": 5.04967993880591, - "grad_norm": 0.012536806985735893, - "learning_rate": 0.0001999874258663245, - "loss": 46.0, - "step": 31356 - }, - { - "epoch": 5.049840975884697, - "grad_norm": 0.004379378166049719, - "learning_rate": 0.0001999874250640486, - "loss": 46.0, - "step": 31357 - }, - { - "epoch": 5.050002012963485, - "grad_norm": 0.0015932349488139153, - "learning_rate": 0.00019998742426174707, - "loss": 46.0, - "step": 31358 - }, - { - "epoch": 5.050163050042272, - "grad_norm": 0.011047838255763054, - "learning_rate": 0.00019998742345941995, - "loss": 46.0, - "step": 31359 - }, - { - "epoch": 5.0503240871210595, - "grad_norm": 0.0020418083295226097, - "learning_rate": 0.00019998742265706722, - "loss": 46.0, - "step": 31360 - }, - { - "epoch": 5.050485124199847, - "grad_norm": 0.0030328778084367514, - "learning_rate": 0.00019998742185468893, - "loss": 46.0, - "step": 31361 - }, - { - "epoch": 5.050646161278634, - "grad_norm": 0.003227280220016837, - "learning_rate": 0.00019998742105228505, - "loss": 46.0, - "step": 31362 - }, - { - "epoch": 5.050807198357422, - "grad_norm": 0.004678688477724791, - "learning_rate": 0.00019998742024985555, - "loss": 46.0, - "step": 31363 - }, - { - "epoch": 5.050968235436209, - "grad_norm": 0.00585603155195713, - "learning_rate": 0.00019998741944740047, - "loss": 46.0, - "step": 31364 - }, - { - "epoch": 5.051129272514997, - "grad_norm": 0.004272001795470715, - "learning_rate": 0.0001999874186449198, - "loss": 46.0, - "step": 31365 - }, - { - "epoch": 5.051290309593784, - "grad_norm": 0.005335711408406496, - "learning_rate": 0.00019998741784241358, - "loss": 46.0, - "step": 31366 - }, - { - "epoch": 5.051451346672572, - "grad_norm": 0.005429176148027182, - "learning_rate": 0.0001999874170398817, - "loss": 46.0, - "step": 31367 - }, - { - "epoch": 5.051612383751359, - "grad_norm": 0.0013133641332387924, - "learning_rate": 0.00019998741623732425, - "loss": 46.0, - "step": 31368 - }, - { - "epoch": 5.051773420830147, - "grad_norm": 0.005599755793809891, - "learning_rate": 0.00019998741543474124, - "loss": 46.0, - "step": 31369 - }, - { - "epoch": 5.051934457908933, - "grad_norm": 0.004211513325572014, - "learning_rate": 0.0001999874146321326, - "loss": 46.0, - "step": 31370 - }, - { - "epoch": 5.052095494987721, - "grad_norm": 0.009187194518744946, - "learning_rate": 0.0001999874138294984, - "loss": 46.0, - "step": 31371 - }, - { - "epoch": 5.052256532066508, - "grad_norm": 0.004940877668559551, - "learning_rate": 0.00019998741302683856, - "loss": 46.0, - "step": 31372 - }, - { - "epoch": 5.0524175691452955, - "grad_norm": 0.012068133801221848, - "learning_rate": 0.0001999874122241532, - "loss": 46.0, - "step": 31373 - }, - { - "epoch": 5.052578606224083, - "grad_norm": 0.006456427741795778, - "learning_rate": 0.00019998741142144218, - "loss": 46.0, - "step": 31374 - }, - { - "epoch": 5.05273964330287, - "grad_norm": 0.004046276211738586, - "learning_rate": 0.00019998741061870562, - "loss": 46.0, - "step": 31375 - }, - { - "epoch": 5.052900680381658, - "grad_norm": 0.0027716588228940964, - "learning_rate": 0.0001999874098159434, - "loss": 46.0, - "step": 31376 - }, - { - "epoch": 5.053061717460445, - "grad_norm": 0.0017947101732715964, - "learning_rate": 0.00019998740901315567, - "loss": 46.0, - "step": 31377 - }, - { - "epoch": 5.053222754539233, - "grad_norm": 0.016897421330213547, - "learning_rate": 0.0001999874082103423, - "loss": 46.0, - "step": 31378 - }, - { - "epoch": 5.05338379161802, - "grad_norm": 0.024554461240768433, - "learning_rate": 0.00019998740740750334, - "loss": 46.0, - "step": 31379 - }, - { - "epoch": 5.053544828696808, - "grad_norm": 0.00404990091919899, - "learning_rate": 0.00019998740660463881, - "loss": 46.0, - "step": 31380 - }, - { - "epoch": 5.053705865775595, - "grad_norm": 0.005295802839100361, - "learning_rate": 0.00019998740580174867, - "loss": 46.0, - "step": 31381 - }, - { - "epoch": 5.053866902854383, - "grad_norm": 0.005360559560358524, - "learning_rate": 0.00019998740499883297, - "loss": 46.0, - "step": 31382 - }, - { - "epoch": 5.05402793993317, - "grad_norm": 0.0006004504975862801, - "learning_rate": 0.00019998740419589165, - "loss": 46.0, - "step": 31383 - }, - { - "epoch": 5.054188977011957, - "grad_norm": 0.015216195955872536, - "learning_rate": 0.00019998740339292472, - "loss": 46.0, - "step": 31384 - }, - { - "epoch": 5.054350014090744, - "grad_norm": 0.00952720083296299, - "learning_rate": 0.00019998740258993225, - "loss": 46.0, - "step": 31385 - }, - { - "epoch": 5.0545110511695315, - "grad_norm": 0.014766951091587543, - "learning_rate": 0.00019998740178691415, - "loss": 46.0, - "step": 31386 - }, - { - "epoch": 5.054672088248319, - "grad_norm": 0.0011706594377756119, - "learning_rate": 0.00019998740098387048, - "loss": 46.0, - "step": 31387 - }, - { - "epoch": 5.054833125327106, - "grad_norm": 0.00246440339833498, - "learning_rate": 0.00019998740018080117, - "loss": 46.0, - "step": 31388 - }, - { - "epoch": 5.054994162405894, - "grad_norm": 0.0016380480956286192, - "learning_rate": 0.00019998739937770633, - "loss": 46.0, - "step": 31389 - }, - { - "epoch": 5.055155199484681, - "grad_norm": 0.014208029955625534, - "learning_rate": 0.00019998739857458585, - "loss": 46.0, - "step": 31390 - }, - { - "epoch": 5.055316236563469, - "grad_norm": 0.004792451858520508, - "learning_rate": 0.0001999873977714398, - "loss": 46.0, - "step": 31391 - }, - { - "epoch": 5.055477273642256, - "grad_norm": 0.002148693660274148, - "learning_rate": 0.00019998739696826817, - "loss": 46.0, - "step": 31392 - }, - { - "epoch": 5.055638310721044, - "grad_norm": 0.0014832004671916366, - "learning_rate": 0.00019998739616507093, - "loss": 46.0, - "step": 31393 - }, - { - "epoch": 5.055799347799831, - "grad_norm": 0.004854957107454538, - "learning_rate": 0.00019998739536184813, - "loss": 46.0, - "step": 31394 - }, - { - "epoch": 5.0559603848786185, - "grad_norm": 0.0009185775998048484, - "learning_rate": 0.0001999873945585997, - "loss": 46.0, - "step": 31395 - }, - { - "epoch": 5.056121421957406, - "grad_norm": 0.0025046789087355137, - "learning_rate": 0.0001999873937553257, - "loss": 46.0, - "step": 31396 - }, - { - "epoch": 5.056282459036193, - "grad_norm": 0.008300804533064365, - "learning_rate": 0.00019998739295202608, - "loss": 46.0, - "step": 31397 - }, - { - "epoch": 5.056443496114981, - "grad_norm": 0.001007849583402276, - "learning_rate": 0.0001999873921487009, - "loss": 46.0, - "step": 31398 - }, - { - "epoch": 5.0566045331937675, - "grad_norm": 0.013126351870596409, - "learning_rate": 0.0001999873913453501, - "loss": 46.0, - "step": 31399 - }, - { - "epoch": 5.056765570272555, - "grad_norm": 0.002243891591206193, - "learning_rate": 0.00019998739054197376, - "loss": 46.0, - "step": 31400 - }, - { - "epoch": 5.056926607351342, - "grad_norm": 0.004190828185528517, - "learning_rate": 0.0001999873897385718, - "loss": 46.0, - "step": 31401 - }, - { - "epoch": 5.05708764443013, - "grad_norm": 0.007364446762949228, - "learning_rate": 0.00019998738893514423, - "loss": 46.0, - "step": 31402 - }, - { - "epoch": 5.057248681508917, - "grad_norm": 0.005762541200965643, - "learning_rate": 0.00019998738813169106, - "loss": 46.0, - "step": 31403 - }, - { - "epoch": 5.057409718587705, - "grad_norm": 0.0010841024341061711, - "learning_rate": 0.00019998738732821233, - "loss": 46.0, - "step": 31404 - }, - { - "epoch": 5.057570755666492, - "grad_norm": 0.008369272574782372, - "learning_rate": 0.000199987386524708, - "loss": 46.0, - "step": 31405 - }, - { - "epoch": 5.05773179274528, - "grad_norm": 0.010041013360023499, - "learning_rate": 0.00019998738572117808, - "loss": 46.0, - "step": 31406 - }, - { - "epoch": 5.057892829824067, - "grad_norm": 0.002257033484056592, - "learning_rate": 0.00019998738491762256, - "loss": 46.0, - "step": 31407 - }, - { - "epoch": 5.0580538669028545, - "grad_norm": 0.001278592273592949, - "learning_rate": 0.00019998738411404146, - "loss": 46.0, - "step": 31408 - }, - { - "epoch": 5.058214903981642, - "grad_norm": 0.002209677826613188, - "learning_rate": 0.00019998738331043477, - "loss": 46.0, - "step": 31409 - }, - { - "epoch": 5.058375941060429, - "grad_norm": 0.005247199907898903, - "learning_rate": 0.00019998738250680246, - "loss": 46.0, - "step": 31410 - }, - { - "epoch": 5.058536978139217, - "grad_norm": 0.0017442961689084768, - "learning_rate": 0.00019998738170314456, - "loss": 46.0, - "step": 31411 - }, - { - "epoch": 5.058698015218004, - "grad_norm": 0.0007388941594399512, - "learning_rate": 0.0001999873808994611, - "loss": 46.0, - "step": 31412 - }, - { - "epoch": 5.058859052296791, - "grad_norm": 0.0036482533905655146, - "learning_rate": 0.00019998738009575204, - "loss": 46.0, - "step": 31413 - }, - { - "epoch": 5.059020089375578, - "grad_norm": 0.00955873727798462, - "learning_rate": 0.00019998737929201738, - "loss": 46.0, - "step": 31414 - }, - { - "epoch": 5.059181126454366, - "grad_norm": 0.0017869167495518923, - "learning_rate": 0.00019998737848825714, - "loss": 46.0, - "step": 31415 - }, - { - "epoch": 5.059342163533153, - "grad_norm": 0.0059241121634840965, - "learning_rate": 0.0001999873776844713, - "loss": 46.0, - "step": 31416 - }, - { - "epoch": 5.059503200611941, - "grad_norm": 0.00184874318074435, - "learning_rate": 0.00019998737688065987, - "loss": 46.0, - "step": 31417 - }, - { - "epoch": 5.059664237690728, - "grad_norm": 0.003958504181355238, - "learning_rate": 0.00019998737607682286, - "loss": 46.0, - "step": 31418 - }, - { - "epoch": 5.059825274769516, - "grad_norm": 0.004700548946857452, - "learning_rate": 0.00019998737527296024, - "loss": 46.0, - "step": 31419 - }, - { - "epoch": 5.059986311848303, - "grad_norm": 0.0021934835240244865, - "learning_rate": 0.00019998737446907204, - "loss": 46.0, - "step": 31420 - }, - { - "epoch": 5.0601473489270905, - "grad_norm": 0.0026741705369204283, - "learning_rate": 0.00019998737366515822, - "loss": 46.0, - "step": 31421 - }, - { - "epoch": 5.060308386005878, - "grad_norm": 0.0029577219393104315, - "learning_rate": 0.00019998737286121884, - "loss": 46.0, - "step": 31422 - }, - { - "epoch": 5.060469423084665, - "grad_norm": 0.004920677747577429, - "learning_rate": 0.00019998737205725387, - "loss": 46.0, - "step": 31423 - }, - { - "epoch": 5.060630460163453, - "grad_norm": 0.010909433476626873, - "learning_rate": 0.0001999873712532633, - "loss": 46.0, - "step": 31424 - }, - { - "epoch": 5.06079149724224, - "grad_norm": 0.018570439890027046, - "learning_rate": 0.00019998737044924712, - "loss": 46.0, - "step": 31425 - }, - { - "epoch": 5.060952534321028, - "grad_norm": 0.014057573862373829, - "learning_rate": 0.00019998736964520534, - "loss": 46.0, - "step": 31426 - }, - { - "epoch": 5.061113571399815, - "grad_norm": 0.003790802089497447, - "learning_rate": 0.000199987368841138, - "loss": 46.0, - "step": 31427 - }, - { - "epoch": 5.061274608478602, - "grad_norm": 0.005988498218357563, - "learning_rate": 0.00019998736803704506, - "loss": 46.0, - "step": 31428 - }, - { - "epoch": 5.061435645557389, - "grad_norm": 0.004277796018868685, - "learning_rate": 0.00019998736723292652, - "loss": 46.0, - "step": 31429 - }, - { - "epoch": 5.061596682636177, - "grad_norm": 0.003905894234776497, - "learning_rate": 0.00019998736642878239, - "loss": 46.0, - "step": 31430 - }, - { - "epoch": 5.061757719714964, - "grad_norm": 0.0018447316251695156, - "learning_rate": 0.0001999873656246127, - "loss": 46.0, - "step": 31431 - }, - { - "epoch": 5.061918756793752, - "grad_norm": 0.002647134242579341, - "learning_rate": 0.0001999873648204174, - "loss": 46.0, - "step": 31432 - }, - { - "epoch": 5.062079793872539, - "grad_norm": 0.004798299167305231, - "learning_rate": 0.0001999873640161965, - "loss": 46.0, - "step": 31433 - }, - { - "epoch": 5.0622408309513265, - "grad_norm": 0.0034769484773278236, - "learning_rate": 0.00019998736321194999, - "loss": 46.0, - "step": 31434 - }, - { - "epoch": 5.062401868030114, - "grad_norm": 0.0018220728961750865, - "learning_rate": 0.00019998736240767792, - "loss": 46.0, - "step": 31435 - }, - { - "epoch": 5.062562905108901, - "grad_norm": 0.006823256146162748, - "learning_rate": 0.00019998736160338026, - "loss": 46.0, - "step": 31436 - }, - { - "epoch": 5.062723942187689, - "grad_norm": 0.0007807258516550064, - "learning_rate": 0.00019998736079905697, - "loss": 46.0, - "step": 31437 - }, - { - "epoch": 5.062884979266476, - "grad_norm": 0.0043825022876262665, - "learning_rate": 0.0001999873599947081, - "loss": 46.0, - "step": 31438 - }, - { - "epoch": 5.063046016345264, - "grad_norm": 0.0011262602638453245, - "learning_rate": 0.00019998735919033367, - "loss": 46.0, - "step": 31439 - }, - { - "epoch": 5.063207053424051, - "grad_norm": 0.006850048899650574, - "learning_rate": 0.00019998735838593366, - "loss": 46.0, - "step": 31440 - }, - { - "epoch": 5.063368090502839, - "grad_norm": 0.004780042450875044, - "learning_rate": 0.00019998735758150802, - "loss": 46.0, - "step": 31441 - }, - { - "epoch": 5.063529127581626, - "grad_norm": 0.002735063899308443, - "learning_rate": 0.0001999873567770568, - "loss": 46.0, - "step": 31442 - }, - { - "epoch": 5.063690164660413, - "grad_norm": 0.0023452742025256157, - "learning_rate": 0.00019998735597257997, - "loss": 46.0, - "step": 31443 - }, - { - "epoch": 5.0638512017392, - "grad_norm": 0.002004046458750963, - "learning_rate": 0.00019998735516807756, - "loss": 46.0, - "step": 31444 - }, - { - "epoch": 5.064012238817988, - "grad_norm": 0.010709072463214397, - "learning_rate": 0.00019998735436354957, - "loss": 46.0, - "step": 31445 - }, - { - "epoch": 5.064173275896775, - "grad_norm": 0.002725010272115469, - "learning_rate": 0.00019998735355899596, - "loss": 46.0, - "step": 31446 - }, - { - "epoch": 5.0643343129755625, - "grad_norm": 0.007613108493387699, - "learning_rate": 0.0001999873527544168, - "loss": 46.0, - "step": 31447 - }, - { - "epoch": 5.06449535005435, - "grad_norm": 0.004275319166481495, - "learning_rate": 0.000199987351949812, - "loss": 46.0, - "step": 31448 - }, - { - "epoch": 5.064656387133137, - "grad_norm": 0.005990230478346348, - "learning_rate": 0.00019998735114518167, - "loss": 46.0, - "step": 31449 - }, - { - "epoch": 5.064817424211925, - "grad_norm": 0.0027404134161770344, - "learning_rate": 0.0001999873503405257, - "loss": 46.0, - "step": 31450 - }, - { - "epoch": 5.064978461290712, - "grad_norm": 0.0028265758883208036, - "learning_rate": 0.00019998734953584414, - "loss": 46.0, - "step": 31451 - }, - { - "epoch": 5.0651394983695, - "grad_norm": 0.012204973958432674, - "learning_rate": 0.000199987348731137, - "loss": 46.0, - "step": 31452 - }, - { - "epoch": 5.065300535448287, - "grad_norm": 0.009151040576398373, - "learning_rate": 0.0001999873479264043, - "loss": 46.0, - "step": 31453 - }, - { - "epoch": 5.065461572527075, - "grad_norm": 0.0008236308349296451, - "learning_rate": 0.00019998734712164598, - "loss": 46.0, - "step": 31454 - }, - { - "epoch": 5.065622609605862, - "grad_norm": 0.016653602942824364, - "learning_rate": 0.00019998734631686204, - "loss": 46.0, - "step": 31455 - }, - { - "epoch": 5.0657836466846495, - "grad_norm": 0.0010466242674738169, - "learning_rate": 0.00019998734551205253, - "loss": 46.0, - "step": 31456 - }, - { - "epoch": 5.065944683763436, - "grad_norm": 0.0054195416159927845, - "learning_rate": 0.00019998734470721746, - "loss": 46.0, - "step": 31457 - }, - { - "epoch": 5.0661057208422235, - "grad_norm": 0.0033081627916544676, - "learning_rate": 0.00019998734390235675, - "loss": 46.0, - "step": 31458 - }, - { - "epoch": 5.066266757921011, - "grad_norm": 0.003239066805690527, - "learning_rate": 0.0001999873430974705, - "loss": 46.0, - "step": 31459 - }, - { - "epoch": 5.0664277949997985, - "grad_norm": 0.0018130347598344088, - "learning_rate": 0.0001999873422925586, - "loss": 46.0, - "step": 31460 - }, - { - "epoch": 5.066588832078586, - "grad_norm": 0.016576655209064484, - "learning_rate": 0.00019998734148762113, - "loss": 46.0, - "step": 31461 - }, - { - "epoch": 5.066749869157373, - "grad_norm": 0.008636985905468464, - "learning_rate": 0.0001999873406826581, - "loss": 46.0, - "step": 31462 - }, - { - "epoch": 5.066910906236161, - "grad_norm": 0.01158988382667303, - "learning_rate": 0.00019998733987766943, - "loss": 46.0, - "step": 31463 - }, - { - "epoch": 5.067071943314948, - "grad_norm": 0.0015201332280412316, - "learning_rate": 0.0001999873390726552, - "loss": 46.0, - "step": 31464 - }, - { - "epoch": 5.067232980393736, - "grad_norm": 0.0038251907099038363, - "learning_rate": 0.00019998733826761536, - "loss": 46.0, - "step": 31465 - }, - { - "epoch": 5.067394017472523, - "grad_norm": 0.0038515462074428797, - "learning_rate": 0.00019998733746254995, - "loss": 46.0, - "step": 31466 - }, - { - "epoch": 5.067555054551311, - "grad_norm": 0.011617777869105339, - "learning_rate": 0.00019998733665745896, - "loss": 46.0, - "step": 31467 - }, - { - "epoch": 5.067716091630098, - "grad_norm": 0.002525769639760256, - "learning_rate": 0.00019998733585234232, - "loss": 46.0, - "step": 31468 - }, - { - "epoch": 5.0678771287088855, - "grad_norm": 0.0031561728101223707, - "learning_rate": 0.00019998733504720013, - "loss": 46.0, - "step": 31469 - }, - { - "epoch": 5.068038165787673, - "grad_norm": 0.0010259057162329555, - "learning_rate": 0.00019998733424203232, - "loss": 46.0, - "step": 31470 - }, - { - "epoch": 5.06819920286646, - "grad_norm": 0.005360717419534922, - "learning_rate": 0.00019998733343683895, - "loss": 46.0, - "step": 31471 - }, - { - "epoch": 5.068360239945247, - "grad_norm": 0.001513156108558178, - "learning_rate": 0.00019998733263162, - "loss": 46.0, - "step": 31472 - }, - { - "epoch": 5.068521277024034, - "grad_norm": 0.014202897436916828, - "learning_rate": 0.0001999873318263754, - "loss": 46.0, - "step": 31473 - }, - { - "epoch": 5.068682314102822, - "grad_norm": 0.0009605882223695517, - "learning_rate": 0.00019998733102110527, - "loss": 46.0, - "step": 31474 - }, - { - "epoch": 5.068843351181609, - "grad_norm": 0.0026066009886562824, - "learning_rate": 0.0001999873302158095, - "loss": 46.0, - "step": 31475 - }, - { - "epoch": 5.069004388260397, - "grad_norm": 0.02905389666557312, - "learning_rate": 0.0001999873294104882, - "loss": 46.0, - "step": 31476 - }, - { - "epoch": 5.069165425339184, - "grad_norm": 0.005645869765430689, - "learning_rate": 0.00019998732860514125, - "loss": 46.0, - "step": 31477 - }, - { - "epoch": 5.069326462417972, - "grad_norm": 0.001611900283023715, - "learning_rate": 0.00019998732779976871, - "loss": 46.0, - "step": 31478 - }, - { - "epoch": 5.069487499496759, - "grad_norm": 0.009771431796252728, - "learning_rate": 0.00019998732699437062, - "loss": 46.0, - "step": 31479 - }, - { - "epoch": 5.069648536575547, - "grad_norm": 0.0006704932893626392, - "learning_rate": 0.00019998732618894691, - "loss": 46.0, - "step": 31480 - }, - { - "epoch": 5.069809573654334, - "grad_norm": 0.004354712553322315, - "learning_rate": 0.00019998732538349762, - "loss": 46.0, - "step": 31481 - }, - { - "epoch": 5.0699706107331215, - "grad_norm": 0.009478788822889328, - "learning_rate": 0.00019998732457802274, - "loss": 46.0, - "step": 31482 - }, - { - "epoch": 5.070131647811909, - "grad_norm": 0.002908652415499091, - "learning_rate": 0.00019998732377252224, - "loss": 46.0, - "step": 31483 - }, - { - "epoch": 5.070292684890696, - "grad_norm": 0.012400351464748383, - "learning_rate": 0.00019998732296699619, - "loss": 46.0, - "step": 31484 - }, - { - "epoch": 5.070453721969484, - "grad_norm": 0.00456674350425601, - "learning_rate": 0.00019998732216144452, - "loss": 46.0, - "step": 31485 - }, - { - "epoch": 5.07061475904827, - "grad_norm": 0.015737170353531837, - "learning_rate": 0.00019998732135586726, - "loss": 46.0, - "step": 31486 - }, - { - "epoch": 5.070775796127058, - "grad_norm": 0.007681137416511774, - "learning_rate": 0.00019998732055026441, - "loss": 46.0, - "step": 31487 - }, - { - "epoch": 5.070936833205845, - "grad_norm": 0.010516487993299961, - "learning_rate": 0.00019998731974463598, - "loss": 46.0, - "step": 31488 - }, - { - "epoch": 5.071097870284633, - "grad_norm": 0.012859782204031944, - "learning_rate": 0.00019998731893898194, - "loss": 46.0, - "step": 31489 - }, - { - "epoch": 5.07125890736342, - "grad_norm": 0.0018989465897902846, - "learning_rate": 0.00019998731813330233, - "loss": 46.0, - "step": 31490 - }, - { - "epoch": 5.071419944442208, - "grad_norm": 0.0024115622509270906, - "learning_rate": 0.0001999873173275971, - "loss": 46.0, - "step": 31491 - }, - { - "epoch": 5.071580981520995, - "grad_norm": 0.008649192750453949, - "learning_rate": 0.0001999873165218663, - "loss": 46.0, - "step": 31492 - }, - { - "epoch": 5.071742018599783, - "grad_norm": 0.0024184526409953833, - "learning_rate": 0.0001999873157161099, - "loss": 46.0, - "step": 31493 - }, - { - "epoch": 5.07190305567857, - "grad_norm": 0.009620298631489277, - "learning_rate": 0.00019998731491032793, - "loss": 46.0, - "step": 31494 - }, - { - "epoch": 5.0720640927573575, - "grad_norm": 0.0030542039312422276, - "learning_rate": 0.00019998731410452036, - "loss": 46.0, - "step": 31495 - }, - { - "epoch": 5.072225129836145, - "grad_norm": 0.001520965714007616, - "learning_rate": 0.00019998731329868717, - "loss": 46.0, - "step": 31496 - }, - { - "epoch": 5.072386166914932, - "grad_norm": 0.0036817335058003664, - "learning_rate": 0.00019998731249282843, - "loss": 46.0, - "step": 31497 - }, - { - "epoch": 5.07254720399372, - "grad_norm": 0.003716722596436739, - "learning_rate": 0.00019998731168694404, - "loss": 46.0, - "step": 31498 - }, - { - "epoch": 5.072708241072507, - "grad_norm": 0.0005912923370487988, - "learning_rate": 0.00019998731088103413, - "loss": 46.0, - "step": 31499 - }, - { - "epoch": 5.072869278151295, - "grad_norm": 0.0007993608014658093, - "learning_rate": 0.0001999873100750986, - "loss": 46.0, - "step": 31500 - }, - { - "epoch": 5.073030315230081, - "grad_norm": 0.011456293985247612, - "learning_rate": 0.00019998730926913745, - "loss": 46.0, - "step": 31501 - }, - { - "epoch": 5.073191352308869, - "grad_norm": 0.0019239726243540645, - "learning_rate": 0.00019998730846315071, - "loss": 46.0, - "step": 31502 - }, - { - "epoch": 5.073352389387656, - "grad_norm": 0.0032419771887362003, - "learning_rate": 0.00019998730765713842, - "loss": 46.0, - "step": 31503 - }, - { - "epoch": 5.073513426466444, - "grad_norm": 0.010542859323322773, - "learning_rate": 0.00019998730685110049, - "loss": 46.0, - "step": 31504 - }, - { - "epoch": 5.073674463545231, - "grad_norm": 0.0032794310245662928, - "learning_rate": 0.00019998730604503702, - "loss": 46.0, - "step": 31505 - }, - { - "epoch": 5.073835500624019, - "grad_norm": 0.006397836841642857, - "learning_rate": 0.00019998730523894794, - "loss": 46.0, - "step": 31506 - }, - { - "epoch": 5.073996537702806, - "grad_norm": 0.01285083219408989, - "learning_rate": 0.00019998730443283327, - "loss": 46.0, - "step": 31507 - }, - { - "epoch": 5.0741575747815935, - "grad_norm": 0.0035771497059613466, - "learning_rate": 0.00019998730362669298, - "loss": 46.0, - "step": 31508 - }, - { - "epoch": 5.074318611860381, - "grad_norm": 0.0007602878613397479, - "learning_rate": 0.0001999873028205271, - "loss": 46.0, - "step": 31509 - }, - { - "epoch": 5.074479648939168, - "grad_norm": 0.010783039033412933, - "learning_rate": 0.00019998730201433565, - "loss": 46.0, - "step": 31510 - }, - { - "epoch": 5.074640686017956, - "grad_norm": 0.005767618305981159, - "learning_rate": 0.00019998730120811864, - "loss": 46.0, - "step": 31511 - }, - { - "epoch": 5.074801723096743, - "grad_norm": 0.006642652675509453, - "learning_rate": 0.00019998730040187598, - "loss": 46.0, - "step": 31512 - }, - { - "epoch": 5.074962760175531, - "grad_norm": 0.005331065505743027, - "learning_rate": 0.00019998729959560776, - "loss": 46.0, - "step": 31513 - }, - { - "epoch": 5.075123797254318, - "grad_norm": 0.005481342785060406, - "learning_rate": 0.00019998729878931392, - "loss": 46.0, - "step": 31514 - }, - { - "epoch": 5.075284834333106, - "grad_norm": 0.007799872197210789, - "learning_rate": 0.0001999872979829945, - "loss": 46.0, - "step": 31515 - }, - { - "epoch": 5.075445871411892, - "grad_norm": 0.014986932277679443, - "learning_rate": 0.00019998729717664952, - "loss": 46.0, - "step": 31516 - }, - { - "epoch": 5.07560690849068, - "grad_norm": 0.0039672283455729485, - "learning_rate": 0.00019998729637027893, - "loss": 46.0, - "step": 31517 - }, - { - "epoch": 5.075767945569467, - "grad_norm": 0.009957185946404934, - "learning_rate": 0.00019998729556388274, - "loss": 46.0, - "step": 31518 - }, - { - "epoch": 5.0759289826482545, - "grad_norm": 0.009040186181664467, - "learning_rate": 0.00019998729475746097, - "loss": 46.0, - "step": 31519 - }, - { - "epoch": 5.076090019727042, - "grad_norm": 0.0041587925516068935, - "learning_rate": 0.0001999872939510136, - "loss": 46.0, - "step": 31520 - }, - { - "epoch": 5.076251056805829, - "grad_norm": 0.003826018190011382, - "learning_rate": 0.00019998729314454065, - "loss": 46.0, - "step": 31521 - }, - { - "epoch": 5.076412093884617, - "grad_norm": 0.0040887692011892796, - "learning_rate": 0.0001999872923380421, - "loss": 46.0, - "step": 31522 - }, - { - "epoch": 5.076573130963404, - "grad_norm": 0.009654448367655277, - "learning_rate": 0.00019998729153151794, - "loss": 46.0, - "step": 31523 - }, - { - "epoch": 5.076734168042192, - "grad_norm": 0.0019868065137416124, - "learning_rate": 0.0001999872907249682, - "loss": 46.0, - "step": 31524 - }, - { - "epoch": 5.076895205120979, - "grad_norm": 0.0022918428294360638, - "learning_rate": 0.00019998728991839286, - "loss": 46.0, - "step": 31525 - }, - { - "epoch": 5.077056242199767, - "grad_norm": 0.014718353748321533, - "learning_rate": 0.00019998728911179193, - "loss": 46.0, - "step": 31526 - }, - { - "epoch": 5.077217279278554, - "grad_norm": 0.004916117526590824, - "learning_rate": 0.00019998728830516543, - "loss": 46.0, - "step": 31527 - }, - { - "epoch": 5.077378316357342, - "grad_norm": 0.0077346679754555225, - "learning_rate": 0.00019998728749851335, - "loss": 46.0, - "step": 31528 - }, - { - "epoch": 5.077539353436129, - "grad_norm": 0.0023475319612771273, - "learning_rate": 0.00019998728669183563, - "loss": 46.0, - "step": 31529 - }, - { - "epoch": 5.0777003905149165, - "grad_norm": 0.017003672197461128, - "learning_rate": 0.00019998728588513235, - "loss": 46.0, - "step": 31530 - }, - { - "epoch": 5.077861427593703, - "grad_norm": 0.0027289206627756357, - "learning_rate": 0.00019998728507840348, - "loss": 46.0, - "step": 31531 - }, - { - "epoch": 5.0780224646724905, - "grad_norm": 0.006898499093949795, - "learning_rate": 0.00019998728427164902, - "loss": 46.0, - "step": 31532 - }, - { - "epoch": 5.078183501751278, - "grad_norm": 0.0038688050117343664, - "learning_rate": 0.00019998728346486895, - "loss": 46.0, - "step": 31533 - }, - { - "epoch": 5.078344538830065, - "grad_norm": 0.0016531539149582386, - "learning_rate": 0.0001999872826580633, - "loss": 46.0, - "step": 31534 - }, - { - "epoch": 5.078505575908853, - "grad_norm": 0.0011903924169018865, - "learning_rate": 0.00019998728185123205, - "loss": 46.0, - "step": 31535 - }, - { - "epoch": 5.07866661298764, - "grad_norm": 0.019421108067035675, - "learning_rate": 0.0001999872810443752, - "loss": 46.0, - "step": 31536 - }, - { - "epoch": 5.078827650066428, - "grad_norm": 0.0008002580143511295, - "learning_rate": 0.0001999872802374928, - "loss": 46.0, - "step": 31537 - }, - { - "epoch": 5.078988687145215, - "grad_norm": 0.01161094382405281, - "learning_rate": 0.00019998727943058479, - "loss": 46.0, - "step": 31538 - }, - { - "epoch": 5.079149724224003, - "grad_norm": 0.0034828488714993, - "learning_rate": 0.00019998727862365116, - "loss": 46.0, - "step": 31539 - }, - { - "epoch": 5.07931076130279, - "grad_norm": 0.002845194423571229, - "learning_rate": 0.00019998727781669198, - "loss": 46.0, - "step": 31540 - }, - { - "epoch": 5.079471798381578, - "grad_norm": 0.004946758504956961, - "learning_rate": 0.00019998727700970716, - "loss": 46.0, - "step": 31541 - }, - { - "epoch": 5.079632835460365, - "grad_norm": 0.0028560252394527197, - "learning_rate": 0.00019998727620269678, - "loss": 46.0, - "step": 31542 - }, - { - "epoch": 5.0797938725391525, - "grad_norm": 0.0023897234350442886, - "learning_rate": 0.00019998727539566083, - "loss": 46.0, - "step": 31543 - }, - { - "epoch": 5.07995490961794, - "grad_norm": 0.0018610517727211118, - "learning_rate": 0.00019998727458859925, - "loss": 46.0, - "step": 31544 - }, - { - "epoch": 5.0801159466967265, - "grad_norm": 0.01645711250603199, - "learning_rate": 0.00019998727378151208, - "loss": 46.0, - "step": 31545 - }, - { - "epoch": 5.080276983775514, - "grad_norm": 0.0027641672641038895, - "learning_rate": 0.00019998727297439932, - "loss": 46.0, - "step": 31546 - }, - { - "epoch": 5.080438020854301, - "grad_norm": 0.002907135523855686, - "learning_rate": 0.000199987272167261, - "loss": 46.0, - "step": 31547 - }, - { - "epoch": 5.080599057933089, - "grad_norm": 0.0017631857190281153, - "learning_rate": 0.00019998727136009704, - "loss": 46.0, - "step": 31548 - }, - { - "epoch": 5.080760095011876, - "grad_norm": 0.007124314084649086, - "learning_rate": 0.00019998727055290752, - "loss": 46.0, - "step": 31549 - }, - { - "epoch": 5.080921132090664, - "grad_norm": 0.002711135894060135, - "learning_rate": 0.0001999872697456924, - "loss": 46.0, - "step": 31550 - }, - { - "epoch": 5.081082169169451, - "grad_norm": 0.009224534034729004, - "learning_rate": 0.0001999872689384517, - "loss": 46.0, - "step": 31551 - }, - { - "epoch": 5.081243206248239, - "grad_norm": 0.004644982051104307, - "learning_rate": 0.0001999872681311854, - "loss": 46.0, - "step": 31552 - }, - { - "epoch": 5.081404243327026, - "grad_norm": 0.007895423099398613, - "learning_rate": 0.0001999872673238935, - "loss": 46.0, - "step": 31553 - }, - { - "epoch": 5.081565280405814, - "grad_norm": 0.004799280781298876, - "learning_rate": 0.00019998726651657603, - "loss": 46.0, - "step": 31554 - }, - { - "epoch": 5.081726317484601, - "grad_norm": 0.002282541710883379, - "learning_rate": 0.00019998726570923293, - "loss": 46.0, - "step": 31555 - }, - { - "epoch": 5.0818873545633885, - "grad_norm": 0.0023004882968962193, - "learning_rate": 0.00019998726490186427, - "loss": 46.0, - "step": 31556 - }, - { - "epoch": 5.082048391642176, - "grad_norm": 0.004254021681845188, - "learning_rate": 0.00019998726409447003, - "loss": 46.0, - "step": 31557 - }, - { - "epoch": 5.082209428720963, - "grad_norm": 0.008733365684747696, - "learning_rate": 0.00019998726328705017, - "loss": 46.0, - "step": 31558 - }, - { - "epoch": 5.082370465799751, - "grad_norm": 0.012953191995620728, - "learning_rate": 0.00019998726247960472, - "loss": 46.0, - "step": 31559 - }, - { - "epoch": 5.082531502878537, - "grad_norm": 0.003261252772063017, - "learning_rate": 0.0001999872616721337, - "loss": 46.0, - "step": 31560 - }, - { - "epoch": 5.082692539957325, - "grad_norm": 0.006477107293903828, - "learning_rate": 0.00019998726086463707, - "loss": 46.0, - "step": 31561 - }, - { - "epoch": 5.082853577036112, - "grad_norm": 0.007697482593357563, - "learning_rate": 0.00019998726005711483, - "loss": 46.0, - "step": 31562 - }, - { - "epoch": 5.0830146141149, - "grad_norm": 0.0009260574588552117, - "learning_rate": 0.00019998725924956704, - "loss": 46.0, - "step": 31563 - }, - { - "epoch": 5.083175651193687, - "grad_norm": 0.010477319359779358, - "learning_rate": 0.00019998725844199363, - "loss": 46.0, - "step": 31564 - }, - { - "epoch": 5.083336688272475, - "grad_norm": 0.011995733715593815, - "learning_rate": 0.00019998725763439463, - "loss": 46.0, - "step": 31565 - }, - { - "epoch": 5.083497725351262, - "grad_norm": 0.002470846753567457, - "learning_rate": 0.00019998725682677005, - "loss": 46.0, - "step": 31566 - }, - { - "epoch": 5.0836587624300495, - "grad_norm": 0.009876557625830173, - "learning_rate": 0.00019998725601911988, - "loss": 46.0, - "step": 31567 - }, - { - "epoch": 5.083819799508837, - "grad_norm": 0.006988706067204475, - "learning_rate": 0.00019998725521144412, - "loss": 46.0, - "step": 31568 - }, - { - "epoch": 5.0839808365876245, - "grad_norm": 0.0031932799611240625, - "learning_rate": 0.00019998725440374275, - "loss": 46.0, - "step": 31569 - }, - { - "epoch": 5.084141873666412, - "grad_norm": 0.002237739274278283, - "learning_rate": 0.0001999872535960158, - "loss": 46.0, - "step": 31570 - }, - { - "epoch": 5.084302910745199, - "grad_norm": 0.0008495231741108, - "learning_rate": 0.00019998725278826327, - "loss": 46.0, - "step": 31571 - }, - { - "epoch": 5.084463947823987, - "grad_norm": 0.007848595269024372, - "learning_rate": 0.0001999872519804851, - "loss": 46.0, - "step": 31572 - }, - { - "epoch": 5.084624984902774, - "grad_norm": 0.009854995645582676, - "learning_rate": 0.0001999872511726814, - "loss": 46.0, - "step": 31573 - }, - { - "epoch": 5.084786021981561, - "grad_norm": 0.011838048696517944, - "learning_rate": 0.00019998725036485208, - "loss": 46.0, - "step": 31574 - }, - { - "epoch": 5.084947059060348, - "grad_norm": 0.0029190005734562874, - "learning_rate": 0.00019998724955699716, - "loss": 46.0, - "step": 31575 - }, - { - "epoch": 5.085108096139136, - "grad_norm": 0.001822870341129601, - "learning_rate": 0.00019998724874911668, - "loss": 46.0, - "step": 31576 - }, - { - "epoch": 5.085269133217923, - "grad_norm": 0.003114506136626005, - "learning_rate": 0.0001999872479412106, - "loss": 46.0, - "step": 31577 - }, - { - "epoch": 5.085430170296711, - "grad_norm": 0.0015350612811744213, - "learning_rate": 0.0001999872471332789, - "loss": 46.0, - "step": 31578 - }, - { - "epoch": 5.085591207375498, - "grad_norm": 0.0020557488314807415, - "learning_rate": 0.00019998724632532163, - "loss": 46.0, - "step": 31579 - }, - { - "epoch": 5.0857522444542855, - "grad_norm": 0.004932016599923372, - "learning_rate": 0.00019998724551733877, - "loss": 46.0, - "step": 31580 - }, - { - "epoch": 5.085913281533073, - "grad_norm": 0.019641203805804253, - "learning_rate": 0.0001999872447093303, - "loss": 46.0, - "step": 31581 - }, - { - "epoch": 5.08607431861186, - "grad_norm": 0.004743436351418495, - "learning_rate": 0.00019998724390129624, - "loss": 46.0, - "step": 31582 - }, - { - "epoch": 5.086235355690648, - "grad_norm": 0.009794383309781551, - "learning_rate": 0.00019998724309323662, - "loss": 46.0, - "step": 31583 - }, - { - "epoch": 5.086396392769435, - "grad_norm": 0.0032204575836658478, - "learning_rate": 0.00019998724228515138, - "loss": 46.0, - "step": 31584 - }, - { - "epoch": 5.086557429848223, - "grad_norm": 0.005569989327341318, - "learning_rate": 0.00019998724147704056, - "loss": 46.0, - "step": 31585 - }, - { - "epoch": 5.08671846692701, - "grad_norm": 0.004238417837768793, - "learning_rate": 0.00019998724066890415, - "loss": 46.0, - "step": 31586 - }, - { - "epoch": 5.086879504005798, - "grad_norm": 0.002847732277587056, - "learning_rate": 0.00019998723986074213, - "loss": 46.0, - "step": 31587 - }, - { - "epoch": 5.087040541084585, - "grad_norm": 0.0033774760086089373, - "learning_rate": 0.00019998723905255452, - "loss": 46.0, - "step": 31588 - }, - { - "epoch": 5.087201578163372, - "grad_norm": 0.010860154405236244, - "learning_rate": 0.00019998723824434132, - "loss": 46.0, - "step": 31589 - }, - { - "epoch": 5.087362615242159, - "grad_norm": 0.002917844569310546, - "learning_rate": 0.00019998723743610257, - "loss": 46.0, - "step": 31590 - }, - { - "epoch": 5.087523652320947, - "grad_norm": 0.0014842735836282372, - "learning_rate": 0.00019998723662783817, - "loss": 46.0, - "step": 31591 - }, - { - "epoch": 5.087684689399734, - "grad_norm": 0.018754929304122925, - "learning_rate": 0.0001999872358195482, - "loss": 46.0, - "step": 31592 - }, - { - "epoch": 5.0878457264785215, - "grad_norm": 0.01046660728752613, - "learning_rate": 0.00019998723501123266, - "loss": 46.0, - "step": 31593 - }, - { - "epoch": 5.088006763557309, - "grad_norm": 0.00049332354683429, - "learning_rate": 0.0001999872342028915, - "loss": 46.0, - "step": 31594 - }, - { - "epoch": 5.088167800636096, - "grad_norm": 0.002214283449575305, - "learning_rate": 0.00019998723339452478, - "loss": 46.0, - "step": 31595 - }, - { - "epoch": 5.088328837714884, - "grad_norm": 0.0033477996475994587, - "learning_rate": 0.00019998723258613245, - "loss": 46.0, - "step": 31596 - }, - { - "epoch": 5.088489874793671, - "grad_norm": 0.013349231332540512, - "learning_rate": 0.00019998723177771453, - "loss": 46.0, - "step": 31597 - }, - { - "epoch": 5.088650911872459, - "grad_norm": 0.0033220131881535053, - "learning_rate": 0.00019998723096927102, - "loss": 46.0, - "step": 31598 - }, - { - "epoch": 5.088811948951246, - "grad_norm": 0.0012966557405889034, - "learning_rate": 0.0001999872301608019, - "loss": 46.0, - "step": 31599 - }, - { - "epoch": 5.088972986030034, - "grad_norm": 0.0010098421480506659, - "learning_rate": 0.00019998722935230721, - "loss": 46.0, - "step": 31600 - }, - { - "epoch": 5.089134023108821, - "grad_norm": 0.003335656365379691, - "learning_rate": 0.00019998722854378692, - "loss": 46.0, - "step": 31601 - }, - { - "epoch": 5.089295060187609, - "grad_norm": 0.006485785357654095, - "learning_rate": 0.00019998722773524103, - "loss": 46.0, - "step": 31602 - }, - { - "epoch": 5.089456097266396, - "grad_norm": 0.006169487722218037, - "learning_rate": 0.00019998722692666956, - "loss": 46.0, - "step": 31603 - }, - { - "epoch": 5.089617134345183, - "grad_norm": 0.004332922399044037, - "learning_rate": 0.0001999872261180725, - "loss": 46.0, - "step": 31604 - }, - { - "epoch": 5.08977817142397, - "grad_norm": 0.008071127347648144, - "learning_rate": 0.00019998722530944983, - "loss": 46.0, - "step": 31605 - }, - { - "epoch": 5.0899392085027575, - "grad_norm": 0.008919227868318558, - "learning_rate": 0.0001999872245008016, - "loss": 46.0, - "step": 31606 - }, - { - "epoch": 5.090100245581545, - "grad_norm": 0.002631570678204298, - "learning_rate": 0.00019998722369212775, - "loss": 46.0, - "step": 31607 - }, - { - "epoch": 5.090261282660332, - "grad_norm": 0.004740362521260977, - "learning_rate": 0.00019998722288342832, - "loss": 46.0, - "step": 31608 - }, - { - "epoch": 5.09042231973912, - "grad_norm": 0.001849306165240705, - "learning_rate": 0.0001999872220747033, - "loss": 46.0, - "step": 31609 - }, - { - "epoch": 5.090583356817907, - "grad_norm": 0.0038777433801442385, - "learning_rate": 0.00019998722126595268, - "loss": 46.0, - "step": 31610 - }, - { - "epoch": 5.090744393896695, - "grad_norm": 0.002898280043154955, - "learning_rate": 0.0001999872204571765, - "loss": 46.0, - "step": 31611 - }, - { - "epoch": 5.090905430975482, - "grad_norm": 0.010929821990430355, - "learning_rate": 0.00019998721964837468, - "loss": 46.0, - "step": 31612 - }, - { - "epoch": 5.09106646805427, - "grad_norm": 0.001335539622232318, - "learning_rate": 0.0001999872188395473, - "loss": 46.0, - "step": 31613 - }, - { - "epoch": 5.091227505133057, - "grad_norm": 0.008599543944001198, - "learning_rate": 0.00019998721803069432, - "loss": 46.0, - "step": 31614 - }, - { - "epoch": 5.091388542211845, - "grad_norm": 0.0010156950447708368, - "learning_rate": 0.00019998721722181575, - "loss": 46.0, - "step": 31615 - }, - { - "epoch": 5.091549579290632, - "grad_norm": 0.0017077663214877248, - "learning_rate": 0.00019998721641291157, - "loss": 46.0, - "step": 31616 - }, - { - "epoch": 5.0917106163694195, - "grad_norm": 0.003509596921503544, - "learning_rate": 0.00019998721560398182, - "loss": 46.0, - "step": 31617 - }, - { - "epoch": 5.091871653448206, - "grad_norm": 0.01000924501568079, - "learning_rate": 0.00019998721479502646, - "loss": 46.0, - "step": 31618 - }, - { - "epoch": 5.0920326905269935, - "grad_norm": 0.0036562189925462008, - "learning_rate": 0.0001999872139860455, - "loss": 46.0, - "step": 31619 - }, - { - "epoch": 5.092193727605781, - "grad_norm": 0.0006999990437179804, - "learning_rate": 0.000199987213177039, - "loss": 46.0, - "step": 31620 - }, - { - "epoch": 5.092354764684568, - "grad_norm": 0.007833797484636307, - "learning_rate": 0.00019998721236800688, - "loss": 46.0, - "step": 31621 - }, - { - "epoch": 5.092515801763356, - "grad_norm": 0.0033367800060659647, - "learning_rate": 0.00019998721155894917, - "loss": 46.0, - "step": 31622 - }, - { - "epoch": 5.092676838842143, - "grad_norm": 0.002594214165583253, - "learning_rate": 0.00019998721074986585, - "loss": 46.0, - "step": 31623 - }, - { - "epoch": 5.092837875920931, - "grad_norm": 0.004807803314179182, - "learning_rate": 0.00019998720994075697, - "loss": 46.0, - "step": 31624 - }, - { - "epoch": 5.092998912999718, - "grad_norm": 0.0019444884965196252, - "learning_rate": 0.00019998720913162247, - "loss": 46.0, - "step": 31625 - }, - { - "epoch": 5.093159950078506, - "grad_norm": 0.014486836269497871, - "learning_rate": 0.0001999872083224624, - "loss": 46.0, - "step": 31626 - }, - { - "epoch": 5.093320987157293, - "grad_norm": 0.001850353553891182, - "learning_rate": 0.0001999872075132767, - "loss": 46.0, - "step": 31627 - }, - { - "epoch": 5.0934820242360805, - "grad_norm": 0.003495913464576006, - "learning_rate": 0.00019998720670406545, - "loss": 46.0, - "step": 31628 - }, - { - "epoch": 5.093643061314868, - "grad_norm": 0.008592427708208561, - "learning_rate": 0.0001999872058948286, - "loss": 46.0, - "step": 31629 - }, - { - "epoch": 5.093804098393655, - "grad_norm": 0.0013628288870677352, - "learning_rate": 0.00019998720508556615, - "loss": 46.0, - "step": 31630 - }, - { - "epoch": 5.093965135472443, - "grad_norm": 0.0036802305839955807, - "learning_rate": 0.00019998720427627812, - "loss": 46.0, - "step": 31631 - }, - { - "epoch": 5.09412617255123, - "grad_norm": 0.0013586009154096246, - "learning_rate": 0.0001999872034669645, - "loss": 46.0, - "step": 31632 - }, - { - "epoch": 5.094287209630017, - "grad_norm": 0.0055445656180381775, - "learning_rate": 0.00019998720265762527, - "loss": 46.0, - "step": 31633 - }, - { - "epoch": 5.094448246708804, - "grad_norm": 0.003254644339904189, - "learning_rate": 0.00019998720184826046, - "loss": 46.0, - "step": 31634 - }, - { - "epoch": 5.094609283787592, - "grad_norm": 0.0018056377302855253, - "learning_rate": 0.00019998720103887006, - "loss": 46.0, - "step": 31635 - }, - { - "epoch": 5.094770320866379, - "grad_norm": 0.005883445963263512, - "learning_rate": 0.00019998720022945405, - "loss": 46.0, - "step": 31636 - }, - { - "epoch": 5.094931357945167, - "grad_norm": 0.01950792782008648, - "learning_rate": 0.00019998719942001248, - "loss": 46.0, - "step": 31637 - }, - { - "epoch": 5.095092395023954, - "grad_norm": 0.008549262769520283, - "learning_rate": 0.0001999871986105453, - "loss": 46.0, - "step": 31638 - }, - { - "epoch": 5.095253432102742, - "grad_norm": 0.00631296169012785, - "learning_rate": 0.00019998719780105252, - "loss": 46.0, - "step": 31639 - }, - { - "epoch": 5.095414469181529, - "grad_norm": 0.0015487271593883634, - "learning_rate": 0.00019998719699153416, - "loss": 46.0, - "step": 31640 - }, - { - "epoch": 5.0955755062603165, - "grad_norm": 0.011292981915175915, - "learning_rate": 0.0001999871961819902, - "loss": 46.0, - "step": 31641 - }, - { - "epoch": 5.095736543339104, - "grad_norm": 0.0037516835145652294, - "learning_rate": 0.00019998719537242066, - "loss": 46.0, - "step": 31642 - }, - { - "epoch": 5.095897580417891, - "grad_norm": 0.007287455257028341, - "learning_rate": 0.0001999871945628255, - "loss": 46.0, - "step": 31643 - }, - { - "epoch": 5.096058617496679, - "grad_norm": 0.0011183179449290037, - "learning_rate": 0.00019998719375320477, - "loss": 46.0, - "step": 31644 - }, - { - "epoch": 5.096219654575466, - "grad_norm": 0.0020118425600230694, - "learning_rate": 0.00019998719294355848, - "loss": 46.0, - "step": 31645 - }, - { - "epoch": 5.096380691654254, - "grad_norm": 0.007203317247331142, - "learning_rate": 0.00019998719213388654, - "loss": 46.0, - "step": 31646 - }, - { - "epoch": 5.09654172873304, - "grad_norm": 0.014000486582517624, - "learning_rate": 0.00019998719132418907, - "loss": 46.0, - "step": 31647 - }, - { - "epoch": 5.096702765811828, - "grad_norm": 0.02207367494702339, - "learning_rate": 0.00019998719051446596, - "loss": 46.0, - "step": 31648 - }, - { - "epoch": 5.096863802890615, - "grad_norm": 0.009998398832976818, - "learning_rate": 0.0001999871897047173, - "loss": 46.0, - "step": 31649 - }, - { - "epoch": 5.097024839969403, - "grad_norm": 0.00410973746329546, - "learning_rate": 0.00019998718889494298, - "loss": 46.0, - "step": 31650 - }, - { - "epoch": 5.09718587704819, - "grad_norm": 0.0008798366761766374, - "learning_rate": 0.00019998718808514313, - "loss": 46.0, - "step": 31651 - }, - { - "epoch": 5.097346914126978, - "grad_norm": 0.002491818042472005, - "learning_rate": 0.00019998718727531767, - "loss": 46.0, - "step": 31652 - }, - { - "epoch": 5.097507951205765, - "grad_norm": 0.003479825798422098, - "learning_rate": 0.00019998718646546662, - "loss": 46.0, - "step": 31653 - }, - { - "epoch": 5.0976689882845525, - "grad_norm": 0.0031058855820447206, - "learning_rate": 0.00019998718565559, - "loss": 46.0, - "step": 31654 - }, - { - "epoch": 5.09783002536334, - "grad_norm": 0.015748916193842888, - "learning_rate": 0.00019998718484568774, - "loss": 46.0, - "step": 31655 - }, - { - "epoch": 5.097991062442127, - "grad_norm": 0.006286006420850754, - "learning_rate": 0.00019998718403575988, - "loss": 46.0, - "step": 31656 - }, - { - "epoch": 5.098152099520915, - "grad_norm": 0.003368560690432787, - "learning_rate": 0.00019998718322580648, - "loss": 46.0, - "step": 31657 - }, - { - "epoch": 5.098313136599702, - "grad_norm": 0.001771627925336361, - "learning_rate": 0.00019998718241582747, - "loss": 46.0, - "step": 31658 - }, - { - "epoch": 5.09847417367849, - "grad_norm": 0.0017098980024456978, - "learning_rate": 0.00019998718160582288, - "loss": 46.0, - "step": 31659 - }, - { - "epoch": 5.098635210757277, - "grad_norm": 0.005052240565419197, - "learning_rate": 0.0001999871807957927, - "loss": 46.0, - "step": 31660 - }, - { - "epoch": 5.098796247836065, - "grad_norm": 0.002359638223424554, - "learning_rate": 0.0001999871799857369, - "loss": 46.0, - "step": 31661 - }, - { - "epoch": 5.098957284914851, - "grad_norm": 0.012051573023200035, - "learning_rate": 0.00019998717917565554, - "loss": 46.0, - "step": 31662 - }, - { - "epoch": 5.099118321993639, - "grad_norm": 0.00994238443672657, - "learning_rate": 0.00019998717836554854, - "loss": 46.0, - "step": 31663 - }, - { - "epoch": 5.099279359072426, - "grad_norm": 0.021187372505664825, - "learning_rate": 0.00019998717755541598, - "loss": 46.0, - "step": 31664 - }, - { - "epoch": 5.099440396151214, - "grad_norm": 0.003129806602373719, - "learning_rate": 0.00019998717674525783, - "loss": 46.0, - "step": 31665 - }, - { - "epoch": 5.099601433230001, - "grad_norm": 0.0065110777504742146, - "learning_rate": 0.00019998717593507407, - "loss": 46.0, - "step": 31666 - }, - { - "epoch": 5.0997624703087885, - "grad_norm": 0.006100281607359648, - "learning_rate": 0.00019998717512486475, - "loss": 46.0, - "step": 31667 - }, - { - "epoch": 5.099923507387576, - "grad_norm": 0.001959226094186306, - "learning_rate": 0.00019998717431462984, - "loss": 46.0, - "step": 31668 - }, - { - "epoch": 5.100084544466363, - "grad_norm": 0.0024511960800737143, - "learning_rate": 0.00019998717350436931, - "loss": 46.0, - "step": 31669 - }, - { - "epoch": 5.100245581545151, - "grad_norm": 0.0038314347621053457, - "learning_rate": 0.0001999871726940832, - "loss": 46.0, - "step": 31670 - }, - { - "epoch": 5.100406618623938, - "grad_norm": 0.006789130158722401, - "learning_rate": 0.0001999871718837715, - "loss": 46.0, - "step": 31671 - }, - { - "epoch": 5.100567655702726, - "grad_norm": 0.004750135820358992, - "learning_rate": 0.00019998717107343422, - "loss": 46.0, - "step": 31672 - }, - { - "epoch": 5.100728692781513, - "grad_norm": 0.003990810830146074, - "learning_rate": 0.00019998717026307132, - "loss": 46.0, - "step": 31673 - }, - { - "epoch": 5.100889729860301, - "grad_norm": 0.011149369180202484, - "learning_rate": 0.00019998716945268284, - "loss": 46.0, - "step": 31674 - }, - { - "epoch": 5.101050766939088, - "grad_norm": 0.004834349267184734, - "learning_rate": 0.0001999871686422688, - "loss": 46.0, - "step": 31675 - }, - { - "epoch": 5.1012118040178756, - "grad_norm": 0.0037851883098483086, - "learning_rate": 0.0001999871678318291, - "loss": 46.0, - "step": 31676 - }, - { - "epoch": 5.101372841096662, - "grad_norm": 0.0028146200347691774, - "learning_rate": 0.00019998716702136386, - "loss": 46.0, - "step": 31677 - }, - { - "epoch": 5.10153387817545, - "grad_norm": 0.006069815717637539, - "learning_rate": 0.00019998716621087302, - "loss": 46.0, - "step": 31678 - }, - { - "epoch": 5.101694915254237, - "grad_norm": 0.002303576096892357, - "learning_rate": 0.00019998716540035657, - "loss": 46.0, - "step": 31679 - }, - { - "epoch": 5.1018559523330245, - "grad_norm": 0.0019306502072140574, - "learning_rate": 0.00019998716458981456, - "loss": 46.0, - "step": 31680 - }, - { - "epoch": 5.102016989411812, - "grad_norm": 0.0035780046600848436, - "learning_rate": 0.00019998716377924694, - "loss": 46.0, - "step": 31681 - }, - { - "epoch": 5.102178026490599, - "grad_norm": 0.015230325981974602, - "learning_rate": 0.00019998716296865373, - "loss": 46.0, - "step": 31682 - }, - { - "epoch": 5.102339063569387, - "grad_norm": 0.003726790426298976, - "learning_rate": 0.00019998716215803493, - "loss": 46.0, - "step": 31683 - }, - { - "epoch": 5.102500100648174, - "grad_norm": 0.006447991821914911, - "learning_rate": 0.00019998716134739055, - "loss": 46.0, - "step": 31684 - }, - { - "epoch": 5.102661137726962, - "grad_norm": 0.007664111442863941, - "learning_rate": 0.00019998716053672055, - "loss": 46.0, - "step": 31685 - }, - { - "epoch": 5.102822174805749, - "grad_norm": 0.011768290773034096, - "learning_rate": 0.000199987159726025, - "loss": 46.0, - "step": 31686 - }, - { - "epoch": 5.102983211884537, - "grad_norm": 0.00295541831292212, - "learning_rate": 0.00019998715891530382, - "loss": 46.0, - "step": 31687 - }, - { - "epoch": 5.103144248963324, - "grad_norm": 0.004615542478859425, - "learning_rate": 0.00019998715810455706, - "loss": 46.0, - "step": 31688 - }, - { - "epoch": 5.1033052860421115, - "grad_norm": 0.0015148420352488756, - "learning_rate": 0.0001999871572937847, - "loss": 46.0, - "step": 31689 - }, - { - "epoch": 5.103466323120899, - "grad_norm": 0.0018063432071357965, - "learning_rate": 0.00019998715648298675, - "loss": 46.0, - "step": 31690 - }, - { - "epoch": 5.1036273601996855, - "grad_norm": 0.003022967604920268, - "learning_rate": 0.00019998715567216325, - "loss": 46.0, - "step": 31691 - }, - { - "epoch": 5.103788397278473, - "grad_norm": 0.00608081417158246, - "learning_rate": 0.0001999871548613141, - "loss": 46.0, - "step": 31692 - }, - { - "epoch": 5.1039494343572605, - "grad_norm": 0.01729155145585537, - "learning_rate": 0.0001999871540504394, - "loss": 46.0, - "step": 31693 - }, - { - "epoch": 5.104110471436048, - "grad_norm": 0.0017848373390734196, - "learning_rate": 0.00019998715323953908, - "loss": 46.0, - "step": 31694 - }, - { - "epoch": 5.104271508514835, - "grad_norm": 0.018515024334192276, - "learning_rate": 0.00019998715242861318, - "loss": 46.0, - "step": 31695 - }, - { - "epoch": 5.104432545593623, - "grad_norm": 0.0021290862932801247, - "learning_rate": 0.00019998715161766167, - "loss": 46.0, - "step": 31696 - }, - { - "epoch": 5.10459358267241, - "grad_norm": 0.0026584663428366184, - "learning_rate": 0.00019998715080668463, - "loss": 46.0, - "step": 31697 - }, - { - "epoch": 5.104754619751198, - "grad_norm": 0.0021825479343533516, - "learning_rate": 0.00019998714999568194, - "loss": 46.0, - "step": 31698 - }, - { - "epoch": 5.104915656829985, - "grad_norm": 0.005995746236294508, - "learning_rate": 0.00019998714918465367, - "loss": 46.0, - "step": 31699 - }, - { - "epoch": 5.105076693908773, - "grad_norm": 0.01741694286465645, - "learning_rate": 0.00019998714837359983, - "loss": 46.0, - "step": 31700 - }, - { - "epoch": 5.10523773098756, - "grad_norm": 0.009037435054779053, - "learning_rate": 0.00019998714756252038, - "loss": 46.0, - "step": 31701 - }, - { - "epoch": 5.1053987680663475, - "grad_norm": 0.0029692198149859905, - "learning_rate": 0.00019998714675141535, - "loss": 46.0, - "step": 31702 - }, - { - "epoch": 5.105559805145135, - "grad_norm": 0.004011984448879957, - "learning_rate": 0.0001999871459402847, - "loss": 46.0, - "step": 31703 - }, - { - "epoch": 5.105720842223922, - "grad_norm": 0.011755079962313175, - "learning_rate": 0.00019998714512912846, - "loss": 46.0, - "step": 31704 - }, - { - "epoch": 5.10588187930271, - "grad_norm": 0.0044697984121739864, - "learning_rate": 0.00019998714431794667, - "loss": 46.0, - "step": 31705 - }, - { - "epoch": 5.106042916381496, - "grad_norm": 0.004943686071783304, - "learning_rate": 0.00019998714350673926, - "loss": 46.0, - "step": 31706 - }, - { - "epoch": 5.106203953460284, - "grad_norm": 0.008940238505601883, - "learning_rate": 0.00019998714269550626, - "loss": 46.0, - "step": 31707 - }, - { - "epoch": 5.106364990539071, - "grad_norm": 0.0014674649573862553, - "learning_rate": 0.00019998714188424765, - "loss": 46.0, - "step": 31708 - }, - { - "epoch": 5.106526027617859, - "grad_norm": 0.003657751251012087, - "learning_rate": 0.00019998714107296347, - "loss": 46.0, - "step": 31709 - }, - { - "epoch": 5.106687064696646, - "grad_norm": 0.0029363837093114853, - "learning_rate": 0.0001999871402616537, - "loss": 46.0, - "step": 31710 - }, - { - "epoch": 5.106848101775434, - "grad_norm": 0.010432448238134384, - "learning_rate": 0.00019998713945031834, - "loss": 46.0, - "step": 31711 - }, - { - "epoch": 5.107009138854221, - "grad_norm": 0.006936706602573395, - "learning_rate": 0.0001999871386389574, - "loss": 46.0, - "step": 31712 - }, - { - "epoch": 5.107170175933009, - "grad_norm": 0.013828225433826447, - "learning_rate": 0.00019998713782757083, - "loss": 46.0, - "step": 31713 - }, - { - "epoch": 5.107331213011796, - "grad_norm": 0.013111437670886517, - "learning_rate": 0.0001999871370161587, - "loss": 46.0, - "step": 31714 - }, - { - "epoch": 5.1074922500905835, - "grad_norm": 0.010958925820887089, - "learning_rate": 0.00019998713620472097, - "loss": 46.0, - "step": 31715 - }, - { - "epoch": 5.107653287169371, - "grad_norm": 0.004703730810433626, - "learning_rate": 0.00019998713539325764, - "loss": 46.0, - "step": 31716 - }, - { - "epoch": 5.107814324248158, - "grad_norm": 0.0008039295789785683, - "learning_rate": 0.0001999871345817687, - "loss": 46.0, - "step": 31717 - }, - { - "epoch": 5.107975361326946, - "grad_norm": 0.01591426320374012, - "learning_rate": 0.00019998713377025423, - "loss": 46.0, - "step": 31718 - }, - { - "epoch": 5.108136398405733, - "grad_norm": 0.002451897133141756, - "learning_rate": 0.00019998713295871413, - "loss": 46.0, - "step": 31719 - }, - { - "epoch": 5.10829743548452, - "grad_norm": 0.0022533764131367207, - "learning_rate": 0.00019998713214714844, - "loss": 46.0, - "step": 31720 - }, - { - "epoch": 5.108458472563307, - "grad_norm": 0.017491359263658524, - "learning_rate": 0.00019998713133555714, - "loss": 46.0, - "step": 31721 - }, - { - "epoch": 5.108619509642095, - "grad_norm": 0.0038294882979243994, - "learning_rate": 0.00019998713052394028, - "loss": 46.0, - "step": 31722 - }, - { - "epoch": 5.108780546720882, - "grad_norm": 0.005774224177002907, - "learning_rate": 0.00019998712971229784, - "loss": 46.0, - "step": 31723 - }, - { - "epoch": 5.10894158379967, - "grad_norm": 0.0029772575944662094, - "learning_rate": 0.00019998712890062978, - "loss": 46.0, - "step": 31724 - }, - { - "epoch": 5.109102620878457, - "grad_norm": 0.004000029526650906, - "learning_rate": 0.00019998712808893613, - "loss": 46.0, - "step": 31725 - }, - { - "epoch": 5.109263657957245, - "grad_norm": 0.0035110164899379015, - "learning_rate": 0.00019998712727721686, - "loss": 46.0, - "step": 31726 - }, - { - "epoch": 5.109424695036032, - "grad_norm": 0.0008503383141942322, - "learning_rate": 0.00019998712646547207, - "loss": 46.0, - "step": 31727 - }, - { - "epoch": 5.1095857321148195, - "grad_norm": 0.004292924888432026, - "learning_rate": 0.00019998712565370163, - "loss": 46.0, - "step": 31728 - }, - { - "epoch": 5.109746769193607, - "grad_norm": 0.0012082025641575456, - "learning_rate": 0.00019998712484190563, - "loss": 46.0, - "step": 31729 - }, - { - "epoch": 5.109907806272394, - "grad_norm": 0.0033361774403601885, - "learning_rate": 0.00019998712403008402, - "loss": 46.0, - "step": 31730 - }, - { - "epoch": 5.110068843351182, - "grad_norm": 0.006076530087739229, - "learning_rate": 0.00019998712321823682, - "loss": 46.0, - "step": 31731 - }, - { - "epoch": 5.110229880429969, - "grad_norm": 0.002917871344834566, - "learning_rate": 0.00019998712240636404, - "loss": 46.0, - "step": 31732 - }, - { - "epoch": 5.110390917508757, - "grad_norm": 0.002042217645794153, - "learning_rate": 0.00019998712159446566, - "loss": 46.0, - "step": 31733 - }, - { - "epoch": 5.110551954587544, - "grad_norm": 0.004983257502317429, - "learning_rate": 0.00019998712078254168, - "loss": 46.0, - "step": 31734 - }, - { - "epoch": 5.110712991666331, - "grad_norm": 0.003702867776155472, - "learning_rate": 0.00019998711997059213, - "loss": 46.0, - "step": 31735 - }, - { - "epoch": 5.110874028745118, - "grad_norm": 0.006975711323320866, - "learning_rate": 0.00019998711915861697, - "loss": 46.0, - "step": 31736 - }, - { - "epoch": 5.111035065823906, - "grad_norm": 0.015306190587580204, - "learning_rate": 0.00019998711834661622, - "loss": 46.0, - "step": 31737 - }, - { - "epoch": 5.111196102902693, - "grad_norm": 0.0022727414034307003, - "learning_rate": 0.00019998711753458988, - "loss": 46.0, - "step": 31738 - }, - { - "epoch": 5.111357139981481, - "grad_norm": 0.01086303312331438, - "learning_rate": 0.00019998711672253796, - "loss": 46.0, - "step": 31739 - }, - { - "epoch": 5.111518177060268, - "grad_norm": 0.0005476285587064922, - "learning_rate": 0.00019998711591046045, - "loss": 46.0, - "step": 31740 - }, - { - "epoch": 5.1116792141390555, - "grad_norm": 0.019337452948093414, - "learning_rate": 0.0001999871150983573, - "loss": 46.0, - "step": 31741 - }, - { - "epoch": 5.111840251217843, - "grad_norm": 0.003269969252869487, - "learning_rate": 0.00019998711428622861, - "loss": 46.0, - "step": 31742 - }, - { - "epoch": 5.11200128829663, - "grad_norm": 0.0025042598135769367, - "learning_rate": 0.00019998711347407432, - "loss": 46.0, - "step": 31743 - }, - { - "epoch": 5.112162325375418, - "grad_norm": 0.00527442991733551, - "learning_rate": 0.00019998711266189446, - "loss": 46.0, - "step": 31744 - }, - { - "epoch": 5.112323362454205, - "grad_norm": 0.008749818429350853, - "learning_rate": 0.00019998711184968896, - "loss": 46.0, - "step": 31745 - }, - { - "epoch": 5.112484399532993, - "grad_norm": 0.006924590095877647, - "learning_rate": 0.0001999871110374579, - "loss": 46.0, - "step": 31746 - }, - { - "epoch": 5.11264543661178, - "grad_norm": 0.004711885005235672, - "learning_rate": 0.00019998711022520122, - "loss": 46.0, - "step": 31747 - }, - { - "epoch": 5.112806473690568, - "grad_norm": 0.0006080442690290511, - "learning_rate": 0.00019998710941291896, - "loss": 46.0, - "step": 31748 - }, - { - "epoch": 5.112967510769355, - "grad_norm": 0.0035481364466249943, - "learning_rate": 0.00019998710860061114, - "loss": 46.0, - "step": 31749 - }, - { - "epoch": 5.113128547848142, - "grad_norm": 0.02730635367333889, - "learning_rate": 0.0001999871077882777, - "loss": 46.0, - "step": 31750 - }, - { - "epoch": 5.113289584926929, - "grad_norm": 0.0027243653312325478, - "learning_rate": 0.00019998710697591868, - "loss": 46.0, - "step": 31751 - }, - { - "epoch": 5.1134506220057165, - "grad_norm": 0.01114586740732193, - "learning_rate": 0.00019998710616353404, - "loss": 46.0, - "step": 31752 - }, - { - "epoch": 5.113611659084504, - "grad_norm": 0.010896636173129082, - "learning_rate": 0.00019998710535112384, - "loss": 46.0, - "step": 31753 - }, - { - "epoch": 5.113772696163291, - "grad_norm": 0.007446507923305035, - "learning_rate": 0.00019998710453868803, - "loss": 46.0, - "step": 31754 - }, - { - "epoch": 5.113933733242079, - "grad_norm": 0.0055212113074958324, - "learning_rate": 0.00019998710372622663, - "loss": 46.0, - "step": 31755 - }, - { - "epoch": 5.114094770320866, - "grad_norm": 0.01968042179942131, - "learning_rate": 0.00019998710291373965, - "loss": 46.0, - "step": 31756 - }, - { - "epoch": 5.114255807399654, - "grad_norm": 0.004497861955314875, - "learning_rate": 0.0001999871021012271, - "loss": 46.0, - "step": 31757 - }, - { - "epoch": 5.114416844478441, - "grad_norm": 0.002537531778216362, - "learning_rate": 0.0001999871012886889, - "loss": 46.0, - "step": 31758 - }, - { - "epoch": 5.114577881557229, - "grad_norm": 0.0012752487091347575, - "learning_rate": 0.00019998710047612514, - "loss": 46.0, - "step": 31759 - }, - { - "epoch": 5.114738918636016, - "grad_norm": 0.0038238794077187777, - "learning_rate": 0.0001999870996635358, - "loss": 46.0, - "step": 31760 - }, - { - "epoch": 5.114899955714804, - "grad_norm": 0.002374745672568679, - "learning_rate": 0.00019998709885092085, - "loss": 46.0, - "step": 31761 - }, - { - "epoch": 5.115060992793591, - "grad_norm": 0.0030338556971400976, - "learning_rate": 0.00019998709803828032, - "loss": 46.0, - "step": 31762 - }, - { - "epoch": 5.1152220298723785, - "grad_norm": 0.004380014259368181, - "learning_rate": 0.0001999870972256142, - "loss": 46.0, - "step": 31763 - }, - { - "epoch": 5.115383066951165, - "grad_norm": 0.0029799898620694876, - "learning_rate": 0.00019998709641292245, - "loss": 46.0, - "step": 31764 - }, - { - "epoch": 5.1155441040299525, - "grad_norm": 0.010815626010298729, - "learning_rate": 0.00019998709560020516, - "loss": 46.0, - "step": 31765 - }, - { - "epoch": 5.11570514110874, - "grad_norm": 0.01009557954967022, - "learning_rate": 0.00019998709478746224, - "loss": 46.0, - "step": 31766 - }, - { - "epoch": 5.115866178187527, - "grad_norm": 0.006285614334046841, - "learning_rate": 0.00019998709397469377, - "loss": 46.0, - "step": 31767 - }, - { - "epoch": 5.116027215266315, - "grad_norm": 0.011339014396071434, - "learning_rate": 0.00019998709316189968, - "loss": 46.0, - "step": 31768 - }, - { - "epoch": 5.116188252345102, - "grad_norm": 0.0037707164883613586, - "learning_rate": 0.00019998709234907998, - "loss": 46.0, - "step": 31769 - }, - { - "epoch": 5.11634928942389, - "grad_norm": 0.005328834988176823, - "learning_rate": 0.00019998709153623475, - "loss": 46.0, - "step": 31770 - }, - { - "epoch": 5.116510326502677, - "grad_norm": 0.0035266312770545483, - "learning_rate": 0.00019998709072336387, - "loss": 46.0, - "step": 31771 - }, - { - "epoch": 5.116671363581465, - "grad_norm": 0.0072785671800374985, - "learning_rate": 0.00019998708991046744, - "loss": 46.0, - "step": 31772 - }, - { - "epoch": 5.116832400660252, - "grad_norm": 0.0034306710585951805, - "learning_rate": 0.0001999870890975454, - "loss": 46.0, - "step": 31773 - }, - { - "epoch": 5.11699343773904, - "grad_norm": 0.00595342181622982, - "learning_rate": 0.00019998708828459775, - "loss": 46.0, - "step": 31774 - }, - { - "epoch": 5.117154474817827, - "grad_norm": 0.003118218155577779, - "learning_rate": 0.00019998708747162453, - "loss": 46.0, - "step": 31775 - }, - { - "epoch": 5.1173155118966145, - "grad_norm": 0.010127179324626923, - "learning_rate": 0.00019998708665862572, - "loss": 46.0, - "step": 31776 - }, - { - "epoch": 5.117476548975402, - "grad_norm": 0.01014582347124815, - "learning_rate": 0.00019998708584560132, - "loss": 46.0, - "step": 31777 - }, - { - "epoch": 5.117637586054189, - "grad_norm": 0.00323995528742671, - "learning_rate": 0.0001999870850325513, - "loss": 46.0, - "step": 31778 - }, - { - "epoch": 5.117798623132976, - "grad_norm": 0.004588102921843529, - "learning_rate": 0.0001999870842194757, - "loss": 46.0, - "step": 31779 - }, - { - "epoch": 5.117959660211763, - "grad_norm": 0.006466016639024019, - "learning_rate": 0.00019998708340637452, - "loss": 46.0, - "step": 31780 - }, - { - "epoch": 5.118120697290551, - "grad_norm": 0.0020204796455800533, - "learning_rate": 0.00019998708259324777, - "loss": 46.0, - "step": 31781 - }, - { - "epoch": 5.118281734369338, - "grad_norm": 0.015112536028027534, - "learning_rate": 0.00019998708178009539, - "loss": 46.0, - "step": 31782 - }, - { - "epoch": 5.118442771448126, - "grad_norm": 0.0012880859430879354, - "learning_rate": 0.00019998708096691744, - "loss": 46.0, - "step": 31783 - }, - { - "epoch": 5.118603808526913, - "grad_norm": 0.0015607925597578287, - "learning_rate": 0.00019998708015371387, - "loss": 46.0, - "step": 31784 - }, - { - "epoch": 5.118764845605701, - "grad_norm": 0.004017500206828117, - "learning_rate": 0.00019998707934048472, - "loss": 46.0, - "step": 31785 - }, - { - "epoch": 5.118925882684488, - "grad_norm": 0.004509245976805687, - "learning_rate": 0.00019998707852723001, - "loss": 46.0, - "step": 31786 - }, - { - "epoch": 5.119086919763276, - "grad_norm": 0.005218183156102896, - "learning_rate": 0.0001999870777139497, - "loss": 46.0, - "step": 31787 - }, - { - "epoch": 5.119247956842063, - "grad_norm": 0.006368751637637615, - "learning_rate": 0.00019998707690064378, - "loss": 46.0, - "step": 31788 - }, - { - "epoch": 5.1194089939208505, - "grad_norm": 0.008672593161463737, - "learning_rate": 0.00019998707608731225, - "loss": 46.0, - "step": 31789 - }, - { - "epoch": 5.119570030999638, - "grad_norm": 0.010050750337541103, - "learning_rate": 0.00019998707527395517, - "loss": 46.0, - "step": 31790 - }, - { - "epoch": 5.119731068078425, - "grad_norm": 0.018477262929081917, - "learning_rate": 0.00019998707446057247, - "loss": 46.0, - "step": 31791 - }, - { - "epoch": 5.119892105157213, - "grad_norm": 0.0165387112647295, - "learning_rate": 0.0001999870736471642, - "loss": 46.0, - "step": 31792 - }, - { - "epoch": 5.120053142235999, - "grad_norm": 0.0012520333984866738, - "learning_rate": 0.0001999870728337303, - "loss": 46.0, - "step": 31793 - }, - { - "epoch": 5.120214179314787, - "grad_norm": 0.0046949079260230064, - "learning_rate": 0.00019998707202027085, - "loss": 46.0, - "step": 31794 - }, - { - "epoch": 5.120375216393574, - "grad_norm": 0.010558567009866238, - "learning_rate": 0.0001999870712067858, - "loss": 46.0, - "step": 31795 - }, - { - "epoch": 5.120536253472362, - "grad_norm": 0.024613451212644577, - "learning_rate": 0.00019998707039327513, - "loss": 46.0, - "step": 31796 - }, - { - "epoch": 5.120697290551149, - "grad_norm": 0.0015605506487190723, - "learning_rate": 0.0001999870695797389, - "loss": 46.0, - "step": 31797 - }, - { - "epoch": 5.120858327629937, - "grad_norm": 0.0032269759103655815, - "learning_rate": 0.00019998706876617707, - "loss": 46.0, - "step": 31798 - }, - { - "epoch": 5.121019364708724, - "grad_norm": 0.00508446479216218, - "learning_rate": 0.00019998706795258965, - "loss": 46.0, - "step": 31799 - }, - { - "epoch": 5.1211804017875115, - "grad_norm": 0.0070706079714000225, - "learning_rate": 0.00019998706713897664, - "loss": 46.0, - "step": 31800 - }, - { - "epoch": 5.121341438866299, - "grad_norm": 0.002075758296996355, - "learning_rate": 0.00019998706632533804, - "loss": 46.0, - "step": 31801 - }, - { - "epoch": 5.1215024759450865, - "grad_norm": 0.003729782300069928, - "learning_rate": 0.00019998706551167383, - "loss": 46.0, - "step": 31802 - }, - { - "epoch": 5.121663513023874, - "grad_norm": 0.0024954641703516245, - "learning_rate": 0.00019998706469798405, - "loss": 46.0, - "step": 31803 - }, - { - "epoch": 5.121824550102661, - "grad_norm": 0.0027922680601477623, - "learning_rate": 0.00019998706388426867, - "loss": 46.0, - "step": 31804 - }, - { - "epoch": 5.121985587181449, - "grad_norm": 0.0034518782049417496, - "learning_rate": 0.0001999870630705277, - "loss": 46.0, - "step": 31805 - }, - { - "epoch": 5.122146624260236, - "grad_norm": 0.008538051508367062, - "learning_rate": 0.00019998706225676113, - "loss": 46.0, - "step": 31806 - }, - { - "epoch": 5.122307661339024, - "grad_norm": 0.010760035365819931, - "learning_rate": 0.00019998706144296898, - "loss": 46.0, - "step": 31807 - }, - { - "epoch": 5.12246869841781, - "grad_norm": 0.0022751842625439167, - "learning_rate": 0.00019998706062915122, - "loss": 46.0, - "step": 31808 - }, - { - "epoch": 5.122629735496598, - "grad_norm": 0.0067843664437532425, - "learning_rate": 0.00019998705981530787, - "loss": 46.0, - "step": 31809 - }, - { - "epoch": 5.122790772575385, - "grad_norm": 0.017063351348042488, - "learning_rate": 0.00019998705900143896, - "loss": 46.0, - "step": 31810 - }, - { - "epoch": 5.122951809654173, - "grad_norm": 0.012322777882218361, - "learning_rate": 0.00019998705818754443, - "loss": 46.0, - "step": 31811 - }, - { - "epoch": 5.12311284673296, - "grad_norm": 0.014878259971737862, - "learning_rate": 0.00019998705737362432, - "loss": 46.0, - "step": 31812 - }, - { - "epoch": 5.1232738838117475, - "grad_norm": 0.0020468709990382195, - "learning_rate": 0.00019998705655967862, - "loss": 46.0, - "step": 31813 - }, - { - "epoch": 5.123434920890535, - "grad_norm": 0.0025103818625211716, - "learning_rate": 0.0001999870557457073, - "loss": 46.0, - "step": 31814 - }, - { - "epoch": 5.123595957969322, - "grad_norm": 0.0058688693679869175, - "learning_rate": 0.00019998705493171043, - "loss": 46.0, - "step": 31815 - }, - { - "epoch": 5.12375699504811, - "grad_norm": 0.004198704846203327, - "learning_rate": 0.00019998705411768795, - "loss": 46.0, - "step": 31816 - }, - { - "epoch": 5.123918032126897, - "grad_norm": 0.011426171287894249, - "learning_rate": 0.00019998705330363987, - "loss": 46.0, - "step": 31817 - }, - { - "epoch": 5.124079069205685, - "grad_norm": 0.006741536781191826, - "learning_rate": 0.0001999870524895662, - "loss": 46.0, - "step": 31818 - }, - { - "epoch": 5.124240106284472, - "grad_norm": 0.004225070588290691, - "learning_rate": 0.000199987051675467, - "loss": 46.0, - "step": 31819 - }, - { - "epoch": 5.12440114336326, - "grad_norm": 0.004235315602272749, - "learning_rate": 0.00019998705086134212, - "loss": 46.0, - "step": 31820 - }, - { - "epoch": 5.124562180442047, - "grad_norm": 0.002773455111309886, - "learning_rate": 0.0001999870500471917, - "loss": 46.0, - "step": 31821 - }, - { - "epoch": 5.124723217520835, - "grad_norm": 0.004156128969043493, - "learning_rate": 0.00019998704923301566, - "loss": 46.0, - "step": 31822 - }, - { - "epoch": 5.124884254599621, - "grad_norm": 0.004585111979395151, - "learning_rate": 0.00019998704841881404, - "loss": 46.0, - "step": 31823 - }, - { - "epoch": 5.125045291678409, - "grad_norm": 0.009455375373363495, - "learning_rate": 0.00019998704760458683, - "loss": 46.0, - "step": 31824 - }, - { - "epoch": 5.125206328757196, - "grad_norm": 0.004446776583790779, - "learning_rate": 0.00019998704679033403, - "loss": 46.0, - "step": 31825 - }, - { - "epoch": 5.1253673658359835, - "grad_norm": 0.005561869125813246, - "learning_rate": 0.00019998704597605564, - "loss": 46.0, - "step": 31826 - }, - { - "epoch": 5.125528402914771, - "grad_norm": 0.005660254042595625, - "learning_rate": 0.00019998704516175164, - "loss": 46.0, - "step": 31827 - }, - { - "epoch": 5.125689439993558, - "grad_norm": 0.003981557209044695, - "learning_rate": 0.00019998704434742205, - "loss": 46.0, - "step": 31828 - }, - { - "epoch": 5.125850477072346, - "grad_norm": 0.014550895430147648, - "learning_rate": 0.0001999870435330669, - "loss": 46.0, - "step": 31829 - }, - { - "epoch": 5.126011514151133, - "grad_norm": 0.0021189465187489986, - "learning_rate": 0.00019998704271868614, - "loss": 46.0, - "step": 31830 - }, - { - "epoch": 5.126172551229921, - "grad_norm": 0.01488464791327715, - "learning_rate": 0.0001999870419042798, - "loss": 46.0, - "step": 31831 - }, - { - "epoch": 5.126333588308708, - "grad_norm": 0.018151327967643738, - "learning_rate": 0.00019998704108984785, - "loss": 46.0, - "step": 31832 - }, - { - "epoch": 5.126494625387496, - "grad_norm": 0.011158701963722706, - "learning_rate": 0.0001999870402753903, - "loss": 46.0, - "step": 31833 - }, - { - "epoch": 5.126655662466283, - "grad_norm": 0.006380947772413492, - "learning_rate": 0.00019998703946090717, - "loss": 46.0, - "step": 31834 - }, - { - "epoch": 5.126816699545071, - "grad_norm": 0.0044798413291573524, - "learning_rate": 0.00019998703864639844, - "loss": 46.0, - "step": 31835 - }, - { - "epoch": 5.126977736623858, - "grad_norm": 0.005650964565575123, - "learning_rate": 0.00019998703783186416, - "loss": 46.0, - "step": 31836 - }, - { - "epoch": 5.1271387737026455, - "grad_norm": 0.0026274228002876043, - "learning_rate": 0.00019998703701730426, - "loss": 46.0, - "step": 31837 - }, - { - "epoch": 5.127299810781432, - "grad_norm": 0.0026627432089298964, - "learning_rate": 0.00019998703620271874, - "loss": 46.0, - "step": 31838 - }, - { - "epoch": 5.1274608478602195, - "grad_norm": 0.0013048682594671845, - "learning_rate": 0.00019998703538810767, - "loss": 46.0, - "step": 31839 - }, - { - "epoch": 5.127621884939007, - "grad_norm": 0.006113915238529444, - "learning_rate": 0.00019998703457347098, - "loss": 46.0, - "step": 31840 - }, - { - "epoch": 5.127782922017794, - "grad_norm": 0.0057538640685379505, - "learning_rate": 0.0001999870337588087, - "loss": 46.0, - "step": 31841 - }, - { - "epoch": 5.127943959096582, - "grad_norm": 0.0028739715926349163, - "learning_rate": 0.00019998703294412087, - "loss": 46.0, - "step": 31842 - }, - { - "epoch": 5.128104996175369, - "grad_norm": 0.003068583784624934, - "learning_rate": 0.00019998703212940742, - "loss": 46.0, - "step": 31843 - }, - { - "epoch": 5.128266033254157, - "grad_norm": 0.0021642097271978855, - "learning_rate": 0.00019998703131466838, - "loss": 46.0, - "step": 31844 - }, - { - "epoch": 5.128427070332944, - "grad_norm": 0.0014632302336394787, - "learning_rate": 0.00019998703049990373, - "loss": 46.0, - "step": 31845 - }, - { - "epoch": 5.128588107411732, - "grad_norm": 0.012482680380344391, - "learning_rate": 0.0001999870296851135, - "loss": 46.0, - "step": 31846 - }, - { - "epoch": 5.128749144490519, - "grad_norm": 0.02205357700586319, - "learning_rate": 0.00019998702887029767, - "loss": 46.0, - "step": 31847 - }, - { - "epoch": 5.128910181569307, - "grad_norm": 0.00596151826903224, - "learning_rate": 0.00019998702805545628, - "loss": 46.0, - "step": 31848 - }, - { - "epoch": 5.129071218648094, - "grad_norm": 0.017880428582429886, - "learning_rate": 0.00019998702724058928, - "loss": 46.0, - "step": 31849 - }, - { - "epoch": 5.1292322557268815, - "grad_norm": 0.004431016743183136, - "learning_rate": 0.0001999870264256967, - "loss": 46.0, - "step": 31850 - }, - { - "epoch": 5.129393292805669, - "grad_norm": 0.003324661171063781, - "learning_rate": 0.00019998702561077852, - "loss": 46.0, - "step": 31851 - }, - { - "epoch": 5.1295543298844555, - "grad_norm": 0.014620086178183556, - "learning_rate": 0.00019998702479583473, - "loss": 46.0, - "step": 31852 - }, - { - "epoch": 5.129715366963243, - "grad_norm": 0.0032435583416372538, - "learning_rate": 0.00019998702398086536, - "loss": 46.0, - "step": 31853 - }, - { - "epoch": 5.12987640404203, - "grad_norm": 0.005491221789270639, - "learning_rate": 0.0001999870231658704, - "loss": 46.0, - "step": 31854 - }, - { - "epoch": 5.130037441120818, - "grad_norm": 0.005614907015115023, - "learning_rate": 0.00019998702235084984, - "loss": 46.0, - "step": 31855 - }, - { - "epoch": 5.130198478199605, - "grad_norm": 0.0020053989719599485, - "learning_rate": 0.00019998702153580373, - "loss": 46.0, - "step": 31856 - }, - { - "epoch": 5.130359515278393, - "grad_norm": 0.0018744183471426368, - "learning_rate": 0.00019998702072073198, - "loss": 46.0, - "step": 31857 - }, - { - "epoch": 5.13052055235718, - "grad_norm": 0.015590890310704708, - "learning_rate": 0.00019998701990563467, - "loss": 46.0, - "step": 31858 - }, - { - "epoch": 5.130681589435968, - "grad_norm": 0.0010458175092935562, - "learning_rate": 0.00019998701909051175, - "loss": 46.0, - "step": 31859 - }, - { - "epoch": 5.130842626514755, - "grad_norm": 0.001125580514781177, - "learning_rate": 0.00019998701827536323, - "loss": 46.0, - "step": 31860 - }, - { - "epoch": 5.1310036635935425, - "grad_norm": 0.0012638133484870195, - "learning_rate": 0.00019998701746018913, - "loss": 46.0, - "step": 31861 - }, - { - "epoch": 5.13116470067233, - "grad_norm": 0.0016861631302163005, - "learning_rate": 0.00019998701664498944, - "loss": 46.0, - "step": 31862 - }, - { - "epoch": 5.131325737751117, - "grad_norm": 0.01926904357969761, - "learning_rate": 0.00019998701582976417, - "loss": 46.0, - "step": 31863 - }, - { - "epoch": 5.131486774829905, - "grad_norm": 0.005981662310659885, - "learning_rate": 0.00019998701501451328, - "loss": 46.0, - "step": 31864 - }, - { - "epoch": 5.131647811908692, - "grad_norm": 0.0062086321413517, - "learning_rate": 0.00019998701419923683, - "loss": 46.0, - "step": 31865 - }, - { - "epoch": 5.131808848987479, - "grad_norm": 0.006888432428240776, - "learning_rate": 0.00019998701338393477, - "loss": 46.0, - "step": 31866 - }, - { - "epoch": 5.131969886066266, - "grad_norm": 0.014934870414435863, - "learning_rate": 0.00019998701256860712, - "loss": 46.0, - "step": 31867 - }, - { - "epoch": 5.132130923145054, - "grad_norm": 0.007694324012845755, - "learning_rate": 0.00019998701175325388, - "loss": 46.0, - "step": 31868 - }, - { - "epoch": 5.132291960223841, - "grad_norm": 0.008090621791779995, - "learning_rate": 0.00019998701093787506, - "loss": 46.0, - "step": 31869 - }, - { - "epoch": 5.132452997302629, - "grad_norm": 0.0028628341387957335, - "learning_rate": 0.00019998701012247062, - "loss": 46.0, - "step": 31870 - }, - { - "epoch": 5.132614034381416, - "grad_norm": 0.005956295877695084, - "learning_rate": 0.00019998700930704062, - "loss": 46.0, - "step": 31871 - }, - { - "epoch": 5.132775071460204, - "grad_norm": 0.003277809824794531, - "learning_rate": 0.000199987008491585, - "loss": 46.0, - "step": 31872 - }, - { - "epoch": 5.132936108538991, - "grad_norm": 0.01019643247127533, - "learning_rate": 0.0001999870076761038, - "loss": 46.0, - "step": 31873 - }, - { - "epoch": 5.1330971456177785, - "grad_norm": 0.002147262915968895, - "learning_rate": 0.00019998700686059702, - "loss": 46.0, - "step": 31874 - }, - { - "epoch": 5.133258182696566, - "grad_norm": 0.0026059038937091827, - "learning_rate": 0.00019998700604506464, - "loss": 46.0, - "step": 31875 - }, - { - "epoch": 5.133419219775353, - "grad_norm": 0.003703092923387885, - "learning_rate": 0.00019998700522950666, - "loss": 46.0, - "step": 31876 - }, - { - "epoch": 5.133580256854141, - "grad_norm": 0.0031463506165891886, - "learning_rate": 0.0001999870044139231, - "loss": 46.0, - "step": 31877 - }, - { - "epoch": 5.133741293932928, - "grad_norm": 0.0009380585397593677, - "learning_rate": 0.00019998700359831394, - "loss": 46.0, - "step": 31878 - }, - { - "epoch": 5.133902331011716, - "grad_norm": 0.003604380413889885, - "learning_rate": 0.00019998700278267922, - "loss": 46.0, - "step": 31879 - }, - { - "epoch": 5.134063368090503, - "grad_norm": 0.022177448496222496, - "learning_rate": 0.00019998700196701888, - "loss": 46.0, - "step": 31880 - }, - { - "epoch": 5.13422440516929, - "grad_norm": 0.003096317872405052, - "learning_rate": 0.00019998700115133293, - "loss": 46.0, - "step": 31881 - }, - { - "epoch": 5.134385442248077, - "grad_norm": 0.007766911294311285, - "learning_rate": 0.00019998700033562142, - "loss": 46.0, - "step": 31882 - }, - { - "epoch": 5.134546479326865, - "grad_norm": 0.0021253577433526516, - "learning_rate": 0.0001999869995198843, - "loss": 46.0, - "step": 31883 - }, - { - "epoch": 5.134707516405652, - "grad_norm": 0.010086525231599808, - "learning_rate": 0.00019998699870412158, - "loss": 46.0, - "step": 31884 - }, - { - "epoch": 5.13486855348444, - "grad_norm": 0.0037321089766919613, - "learning_rate": 0.0001999869978883333, - "loss": 46.0, - "step": 31885 - }, - { - "epoch": 5.135029590563227, - "grad_norm": 0.0051113017834723, - "learning_rate": 0.0001999869970725194, - "loss": 46.0, - "step": 31886 - }, - { - "epoch": 5.1351906276420145, - "grad_norm": 0.004103382583707571, - "learning_rate": 0.00019998699625667991, - "loss": 46.0, - "step": 31887 - }, - { - "epoch": 5.135351664720802, - "grad_norm": 0.003515543881803751, - "learning_rate": 0.00019998699544081485, - "loss": 46.0, - "step": 31888 - }, - { - "epoch": 5.135512701799589, - "grad_norm": 0.002703124424442649, - "learning_rate": 0.0001999869946249242, - "loss": 46.0, - "step": 31889 - }, - { - "epoch": 5.135673738878377, - "grad_norm": 0.0009707590797916055, - "learning_rate": 0.00019998699380900794, - "loss": 46.0, - "step": 31890 - }, - { - "epoch": 5.135834775957164, - "grad_norm": 0.0026337311137467623, - "learning_rate": 0.0001999869929930661, - "loss": 46.0, - "step": 31891 - }, - { - "epoch": 5.135995813035952, - "grad_norm": 0.007333251181989908, - "learning_rate": 0.00019998699217709865, - "loss": 46.0, - "step": 31892 - }, - { - "epoch": 5.136156850114739, - "grad_norm": 0.010815009474754333, - "learning_rate": 0.00019998699136110563, - "loss": 46.0, - "step": 31893 - }, - { - "epoch": 5.136317887193527, - "grad_norm": 0.0009113884880207479, - "learning_rate": 0.00019998699054508704, - "loss": 46.0, - "step": 31894 - }, - { - "epoch": 5.136478924272314, - "grad_norm": 0.020153556019067764, - "learning_rate": 0.0001999869897290428, - "loss": 46.0, - "step": 31895 - }, - { - "epoch": 5.136639961351101, - "grad_norm": 0.003549706656485796, - "learning_rate": 0.00019998698891297297, - "loss": 46.0, - "step": 31896 - }, - { - "epoch": 5.136800998429888, - "grad_norm": 0.016075970605015755, - "learning_rate": 0.00019998698809687763, - "loss": 46.0, - "step": 31897 - }, - { - "epoch": 5.136962035508676, - "grad_norm": 0.003938252106308937, - "learning_rate": 0.0001999869872807566, - "loss": 46.0, - "step": 31898 - }, - { - "epoch": 5.137123072587463, - "grad_norm": 0.0031970986165106297, - "learning_rate": 0.00019998698646461004, - "loss": 46.0, - "step": 31899 - }, - { - "epoch": 5.1372841096662505, - "grad_norm": 0.005274391267448664, - "learning_rate": 0.00019998698564843787, - "loss": 46.0, - "step": 31900 - }, - { - "epoch": 5.137445146745038, - "grad_norm": 0.02253166027367115, - "learning_rate": 0.00019998698483224012, - "loss": 46.0, - "step": 31901 - }, - { - "epoch": 5.137606183823825, - "grad_norm": 0.004127433989197016, - "learning_rate": 0.00019998698401601676, - "loss": 46.0, - "step": 31902 - }, - { - "epoch": 5.137767220902613, - "grad_norm": 0.002368879271671176, - "learning_rate": 0.0001999869831997678, - "loss": 46.0, - "step": 31903 - }, - { - "epoch": 5.1379282579814, - "grad_norm": 0.0035266403574496508, - "learning_rate": 0.00019998698238349327, - "loss": 46.0, - "step": 31904 - }, - { - "epoch": 5.138089295060188, - "grad_norm": 0.009408049285411835, - "learning_rate": 0.00019998698156719315, - "loss": 46.0, - "step": 31905 - }, - { - "epoch": 5.138250332138975, - "grad_norm": 0.0008480396354570985, - "learning_rate": 0.00019998698075086743, - "loss": 46.0, - "step": 31906 - }, - { - "epoch": 5.138411369217763, - "grad_norm": 0.00807144958525896, - "learning_rate": 0.0001999869799345161, - "loss": 46.0, - "step": 31907 - }, - { - "epoch": 5.13857240629655, - "grad_norm": 0.009584181942045689, - "learning_rate": 0.00019998697911813922, - "loss": 46.0, - "step": 31908 - }, - { - "epoch": 5.1387334433753376, - "grad_norm": 0.015621276572346687, - "learning_rate": 0.00019998697830173675, - "loss": 46.0, - "step": 31909 - }, - { - "epoch": 5.138894480454125, - "grad_norm": 0.008341421373188496, - "learning_rate": 0.00019998697748530866, - "loss": 46.0, - "step": 31910 - }, - { - "epoch": 5.139055517532912, - "grad_norm": 0.0017939795507118106, - "learning_rate": 0.00019998697666885495, - "loss": 46.0, - "step": 31911 - }, - { - "epoch": 5.139216554611699, - "grad_norm": 0.003648452227935195, - "learning_rate": 0.00019998697585237572, - "loss": 46.0, - "step": 31912 - }, - { - "epoch": 5.1393775916904865, - "grad_norm": 0.002116371877491474, - "learning_rate": 0.00019998697503587084, - "loss": 46.0, - "step": 31913 - }, - { - "epoch": 5.139538628769274, - "grad_norm": 0.006568969693034887, - "learning_rate": 0.0001999869742193404, - "loss": 46.0, - "step": 31914 - }, - { - "epoch": 5.139699665848061, - "grad_norm": 0.007540897000581026, - "learning_rate": 0.00019998697340278433, - "loss": 46.0, - "step": 31915 - }, - { - "epoch": 5.139860702926849, - "grad_norm": 0.013246042653918266, - "learning_rate": 0.00019998697258620272, - "loss": 46.0, - "step": 31916 - }, - { - "epoch": 5.140021740005636, - "grad_norm": 0.0015266399132087827, - "learning_rate": 0.0001999869717695955, - "loss": 46.0, - "step": 31917 - }, - { - "epoch": 5.140182777084424, - "grad_norm": 0.012009619735181332, - "learning_rate": 0.00019998697095296265, - "loss": 46.0, - "step": 31918 - }, - { - "epoch": 5.140343814163211, - "grad_norm": 0.004252308048307896, - "learning_rate": 0.00019998697013630428, - "loss": 46.0, - "step": 31919 - }, - { - "epoch": 5.140504851241999, - "grad_norm": 0.01128622516989708, - "learning_rate": 0.00019998696931962026, - "loss": 46.0, - "step": 31920 - }, - { - "epoch": 5.140665888320786, - "grad_norm": 0.002854184480383992, - "learning_rate": 0.0001999869685029107, - "loss": 46.0, - "step": 31921 - }, - { - "epoch": 5.1408269253995735, - "grad_norm": 0.0050996169447898865, - "learning_rate": 0.00019998696768617547, - "loss": 46.0, - "step": 31922 - }, - { - "epoch": 5.140987962478361, - "grad_norm": 0.0017365822568535805, - "learning_rate": 0.00019998696686941472, - "loss": 46.0, - "step": 31923 - }, - { - "epoch": 5.141148999557148, - "grad_norm": 0.0038486134726554155, - "learning_rate": 0.00019998696605262833, - "loss": 46.0, - "step": 31924 - }, - { - "epoch": 5.141310036635935, - "grad_norm": 0.0014324215007945895, - "learning_rate": 0.00019998696523581638, - "loss": 46.0, - "step": 31925 - }, - { - "epoch": 5.1414710737147225, - "grad_norm": 0.003406471572816372, - "learning_rate": 0.00019998696441897885, - "loss": 46.0, - "step": 31926 - }, - { - "epoch": 5.14163211079351, - "grad_norm": 0.010486254468560219, - "learning_rate": 0.0001999869636021157, - "loss": 46.0, - "step": 31927 - }, - { - "epoch": 5.141793147872297, - "grad_norm": 0.0025951347779482603, - "learning_rate": 0.00019998696278522698, - "loss": 46.0, - "step": 31928 - }, - { - "epoch": 5.141954184951085, - "grad_norm": 0.005481914151459932, - "learning_rate": 0.00019998696196831266, - "loss": 46.0, - "step": 31929 - }, - { - "epoch": 5.142115222029872, - "grad_norm": 0.0021850187331438065, - "learning_rate": 0.00019998696115137272, - "loss": 46.0, - "step": 31930 - }, - { - "epoch": 5.14227625910866, - "grad_norm": 0.0020159154664725065, - "learning_rate": 0.00019998696033440722, - "loss": 46.0, - "step": 31931 - }, - { - "epoch": 5.142437296187447, - "grad_norm": 0.008351726457476616, - "learning_rate": 0.00019998695951741613, - "loss": 46.0, - "step": 31932 - }, - { - "epoch": 5.142598333266235, - "grad_norm": 0.0037066920194774866, - "learning_rate": 0.00019998695870039943, - "loss": 46.0, - "step": 31933 - }, - { - "epoch": 5.142759370345022, - "grad_norm": 0.004341783933341503, - "learning_rate": 0.00019998695788335714, - "loss": 46.0, - "step": 31934 - }, - { - "epoch": 5.1429204074238095, - "grad_norm": 0.015800854191184044, - "learning_rate": 0.0001999869570662893, - "loss": 46.0, - "step": 31935 - }, - { - "epoch": 5.143081444502597, - "grad_norm": 0.003397870808839798, - "learning_rate": 0.00019998695624919583, - "loss": 46.0, - "step": 31936 - }, - { - "epoch": 5.143242481581384, - "grad_norm": 0.0027612776029855013, - "learning_rate": 0.00019998695543207675, - "loss": 46.0, - "step": 31937 - }, - { - "epoch": 5.143403518660172, - "grad_norm": 0.008857367560267448, - "learning_rate": 0.0001999869546149321, - "loss": 46.0, - "step": 31938 - }, - { - "epoch": 5.143564555738959, - "grad_norm": 0.003905584802851081, - "learning_rate": 0.0001999869537977619, - "loss": 46.0, - "step": 31939 - }, - { - "epoch": 5.143725592817746, - "grad_norm": 0.008554146625101566, - "learning_rate": 0.00019998695298056605, - "loss": 46.0, - "step": 31940 - }, - { - "epoch": 5.143886629896533, - "grad_norm": 0.003602387849241495, - "learning_rate": 0.00019998695216334462, - "loss": 46.0, - "step": 31941 - }, - { - "epoch": 5.144047666975321, - "grad_norm": 0.00845616776496172, - "learning_rate": 0.0001999869513460976, - "loss": 46.0, - "step": 31942 - }, - { - "epoch": 5.144208704054108, - "grad_norm": 0.007769668009132147, - "learning_rate": 0.00019998695052882498, - "loss": 46.0, - "step": 31943 - }, - { - "epoch": 5.144369741132896, - "grad_norm": 0.013131647370755672, - "learning_rate": 0.0001999869497115268, - "loss": 46.0, - "step": 31944 - }, - { - "epoch": 5.144530778211683, - "grad_norm": 0.014707699418067932, - "learning_rate": 0.00019998694889420302, - "loss": 46.0, - "step": 31945 - }, - { - "epoch": 5.144691815290471, - "grad_norm": 0.002883515553548932, - "learning_rate": 0.00019998694807685366, - "loss": 46.0, - "step": 31946 - }, - { - "epoch": 5.144852852369258, - "grad_norm": 0.006109686568379402, - "learning_rate": 0.00019998694725947865, - "loss": 46.0, - "step": 31947 - }, - { - "epoch": 5.1450138894480455, - "grad_norm": 0.0022044205106794834, - "learning_rate": 0.0001999869464420781, - "loss": 46.0, - "step": 31948 - }, - { - "epoch": 5.145174926526833, - "grad_norm": 0.0054174126125872135, - "learning_rate": 0.00019998694562465197, - "loss": 46.0, - "step": 31949 - }, - { - "epoch": 5.14533596360562, - "grad_norm": 0.002054027281701565, - "learning_rate": 0.00019998694480720023, - "loss": 46.0, - "step": 31950 - }, - { - "epoch": 5.145497000684408, - "grad_norm": 0.0015779273817315698, - "learning_rate": 0.00019998694398972288, - "loss": 46.0, - "step": 31951 - }, - { - "epoch": 5.145658037763195, - "grad_norm": 0.001287986640818417, - "learning_rate": 0.00019998694317221994, - "loss": 46.0, - "step": 31952 - }, - { - "epoch": 5.145819074841983, - "grad_norm": 0.003587508574128151, - "learning_rate": 0.0001999869423546914, - "loss": 46.0, - "step": 31953 - }, - { - "epoch": 5.145980111920769, - "grad_norm": 0.0014130700146779418, - "learning_rate": 0.0001999869415371373, - "loss": 46.0, - "step": 31954 - }, - { - "epoch": 5.146141148999557, - "grad_norm": 0.010049944743514061, - "learning_rate": 0.0001999869407195576, - "loss": 46.0, - "step": 31955 - }, - { - "epoch": 5.146302186078344, - "grad_norm": 0.0018803478451445699, - "learning_rate": 0.0001999869399019523, - "loss": 46.0, - "step": 31956 - }, - { - "epoch": 5.146463223157132, - "grad_norm": 0.010292558930814266, - "learning_rate": 0.00019998693908432143, - "loss": 46.0, - "step": 31957 - }, - { - "epoch": 5.146624260235919, - "grad_norm": 0.004415625240653753, - "learning_rate": 0.00019998693826666494, - "loss": 46.0, - "step": 31958 - }, - { - "epoch": 5.146785297314707, - "grad_norm": 0.0029766466468572617, - "learning_rate": 0.00019998693744898287, - "loss": 46.0, - "step": 31959 - }, - { - "epoch": 5.146946334393494, - "grad_norm": 0.009527206420898438, - "learning_rate": 0.0001999869366312752, - "loss": 46.0, - "step": 31960 - }, - { - "epoch": 5.1471073714722815, - "grad_norm": 0.0058088358491659164, - "learning_rate": 0.00019998693581354195, - "loss": 46.0, - "step": 31961 - }, - { - "epoch": 5.147268408551069, - "grad_norm": 0.003082311013713479, - "learning_rate": 0.00019998693499578311, - "loss": 46.0, - "step": 31962 - }, - { - "epoch": 5.147429445629856, - "grad_norm": 0.007802627515047789, - "learning_rate": 0.00019998693417799866, - "loss": 46.0, - "step": 31963 - }, - { - "epoch": 5.147590482708644, - "grad_norm": 0.007802153471857309, - "learning_rate": 0.00019998693336018865, - "loss": 46.0, - "step": 31964 - }, - { - "epoch": 5.147751519787431, - "grad_norm": 0.0027400145772844553, - "learning_rate": 0.00019998693254235302, - "loss": 46.0, - "step": 31965 - }, - { - "epoch": 5.147912556866219, - "grad_norm": 0.0021923664025962353, - "learning_rate": 0.00019998693172449184, - "loss": 46.0, - "step": 31966 - }, - { - "epoch": 5.148073593945006, - "grad_norm": 0.0036887081805616617, - "learning_rate": 0.000199986930906605, - "loss": 46.0, - "step": 31967 - }, - { - "epoch": 5.148234631023794, - "grad_norm": 0.001792989089153707, - "learning_rate": 0.00019998693008869262, - "loss": 46.0, - "step": 31968 - }, - { - "epoch": 5.14839566810258, - "grad_norm": 0.010252018459141254, - "learning_rate": 0.00019998692927075464, - "loss": 46.0, - "step": 31969 - }, - { - "epoch": 5.148556705181368, - "grad_norm": 0.002269984222948551, - "learning_rate": 0.00019998692845279105, - "loss": 46.0, - "step": 31970 - }, - { - "epoch": 5.148717742260155, - "grad_norm": 0.002541786292567849, - "learning_rate": 0.0001999869276348019, - "loss": 46.0, - "step": 31971 - }, - { - "epoch": 5.148878779338943, - "grad_norm": 0.0016420090105384588, - "learning_rate": 0.00019998692681678714, - "loss": 46.0, - "step": 31972 - }, - { - "epoch": 5.14903981641773, - "grad_norm": 0.004215634427964687, - "learning_rate": 0.0001999869259987468, - "loss": 46.0, - "step": 31973 - }, - { - "epoch": 5.1492008534965175, - "grad_norm": 0.013805377297103405, - "learning_rate": 0.00019998692518068085, - "loss": 46.0, - "step": 31974 - }, - { - "epoch": 5.149361890575305, - "grad_norm": 0.006287030875682831, - "learning_rate": 0.00019998692436258933, - "loss": 46.0, - "step": 31975 - }, - { - "epoch": 5.149522927654092, - "grad_norm": 0.006337171886116266, - "learning_rate": 0.00019998692354447219, - "loss": 46.0, - "step": 31976 - }, - { - "epoch": 5.14968396473288, - "grad_norm": 0.0035442980006337166, - "learning_rate": 0.0001999869227263295, - "loss": 46.0, - "step": 31977 - }, - { - "epoch": 5.149845001811667, - "grad_norm": 0.004764437209814787, - "learning_rate": 0.00019998692190816115, - "loss": 46.0, - "step": 31978 - }, - { - "epoch": 5.150006038890455, - "grad_norm": 0.011036619544029236, - "learning_rate": 0.00019998692108996727, - "loss": 46.0, - "step": 31979 - }, - { - "epoch": 5.150167075969242, - "grad_norm": 0.0048725781962275505, - "learning_rate": 0.00019998692027174776, - "loss": 46.0, - "step": 31980 - }, - { - "epoch": 5.15032811304803, - "grad_norm": 0.002315012738108635, - "learning_rate": 0.00019998691945350268, - "loss": 46.0, - "step": 31981 - }, - { - "epoch": 5.150489150126817, - "grad_norm": 0.009306208230555058, - "learning_rate": 0.00019998691863523202, - "loss": 46.0, - "step": 31982 - }, - { - "epoch": 5.1506501872056045, - "grad_norm": 0.0018951231613755226, - "learning_rate": 0.00019998691781693574, - "loss": 46.0, - "step": 31983 - }, - { - "epoch": 5.150811224284391, - "grad_norm": 0.02272212691605091, - "learning_rate": 0.0001999869169986139, - "loss": 46.0, - "step": 31984 - }, - { - "epoch": 5.1509722613631785, - "grad_norm": 0.0015805884031578898, - "learning_rate": 0.00019998691618026642, - "loss": 46.0, - "step": 31985 - }, - { - "epoch": 5.151133298441966, - "grad_norm": 0.009673546068370342, - "learning_rate": 0.0001999869153618934, - "loss": 46.0, - "step": 31986 - }, - { - "epoch": 5.151294335520753, - "grad_norm": 0.017125146463513374, - "learning_rate": 0.00019998691454349476, - "loss": 46.0, - "step": 31987 - }, - { - "epoch": 5.151455372599541, - "grad_norm": 0.007747529074549675, - "learning_rate": 0.00019998691372507055, - "loss": 46.0, - "step": 31988 - }, - { - "epoch": 5.151616409678328, - "grad_norm": 0.0019121356308460236, - "learning_rate": 0.00019998691290662072, - "loss": 46.0, - "step": 31989 - }, - { - "epoch": 5.151777446757116, - "grad_norm": 0.020110275596380234, - "learning_rate": 0.00019998691208814533, - "loss": 46.0, - "step": 31990 - }, - { - "epoch": 5.151938483835903, - "grad_norm": 0.03902021422982216, - "learning_rate": 0.00019998691126964433, - "loss": 46.0, - "step": 31991 - }, - { - "epoch": 5.152099520914691, - "grad_norm": 0.003365870099514723, - "learning_rate": 0.00019998691045111774, - "loss": 46.0, - "step": 31992 - }, - { - "epoch": 5.152260557993478, - "grad_norm": 0.0011135706445202231, - "learning_rate": 0.00019998690963256554, - "loss": 46.0, - "step": 31993 - }, - { - "epoch": 5.152421595072266, - "grad_norm": 0.003256438300013542, - "learning_rate": 0.00019998690881398778, - "loss": 46.0, - "step": 31994 - }, - { - "epoch": 5.152582632151053, - "grad_norm": 0.007314857095479965, - "learning_rate": 0.00019998690799538443, - "loss": 46.0, - "step": 31995 - }, - { - "epoch": 5.1527436692298405, - "grad_norm": 0.0039177159778773785, - "learning_rate": 0.00019998690717675546, - "loss": 46.0, - "step": 31996 - }, - { - "epoch": 5.152904706308628, - "grad_norm": 0.0039351144805550575, - "learning_rate": 0.0001999869063581009, - "loss": 46.0, - "step": 31997 - }, - { - "epoch": 5.153065743387415, - "grad_norm": 0.005802639760077, - "learning_rate": 0.0001999869055394208, - "loss": 46.0, - "step": 31998 - }, - { - "epoch": 5.153226780466202, - "grad_norm": 0.0011089701438322663, - "learning_rate": 0.00019998690472071504, - "loss": 46.0, - "step": 31999 - }, - { - "epoch": 5.153387817544989, - "grad_norm": 0.007191171403974295, - "learning_rate": 0.0001999869039019837, - "loss": 46.0, - "step": 32000 - }, - { - "epoch": 5.153548854623777, - "grad_norm": 0.0034574205055832863, - "learning_rate": 0.0001999869030832268, - "loss": 46.0, - "step": 32001 - }, - { - "epoch": 5.153709891702564, - "grad_norm": 0.0017519757384434342, - "learning_rate": 0.0001999869022644443, - "loss": 46.0, - "step": 32002 - }, - { - "epoch": 5.153870928781352, - "grad_norm": 0.002987498650327325, - "learning_rate": 0.00019998690144563621, - "loss": 46.0, - "step": 32003 - }, - { - "epoch": 5.154031965860139, - "grad_norm": 0.007787894457578659, - "learning_rate": 0.00019998690062680253, - "loss": 46.0, - "step": 32004 - }, - { - "epoch": 5.154193002938927, - "grad_norm": 0.0026969073805958033, - "learning_rate": 0.00019998689980794322, - "loss": 46.0, - "step": 32005 - }, - { - "epoch": 5.154354040017714, - "grad_norm": 0.003432060359045863, - "learning_rate": 0.00019998689898905836, - "loss": 46.0, - "step": 32006 - }, - { - "epoch": 5.154515077096502, - "grad_norm": 0.006968228612095118, - "learning_rate": 0.0001999868981701479, - "loss": 46.0, - "step": 32007 - }, - { - "epoch": 5.154676114175289, - "grad_norm": 0.002099679782986641, - "learning_rate": 0.00019998689735121184, - "loss": 46.0, - "step": 32008 - }, - { - "epoch": 5.1548371512540765, - "grad_norm": 0.011155657470226288, - "learning_rate": 0.0001999868965322502, - "loss": 46.0, - "step": 32009 - }, - { - "epoch": 5.154998188332864, - "grad_norm": 0.002716090762987733, - "learning_rate": 0.00019998689571326298, - "loss": 46.0, - "step": 32010 - }, - { - "epoch": 5.155159225411651, - "grad_norm": 0.006175829563289881, - "learning_rate": 0.00019998689489425013, - "loss": 46.0, - "step": 32011 - }, - { - "epoch": 5.155320262490439, - "grad_norm": 0.019180694594979286, - "learning_rate": 0.0001999868940752117, - "loss": 46.0, - "step": 32012 - }, - { - "epoch": 5.155481299569225, - "grad_norm": 0.0019575918558984995, - "learning_rate": 0.0001999868932561477, - "loss": 46.0, - "step": 32013 - }, - { - "epoch": 5.155642336648013, - "grad_norm": 0.0014998972183093429, - "learning_rate": 0.0001999868924370581, - "loss": 46.0, - "step": 32014 - }, - { - "epoch": 5.1558033737268, - "grad_norm": 0.0029361594934016466, - "learning_rate": 0.0001999868916179429, - "loss": 46.0, - "step": 32015 - }, - { - "epoch": 5.155964410805588, - "grad_norm": 0.001516103744506836, - "learning_rate": 0.00019998689079880213, - "loss": 46.0, - "step": 32016 - }, - { - "epoch": 5.156125447884375, - "grad_norm": 0.002657112665474415, - "learning_rate": 0.00019998688997963576, - "loss": 46.0, - "step": 32017 - }, - { - "epoch": 5.156286484963163, - "grad_norm": 0.0008394195465371013, - "learning_rate": 0.00019998688916044377, - "loss": 46.0, - "step": 32018 - }, - { - "epoch": 5.15644752204195, - "grad_norm": 0.008360626175999641, - "learning_rate": 0.00019998688834122624, - "loss": 46.0, - "step": 32019 - }, - { - "epoch": 5.156608559120738, - "grad_norm": 0.0073290071450173855, - "learning_rate": 0.00019998688752198308, - "loss": 46.0, - "step": 32020 - }, - { - "epoch": 5.156769596199525, - "grad_norm": 0.0031881213653832674, - "learning_rate": 0.00019998688670271432, - "loss": 46.0, - "step": 32021 - }, - { - "epoch": 5.1569306332783125, - "grad_norm": 0.006068927701562643, - "learning_rate": 0.00019998688588341999, - "loss": 46.0, - "step": 32022 - }, - { - "epoch": 5.1570916703571, - "grad_norm": 0.012644579634070396, - "learning_rate": 0.00019998688506410006, - "loss": 46.0, - "step": 32023 - }, - { - "epoch": 5.157252707435887, - "grad_norm": 0.0051238052546978, - "learning_rate": 0.00019998688424475454, - "loss": 46.0, - "step": 32024 - }, - { - "epoch": 5.157413744514675, - "grad_norm": 0.0034228686708956957, - "learning_rate": 0.00019998688342538344, - "loss": 46.0, - "step": 32025 - }, - { - "epoch": 5.157574781593462, - "grad_norm": 0.0034452243708074093, - "learning_rate": 0.00019998688260598673, - "loss": 46.0, - "step": 32026 - }, - { - "epoch": 5.157735818672249, - "grad_norm": 0.0025913238059729338, - "learning_rate": 0.00019998688178656443, - "loss": 46.0, - "step": 32027 - }, - { - "epoch": 5.157896855751036, - "grad_norm": 0.006521561648696661, - "learning_rate": 0.00019998688096711656, - "loss": 46.0, - "step": 32028 - }, - { - "epoch": 5.158057892829824, - "grad_norm": 0.0017765399534255266, - "learning_rate": 0.0001999868801476431, - "loss": 46.0, - "step": 32029 - }, - { - "epoch": 5.158218929908611, - "grad_norm": 0.005090164951980114, - "learning_rate": 0.000199986879328144, - "loss": 46.0, - "step": 32030 - }, - { - "epoch": 5.158379966987399, - "grad_norm": 0.0036904604639858007, - "learning_rate": 0.00019998687850861935, - "loss": 46.0, - "step": 32031 - }, - { - "epoch": 5.158541004066186, - "grad_norm": 0.0014547455357387662, - "learning_rate": 0.0001999868776890691, - "loss": 46.0, - "step": 32032 - }, - { - "epoch": 5.1587020411449735, - "grad_norm": 0.0015070633962750435, - "learning_rate": 0.00019998687686949325, - "loss": 46.0, - "step": 32033 - }, - { - "epoch": 5.158863078223761, - "grad_norm": 0.0021063510794192553, - "learning_rate": 0.00019998687604989184, - "loss": 46.0, - "step": 32034 - }, - { - "epoch": 5.1590241153025485, - "grad_norm": 0.001814411603845656, - "learning_rate": 0.00019998687523026482, - "loss": 46.0, - "step": 32035 - }, - { - "epoch": 5.159185152381336, - "grad_norm": 0.003035438247025013, - "learning_rate": 0.0001999868744106122, - "loss": 46.0, - "step": 32036 - }, - { - "epoch": 5.159346189460123, - "grad_norm": 0.007734688930213451, - "learning_rate": 0.00019998687359093397, - "loss": 46.0, - "step": 32037 - }, - { - "epoch": 5.159507226538911, - "grad_norm": 0.008473081514239311, - "learning_rate": 0.00019998687277123018, - "loss": 46.0, - "step": 32038 - }, - { - "epoch": 5.159668263617698, - "grad_norm": 0.0015391348861157894, - "learning_rate": 0.00019998687195150078, - "loss": 46.0, - "step": 32039 - }, - { - "epoch": 5.159829300696486, - "grad_norm": 0.004602321423590183, - "learning_rate": 0.00019998687113174582, - "loss": 46.0, - "step": 32040 - }, - { - "epoch": 5.159990337775273, - "grad_norm": 0.0052553098648786545, - "learning_rate": 0.00019998687031196522, - "loss": 46.0, - "step": 32041 - }, - { - "epoch": 5.16015137485406, - "grad_norm": 0.0014426345005631447, - "learning_rate": 0.00019998686949215905, - "loss": 46.0, - "step": 32042 - }, - { - "epoch": 5.160312411932847, - "grad_norm": 0.0019206246361136436, - "learning_rate": 0.00019998686867232733, - "loss": 46.0, - "step": 32043 - }, - { - "epoch": 5.160473449011635, - "grad_norm": 0.0025787819176912308, - "learning_rate": 0.00019998686785246996, - "loss": 46.0, - "step": 32044 - }, - { - "epoch": 5.160634486090422, - "grad_norm": 0.01073532085865736, - "learning_rate": 0.000199986867032587, - "loss": 46.0, - "step": 32045 - }, - { - "epoch": 5.1607955231692095, - "grad_norm": 0.0022140785586088896, - "learning_rate": 0.00019998686621267847, - "loss": 46.0, - "step": 32046 - }, - { - "epoch": 5.160956560247997, - "grad_norm": 0.003601910313591361, - "learning_rate": 0.00019998686539274437, - "loss": 46.0, - "step": 32047 - }, - { - "epoch": 5.161117597326784, - "grad_norm": 0.007175914477556944, - "learning_rate": 0.00019998686457278465, - "loss": 46.0, - "step": 32048 - }, - { - "epoch": 5.161278634405572, - "grad_norm": 0.004147344268858433, - "learning_rate": 0.00019998686375279933, - "loss": 46.0, - "step": 32049 - }, - { - "epoch": 5.161439671484359, - "grad_norm": 0.004679791163653135, - "learning_rate": 0.00019998686293278844, - "loss": 46.0, - "step": 32050 - }, - { - "epoch": 5.161600708563147, - "grad_norm": 0.001714741694740951, - "learning_rate": 0.00019998686211275196, - "loss": 46.0, - "step": 32051 - }, - { - "epoch": 5.161761745641934, - "grad_norm": 0.0049721552059054375, - "learning_rate": 0.0001999868612926899, - "loss": 46.0, - "step": 32052 - }, - { - "epoch": 5.161922782720722, - "grad_norm": 0.033338431268930435, - "learning_rate": 0.0001999868604726022, - "loss": 46.0, - "step": 32053 - }, - { - "epoch": 5.162083819799509, - "grad_norm": 0.005200261250138283, - "learning_rate": 0.00019998685965248893, - "loss": 46.0, - "step": 32054 - }, - { - "epoch": 5.162244856878297, - "grad_norm": 0.019763480871915817, - "learning_rate": 0.00019998685883235008, - "loss": 46.0, - "step": 32055 - }, - { - "epoch": 5.162405893957084, - "grad_norm": 0.008143062703311443, - "learning_rate": 0.00019998685801218564, - "loss": 46.0, - "step": 32056 - }, - { - "epoch": 5.162566931035871, - "grad_norm": 0.007820661179721355, - "learning_rate": 0.0001999868571919956, - "loss": 46.0, - "step": 32057 - }, - { - "epoch": 5.162727968114658, - "grad_norm": 0.007466123439371586, - "learning_rate": 0.00019998685637177997, - "loss": 46.0, - "step": 32058 - }, - { - "epoch": 5.1628890051934455, - "grad_norm": 0.00197614636272192, - "learning_rate": 0.00019998685555153872, - "loss": 46.0, - "step": 32059 - }, - { - "epoch": 5.163050042272233, - "grad_norm": 0.0017582852160558105, - "learning_rate": 0.00019998685473127193, - "loss": 46.0, - "step": 32060 - }, - { - "epoch": 5.16321107935102, - "grad_norm": 0.002587753813713789, - "learning_rate": 0.00019998685391097953, - "loss": 46.0, - "step": 32061 - }, - { - "epoch": 5.163372116429808, - "grad_norm": 0.0031756702810525894, - "learning_rate": 0.00019998685309066154, - "loss": 46.0, - "step": 32062 - }, - { - "epoch": 5.163533153508595, - "grad_norm": 0.0023196216206997633, - "learning_rate": 0.00019998685227031794, - "loss": 46.0, - "step": 32063 - }, - { - "epoch": 5.163694190587383, - "grad_norm": 0.004183600191026926, - "learning_rate": 0.00019998685144994877, - "loss": 46.0, - "step": 32064 - }, - { - "epoch": 5.16385522766617, - "grad_norm": 0.004171449225395918, - "learning_rate": 0.000199986850629554, - "loss": 46.0, - "step": 32065 - }, - { - "epoch": 5.164016264744958, - "grad_norm": 0.003592939116060734, - "learning_rate": 0.00019998684980913366, - "loss": 46.0, - "step": 32066 - }, - { - "epoch": 5.164177301823745, - "grad_norm": 0.006181646138429642, - "learning_rate": 0.00019998684898868768, - "loss": 46.0, - "step": 32067 - }, - { - "epoch": 5.164338338902533, - "grad_norm": 0.0017797845648601651, - "learning_rate": 0.00019998684816821617, - "loss": 46.0, - "step": 32068 - }, - { - "epoch": 5.16449937598132, - "grad_norm": 0.008590904995799065, - "learning_rate": 0.00019998684734771901, - "loss": 46.0, - "step": 32069 - }, - { - "epoch": 5.1646604130601075, - "grad_norm": 0.006853925529867411, - "learning_rate": 0.00019998684652719627, - "loss": 46.0, - "step": 32070 - }, - { - "epoch": 5.164821450138895, - "grad_norm": 0.006140245124697685, - "learning_rate": 0.00019998684570664797, - "loss": 46.0, - "step": 32071 - }, - { - "epoch": 5.1649824872176815, - "grad_norm": 0.004573285114020109, - "learning_rate": 0.00019998684488607403, - "loss": 46.0, - "step": 32072 - }, - { - "epoch": 5.165143524296469, - "grad_norm": 0.010456668213009834, - "learning_rate": 0.00019998684406547453, - "loss": 46.0, - "step": 32073 - }, - { - "epoch": 5.165304561375256, - "grad_norm": 0.004685725551098585, - "learning_rate": 0.00019998684324484947, - "loss": 46.0, - "step": 32074 - }, - { - "epoch": 5.165465598454044, - "grad_norm": 0.009032877162098885, - "learning_rate": 0.00019998684242419876, - "loss": 46.0, - "step": 32075 - }, - { - "epoch": 5.165626635532831, - "grad_norm": 0.010874249041080475, - "learning_rate": 0.00019998684160352247, - "loss": 46.0, - "step": 32076 - }, - { - "epoch": 5.165787672611619, - "grad_norm": 0.0025444519706070423, - "learning_rate": 0.00019998684078282062, - "loss": 46.0, - "step": 32077 - }, - { - "epoch": 5.165948709690406, - "grad_norm": 0.0036635638680309057, - "learning_rate": 0.00019998683996209319, - "loss": 46.0, - "step": 32078 - }, - { - "epoch": 5.166109746769194, - "grad_norm": 0.012037809938192368, - "learning_rate": 0.0001999868391413401, - "loss": 46.0, - "step": 32079 - }, - { - "epoch": 5.166270783847981, - "grad_norm": 0.002927191089838743, - "learning_rate": 0.00019998683832056147, - "loss": 46.0, - "step": 32080 - }, - { - "epoch": 5.166431820926769, - "grad_norm": 0.01672675646841526, - "learning_rate": 0.00019998683749975724, - "loss": 46.0, - "step": 32081 - }, - { - "epoch": 5.166592858005556, - "grad_norm": 0.002120276214554906, - "learning_rate": 0.00019998683667892743, - "loss": 46.0, - "step": 32082 - }, - { - "epoch": 5.1667538950843435, - "grad_norm": 0.0024933675304055214, - "learning_rate": 0.000199986835858072, - "loss": 46.0, - "step": 32083 - }, - { - "epoch": 5.166914932163131, - "grad_norm": 0.0028369955252856016, - "learning_rate": 0.00019998683503719098, - "loss": 46.0, - "step": 32084 - }, - { - "epoch": 5.167075969241918, - "grad_norm": 0.024100981652736664, - "learning_rate": 0.00019998683421628438, - "loss": 46.0, - "step": 32085 - }, - { - "epoch": 5.167237006320705, - "grad_norm": 0.0043613119050860405, - "learning_rate": 0.0001999868333953522, - "loss": 46.0, - "step": 32086 - }, - { - "epoch": 5.167398043399492, - "grad_norm": 0.007613196037709713, - "learning_rate": 0.00019998683257439442, - "loss": 46.0, - "step": 32087 - }, - { - "epoch": 5.16755908047828, - "grad_norm": 0.005537358112633228, - "learning_rate": 0.00019998683175341105, - "loss": 46.0, - "step": 32088 - }, - { - "epoch": 5.167720117557067, - "grad_norm": 0.00807643961161375, - "learning_rate": 0.00019998683093240207, - "loss": 46.0, - "step": 32089 - }, - { - "epoch": 5.167881154635855, - "grad_norm": 0.0023925926070660353, - "learning_rate": 0.0001999868301113675, - "loss": 46.0, - "step": 32090 - }, - { - "epoch": 5.168042191714642, - "grad_norm": 0.0020383901428431273, - "learning_rate": 0.00019998682929030736, - "loss": 46.0, - "step": 32091 - }, - { - "epoch": 5.16820322879343, - "grad_norm": 0.0017731408588588238, - "learning_rate": 0.00019998682846922164, - "loss": 46.0, - "step": 32092 - }, - { - "epoch": 5.168364265872217, - "grad_norm": 0.006467212922871113, - "learning_rate": 0.00019998682764811026, - "loss": 46.0, - "step": 32093 - }, - { - "epoch": 5.1685253029510045, - "grad_norm": 0.0034874887205660343, - "learning_rate": 0.00019998682682697335, - "loss": 46.0, - "step": 32094 - }, - { - "epoch": 5.168686340029792, - "grad_norm": 0.0034083575010299683, - "learning_rate": 0.00019998682600581085, - "loss": 46.0, - "step": 32095 - }, - { - "epoch": 5.168847377108579, - "grad_norm": 0.003910582046955824, - "learning_rate": 0.00019998682518462273, - "loss": 46.0, - "step": 32096 - }, - { - "epoch": 5.169008414187367, - "grad_norm": 0.01141782570630312, - "learning_rate": 0.00019998682436340906, - "loss": 46.0, - "step": 32097 - }, - { - "epoch": 5.169169451266154, - "grad_norm": 0.003728589043021202, - "learning_rate": 0.00019998682354216977, - "loss": 46.0, - "step": 32098 - }, - { - "epoch": 5.169330488344942, - "grad_norm": 0.0011391510488465428, - "learning_rate": 0.00019998682272090486, - "loss": 46.0, - "step": 32099 - }, - { - "epoch": 5.169491525423728, - "grad_norm": 0.0020659365691244602, - "learning_rate": 0.0001999868218996144, - "loss": 46.0, - "step": 32100 - }, - { - "epoch": 5.169652562502516, - "grad_norm": 0.015897784382104874, - "learning_rate": 0.00019998682107829835, - "loss": 46.0, - "step": 32101 - }, - { - "epoch": 5.169813599581303, - "grad_norm": 0.00939464196562767, - "learning_rate": 0.00019998682025695668, - "loss": 46.0, - "step": 32102 - }, - { - "epoch": 5.169974636660091, - "grad_norm": 0.0017044766573235393, - "learning_rate": 0.00019998681943558943, - "loss": 46.0, - "step": 32103 - }, - { - "epoch": 5.170135673738878, - "grad_norm": 0.007020737510174513, - "learning_rate": 0.0001999868186141966, - "loss": 46.0, - "step": 32104 - }, - { - "epoch": 5.170296710817666, - "grad_norm": 0.001351483864709735, - "learning_rate": 0.00019998681779277816, - "loss": 46.0, - "step": 32105 - }, - { - "epoch": 5.170457747896453, - "grad_norm": 0.009765173308551311, - "learning_rate": 0.00019998681697133415, - "loss": 46.0, - "step": 32106 - }, - { - "epoch": 5.1706187849752405, - "grad_norm": 0.0012136913137510419, - "learning_rate": 0.00019998681614986455, - "loss": 46.0, - "step": 32107 - }, - { - "epoch": 5.170779822054028, - "grad_norm": 0.0009749795426614583, - "learning_rate": 0.0001999868153283693, - "loss": 46.0, - "step": 32108 - }, - { - "epoch": 5.170940859132815, - "grad_norm": 0.003672999795526266, - "learning_rate": 0.00019998681450684853, - "loss": 46.0, - "step": 32109 - }, - { - "epoch": 5.171101896211603, - "grad_norm": 0.010947924107313156, - "learning_rate": 0.00019998681368530214, - "loss": 46.0, - "step": 32110 - }, - { - "epoch": 5.17126293329039, - "grad_norm": 0.006620231084525585, - "learning_rate": 0.00019998681286373016, - "loss": 46.0, - "step": 32111 - }, - { - "epoch": 5.171423970369178, - "grad_norm": 0.015757473185658455, - "learning_rate": 0.0001999868120421326, - "loss": 46.0, - "step": 32112 - }, - { - "epoch": 5.171585007447965, - "grad_norm": 0.0023470637388527393, - "learning_rate": 0.00019998681122050945, - "loss": 46.0, - "step": 32113 - }, - { - "epoch": 5.171746044526753, - "grad_norm": 0.0022487491369247437, - "learning_rate": 0.00019998681039886068, - "loss": 46.0, - "step": 32114 - }, - { - "epoch": 5.171907081605539, - "grad_norm": 0.002365415683016181, - "learning_rate": 0.00019998680957718633, - "loss": 46.0, - "step": 32115 - }, - { - "epoch": 5.172068118684327, - "grad_norm": 0.0034747770987451077, - "learning_rate": 0.00019998680875548642, - "loss": 46.0, - "step": 32116 - }, - { - "epoch": 5.172229155763114, - "grad_norm": 0.0028694013599306345, - "learning_rate": 0.0001999868079337609, - "loss": 46.0, - "step": 32117 - }, - { - "epoch": 5.172390192841902, - "grad_norm": 0.0029435050673782825, - "learning_rate": 0.00019998680711200975, - "loss": 46.0, - "step": 32118 - }, - { - "epoch": 5.172551229920689, - "grad_norm": 0.013902498409152031, - "learning_rate": 0.00019998680629023302, - "loss": 46.0, - "step": 32119 - }, - { - "epoch": 5.1727122669994765, - "grad_norm": 0.0063034528866410255, - "learning_rate": 0.00019998680546843073, - "loss": 46.0, - "step": 32120 - }, - { - "epoch": 5.172873304078264, - "grad_norm": 0.005939032882452011, - "learning_rate": 0.00019998680464660285, - "loss": 46.0, - "step": 32121 - }, - { - "epoch": 5.173034341157051, - "grad_norm": 0.0005871170433238149, - "learning_rate": 0.00019998680382474934, - "loss": 46.0, - "step": 32122 - }, - { - "epoch": 5.173195378235839, - "grad_norm": 0.0053421612828969955, - "learning_rate": 0.00019998680300287029, - "loss": 46.0, - "step": 32123 - }, - { - "epoch": 5.173356415314626, - "grad_norm": 0.003946044947952032, - "learning_rate": 0.00019998680218096562, - "loss": 46.0, - "step": 32124 - }, - { - "epoch": 5.173517452393414, - "grad_norm": 0.012237252667546272, - "learning_rate": 0.00019998680135903537, - "loss": 46.0, - "step": 32125 - }, - { - "epoch": 5.173678489472201, - "grad_norm": 0.008623216301202774, - "learning_rate": 0.0001999868005370795, - "loss": 46.0, - "step": 32126 - }, - { - "epoch": 5.173839526550989, - "grad_norm": 0.006859529297798872, - "learning_rate": 0.00019998679971509805, - "loss": 46.0, - "step": 32127 - }, - { - "epoch": 5.174000563629776, - "grad_norm": 0.004463626071810722, - "learning_rate": 0.000199986798893091, - "loss": 46.0, - "step": 32128 - }, - { - "epoch": 5.174161600708564, - "grad_norm": 0.0021174491848796606, - "learning_rate": 0.0001999867980710584, - "loss": 46.0, - "step": 32129 - }, - { - "epoch": 5.17432263778735, - "grad_norm": 0.0017284222412854433, - "learning_rate": 0.00019998679724900017, - "loss": 46.0, - "step": 32130 - }, - { - "epoch": 5.174483674866138, - "grad_norm": 0.006913302466273308, - "learning_rate": 0.00019998679642691637, - "loss": 46.0, - "step": 32131 - }, - { - "epoch": 5.174644711944925, - "grad_norm": 0.003466990776360035, - "learning_rate": 0.00019998679560480698, - "loss": 46.0, - "step": 32132 - }, - { - "epoch": 5.1748057490237125, - "grad_norm": 0.0019665840081870556, - "learning_rate": 0.000199986794782672, - "loss": 46.0, - "step": 32133 - }, - { - "epoch": 5.1749667861025, - "grad_norm": 0.0018506355118006468, - "learning_rate": 0.00019998679396051138, - "loss": 46.0, - "step": 32134 - }, - { - "epoch": 5.175127823181287, - "grad_norm": 0.014390092343091965, - "learning_rate": 0.00019998679313832523, - "loss": 46.0, - "step": 32135 - }, - { - "epoch": 5.175288860260075, - "grad_norm": 0.003375882050022483, - "learning_rate": 0.00019998679231611344, - "loss": 46.0, - "step": 32136 - }, - { - "epoch": 5.175449897338862, - "grad_norm": 0.002697971649467945, - "learning_rate": 0.0001999867914938761, - "loss": 46.0, - "step": 32137 - }, - { - "epoch": 5.17561093441765, - "grad_norm": 0.00269668223336339, - "learning_rate": 0.00019998679067161315, - "loss": 46.0, - "step": 32138 - }, - { - "epoch": 5.175771971496437, - "grad_norm": 0.00459744269028306, - "learning_rate": 0.00019998678984932462, - "loss": 46.0, - "step": 32139 - }, - { - "epoch": 5.175933008575225, - "grad_norm": 0.013508311472833157, - "learning_rate": 0.00019998678902701046, - "loss": 46.0, - "step": 32140 - }, - { - "epoch": 5.176094045654012, - "grad_norm": 0.010211831890046597, - "learning_rate": 0.00019998678820467073, - "loss": 46.0, - "step": 32141 - }, - { - "epoch": 5.1762550827327996, - "grad_norm": 0.00506754033267498, - "learning_rate": 0.00019998678738230544, - "loss": 46.0, - "step": 32142 - }, - { - "epoch": 5.176416119811587, - "grad_norm": 0.0034224314149469137, - "learning_rate": 0.00019998678655991454, - "loss": 46.0, - "step": 32143 - }, - { - "epoch": 5.1765771568903745, - "grad_norm": 0.0038564172573387623, - "learning_rate": 0.00019998678573749802, - "loss": 46.0, - "step": 32144 - }, - { - "epoch": 5.176738193969161, - "grad_norm": 0.0031078457832336426, - "learning_rate": 0.00019998678491505594, - "loss": 46.0, - "step": 32145 - }, - { - "epoch": 5.1768992310479485, - "grad_norm": 0.006362107582390308, - "learning_rate": 0.00019998678409258825, - "loss": 46.0, - "step": 32146 - }, - { - "epoch": 5.177060268126736, - "grad_norm": 0.008865317329764366, - "learning_rate": 0.00019998678327009498, - "loss": 46.0, - "step": 32147 - }, - { - "epoch": 5.177221305205523, - "grad_norm": 0.0026274453848600388, - "learning_rate": 0.0001999867824475761, - "loss": 46.0, - "step": 32148 - }, - { - "epoch": 5.177382342284311, - "grad_norm": 0.013871009461581707, - "learning_rate": 0.00019998678162503166, - "loss": 46.0, - "step": 32149 - }, - { - "epoch": 5.177543379363098, - "grad_norm": 0.003805729327723384, - "learning_rate": 0.00019998678080246162, - "loss": 46.0, - "step": 32150 - }, - { - "epoch": 5.177704416441886, - "grad_norm": 0.0035232841037213802, - "learning_rate": 0.000199986779979866, - "loss": 46.0, - "step": 32151 - }, - { - "epoch": 5.177865453520673, - "grad_norm": 0.008128591813147068, - "learning_rate": 0.00019998677915724475, - "loss": 46.0, - "step": 32152 - }, - { - "epoch": 5.178026490599461, - "grad_norm": 0.0013356945710256696, - "learning_rate": 0.00019998677833459792, - "loss": 46.0, - "step": 32153 - }, - { - "epoch": 5.178187527678248, - "grad_norm": 0.005026698112487793, - "learning_rate": 0.0001999867775119255, - "loss": 46.0, - "step": 32154 - }, - { - "epoch": 5.1783485647570355, - "grad_norm": 0.00934301782399416, - "learning_rate": 0.0001999867766892275, - "loss": 46.0, - "step": 32155 - }, - { - "epoch": 5.178509601835823, - "grad_norm": 0.014060525223612785, - "learning_rate": 0.00019998677586650392, - "loss": 46.0, - "step": 32156 - }, - { - "epoch": 5.17867063891461, - "grad_norm": 0.005326290149241686, - "learning_rate": 0.00019998677504375474, - "loss": 46.0, - "step": 32157 - }, - { - "epoch": 5.178831675993398, - "grad_norm": 0.00762485945597291, - "learning_rate": 0.00019998677422097995, - "loss": 46.0, - "step": 32158 - }, - { - "epoch": 5.1789927130721845, - "grad_norm": 0.003433982143178582, - "learning_rate": 0.00019998677339817957, - "loss": 46.0, - "step": 32159 - }, - { - "epoch": 5.179153750150972, - "grad_norm": 0.003874999238178134, - "learning_rate": 0.0001999867725753536, - "loss": 46.0, - "step": 32160 - }, - { - "epoch": 5.179314787229759, - "grad_norm": 0.0026599785778671503, - "learning_rate": 0.00019998677175250205, - "loss": 46.0, - "step": 32161 - }, - { - "epoch": 5.179475824308547, - "grad_norm": 0.0011649243533611298, - "learning_rate": 0.0001999867709296249, - "loss": 46.0, - "step": 32162 - }, - { - "epoch": 5.179636861387334, - "grad_norm": 0.005228824447840452, - "learning_rate": 0.00019998677010672219, - "loss": 46.0, - "step": 32163 - }, - { - "epoch": 5.179797898466122, - "grad_norm": 0.009424913674592972, - "learning_rate": 0.00019998676928379384, - "loss": 46.0, - "step": 32164 - }, - { - "epoch": 5.179958935544909, - "grad_norm": 0.0023327148519456387, - "learning_rate": 0.00019998676846083994, - "loss": 46.0, - "step": 32165 - }, - { - "epoch": 5.180119972623697, - "grad_norm": 0.008002068847417831, - "learning_rate": 0.00019998676763786043, - "loss": 46.0, - "step": 32166 - }, - { - "epoch": 5.180281009702484, - "grad_norm": 0.021423954516649246, - "learning_rate": 0.0001999867668148553, - "loss": 46.0, - "step": 32167 - }, - { - "epoch": 5.1804420467812715, - "grad_norm": 0.0049739438109099865, - "learning_rate": 0.0001999867659918246, - "loss": 46.0, - "step": 32168 - }, - { - "epoch": 5.180603083860059, - "grad_norm": 0.0014739732723683119, - "learning_rate": 0.00019998676516876836, - "loss": 46.0, - "step": 32169 - }, - { - "epoch": 5.180764120938846, - "grad_norm": 0.003991612233221531, - "learning_rate": 0.00019998676434568647, - "loss": 46.0, - "step": 32170 - }, - { - "epoch": 5.180925158017634, - "grad_norm": 0.002354171359911561, - "learning_rate": 0.000199986763522579, - "loss": 46.0, - "step": 32171 - }, - { - "epoch": 5.181086195096421, - "grad_norm": 0.012767891399562359, - "learning_rate": 0.00019998676269944595, - "loss": 46.0, - "step": 32172 - }, - { - "epoch": 5.181247232175208, - "grad_norm": 0.0124012790620327, - "learning_rate": 0.0001999867618762873, - "loss": 46.0, - "step": 32173 - }, - { - "epoch": 5.181408269253995, - "grad_norm": 0.012858306989073753, - "learning_rate": 0.00019998676105310303, - "loss": 46.0, - "step": 32174 - }, - { - "epoch": 5.181569306332783, - "grad_norm": 0.003873368725180626, - "learning_rate": 0.00019998676022989323, - "loss": 46.0, - "step": 32175 - }, - { - "epoch": 5.18173034341157, - "grad_norm": 0.005934448912739754, - "learning_rate": 0.0001999867594066578, - "loss": 46.0, - "step": 32176 - }, - { - "epoch": 5.181891380490358, - "grad_norm": 0.014673309400677681, - "learning_rate": 0.00019998675858339676, - "loss": 46.0, - "step": 32177 - }, - { - "epoch": 5.182052417569145, - "grad_norm": 0.003604224883019924, - "learning_rate": 0.00019998675776011014, - "loss": 46.0, - "step": 32178 - }, - { - "epoch": 5.182213454647933, - "grad_norm": 0.012089215219020844, - "learning_rate": 0.00019998675693679797, - "loss": 46.0, - "step": 32179 - }, - { - "epoch": 5.18237449172672, - "grad_norm": 0.0029040127992630005, - "learning_rate": 0.00019998675611346015, - "loss": 46.0, - "step": 32180 - }, - { - "epoch": 5.1825355288055075, - "grad_norm": 0.006171551998704672, - "learning_rate": 0.00019998675529009677, - "loss": 46.0, - "step": 32181 - }, - { - "epoch": 5.182696565884295, - "grad_norm": 0.0026237000711262226, - "learning_rate": 0.0001999867544667078, - "loss": 46.0, - "step": 32182 - }, - { - "epoch": 5.182857602963082, - "grad_norm": 0.0030658748000860214, - "learning_rate": 0.00019998675364329326, - "loss": 46.0, - "step": 32183 - }, - { - "epoch": 5.18301864004187, - "grad_norm": 0.010112238116562366, - "learning_rate": 0.0001999867528198531, - "loss": 46.0, - "step": 32184 - }, - { - "epoch": 5.183179677120657, - "grad_norm": 0.012909832410514355, - "learning_rate": 0.00019998675199638734, - "loss": 46.0, - "step": 32185 - }, - { - "epoch": 5.183340714199445, - "grad_norm": 0.003441684879362583, - "learning_rate": 0.00019998675117289597, - "loss": 46.0, - "step": 32186 - }, - { - "epoch": 5.183501751278232, - "grad_norm": 0.005345614161342382, - "learning_rate": 0.00019998675034937907, - "loss": 46.0, - "step": 32187 - }, - { - "epoch": 5.183662788357019, - "grad_norm": 0.002593819983303547, - "learning_rate": 0.00019998674952583653, - "loss": 46.0, - "step": 32188 - }, - { - "epoch": 5.183823825435806, - "grad_norm": 0.0019140162039548159, - "learning_rate": 0.0001999867487022684, - "loss": 46.0, - "step": 32189 - }, - { - "epoch": 5.183984862514594, - "grad_norm": 0.0022274444345384836, - "learning_rate": 0.00019998674787867474, - "loss": 46.0, - "step": 32190 - }, - { - "epoch": 5.184145899593381, - "grad_norm": 0.006879804190248251, - "learning_rate": 0.00019998674705505544, - "loss": 46.0, - "step": 32191 - }, - { - "epoch": 5.184306936672169, - "grad_norm": 0.0018583649070933461, - "learning_rate": 0.00019998674623141055, - "loss": 46.0, - "step": 32192 - }, - { - "epoch": 5.184467973750956, - "grad_norm": 0.0010316588450223207, - "learning_rate": 0.00019998674540774004, - "loss": 46.0, - "step": 32193 - }, - { - "epoch": 5.1846290108297435, - "grad_norm": 0.015452869236469269, - "learning_rate": 0.00019998674458404398, - "loss": 46.0, - "step": 32194 - }, - { - "epoch": 5.184790047908531, - "grad_norm": 0.008038332685828209, - "learning_rate": 0.00019998674376032233, - "loss": 46.0, - "step": 32195 - }, - { - "epoch": 5.184951084987318, - "grad_norm": 0.0045012845657765865, - "learning_rate": 0.00019998674293657506, - "loss": 46.0, - "step": 32196 - }, - { - "epoch": 5.185112122066106, - "grad_norm": 0.0007633883506059647, - "learning_rate": 0.0001999867421128022, - "loss": 46.0, - "step": 32197 - }, - { - "epoch": 5.185273159144893, - "grad_norm": 0.004212799482047558, - "learning_rate": 0.0001999867412890038, - "loss": 46.0, - "step": 32198 - }, - { - "epoch": 5.185434196223681, - "grad_norm": 0.018131045624613762, - "learning_rate": 0.00019998674046517976, - "loss": 46.0, - "step": 32199 - }, - { - "epoch": 5.185595233302468, - "grad_norm": 0.0029433213640004396, - "learning_rate": 0.00019998673964133012, - "loss": 46.0, - "step": 32200 - }, - { - "epoch": 5.185756270381256, - "grad_norm": 0.005548818968236446, - "learning_rate": 0.00019998673881745492, - "loss": 46.0, - "step": 32201 - }, - { - "epoch": 5.185917307460043, - "grad_norm": 0.03727267310023308, - "learning_rate": 0.0001999867379935541, - "loss": 46.0, - "step": 32202 - }, - { - "epoch": 5.18607834453883, - "grad_norm": 0.0027350359596312046, - "learning_rate": 0.0001999867371696277, - "loss": 46.0, - "step": 32203 - }, - { - "epoch": 5.186239381617617, - "grad_norm": 0.01887637749314308, - "learning_rate": 0.00019998673634567574, - "loss": 46.0, - "step": 32204 - }, - { - "epoch": 5.186400418696405, - "grad_norm": 0.008631446398794651, - "learning_rate": 0.00019998673552169813, - "loss": 46.0, - "step": 32205 - }, - { - "epoch": 5.186561455775192, - "grad_norm": 0.002271883189678192, - "learning_rate": 0.00019998673469769497, - "loss": 46.0, - "step": 32206 - }, - { - "epoch": 5.1867224928539795, - "grad_norm": 0.0039206258952617645, - "learning_rate": 0.0001999867338736662, - "loss": 46.0, - "step": 32207 - }, - { - "epoch": 5.186883529932767, - "grad_norm": 0.0022147258277982473, - "learning_rate": 0.00019998673304961188, - "loss": 46.0, - "step": 32208 - }, - { - "epoch": 5.187044567011554, - "grad_norm": 0.006100933067500591, - "learning_rate": 0.0001999867322255319, - "loss": 46.0, - "step": 32209 - }, - { - "epoch": 5.187205604090342, - "grad_norm": 0.0021571496035903692, - "learning_rate": 0.00019998673140142635, - "loss": 46.0, - "step": 32210 - }, - { - "epoch": 5.187366641169129, - "grad_norm": 0.009839133359491825, - "learning_rate": 0.00019998673057729525, - "loss": 46.0, - "step": 32211 - }, - { - "epoch": 5.187527678247917, - "grad_norm": 0.004839913919568062, - "learning_rate": 0.00019998672975313854, - "loss": 46.0, - "step": 32212 - }, - { - "epoch": 5.187688715326704, - "grad_norm": 0.0029112743213772774, - "learning_rate": 0.00019998672892895624, - "loss": 46.0, - "step": 32213 - }, - { - "epoch": 5.187849752405492, - "grad_norm": 0.004325307905673981, - "learning_rate": 0.00019998672810474832, - "loss": 46.0, - "step": 32214 - }, - { - "epoch": 5.188010789484279, - "grad_norm": 0.001963454531505704, - "learning_rate": 0.00019998672728051484, - "loss": 46.0, - "step": 32215 - }, - { - "epoch": 5.1881718265630665, - "grad_norm": 0.013674069195985794, - "learning_rate": 0.00019998672645625573, - "loss": 46.0, - "step": 32216 - }, - { - "epoch": 5.188332863641854, - "grad_norm": 0.013424782082438469, - "learning_rate": 0.00019998672563197105, - "loss": 46.0, - "step": 32217 - }, - { - "epoch": 5.1884939007206405, - "grad_norm": 0.002722163451835513, - "learning_rate": 0.00019998672480766078, - "loss": 46.0, - "step": 32218 - }, - { - "epoch": 5.188654937799428, - "grad_norm": 0.0076047698967158794, - "learning_rate": 0.00019998672398332493, - "loss": 46.0, - "step": 32219 - }, - { - "epoch": 5.188815974878215, - "grad_norm": 0.004611724056303501, - "learning_rate": 0.00019998672315896346, - "loss": 46.0, - "step": 32220 - }, - { - "epoch": 5.188977011957003, - "grad_norm": 0.006655667908489704, - "learning_rate": 0.00019998672233457644, - "loss": 46.0, - "step": 32221 - }, - { - "epoch": 5.18913804903579, - "grad_norm": 0.0017513770144432783, - "learning_rate": 0.0001999867215101638, - "loss": 46.0, - "step": 32222 - }, - { - "epoch": 5.189299086114578, - "grad_norm": 0.007567969616502523, - "learning_rate": 0.0001999867206857256, - "loss": 46.0, - "step": 32223 - }, - { - "epoch": 5.189460123193365, - "grad_norm": 0.002437217626720667, - "learning_rate": 0.00019998671986126175, - "loss": 46.0, - "step": 32224 - }, - { - "epoch": 5.189621160272153, - "grad_norm": 0.007849694229662418, - "learning_rate": 0.00019998671903677235, - "loss": 46.0, - "step": 32225 - }, - { - "epoch": 5.18978219735094, - "grad_norm": 0.007074286229908466, - "learning_rate": 0.00019998671821225736, - "loss": 46.0, - "step": 32226 - }, - { - "epoch": 5.189943234429728, - "grad_norm": 0.004294555634260178, - "learning_rate": 0.00019998671738771678, - "loss": 46.0, - "step": 32227 - }, - { - "epoch": 5.190104271508515, - "grad_norm": 0.003116513602435589, - "learning_rate": 0.0001999867165631506, - "loss": 46.0, - "step": 32228 - }, - { - "epoch": 5.1902653085873025, - "grad_norm": 0.018665479496121407, - "learning_rate": 0.00019998671573855884, - "loss": 46.0, - "step": 32229 - }, - { - "epoch": 5.19042634566609, - "grad_norm": 0.005529359448701143, - "learning_rate": 0.00019998671491394145, - "loss": 46.0, - "step": 32230 - }, - { - "epoch": 5.190587382744877, - "grad_norm": 0.003929000813513994, - "learning_rate": 0.0001999867140892985, - "loss": 46.0, - "step": 32231 - }, - { - "epoch": 5.190748419823664, - "grad_norm": 0.001755247823894024, - "learning_rate": 0.00019998671326462996, - "loss": 46.0, - "step": 32232 - }, - { - "epoch": 5.190909456902451, - "grad_norm": 0.006418773904442787, - "learning_rate": 0.0001999867124399358, - "loss": 46.0, - "step": 32233 - }, - { - "epoch": 5.191070493981239, - "grad_norm": 0.002701960038393736, - "learning_rate": 0.00019998671161521606, - "loss": 46.0, - "step": 32234 - }, - { - "epoch": 5.191231531060026, - "grad_norm": 0.007042792625725269, - "learning_rate": 0.00019998671079047076, - "loss": 46.0, - "step": 32235 - }, - { - "epoch": 5.191392568138814, - "grad_norm": 0.004141178447753191, - "learning_rate": 0.00019998670996569984, - "loss": 46.0, - "step": 32236 - }, - { - "epoch": 5.191553605217601, - "grad_norm": 0.01399219036102295, - "learning_rate": 0.00019998670914090334, - "loss": 46.0, - "step": 32237 - }, - { - "epoch": 5.191714642296389, - "grad_norm": 0.001908156438730657, - "learning_rate": 0.00019998670831608125, - "loss": 46.0, - "step": 32238 - }, - { - "epoch": 5.191875679375176, - "grad_norm": 0.005616384092718363, - "learning_rate": 0.00019998670749123357, - "loss": 46.0, - "step": 32239 - }, - { - "epoch": 5.192036716453964, - "grad_norm": 0.006891006603837013, - "learning_rate": 0.00019998670666636028, - "loss": 46.0, - "step": 32240 - }, - { - "epoch": 5.192197753532751, - "grad_norm": 0.01339486800134182, - "learning_rate": 0.0001999867058414614, - "loss": 46.0, - "step": 32241 - }, - { - "epoch": 5.1923587906115385, - "grad_norm": 0.0019606389105319977, - "learning_rate": 0.00019998670501653694, - "loss": 46.0, - "step": 32242 - }, - { - "epoch": 5.192519827690326, - "grad_norm": 0.011777014471590519, - "learning_rate": 0.00019998670419158688, - "loss": 46.0, - "step": 32243 - }, - { - "epoch": 5.192680864769113, - "grad_norm": 0.0020849357824772596, - "learning_rate": 0.00019998670336661122, - "loss": 46.0, - "step": 32244 - }, - { - "epoch": 5.192841901847901, - "grad_norm": 0.012270960956811905, - "learning_rate": 0.00019998670254161002, - "loss": 46.0, - "step": 32245 - }, - { - "epoch": 5.193002938926688, - "grad_norm": 0.012650938704609871, - "learning_rate": 0.00019998670171658315, - "loss": 46.0, - "step": 32246 - }, - { - "epoch": 5.193163976005475, - "grad_norm": 0.009674279019236565, - "learning_rate": 0.00019998670089153072, - "loss": 46.0, - "step": 32247 - }, - { - "epoch": 5.193325013084262, - "grad_norm": 0.014590908773243427, - "learning_rate": 0.00019998670006645273, - "loss": 46.0, - "step": 32248 - }, - { - "epoch": 5.19348605016305, - "grad_norm": 0.0007428644457831979, - "learning_rate": 0.00019998669924134913, - "loss": 46.0, - "step": 32249 - }, - { - "epoch": 5.193647087241837, - "grad_norm": 0.014840604737401009, - "learning_rate": 0.00019998669841621994, - "loss": 46.0, - "step": 32250 - }, - { - "epoch": 5.193808124320625, - "grad_norm": 0.0019372176611796021, - "learning_rate": 0.00019998669759106513, - "loss": 46.0, - "step": 32251 - }, - { - "epoch": 5.193969161399412, - "grad_norm": 0.003242619801312685, - "learning_rate": 0.00019998669676588477, - "loss": 46.0, - "step": 32252 - }, - { - "epoch": 5.1941301984782, - "grad_norm": 0.006310049444437027, - "learning_rate": 0.00019998669594067882, - "loss": 46.0, - "step": 32253 - }, - { - "epoch": 5.194291235556987, - "grad_norm": 0.0031712206546217203, - "learning_rate": 0.00019998669511544725, - "loss": 46.0, - "step": 32254 - }, - { - "epoch": 5.1944522726357745, - "grad_norm": 0.026464257389307022, - "learning_rate": 0.0001999866942901901, - "loss": 46.0, - "step": 32255 - }, - { - "epoch": 5.194613309714562, - "grad_norm": 0.011263553984463215, - "learning_rate": 0.00019998669346490736, - "loss": 46.0, - "step": 32256 - }, - { - "epoch": 5.194774346793349, - "grad_norm": 0.0027723631355911493, - "learning_rate": 0.00019998669263959903, - "loss": 46.0, - "step": 32257 - }, - { - "epoch": 5.194935383872137, - "grad_norm": 0.0026018302887678146, - "learning_rate": 0.0001999866918142651, - "loss": 46.0, - "step": 32258 - }, - { - "epoch": 5.195096420950924, - "grad_norm": 0.006990714464336634, - "learning_rate": 0.00019998669098890556, - "loss": 46.0, - "step": 32259 - }, - { - "epoch": 5.195257458029712, - "grad_norm": 0.021532442420721054, - "learning_rate": 0.00019998669016352047, - "loss": 46.0, - "step": 32260 - }, - { - "epoch": 5.195418495108498, - "grad_norm": 0.0025244515854865313, - "learning_rate": 0.00019998668933810977, - "loss": 46.0, - "step": 32261 - }, - { - "epoch": 5.195579532187286, - "grad_norm": 0.0014430736191570759, - "learning_rate": 0.00019998668851267345, - "loss": 46.0, - "step": 32262 - }, - { - "epoch": 5.195740569266073, - "grad_norm": 0.0033355862833559513, - "learning_rate": 0.00019998668768721157, - "loss": 46.0, - "step": 32263 - }, - { - "epoch": 5.195901606344861, - "grad_norm": 0.0025123562663793564, - "learning_rate": 0.0001999866868617241, - "loss": 46.0, - "step": 32264 - }, - { - "epoch": 5.196062643423648, - "grad_norm": 0.003933960106223822, - "learning_rate": 0.00019998668603621105, - "loss": 46.0, - "step": 32265 - }, - { - "epoch": 5.1962236805024355, - "grad_norm": 0.005743119399994612, - "learning_rate": 0.0001999866852106724, - "loss": 46.0, - "step": 32266 - }, - { - "epoch": 5.196384717581223, - "grad_norm": 0.003340854775160551, - "learning_rate": 0.00019998668438510813, - "loss": 46.0, - "step": 32267 - }, - { - "epoch": 5.1965457546600105, - "grad_norm": 0.010091559961438179, - "learning_rate": 0.0001999866835595183, - "loss": 46.0, - "step": 32268 - }, - { - "epoch": 5.196706791738798, - "grad_norm": 0.001893533393740654, - "learning_rate": 0.00019998668273390284, - "loss": 46.0, - "step": 32269 - }, - { - "epoch": 5.196867828817585, - "grad_norm": 0.014940696768462658, - "learning_rate": 0.00019998668190826182, - "loss": 46.0, - "step": 32270 - }, - { - "epoch": 5.197028865896373, - "grad_norm": 0.003325200639665127, - "learning_rate": 0.00019998668108259522, - "loss": 46.0, - "step": 32271 - }, - { - "epoch": 5.19718990297516, - "grad_norm": 0.008352204225957394, - "learning_rate": 0.000199986680256903, - "loss": 46.0, - "step": 32272 - }, - { - "epoch": 5.197350940053948, - "grad_norm": 0.006152221467345953, - "learning_rate": 0.0001999866794311852, - "loss": 46.0, - "step": 32273 - }, - { - "epoch": 5.197511977132735, - "grad_norm": 0.0026792038697749376, - "learning_rate": 0.0001999866786054418, - "loss": 46.0, - "step": 32274 - }, - { - "epoch": 5.197673014211523, - "grad_norm": 0.0010549160651862621, - "learning_rate": 0.00019998667777967283, - "loss": 46.0, - "step": 32275 - }, - { - "epoch": 5.197834051290309, - "grad_norm": 0.03546413406729698, - "learning_rate": 0.00019998667695387827, - "loss": 46.0, - "step": 32276 - }, - { - "epoch": 5.197995088369097, - "grad_norm": 0.006419955287128687, - "learning_rate": 0.00019998667612805812, - "loss": 46.0, - "step": 32277 - }, - { - "epoch": 5.198156125447884, - "grad_norm": 0.00393195915967226, - "learning_rate": 0.00019998667530221232, - "loss": 46.0, - "step": 32278 - }, - { - "epoch": 5.1983171625266715, - "grad_norm": 0.00228667794726789, - "learning_rate": 0.000199986674476341, - "loss": 46.0, - "step": 32279 - }, - { - "epoch": 5.198478199605459, - "grad_norm": 0.013646171428263187, - "learning_rate": 0.00019998667365044405, - "loss": 46.0, - "step": 32280 - }, - { - "epoch": 5.198639236684246, - "grad_norm": 0.007861576974391937, - "learning_rate": 0.00019998667282452153, - "loss": 46.0, - "step": 32281 - }, - { - "epoch": 5.198800273763034, - "grad_norm": 0.021162524819374084, - "learning_rate": 0.0001999866719985734, - "loss": 46.0, - "step": 32282 - }, - { - "epoch": 5.198961310841821, - "grad_norm": 0.0029266648925840855, - "learning_rate": 0.0001999866711725997, - "loss": 46.0, - "step": 32283 - }, - { - "epoch": 5.199122347920609, - "grad_norm": 0.013833215460181236, - "learning_rate": 0.0001999866703466004, - "loss": 46.0, - "step": 32284 - }, - { - "epoch": 5.199283384999396, - "grad_norm": 0.009972468949854374, - "learning_rate": 0.0001999866695205755, - "loss": 46.0, - "step": 32285 - }, - { - "epoch": 5.199444422078184, - "grad_norm": 0.01192058902233839, - "learning_rate": 0.000199986668694525, - "loss": 46.0, - "step": 32286 - }, - { - "epoch": 5.199605459156971, - "grad_norm": 0.0017158373957499862, - "learning_rate": 0.00019998666786844892, - "loss": 46.0, - "step": 32287 - }, - { - "epoch": 5.199766496235759, - "grad_norm": 0.0027622284833341837, - "learning_rate": 0.00019998666704234728, - "loss": 46.0, - "step": 32288 - }, - { - "epoch": 5.199927533314546, - "grad_norm": 0.001802717219106853, - "learning_rate": 0.00019998666621622, - "loss": 46.0, - "step": 32289 - }, - { - "epoch": 5.2000885703933335, - "grad_norm": 0.0036672374699264765, - "learning_rate": 0.00019998666539006713, - "loss": 46.0, - "step": 32290 - }, - { - "epoch": 5.20024960747212, - "grad_norm": 0.012927980162203312, - "learning_rate": 0.0001999866645638887, - "loss": 46.0, - "step": 32291 - }, - { - "epoch": 5.2004106445509075, - "grad_norm": 0.0068999649956822395, - "learning_rate": 0.00019998666373768467, - "loss": 46.0, - "step": 32292 - }, - { - "epoch": 5.200571681629695, - "grad_norm": 0.002394864335656166, - "learning_rate": 0.00019998666291145504, - "loss": 46.0, - "step": 32293 - }, - { - "epoch": 5.200732718708482, - "grad_norm": 0.006971741560846567, - "learning_rate": 0.00019998666208519982, - "loss": 46.0, - "step": 32294 - }, - { - "epoch": 5.20089375578727, - "grad_norm": 0.00999845378100872, - "learning_rate": 0.000199986661258919, - "loss": 46.0, - "step": 32295 - }, - { - "epoch": 5.201054792866057, - "grad_norm": 0.014367658644914627, - "learning_rate": 0.0001999866604326126, - "loss": 46.0, - "step": 32296 - }, - { - "epoch": 5.201215829944845, - "grad_norm": 0.0010793162509799004, - "learning_rate": 0.00019998665960628063, - "loss": 46.0, - "step": 32297 - }, - { - "epoch": 5.201376867023632, - "grad_norm": 0.005863463971763849, - "learning_rate": 0.000199986658779923, - "loss": 46.0, - "step": 32298 - }, - { - "epoch": 5.20153790410242, - "grad_norm": 0.004061252810060978, - "learning_rate": 0.00019998665795353986, - "loss": 46.0, - "step": 32299 - }, - { - "epoch": 5.201698941181207, - "grad_norm": 0.012200465425848961, - "learning_rate": 0.00019998665712713106, - "loss": 46.0, - "step": 32300 - }, - { - "epoch": 5.201859978259995, - "grad_norm": 0.00080094113945961, - "learning_rate": 0.00019998665630069674, - "loss": 46.0, - "step": 32301 - }, - { - "epoch": 5.202021015338782, - "grad_norm": 0.0070303515531122684, - "learning_rate": 0.00019998665547423677, - "loss": 46.0, - "step": 32302 - }, - { - "epoch": 5.2021820524175695, - "grad_norm": 0.0014221030287444592, - "learning_rate": 0.00019998665464775124, - "loss": 46.0, - "step": 32303 - }, - { - "epoch": 5.202343089496357, - "grad_norm": 0.006524026393890381, - "learning_rate": 0.0001999866538212401, - "loss": 46.0, - "step": 32304 - }, - { - "epoch": 5.202504126575144, - "grad_norm": 0.003949557896703482, - "learning_rate": 0.00019998665299470337, - "loss": 46.0, - "step": 32305 - }, - { - "epoch": 5.202665163653931, - "grad_norm": 0.003168280702084303, - "learning_rate": 0.00019998665216814106, - "loss": 46.0, - "step": 32306 - }, - { - "epoch": 5.202826200732718, - "grad_norm": 0.001541856792755425, - "learning_rate": 0.00019998665134155315, - "loss": 46.0, - "step": 32307 - }, - { - "epoch": 5.202987237811506, - "grad_norm": 0.009249972179532051, - "learning_rate": 0.00019998665051493966, - "loss": 46.0, - "step": 32308 - }, - { - "epoch": 5.203148274890293, - "grad_norm": 0.0028835709672421217, - "learning_rate": 0.00019998664968830056, - "loss": 46.0, - "step": 32309 - }, - { - "epoch": 5.203309311969081, - "grad_norm": 0.003160462249070406, - "learning_rate": 0.0001999866488616359, - "loss": 46.0, - "step": 32310 - }, - { - "epoch": 5.203470349047868, - "grad_norm": 0.012947812676429749, - "learning_rate": 0.00019998664803494562, - "loss": 46.0, - "step": 32311 - }, - { - "epoch": 5.203631386126656, - "grad_norm": 0.005402395036071539, - "learning_rate": 0.00019998664720822972, - "loss": 46.0, - "step": 32312 - }, - { - "epoch": 5.203792423205443, - "grad_norm": 0.014578593894839287, - "learning_rate": 0.0001999866463814883, - "loss": 46.0, - "step": 32313 - }, - { - "epoch": 5.203953460284231, - "grad_norm": 0.0032998265232890844, - "learning_rate": 0.00019998664555472123, - "loss": 46.0, - "step": 32314 - }, - { - "epoch": 5.204114497363018, - "grad_norm": 0.00633576326072216, - "learning_rate": 0.0001999866447279286, - "loss": 46.0, - "step": 32315 - }, - { - "epoch": 5.2042755344418055, - "grad_norm": 0.0073896548710763454, - "learning_rate": 0.00019998664390111036, - "loss": 46.0, - "step": 32316 - }, - { - "epoch": 5.204436571520593, - "grad_norm": 0.006716161035001278, - "learning_rate": 0.00019998664307426656, - "loss": 46.0, - "step": 32317 - }, - { - "epoch": 5.20459760859938, - "grad_norm": 0.007239084225147963, - "learning_rate": 0.00019998664224739714, - "loss": 46.0, - "step": 32318 - }, - { - "epoch": 5.204758645678168, - "grad_norm": 0.003839393611997366, - "learning_rate": 0.00019998664142050214, - "loss": 46.0, - "step": 32319 - }, - { - "epoch": 5.204919682756954, - "grad_norm": 0.0026037844363600016, - "learning_rate": 0.00019998664059358152, - "loss": 46.0, - "step": 32320 - }, - { - "epoch": 5.205080719835742, - "grad_norm": 0.013562245294451714, - "learning_rate": 0.00019998663976663535, - "loss": 46.0, - "step": 32321 - }, - { - "epoch": 5.205241756914529, - "grad_norm": 0.0033752210438251495, - "learning_rate": 0.00019998663893966355, - "loss": 46.0, - "step": 32322 - }, - { - "epoch": 5.205402793993317, - "grad_norm": 0.008500763215124607, - "learning_rate": 0.00019998663811266617, - "loss": 46.0, - "step": 32323 - }, - { - "epoch": 5.205563831072104, - "grad_norm": 0.003265803214162588, - "learning_rate": 0.00019998663728564324, - "loss": 46.0, - "step": 32324 - }, - { - "epoch": 5.205724868150892, - "grad_norm": 0.002039253478869796, - "learning_rate": 0.00019998663645859468, - "loss": 46.0, - "step": 32325 - }, - { - "epoch": 5.205885905229679, - "grad_norm": 0.004213891923427582, - "learning_rate": 0.00019998663563152054, - "loss": 46.0, - "step": 32326 - }, - { - "epoch": 5.2060469423084665, - "grad_norm": 0.0023148925974965096, - "learning_rate": 0.0001999866348044208, - "loss": 46.0, - "step": 32327 - }, - { - "epoch": 5.206207979387254, - "grad_norm": 0.010200661607086658, - "learning_rate": 0.00019998663397729547, - "loss": 46.0, - "step": 32328 - }, - { - "epoch": 5.206369016466041, - "grad_norm": 0.0037055055145174265, - "learning_rate": 0.00019998663315014454, - "loss": 46.0, - "step": 32329 - }, - { - "epoch": 5.206530053544829, - "grad_norm": 0.0033718347549438477, - "learning_rate": 0.00019998663232296803, - "loss": 46.0, - "step": 32330 - }, - { - "epoch": 5.206691090623616, - "grad_norm": 0.00187536736484617, - "learning_rate": 0.00019998663149576592, - "loss": 46.0, - "step": 32331 - }, - { - "epoch": 5.206852127702404, - "grad_norm": 0.02099069394171238, - "learning_rate": 0.00019998663066853823, - "loss": 46.0, - "step": 32332 - }, - { - "epoch": 5.207013164781191, - "grad_norm": 0.007152533624321222, - "learning_rate": 0.00019998662984128495, - "loss": 46.0, - "step": 32333 - }, - { - "epoch": 5.207174201859978, - "grad_norm": 0.0036567014176398516, - "learning_rate": 0.0001999866290140061, - "loss": 46.0, - "step": 32334 - }, - { - "epoch": 5.207335238938765, - "grad_norm": 0.003594242734834552, - "learning_rate": 0.0001999866281867016, - "loss": 46.0, - "step": 32335 - }, - { - "epoch": 5.207496276017553, - "grad_norm": 0.0075663370080292225, - "learning_rate": 0.00019998662735937154, - "loss": 46.0, - "step": 32336 - }, - { - "epoch": 5.20765731309634, - "grad_norm": 0.005721709690988064, - "learning_rate": 0.0001999866265320159, - "loss": 46.0, - "step": 32337 - }, - { - "epoch": 5.207818350175128, - "grad_norm": 0.004641632083803415, - "learning_rate": 0.00019998662570463465, - "loss": 46.0, - "step": 32338 - }, - { - "epoch": 5.207979387253915, - "grad_norm": 0.0018439932027831674, - "learning_rate": 0.00019998662487722782, - "loss": 46.0, - "step": 32339 - }, - { - "epoch": 5.2081404243327025, - "grad_norm": 0.00573316216468811, - "learning_rate": 0.0001999866240497954, - "loss": 46.0, - "step": 32340 - }, - { - "epoch": 5.20830146141149, - "grad_norm": 0.007510887458920479, - "learning_rate": 0.00019998662322233738, - "loss": 46.0, - "step": 32341 - }, - { - "epoch": 5.208462498490277, - "grad_norm": 0.00951984990388155, - "learning_rate": 0.00019998662239485376, - "loss": 46.0, - "step": 32342 - }, - { - "epoch": 5.208623535569065, - "grad_norm": 0.0018545144703239202, - "learning_rate": 0.00019998662156734458, - "loss": 46.0, - "step": 32343 - }, - { - "epoch": 5.208784572647852, - "grad_norm": 0.00960698164999485, - "learning_rate": 0.0001999866207398098, - "loss": 46.0, - "step": 32344 - }, - { - "epoch": 5.20894560972664, - "grad_norm": 0.008608940988779068, - "learning_rate": 0.00019998661991224941, - "loss": 46.0, - "step": 32345 - }, - { - "epoch": 5.209106646805427, - "grad_norm": 0.005764259956777096, - "learning_rate": 0.0001999866190846634, - "loss": 46.0, - "step": 32346 - }, - { - "epoch": 5.209267683884215, - "grad_norm": 0.0076390160247683525, - "learning_rate": 0.00019998661825705184, - "loss": 46.0, - "step": 32347 - }, - { - "epoch": 5.209428720963002, - "grad_norm": 0.006310062017291784, - "learning_rate": 0.0001999866174294147, - "loss": 46.0, - "step": 32348 - }, - { - "epoch": 5.209589758041789, - "grad_norm": 0.0040053133852779865, - "learning_rate": 0.00019998661660175192, - "loss": 46.0, - "step": 32349 - }, - { - "epoch": 5.209750795120576, - "grad_norm": 0.002568659605458379, - "learning_rate": 0.0001999866157740636, - "loss": 46.0, - "step": 32350 - }, - { - "epoch": 5.209911832199364, - "grad_norm": 0.01154275145381689, - "learning_rate": 0.00019998661494634968, - "loss": 46.0, - "step": 32351 - }, - { - "epoch": 5.210072869278151, - "grad_norm": 0.012137651443481445, - "learning_rate": 0.00019998661411861014, - "loss": 46.0, - "step": 32352 - }, - { - "epoch": 5.2102339063569385, - "grad_norm": 0.003844765480607748, - "learning_rate": 0.00019998661329084504, - "loss": 46.0, - "step": 32353 - }, - { - "epoch": 5.210394943435726, - "grad_norm": 0.006151413079351187, - "learning_rate": 0.00019998661246305432, - "loss": 46.0, - "step": 32354 - }, - { - "epoch": 5.210555980514513, - "grad_norm": 0.0009811024647206068, - "learning_rate": 0.00019998661163523801, - "loss": 46.0, - "step": 32355 - }, - { - "epoch": 5.210717017593301, - "grad_norm": 0.02592357061803341, - "learning_rate": 0.00019998661080739612, - "loss": 46.0, - "step": 32356 - }, - { - "epoch": 5.210878054672088, - "grad_norm": 0.023151839151978493, - "learning_rate": 0.00019998660997952867, - "loss": 46.0, - "step": 32357 - }, - { - "epoch": 5.211039091750876, - "grad_norm": 0.003335629589855671, - "learning_rate": 0.00019998660915163558, - "loss": 46.0, - "step": 32358 - }, - { - "epoch": 5.211200128829663, - "grad_norm": 0.007951409555971622, - "learning_rate": 0.00019998660832371693, - "loss": 46.0, - "step": 32359 - }, - { - "epoch": 5.211361165908451, - "grad_norm": 0.008815682493150234, - "learning_rate": 0.00019998660749577266, - "loss": 46.0, - "step": 32360 - }, - { - "epoch": 5.211522202987238, - "grad_norm": 0.0033207014203071594, - "learning_rate": 0.00019998660666780283, - "loss": 46.0, - "step": 32361 - }, - { - "epoch": 5.211683240066026, - "grad_norm": 0.004477128852158785, - "learning_rate": 0.0001999866058398074, - "loss": 46.0, - "step": 32362 - }, - { - "epoch": 5.211844277144813, - "grad_norm": 0.0022156641352921724, - "learning_rate": 0.00019998660501178636, - "loss": 46.0, - "step": 32363 - }, - { - "epoch": 5.2120053142236, - "grad_norm": 0.00433557340875268, - "learning_rate": 0.00019998660418373975, - "loss": 46.0, - "step": 32364 - }, - { - "epoch": 5.212166351302387, - "grad_norm": 0.0011965023586526513, - "learning_rate": 0.00019998660335566752, - "loss": 46.0, - "step": 32365 - }, - { - "epoch": 5.2123273883811745, - "grad_norm": 0.011845373548567295, - "learning_rate": 0.00019998660252756973, - "loss": 46.0, - "step": 32366 - }, - { - "epoch": 5.212488425459962, - "grad_norm": 0.00622065644711256, - "learning_rate": 0.00019998660169944632, - "loss": 46.0, - "step": 32367 - }, - { - "epoch": 5.212649462538749, - "grad_norm": 0.004135714378207922, - "learning_rate": 0.00019998660087129736, - "loss": 46.0, - "step": 32368 - }, - { - "epoch": 5.212810499617537, - "grad_norm": 0.00586656853556633, - "learning_rate": 0.00019998660004312278, - "loss": 46.0, - "step": 32369 - }, - { - "epoch": 5.212971536696324, - "grad_norm": 0.0019765174947679043, - "learning_rate": 0.0001999865992149226, - "loss": 46.0, - "step": 32370 - }, - { - "epoch": 5.213132573775112, - "grad_norm": 0.005308425985276699, - "learning_rate": 0.00019998659838669683, - "loss": 46.0, - "step": 32371 - }, - { - "epoch": 5.213293610853899, - "grad_norm": 0.0031161182560026646, - "learning_rate": 0.0001999865975584455, - "loss": 46.0, - "step": 32372 - }, - { - "epoch": 5.213454647932687, - "grad_norm": 0.0043140072375535965, - "learning_rate": 0.00019998659673016854, - "loss": 46.0, - "step": 32373 - }, - { - "epoch": 5.213615685011474, - "grad_norm": 0.0022940579801797867, - "learning_rate": 0.00019998659590186603, - "loss": 46.0, - "step": 32374 - }, - { - "epoch": 5.2137767220902616, - "grad_norm": 0.0057122004218399525, - "learning_rate": 0.00019998659507353787, - "loss": 46.0, - "step": 32375 - }, - { - "epoch": 5.213937759169049, - "grad_norm": 0.010407740250229836, - "learning_rate": 0.00019998659424518418, - "loss": 46.0, - "step": 32376 - }, - { - "epoch": 5.2140987962478365, - "grad_norm": 0.0037147626280784607, - "learning_rate": 0.00019998659341680485, - "loss": 46.0, - "step": 32377 - }, - { - "epoch": 5.214259833326624, - "grad_norm": 0.0026124482974410057, - "learning_rate": 0.00019998659258839996, - "loss": 46.0, - "step": 32378 - }, - { - "epoch": 5.2144208704054105, - "grad_norm": 0.0011078146053478122, - "learning_rate": 0.00019998659175996948, - "loss": 46.0, - "step": 32379 - }, - { - "epoch": 5.214581907484198, - "grad_norm": 0.0126641308888793, - "learning_rate": 0.00019998659093151336, - "loss": 46.0, - "step": 32380 - }, - { - "epoch": 5.214742944562985, - "grad_norm": 0.014025966636836529, - "learning_rate": 0.0001999865901030317, - "loss": 46.0, - "step": 32381 - }, - { - "epoch": 5.214903981641773, - "grad_norm": 0.005641443654894829, - "learning_rate": 0.00019998658927452442, - "loss": 46.0, - "step": 32382 - }, - { - "epoch": 5.21506501872056, - "grad_norm": 0.0034278188832104206, - "learning_rate": 0.0001999865884459916, - "loss": 46.0, - "step": 32383 - }, - { - "epoch": 5.215226055799348, - "grad_norm": 0.00983754824846983, - "learning_rate": 0.00019998658761743315, - "loss": 46.0, - "step": 32384 - }, - { - "epoch": 5.215387092878135, - "grad_norm": 0.010542117990553379, - "learning_rate": 0.0001999865867888491, - "loss": 46.0, - "step": 32385 - }, - { - "epoch": 5.215548129956923, - "grad_norm": 0.0038853143341839314, - "learning_rate": 0.00019998658596023946, - "loss": 46.0, - "step": 32386 - }, - { - "epoch": 5.21570916703571, - "grad_norm": 0.0027490260545164347, - "learning_rate": 0.00019998658513160425, - "loss": 46.0, - "step": 32387 - }, - { - "epoch": 5.2158702041144975, - "grad_norm": 0.006599657703191042, - "learning_rate": 0.0001999865843029434, - "loss": 46.0, - "step": 32388 - }, - { - "epoch": 5.216031241193285, - "grad_norm": 0.010191729292273521, - "learning_rate": 0.000199986583474257, - "loss": 46.0, - "step": 32389 - }, - { - "epoch": 5.216192278272072, - "grad_norm": 0.0063103921711444855, - "learning_rate": 0.00019998658264554502, - "loss": 46.0, - "step": 32390 - }, - { - "epoch": 5.21635331535086, - "grad_norm": 0.0011862819083034992, - "learning_rate": 0.00019998658181680744, - "loss": 46.0, - "step": 32391 - }, - { - "epoch": 5.216514352429647, - "grad_norm": 0.005997427739202976, - "learning_rate": 0.00019998658098804425, - "loss": 46.0, - "step": 32392 - }, - { - "epoch": 5.216675389508434, - "grad_norm": 0.004797309171408415, - "learning_rate": 0.0001999865801592555, - "loss": 46.0, - "step": 32393 - }, - { - "epoch": 5.216836426587221, - "grad_norm": 0.01907212659716606, - "learning_rate": 0.0001999865793304411, - "loss": 46.0, - "step": 32394 - }, - { - "epoch": 5.216997463666009, - "grad_norm": 0.031080951914191246, - "learning_rate": 0.00019998657850160118, - "loss": 46.0, - "step": 32395 - }, - { - "epoch": 5.217158500744796, - "grad_norm": 0.0020203404128551483, - "learning_rate": 0.0001999865776727356, - "loss": 46.0, - "step": 32396 - }, - { - "epoch": 5.217319537823584, - "grad_norm": 0.013640389777719975, - "learning_rate": 0.00019998657684384448, - "loss": 46.0, - "step": 32397 - }, - { - "epoch": 5.217480574902371, - "grad_norm": 0.0028299863915890455, - "learning_rate": 0.00019998657601492776, - "loss": 46.0, - "step": 32398 - }, - { - "epoch": 5.217641611981159, - "grad_norm": 0.007689146790653467, - "learning_rate": 0.00019998657518598544, - "loss": 46.0, - "step": 32399 - }, - { - "epoch": 5.217802649059946, - "grad_norm": 0.003342656884342432, - "learning_rate": 0.00019998657435701752, - "loss": 46.0, - "step": 32400 - }, - { - "epoch": 5.2179636861387335, - "grad_norm": 0.0022177011705935, - "learning_rate": 0.00019998657352802402, - "loss": 46.0, - "step": 32401 - }, - { - "epoch": 5.218124723217521, - "grad_norm": 0.00589415431022644, - "learning_rate": 0.00019998657269900492, - "loss": 46.0, - "step": 32402 - }, - { - "epoch": 5.218285760296308, - "grad_norm": 0.016138875856995583, - "learning_rate": 0.00019998657186996025, - "loss": 46.0, - "step": 32403 - }, - { - "epoch": 5.218446797375096, - "grad_norm": 0.005968274548649788, - "learning_rate": 0.00019998657104088995, - "loss": 46.0, - "step": 32404 - }, - { - "epoch": 5.218607834453883, - "grad_norm": 0.006998554803431034, - "learning_rate": 0.0001999865702117941, - "loss": 46.0, - "step": 32405 - }, - { - "epoch": 5.218768871532671, - "grad_norm": 0.002928377129137516, - "learning_rate": 0.0001999865693826726, - "loss": 46.0, - "step": 32406 - }, - { - "epoch": 5.218929908611457, - "grad_norm": 0.0024839399848133326, - "learning_rate": 0.00019998656855352558, - "loss": 46.0, - "step": 32407 - }, - { - "epoch": 5.219090945690245, - "grad_norm": 0.0053073749877512455, - "learning_rate": 0.00019998656772435291, - "loss": 46.0, - "step": 32408 - }, - { - "epoch": 5.219251982769032, - "grad_norm": 0.004488674458116293, - "learning_rate": 0.00019998656689515469, - "loss": 46.0, - "step": 32409 - }, - { - "epoch": 5.21941301984782, - "grad_norm": 0.00266914046369493, - "learning_rate": 0.00019998656606593087, - "loss": 46.0, - "step": 32410 - }, - { - "epoch": 5.219574056926607, - "grad_norm": 0.00374097959138453, - "learning_rate": 0.00019998656523668147, - "loss": 46.0, - "step": 32411 - }, - { - "epoch": 5.219735094005395, - "grad_norm": 0.0014494635397568345, - "learning_rate": 0.00019998656440740645, - "loss": 46.0, - "step": 32412 - }, - { - "epoch": 5.219896131084182, - "grad_norm": 0.0036898301914334297, - "learning_rate": 0.00019998656357810585, - "loss": 46.0, - "step": 32413 - }, - { - "epoch": 5.2200571681629695, - "grad_norm": 0.002010650932788849, - "learning_rate": 0.00019998656274877966, - "loss": 46.0, - "step": 32414 - }, - { - "epoch": 5.220218205241757, - "grad_norm": 0.006113533396273851, - "learning_rate": 0.00019998656191942785, - "loss": 46.0, - "step": 32415 - }, - { - "epoch": 5.220379242320544, - "grad_norm": 0.004528458695858717, - "learning_rate": 0.00019998656109005049, - "loss": 46.0, - "step": 32416 - }, - { - "epoch": 5.220540279399332, - "grad_norm": 0.0020464423578232527, - "learning_rate": 0.00019998656026064753, - "loss": 46.0, - "step": 32417 - }, - { - "epoch": 5.220701316478119, - "grad_norm": 0.003225229447707534, - "learning_rate": 0.00019998655943121897, - "loss": 46.0, - "step": 32418 - }, - { - "epoch": 5.220862353556907, - "grad_norm": 0.0021926143672317266, - "learning_rate": 0.0001999865586017648, - "loss": 46.0, - "step": 32419 - }, - { - "epoch": 5.221023390635694, - "grad_norm": 0.006882375106215477, - "learning_rate": 0.00019998655777228507, - "loss": 46.0, - "step": 32420 - }, - { - "epoch": 5.221184427714482, - "grad_norm": 0.0009305743151344359, - "learning_rate": 0.00019998655694277974, - "loss": 46.0, - "step": 32421 - }, - { - "epoch": 5.221345464793268, - "grad_norm": 0.002799306297674775, - "learning_rate": 0.00019998655611324883, - "loss": 46.0, - "step": 32422 - }, - { - "epoch": 5.221506501872056, - "grad_norm": 0.0060835410840809345, - "learning_rate": 0.0001999865552836923, - "loss": 46.0, - "step": 32423 - }, - { - "epoch": 5.221667538950843, - "grad_norm": 0.0019642033148556948, - "learning_rate": 0.00019998655445411018, - "loss": 46.0, - "step": 32424 - }, - { - "epoch": 5.221828576029631, - "grad_norm": 0.00123660359531641, - "learning_rate": 0.0001999865536245025, - "loss": 46.0, - "step": 32425 - }, - { - "epoch": 5.221989613108418, - "grad_norm": 0.015515579842031002, - "learning_rate": 0.0001999865527948692, - "loss": 46.0, - "step": 32426 - }, - { - "epoch": 5.2221506501872055, - "grad_norm": 0.006554416846483946, - "learning_rate": 0.00019998655196521033, - "loss": 46.0, - "step": 32427 - }, - { - "epoch": 5.222311687265993, - "grad_norm": 0.0029202012810856104, - "learning_rate": 0.00019998655113552584, - "loss": 46.0, - "step": 32428 - }, - { - "epoch": 5.22247272434478, - "grad_norm": 0.0010103315580636263, - "learning_rate": 0.00019998655030581581, - "loss": 46.0, - "step": 32429 - }, - { - "epoch": 5.222633761423568, - "grad_norm": 0.024140065535902977, - "learning_rate": 0.00019998654947608015, - "loss": 46.0, - "step": 32430 - }, - { - "epoch": 5.222794798502355, - "grad_norm": 0.00398417329415679, - "learning_rate": 0.0001999865486463189, - "loss": 46.0, - "step": 32431 - }, - { - "epoch": 5.222955835581143, - "grad_norm": 0.006070380099117756, - "learning_rate": 0.00019998654781653205, - "loss": 46.0, - "step": 32432 - }, - { - "epoch": 5.22311687265993, - "grad_norm": 0.0076614501886069775, - "learning_rate": 0.00019998654698671962, - "loss": 46.0, - "step": 32433 - }, - { - "epoch": 5.223277909738718, - "grad_norm": 0.004210289567708969, - "learning_rate": 0.00019998654615688158, - "loss": 46.0, - "step": 32434 - }, - { - "epoch": 5.223438946817505, - "grad_norm": 0.0008750613196752965, - "learning_rate": 0.000199986545327018, - "loss": 46.0, - "step": 32435 - }, - { - "epoch": 5.2235999838962925, - "grad_norm": 0.0032051270827651024, - "learning_rate": 0.0001999865444971288, - "loss": 46.0, - "step": 32436 - }, - { - "epoch": 5.223761020975079, - "grad_norm": 0.004105807282030582, - "learning_rate": 0.000199986543667214, - "loss": 46.0, - "step": 32437 - }, - { - "epoch": 5.223922058053867, - "grad_norm": 0.02621007151901722, - "learning_rate": 0.00019998654283727362, - "loss": 46.0, - "step": 32438 - }, - { - "epoch": 5.224083095132654, - "grad_norm": 0.0033084868919104338, - "learning_rate": 0.0001999865420073076, - "loss": 46.0, - "step": 32439 - }, - { - "epoch": 5.2242441322114415, - "grad_norm": 0.007322421297430992, - "learning_rate": 0.00019998654117731607, - "loss": 46.0, - "step": 32440 - }, - { - "epoch": 5.224405169290229, - "grad_norm": 0.006035187281668186, - "learning_rate": 0.00019998654034729892, - "loss": 46.0, - "step": 32441 - }, - { - "epoch": 5.224566206369016, - "grad_norm": 0.0019522428046911955, - "learning_rate": 0.00019998653951725615, - "loss": 46.0, - "step": 32442 - }, - { - "epoch": 5.224727243447804, - "grad_norm": 0.004291558638215065, - "learning_rate": 0.0001999865386871878, - "loss": 46.0, - "step": 32443 - }, - { - "epoch": 5.224888280526591, - "grad_norm": 0.007751860190182924, - "learning_rate": 0.00019998653785709386, - "loss": 46.0, - "step": 32444 - }, - { - "epoch": 5.225049317605379, - "grad_norm": 0.015379723161458969, - "learning_rate": 0.00019998653702697436, - "loss": 46.0, - "step": 32445 - }, - { - "epoch": 5.225210354684166, - "grad_norm": 0.003933741711080074, - "learning_rate": 0.00019998653619682924, - "loss": 46.0, - "step": 32446 - }, - { - "epoch": 5.225371391762954, - "grad_norm": 0.007469188887625933, - "learning_rate": 0.00019998653536665854, - "loss": 46.0, - "step": 32447 - }, - { - "epoch": 5.225532428841741, - "grad_norm": 0.005155201535671949, - "learning_rate": 0.00019998653453646222, - "loss": 46.0, - "step": 32448 - }, - { - "epoch": 5.2256934659205285, - "grad_norm": 0.0011988560436293483, - "learning_rate": 0.00019998653370624031, - "loss": 46.0, - "step": 32449 - }, - { - "epoch": 5.225854502999316, - "grad_norm": 0.0026310391258448362, - "learning_rate": 0.00019998653287599285, - "loss": 46.0, - "step": 32450 - }, - { - "epoch": 5.226015540078103, - "grad_norm": 0.008084907196462154, - "learning_rate": 0.00019998653204571974, - "loss": 46.0, - "step": 32451 - }, - { - "epoch": 5.22617657715689, - "grad_norm": 0.004009178373962641, - "learning_rate": 0.0001999865312154211, - "loss": 46.0, - "step": 32452 - }, - { - "epoch": 5.226337614235677, - "grad_norm": 0.013323145918548107, - "learning_rate": 0.00019998653038509685, - "loss": 46.0, - "step": 32453 - }, - { - "epoch": 5.226498651314465, - "grad_norm": 0.002848051954060793, - "learning_rate": 0.00019998652955474698, - "loss": 46.0, - "step": 32454 - }, - { - "epoch": 5.226659688393252, - "grad_norm": 0.002332792617380619, - "learning_rate": 0.00019998652872437156, - "loss": 46.0, - "step": 32455 - }, - { - "epoch": 5.22682072547204, - "grad_norm": 0.021011171862483025, - "learning_rate": 0.0001999865278939705, - "loss": 46.0, - "step": 32456 - }, - { - "epoch": 5.226981762550827, - "grad_norm": 0.0024412842467427254, - "learning_rate": 0.0001999865270635439, - "loss": 46.0, - "step": 32457 - }, - { - "epoch": 5.227142799629615, - "grad_norm": 0.011419717222452164, - "learning_rate": 0.0001999865262330917, - "loss": 46.0, - "step": 32458 - }, - { - "epoch": 5.227303836708402, - "grad_norm": 0.011326770298182964, - "learning_rate": 0.00019998652540261386, - "loss": 46.0, - "step": 32459 - }, - { - "epoch": 5.22746487378719, - "grad_norm": 0.0008496668888255954, - "learning_rate": 0.00019998652457211047, - "loss": 46.0, - "step": 32460 - }, - { - "epoch": 5.227625910865977, - "grad_norm": 0.002399976132437587, - "learning_rate": 0.0001999865237415815, - "loss": 46.0, - "step": 32461 - }, - { - "epoch": 5.2277869479447645, - "grad_norm": 0.0010485263774171472, - "learning_rate": 0.00019998652291102693, - "loss": 46.0, - "step": 32462 - }, - { - "epoch": 5.227947985023552, - "grad_norm": 0.006310475990176201, - "learning_rate": 0.00019998652208044675, - "loss": 46.0, - "step": 32463 - }, - { - "epoch": 5.228109022102339, - "grad_norm": 0.005123792216181755, - "learning_rate": 0.00019998652124984098, - "loss": 46.0, - "step": 32464 - }, - { - "epoch": 5.228270059181127, - "grad_norm": 0.02080768719315529, - "learning_rate": 0.00019998652041920963, - "loss": 46.0, - "step": 32465 - }, - { - "epoch": 5.228431096259913, - "grad_norm": 0.0018601218471303582, - "learning_rate": 0.0001999865195885527, - "loss": 46.0, - "step": 32466 - }, - { - "epoch": 5.228592133338701, - "grad_norm": 0.0029565643053501844, - "learning_rate": 0.00019998651875787014, - "loss": 46.0, - "step": 32467 - }, - { - "epoch": 5.228753170417488, - "grad_norm": 0.0011048543965443969, - "learning_rate": 0.00019998651792716202, - "loss": 46.0, - "step": 32468 - }, - { - "epoch": 5.228914207496276, - "grad_norm": 0.004862792789936066, - "learning_rate": 0.00019998651709642832, - "loss": 46.0, - "step": 32469 - }, - { - "epoch": 5.229075244575063, - "grad_norm": 0.007608181331306696, - "learning_rate": 0.00019998651626566897, - "loss": 46.0, - "step": 32470 - }, - { - "epoch": 5.229236281653851, - "grad_norm": 0.0020849204156547785, - "learning_rate": 0.00019998651543488407, - "loss": 46.0, - "step": 32471 - }, - { - "epoch": 5.229397318732638, - "grad_norm": 0.009699077345430851, - "learning_rate": 0.00019998651460407358, - "loss": 46.0, - "step": 32472 - }, - { - "epoch": 5.229558355811426, - "grad_norm": 0.0067525506019592285, - "learning_rate": 0.0001999865137732375, - "loss": 46.0, - "step": 32473 - }, - { - "epoch": 5.229719392890213, - "grad_norm": 0.002224602969363332, - "learning_rate": 0.00019998651294237584, - "loss": 46.0, - "step": 32474 - }, - { - "epoch": 5.2298804299690005, - "grad_norm": 0.009725421667098999, - "learning_rate": 0.00019998651211148856, - "loss": 46.0, - "step": 32475 - }, - { - "epoch": 5.230041467047788, - "grad_norm": 0.004606904461979866, - "learning_rate": 0.0001999865112805757, - "loss": 46.0, - "step": 32476 - }, - { - "epoch": 5.230202504126575, - "grad_norm": 0.004499922040849924, - "learning_rate": 0.00019998651044963724, - "loss": 46.0, - "step": 32477 - }, - { - "epoch": 5.230363541205363, - "grad_norm": 0.0031274249777197838, - "learning_rate": 0.00019998650961867317, - "loss": 46.0, - "step": 32478 - }, - { - "epoch": 5.23052457828415, - "grad_norm": 0.0032461306545883417, - "learning_rate": 0.00019998650878768354, - "loss": 46.0, - "step": 32479 - }, - { - "epoch": 5.230685615362938, - "grad_norm": 0.0018505065236240625, - "learning_rate": 0.00019998650795666832, - "loss": 46.0, - "step": 32480 - }, - { - "epoch": 5.230846652441724, - "grad_norm": 0.0035431047435849905, - "learning_rate": 0.00019998650712562752, - "loss": 46.0, - "step": 32481 - }, - { - "epoch": 5.231007689520512, - "grad_norm": 0.0033981550950556993, - "learning_rate": 0.0001999865062945611, - "loss": 46.0, - "step": 32482 - }, - { - "epoch": 5.231168726599299, - "grad_norm": 0.003470989875495434, - "learning_rate": 0.0001999865054634691, - "loss": 46.0, - "step": 32483 - }, - { - "epoch": 5.231329763678087, - "grad_norm": 0.008644778281450272, - "learning_rate": 0.0001999865046323515, - "loss": 46.0, - "step": 32484 - }, - { - "epoch": 5.231490800756874, - "grad_norm": 0.0020351686980575323, - "learning_rate": 0.00019998650380120833, - "loss": 46.0, - "step": 32485 - }, - { - "epoch": 5.231651837835662, - "grad_norm": 0.002281755208969116, - "learning_rate": 0.00019998650297003957, - "loss": 46.0, - "step": 32486 - }, - { - "epoch": 5.231812874914449, - "grad_norm": 0.0020727887749671936, - "learning_rate": 0.00019998650213884519, - "loss": 46.0, - "step": 32487 - }, - { - "epoch": 5.2319739119932365, - "grad_norm": 0.0014243468176573515, - "learning_rate": 0.00019998650130762525, - "loss": 46.0, - "step": 32488 - }, - { - "epoch": 5.232134949072024, - "grad_norm": 0.0031394653487950563, - "learning_rate": 0.0001999865004763797, - "loss": 46.0, - "step": 32489 - }, - { - "epoch": 5.232295986150811, - "grad_norm": 0.0024528398644179106, - "learning_rate": 0.00019998649964510855, - "loss": 46.0, - "step": 32490 - }, - { - "epoch": 5.232457023229599, - "grad_norm": 0.006793811451643705, - "learning_rate": 0.00019998649881381182, - "loss": 46.0, - "step": 32491 - }, - { - "epoch": 5.232618060308386, - "grad_norm": 0.0043118358589708805, - "learning_rate": 0.0001999864979824895, - "loss": 46.0, - "step": 32492 - }, - { - "epoch": 5.232779097387174, - "grad_norm": 0.007221119478344917, - "learning_rate": 0.0001999864971511416, - "loss": 46.0, - "step": 32493 - }, - { - "epoch": 5.232940134465961, - "grad_norm": 0.012481052428483963, - "learning_rate": 0.00019998649631976806, - "loss": 46.0, - "step": 32494 - }, - { - "epoch": 5.233101171544748, - "grad_norm": 0.0038691589143127203, - "learning_rate": 0.00019998649548836895, - "loss": 46.0, - "step": 32495 - }, - { - "epoch": 5.233262208623535, - "grad_norm": 0.002644366817548871, - "learning_rate": 0.0001999864946569443, - "loss": 46.0, - "step": 32496 - }, - { - "epoch": 5.233423245702323, - "grad_norm": 0.005674370098859072, - "learning_rate": 0.000199986493825494, - "loss": 46.0, - "step": 32497 - }, - { - "epoch": 5.23358428278111, - "grad_norm": 0.008699550293385983, - "learning_rate": 0.00019998649299401815, - "loss": 46.0, - "step": 32498 - }, - { - "epoch": 5.2337453198598975, - "grad_norm": 0.007826943881809711, - "learning_rate": 0.00019998649216251667, - "loss": 46.0, - "step": 32499 - }, - { - "epoch": 5.233906356938685, - "grad_norm": 0.007207693997770548, - "learning_rate": 0.00019998649133098963, - "loss": 46.0, - "step": 32500 - }, - { - "epoch": 5.2340673940174725, - "grad_norm": 0.0035860768985003233, - "learning_rate": 0.00019998649049943697, - "loss": 46.0, - "step": 32501 - }, - { - "epoch": 5.23422843109626, - "grad_norm": 0.0014584943419322371, - "learning_rate": 0.00019998648966785873, - "loss": 46.0, - "step": 32502 - }, - { - "epoch": 5.234389468175047, - "grad_norm": 0.0023908785078674555, - "learning_rate": 0.0001999864888362549, - "loss": 46.0, - "step": 32503 - }, - { - "epoch": 5.234550505253835, - "grad_norm": 0.013223525136709213, - "learning_rate": 0.00019998648800462549, - "loss": 46.0, - "step": 32504 - }, - { - "epoch": 5.234711542332622, - "grad_norm": 0.002594653284177184, - "learning_rate": 0.00019998648717297045, - "loss": 46.0, - "step": 32505 - }, - { - "epoch": 5.23487257941141, - "grad_norm": 0.0025989434216171503, - "learning_rate": 0.00019998648634128986, - "loss": 46.0, - "step": 32506 - }, - { - "epoch": 5.235033616490197, - "grad_norm": 0.004163894336670637, - "learning_rate": 0.0001999864855095837, - "loss": 46.0, - "step": 32507 - }, - { - "epoch": 5.235194653568985, - "grad_norm": 0.006227575708180666, - "learning_rate": 0.0001999864846778519, - "loss": 46.0, - "step": 32508 - }, - { - "epoch": 5.235355690647772, - "grad_norm": 0.0022246032021939754, - "learning_rate": 0.00019998648384609452, - "loss": 46.0, - "step": 32509 - }, - { - "epoch": 5.235516727726559, - "grad_norm": 0.0065980893559753895, - "learning_rate": 0.00019998648301431152, - "loss": 46.0, - "step": 32510 - }, - { - "epoch": 5.235677764805346, - "grad_norm": 0.0042998818680644035, - "learning_rate": 0.000199986482182503, - "loss": 46.0, - "step": 32511 - }, - { - "epoch": 5.2358388018841335, - "grad_norm": 0.009482955560088158, - "learning_rate": 0.00019998648135066883, - "loss": 46.0, - "step": 32512 - }, - { - "epoch": 5.235999838962921, - "grad_norm": 0.006263263523578644, - "learning_rate": 0.0001999864805188091, - "loss": 46.0, - "step": 32513 - }, - { - "epoch": 5.236160876041708, - "grad_norm": 0.009389905259013176, - "learning_rate": 0.00019998647968692373, - "loss": 46.0, - "step": 32514 - }, - { - "epoch": 5.236321913120496, - "grad_norm": 0.0022588225547224283, - "learning_rate": 0.00019998647885501286, - "loss": 46.0, - "step": 32515 - }, - { - "epoch": 5.236482950199283, - "grad_norm": 0.001993166981264949, - "learning_rate": 0.00019998647802307634, - "loss": 46.0, - "step": 32516 - }, - { - "epoch": 5.236643987278071, - "grad_norm": 0.010135490447282791, - "learning_rate": 0.0001999864771911142, - "loss": 46.0, - "step": 32517 - }, - { - "epoch": 5.236805024356858, - "grad_norm": 0.001166346250101924, - "learning_rate": 0.0001999864763591265, - "loss": 46.0, - "step": 32518 - }, - { - "epoch": 5.236966061435646, - "grad_norm": 0.005321672651916742, - "learning_rate": 0.00019998647552711322, - "loss": 46.0, - "step": 32519 - }, - { - "epoch": 5.237127098514433, - "grad_norm": 0.006660795770585537, - "learning_rate": 0.00019998647469507432, - "loss": 46.0, - "step": 32520 - }, - { - "epoch": 5.237288135593221, - "grad_norm": 0.004549749661237001, - "learning_rate": 0.00019998647386300987, - "loss": 46.0, - "step": 32521 - }, - { - "epoch": 5.237449172672008, - "grad_norm": 0.020205644890666008, - "learning_rate": 0.0001999864730309198, - "loss": 46.0, - "step": 32522 - }, - { - "epoch": 5.2376102097507955, - "grad_norm": 0.018811682239174843, - "learning_rate": 0.00019998647219880415, - "loss": 46.0, - "step": 32523 - }, - { - "epoch": 5.237771246829583, - "grad_norm": 0.005193459801375866, - "learning_rate": 0.0001999864713666629, - "loss": 46.0, - "step": 32524 - }, - { - "epoch": 5.2379322839083695, - "grad_norm": 0.011831403709948063, - "learning_rate": 0.00019998647053449603, - "loss": 46.0, - "step": 32525 - }, - { - "epoch": 5.238093320987157, - "grad_norm": 0.002070151036605239, - "learning_rate": 0.00019998646970230364, - "loss": 46.0, - "step": 32526 - }, - { - "epoch": 5.238254358065944, - "grad_norm": 0.0035476593766361475, - "learning_rate": 0.0001999864688700856, - "loss": 46.0, - "step": 32527 - }, - { - "epoch": 5.238415395144732, - "grad_norm": 0.0023394327145069838, - "learning_rate": 0.000199986468037842, - "loss": 46.0, - "step": 32528 - }, - { - "epoch": 5.238576432223519, - "grad_norm": 0.0028536287136375904, - "learning_rate": 0.00019998646720557277, - "loss": 46.0, - "step": 32529 - }, - { - "epoch": 5.238737469302307, - "grad_norm": 0.004566666204482317, - "learning_rate": 0.00019998646637327797, - "loss": 46.0, - "step": 32530 - }, - { - "epoch": 5.238898506381094, - "grad_norm": 0.008364316076040268, - "learning_rate": 0.00019998646554095762, - "loss": 46.0, - "step": 32531 - }, - { - "epoch": 5.239059543459882, - "grad_norm": 0.008646366186439991, - "learning_rate": 0.00019998646470861163, - "loss": 46.0, - "step": 32532 - }, - { - "epoch": 5.239220580538669, - "grad_norm": 0.0030348661821335554, - "learning_rate": 0.00019998646387624005, - "loss": 46.0, - "step": 32533 - }, - { - "epoch": 5.239381617617457, - "grad_norm": 0.0020744376815855503, - "learning_rate": 0.00019998646304384289, - "loss": 46.0, - "step": 32534 - }, - { - "epoch": 5.239542654696244, - "grad_norm": 0.012931537814438343, - "learning_rate": 0.00019998646221142013, - "loss": 46.0, - "step": 32535 - }, - { - "epoch": 5.2397036917750315, - "grad_norm": 0.0035900769289582968, - "learning_rate": 0.0001999864613789718, - "loss": 46.0, - "step": 32536 - }, - { - "epoch": 5.239864728853819, - "grad_norm": 0.003176985075697303, - "learning_rate": 0.00019998646054649786, - "loss": 46.0, - "step": 32537 - }, - { - "epoch": 5.240025765932606, - "grad_norm": 0.003611267078667879, - "learning_rate": 0.00019998645971399832, - "loss": 46.0, - "step": 32538 - }, - { - "epoch": 5.240186803011393, - "grad_norm": 0.008464580401778221, - "learning_rate": 0.00019998645888147322, - "loss": 46.0, - "step": 32539 - }, - { - "epoch": 5.24034784009018, - "grad_norm": 0.009467690251767635, - "learning_rate": 0.00019998645804892247, - "loss": 46.0, - "step": 32540 - }, - { - "epoch": 5.240508877168968, - "grad_norm": 0.008480178192257881, - "learning_rate": 0.0001999864572163462, - "loss": 46.0, - "step": 32541 - }, - { - "epoch": 5.240669914247755, - "grad_norm": 0.0017373107839375734, - "learning_rate": 0.0001999864563837443, - "loss": 46.0, - "step": 32542 - }, - { - "epoch": 5.240830951326543, - "grad_norm": 0.0016843137564137578, - "learning_rate": 0.0001999864555511168, - "loss": 46.0, - "step": 32543 - }, - { - "epoch": 5.24099198840533, - "grad_norm": 0.02414952963590622, - "learning_rate": 0.00019998645471846374, - "loss": 46.0, - "step": 32544 - }, - { - "epoch": 5.241153025484118, - "grad_norm": 0.0015188314719125628, - "learning_rate": 0.00019998645388578508, - "loss": 46.0, - "step": 32545 - }, - { - "epoch": 5.241314062562905, - "grad_norm": 0.0023643230088055134, - "learning_rate": 0.00019998645305308082, - "loss": 46.0, - "step": 32546 - }, - { - "epoch": 5.241475099641693, - "grad_norm": 0.004094649571925402, - "learning_rate": 0.00019998645222035096, - "loss": 46.0, - "step": 32547 - }, - { - "epoch": 5.24163613672048, - "grad_norm": 0.018110547214746475, - "learning_rate": 0.00019998645138759552, - "loss": 46.0, - "step": 32548 - }, - { - "epoch": 5.2417971737992675, - "grad_norm": 0.010099576786160469, - "learning_rate": 0.0001999864505548145, - "loss": 46.0, - "step": 32549 - }, - { - "epoch": 5.241958210878055, - "grad_norm": 0.00549853453412652, - "learning_rate": 0.00019998644972200785, - "loss": 46.0, - "step": 32550 - }, - { - "epoch": 5.242119247956842, - "grad_norm": 0.002000484149903059, - "learning_rate": 0.00019998644888917565, - "loss": 46.0, - "step": 32551 - }, - { - "epoch": 5.24228028503563, - "grad_norm": 0.008779608644545078, - "learning_rate": 0.00019998644805631783, - "loss": 46.0, - "step": 32552 - }, - { - "epoch": 5.242441322114417, - "grad_norm": 0.004706146195530891, - "learning_rate": 0.00019998644722343443, - "loss": 46.0, - "step": 32553 - }, - { - "epoch": 5.242602359193204, - "grad_norm": 0.00858042761683464, - "learning_rate": 0.00019998644639052543, - "loss": 46.0, - "step": 32554 - }, - { - "epoch": 5.242763396271991, - "grad_norm": 0.006062607280910015, - "learning_rate": 0.00019998644555759088, - "loss": 46.0, - "step": 32555 - }, - { - "epoch": 5.242924433350779, - "grad_norm": 0.0026606328319758177, - "learning_rate": 0.0001999864447246307, - "loss": 46.0, - "step": 32556 - }, - { - "epoch": 5.243085470429566, - "grad_norm": 0.002813649130985141, - "learning_rate": 0.0001999864438916449, - "loss": 46.0, - "step": 32557 - }, - { - "epoch": 5.243246507508354, - "grad_norm": 0.005954826716333628, - "learning_rate": 0.00019998644305863357, - "loss": 46.0, - "step": 32558 - }, - { - "epoch": 5.243407544587141, - "grad_norm": 0.0022507337853312492, - "learning_rate": 0.00019998644222559664, - "loss": 46.0, - "step": 32559 - }, - { - "epoch": 5.2435685816659285, - "grad_norm": 0.007777234073728323, - "learning_rate": 0.00019998644139253407, - "loss": 46.0, - "step": 32560 - }, - { - "epoch": 5.243729618744716, - "grad_norm": 0.010295560583472252, - "learning_rate": 0.00019998644055944595, - "loss": 46.0, - "step": 32561 - }, - { - "epoch": 5.243890655823503, - "grad_norm": 0.005532537121325731, - "learning_rate": 0.00019998643972633223, - "loss": 46.0, - "step": 32562 - }, - { - "epoch": 5.244051692902291, - "grad_norm": 0.0031475473660975695, - "learning_rate": 0.00019998643889319293, - "loss": 46.0, - "step": 32563 - }, - { - "epoch": 5.244212729981078, - "grad_norm": 0.0032074186019599438, - "learning_rate": 0.00019998643806002804, - "loss": 46.0, - "step": 32564 - }, - { - "epoch": 5.244373767059866, - "grad_norm": 0.02486119233071804, - "learning_rate": 0.00019998643722683753, - "loss": 46.0, - "step": 32565 - }, - { - "epoch": 5.244534804138653, - "grad_norm": 0.010372549295425415, - "learning_rate": 0.0001999864363936214, - "loss": 46.0, - "step": 32566 - }, - { - "epoch": 5.244695841217441, - "grad_norm": 0.0032242003362625837, - "learning_rate": 0.00019998643556037976, - "loss": 46.0, - "step": 32567 - }, - { - "epoch": 5.244856878296227, - "grad_norm": 0.0028462721966207027, - "learning_rate": 0.0001999864347271125, - "loss": 46.0, - "step": 32568 - }, - { - "epoch": 5.245017915375015, - "grad_norm": 0.005855824798345566, - "learning_rate": 0.00019998643389381964, - "loss": 46.0, - "step": 32569 - }, - { - "epoch": 5.245178952453802, - "grad_norm": 0.0014157752739265561, - "learning_rate": 0.00019998643306050118, - "loss": 46.0, - "step": 32570 - }, - { - "epoch": 5.24533998953259, - "grad_norm": 0.001425734139047563, - "learning_rate": 0.00019998643222715715, - "loss": 46.0, - "step": 32571 - }, - { - "epoch": 5.245501026611377, - "grad_norm": 0.002818556036800146, - "learning_rate": 0.0001999864313937875, - "loss": 46.0, - "step": 32572 - }, - { - "epoch": 5.2456620636901645, - "grad_norm": 0.009476062841713428, - "learning_rate": 0.00019998643056039228, - "loss": 46.0, - "step": 32573 - }, - { - "epoch": 5.245823100768952, - "grad_norm": 0.002413427224382758, - "learning_rate": 0.00019998642972697146, - "loss": 46.0, - "step": 32574 - }, - { - "epoch": 5.245984137847739, - "grad_norm": 0.00271716364659369, - "learning_rate": 0.00019998642889352506, - "loss": 46.0, - "step": 32575 - }, - { - "epoch": 5.246145174926527, - "grad_norm": 0.00868601631373167, - "learning_rate": 0.00019998642806005307, - "loss": 46.0, - "step": 32576 - }, - { - "epoch": 5.246306212005314, - "grad_norm": 0.014675287529826164, - "learning_rate": 0.00019998642722655546, - "loss": 46.0, - "step": 32577 - }, - { - "epoch": 5.246467249084102, - "grad_norm": 0.01291351392865181, - "learning_rate": 0.00019998642639303227, - "loss": 46.0, - "step": 32578 - }, - { - "epoch": 5.246628286162889, - "grad_norm": 0.006505636032670736, - "learning_rate": 0.0001999864255594835, - "loss": 46.0, - "step": 32579 - }, - { - "epoch": 5.246789323241677, - "grad_norm": 0.003626520512625575, - "learning_rate": 0.00019998642472590913, - "loss": 46.0, - "step": 32580 - }, - { - "epoch": 5.246950360320464, - "grad_norm": 0.00314791570417583, - "learning_rate": 0.00019998642389230917, - "loss": 46.0, - "step": 32581 - }, - { - "epoch": 5.247111397399252, - "grad_norm": 0.0026092049665749073, - "learning_rate": 0.0001999864230586836, - "loss": 46.0, - "step": 32582 - }, - { - "epoch": 5.247272434478038, - "grad_norm": 0.003367555793374777, - "learning_rate": 0.00019998642222503248, - "loss": 46.0, - "step": 32583 - }, - { - "epoch": 5.247433471556826, - "grad_norm": 0.0025751274079084396, - "learning_rate": 0.00019998642139135576, - "loss": 46.0, - "step": 32584 - }, - { - "epoch": 5.247594508635613, - "grad_norm": 0.007634031120687723, - "learning_rate": 0.00019998642055765343, - "loss": 46.0, - "step": 32585 - }, - { - "epoch": 5.2477555457144005, - "grad_norm": 0.004878594074398279, - "learning_rate": 0.00019998641972392552, - "loss": 46.0, - "step": 32586 - }, - { - "epoch": 5.247916582793188, - "grad_norm": 0.01824226602911949, - "learning_rate": 0.000199986418890172, - "loss": 46.0, - "step": 32587 - }, - { - "epoch": 5.248077619871975, - "grad_norm": 0.005828591994941235, - "learning_rate": 0.0001999864180563929, - "loss": 46.0, - "step": 32588 - }, - { - "epoch": 5.248238656950763, - "grad_norm": 0.005131609737873077, - "learning_rate": 0.00019998641722258822, - "loss": 46.0, - "step": 32589 - }, - { - "epoch": 5.24839969402955, - "grad_norm": 0.005229932721704245, - "learning_rate": 0.00019998641638875792, - "loss": 46.0, - "step": 32590 - }, - { - "epoch": 5.248560731108338, - "grad_norm": 0.002291000448167324, - "learning_rate": 0.00019998641555490205, - "loss": 46.0, - "step": 32591 - }, - { - "epoch": 5.248721768187125, - "grad_norm": 0.02242285944521427, - "learning_rate": 0.0001999864147210206, - "loss": 46.0, - "step": 32592 - }, - { - "epoch": 5.248882805265913, - "grad_norm": 0.011595788411796093, - "learning_rate": 0.00019998641388711355, - "loss": 46.0, - "step": 32593 - }, - { - "epoch": 5.2490438423447, - "grad_norm": 0.003237377852201462, - "learning_rate": 0.0001999864130531809, - "loss": 46.0, - "step": 32594 - }, - { - "epoch": 5.249204879423488, - "grad_norm": 0.0025449118111282587, - "learning_rate": 0.00019998641221922268, - "loss": 46.0, - "step": 32595 - }, - { - "epoch": 5.249365916502275, - "grad_norm": 0.011958865448832512, - "learning_rate": 0.00019998641138523884, - "loss": 46.0, - "step": 32596 - }, - { - "epoch": 5.2495269535810625, - "grad_norm": 0.00785353034734726, - "learning_rate": 0.0001999864105512294, - "loss": 46.0, - "step": 32597 - }, - { - "epoch": 5.249687990659849, - "grad_norm": 0.005678877700120211, - "learning_rate": 0.0001999864097171944, - "loss": 46.0, - "step": 32598 - }, - { - "epoch": 5.2498490277386365, - "grad_norm": 0.0008249414386227727, - "learning_rate": 0.0001999864088831338, - "loss": 46.0, - "step": 32599 - }, - { - "epoch": 5.250010064817424, - "grad_norm": 0.005462795961648226, - "learning_rate": 0.0001999864080490476, - "loss": 46.0, - "step": 32600 - }, - { - "epoch": 5.250171101896211, - "grad_norm": 0.010967371053993702, - "learning_rate": 0.00019998640721493582, - "loss": 46.0, - "step": 32601 - }, - { - "epoch": 5.250332138974999, - "grad_norm": 0.033050745725631714, - "learning_rate": 0.00019998640638079843, - "loss": 46.0, - "step": 32602 - }, - { - "epoch": 5.250493176053786, - "grad_norm": 0.019316645339131355, - "learning_rate": 0.00019998640554663548, - "loss": 46.0, - "step": 32603 - }, - { - "epoch": 5.250654213132574, - "grad_norm": 0.004457741975784302, - "learning_rate": 0.0001999864047124469, - "loss": 46.0, - "step": 32604 - }, - { - "epoch": 5.250815250211361, - "grad_norm": 0.0012783827260136604, - "learning_rate": 0.00019998640387823278, - "loss": 46.0, - "step": 32605 - }, - { - "epoch": 5.250976287290149, - "grad_norm": 0.010230284184217453, - "learning_rate": 0.00019998640304399304, - "loss": 46.0, - "step": 32606 - }, - { - "epoch": 5.251137324368936, - "grad_norm": 0.006025716662406921, - "learning_rate": 0.0001999864022097277, - "loss": 46.0, - "step": 32607 - }, - { - "epoch": 5.2512983614477236, - "grad_norm": 0.008783235214650631, - "learning_rate": 0.00019998640137543677, - "loss": 46.0, - "step": 32608 - }, - { - "epoch": 5.251459398526511, - "grad_norm": 0.001808101311326027, - "learning_rate": 0.00019998640054112024, - "loss": 46.0, - "step": 32609 - }, - { - "epoch": 5.2516204356052985, - "grad_norm": 0.0016007708618417382, - "learning_rate": 0.00019998639970677813, - "loss": 46.0, - "step": 32610 - }, - { - "epoch": 5.251781472684086, - "grad_norm": 0.01337799709290266, - "learning_rate": 0.00019998639887241045, - "loss": 46.0, - "step": 32611 - }, - { - "epoch": 5.251942509762873, - "grad_norm": 0.002692866139113903, - "learning_rate": 0.00019998639803801713, - "loss": 46.0, - "step": 32612 - }, - { - "epoch": 5.25210354684166, - "grad_norm": 0.004638901446014643, - "learning_rate": 0.00019998639720359825, - "loss": 46.0, - "step": 32613 - }, - { - "epoch": 5.252264583920447, - "grad_norm": 0.008056741207838058, - "learning_rate": 0.00019998639636915376, - "loss": 46.0, - "step": 32614 - }, - { - "epoch": 5.252425620999235, - "grad_norm": 0.006555595900863409, - "learning_rate": 0.00019998639553468373, - "loss": 46.0, - "step": 32615 - }, - { - "epoch": 5.252586658078022, - "grad_norm": 0.005346778780221939, - "learning_rate": 0.00019998639470018804, - "loss": 46.0, - "step": 32616 - }, - { - "epoch": 5.25274769515681, - "grad_norm": 0.0016766526969149709, - "learning_rate": 0.0001999863938656668, - "loss": 46.0, - "step": 32617 - }, - { - "epoch": 5.252908732235597, - "grad_norm": 0.0030667639803141356, - "learning_rate": 0.00019998639303111997, - "loss": 46.0, - "step": 32618 - }, - { - "epoch": 5.253069769314385, - "grad_norm": 0.009963980875909328, - "learning_rate": 0.00019998639219654757, - "loss": 46.0, - "step": 32619 - }, - { - "epoch": 5.253230806393172, - "grad_norm": 0.007708756718784571, - "learning_rate": 0.00019998639136194953, - "loss": 46.0, - "step": 32620 - }, - { - "epoch": 5.2533918434719595, - "grad_norm": 0.002629245398566127, - "learning_rate": 0.0001999863905273259, - "loss": 46.0, - "step": 32621 - }, - { - "epoch": 5.253552880550747, - "grad_norm": 0.005356887821108103, - "learning_rate": 0.0001999863896926767, - "loss": 46.0, - "step": 32622 - }, - { - "epoch": 5.253713917629534, - "grad_norm": 0.0031015139538794756, - "learning_rate": 0.00019998638885800193, - "loss": 46.0, - "step": 32623 - }, - { - "epoch": 5.253874954708322, - "grad_norm": 0.0010287582408636808, - "learning_rate": 0.0001999863880233015, - "loss": 46.0, - "step": 32624 - }, - { - "epoch": 5.254035991787109, - "grad_norm": 0.015437331981956959, - "learning_rate": 0.00019998638718857553, - "loss": 46.0, - "step": 32625 - }, - { - "epoch": 5.254197028865896, - "grad_norm": 0.007384417578577995, - "learning_rate": 0.00019998638635382397, - "loss": 46.0, - "step": 32626 - }, - { - "epoch": 5.254358065944683, - "grad_norm": 0.012291919440031052, - "learning_rate": 0.00019998638551904681, - "loss": 46.0, - "step": 32627 - }, - { - "epoch": 5.254519103023471, - "grad_norm": 0.008701611310243607, - "learning_rate": 0.00019998638468424407, - "loss": 46.0, - "step": 32628 - }, - { - "epoch": 5.254680140102258, - "grad_norm": 0.002207002369686961, - "learning_rate": 0.00019998638384941572, - "loss": 46.0, - "step": 32629 - }, - { - "epoch": 5.254841177181046, - "grad_norm": 0.001916029374115169, - "learning_rate": 0.00019998638301456178, - "loss": 46.0, - "step": 32630 - }, - { - "epoch": 5.255002214259833, - "grad_norm": 0.013337378390133381, - "learning_rate": 0.00019998638217968228, - "loss": 46.0, - "step": 32631 - }, - { - "epoch": 5.255163251338621, - "grad_norm": 0.0057775224559009075, - "learning_rate": 0.00019998638134477713, - "loss": 46.0, - "step": 32632 - }, - { - "epoch": 5.255324288417408, - "grad_norm": 0.00446449825540185, - "learning_rate": 0.00019998638050984643, - "loss": 46.0, - "step": 32633 - }, - { - "epoch": 5.2554853254961955, - "grad_norm": 0.0013590480666607618, - "learning_rate": 0.0001999863796748901, - "loss": 46.0, - "step": 32634 - }, - { - "epoch": 5.255646362574983, - "grad_norm": 0.007231721188873053, - "learning_rate": 0.00019998637883990823, - "loss": 46.0, - "step": 32635 - }, - { - "epoch": 5.25580739965377, - "grad_norm": 0.008585846982896328, - "learning_rate": 0.00019998637800490074, - "loss": 46.0, - "step": 32636 - }, - { - "epoch": 5.255968436732558, - "grad_norm": 0.0019316069083288312, - "learning_rate": 0.00019998637716986766, - "loss": 46.0, - "step": 32637 - }, - { - "epoch": 5.256129473811345, - "grad_norm": 0.0039500887505710125, - "learning_rate": 0.00019998637633480902, - "loss": 46.0, - "step": 32638 - }, - { - "epoch": 5.256290510890133, - "grad_norm": 0.004407184664160013, - "learning_rate": 0.00019998637549972474, - "loss": 46.0, - "step": 32639 - }, - { - "epoch": 5.25645154796892, - "grad_norm": 0.0025003000628203154, - "learning_rate": 0.0001999863746646149, - "loss": 46.0, - "step": 32640 - }, - { - "epoch": 5.256612585047707, - "grad_norm": 0.016539763659238815, - "learning_rate": 0.00019998637382947948, - "loss": 46.0, - "step": 32641 - }, - { - "epoch": 5.256773622126494, - "grad_norm": 0.002038324251770973, - "learning_rate": 0.0001999863729943184, - "loss": 46.0, - "step": 32642 - }, - { - "epoch": 5.256934659205282, - "grad_norm": 0.011387698352336884, - "learning_rate": 0.00019998637215913178, - "loss": 46.0, - "step": 32643 - }, - { - "epoch": 5.257095696284069, - "grad_norm": 0.001516856369562447, - "learning_rate": 0.00019998637132391956, - "loss": 46.0, - "step": 32644 - }, - { - "epoch": 5.257256733362857, - "grad_norm": 0.010770903900265694, - "learning_rate": 0.00019998637048868179, - "loss": 46.0, - "step": 32645 - }, - { - "epoch": 5.257417770441644, - "grad_norm": 0.015435745939612389, - "learning_rate": 0.00019998636965341837, - "loss": 46.0, - "step": 32646 - }, - { - "epoch": 5.2575788075204315, - "grad_norm": 0.0016320951981469989, - "learning_rate": 0.0001999863688181294, - "loss": 46.0, - "step": 32647 - }, - { - "epoch": 5.257739844599219, - "grad_norm": 0.0033576814457774162, - "learning_rate": 0.0001999863679828148, - "loss": 46.0, - "step": 32648 - }, - { - "epoch": 5.257900881678006, - "grad_norm": 0.01032302901148796, - "learning_rate": 0.00019998636714747462, - "loss": 46.0, - "step": 32649 - }, - { - "epoch": 5.258061918756794, - "grad_norm": 0.00802572537213564, - "learning_rate": 0.00019998636631210888, - "loss": 46.0, - "step": 32650 - }, - { - "epoch": 5.258222955835581, - "grad_norm": 0.00871746614575386, - "learning_rate": 0.00019998636547671753, - "loss": 46.0, - "step": 32651 - }, - { - "epoch": 5.258383992914369, - "grad_norm": 0.007459382060915232, - "learning_rate": 0.00019998636464130059, - "loss": 46.0, - "step": 32652 - }, - { - "epoch": 5.258545029993156, - "grad_norm": 0.004042063374072313, - "learning_rate": 0.00019998636380585803, - "loss": 46.0, - "step": 32653 - }, - { - "epoch": 5.258706067071944, - "grad_norm": 0.0008590960642322898, - "learning_rate": 0.00019998636297038992, - "loss": 46.0, - "step": 32654 - }, - { - "epoch": 5.258867104150731, - "grad_norm": 0.003839415730908513, - "learning_rate": 0.00019998636213489616, - "loss": 46.0, - "step": 32655 - }, - { - "epoch": 5.259028141229518, - "grad_norm": 0.004814943298697472, - "learning_rate": 0.00019998636129937687, - "loss": 46.0, - "step": 32656 - }, - { - "epoch": 5.259189178308305, - "grad_norm": 0.0045902058482170105, - "learning_rate": 0.00019998636046383197, - "loss": 46.0, - "step": 32657 - }, - { - "epoch": 5.259350215387093, - "grad_norm": 0.001437442610040307, - "learning_rate": 0.00019998635962826148, - "loss": 46.0, - "step": 32658 - }, - { - "epoch": 5.25951125246588, - "grad_norm": 0.0025768957566469908, - "learning_rate": 0.0001999863587926654, - "loss": 46.0, - "step": 32659 - }, - { - "epoch": 5.2596722895446675, - "grad_norm": 0.003445117501541972, - "learning_rate": 0.0001999863579570437, - "loss": 46.0, - "step": 32660 - }, - { - "epoch": 5.259833326623455, - "grad_norm": 0.010923337191343307, - "learning_rate": 0.00019998635712139645, - "loss": 46.0, - "step": 32661 - }, - { - "epoch": 5.259994363702242, - "grad_norm": 0.006247815676033497, - "learning_rate": 0.00019998635628572359, - "loss": 46.0, - "step": 32662 - }, - { - "epoch": 5.26015540078103, - "grad_norm": 0.00402148487046361, - "learning_rate": 0.00019998635545002513, - "loss": 46.0, - "step": 32663 - }, - { - "epoch": 5.260316437859817, - "grad_norm": 0.004145434591919184, - "learning_rate": 0.0001999863546143011, - "loss": 46.0, - "step": 32664 - }, - { - "epoch": 5.260477474938605, - "grad_norm": 0.012223700992763042, - "learning_rate": 0.00019998635377855144, - "loss": 46.0, - "step": 32665 - }, - { - "epoch": 5.260638512017392, - "grad_norm": 0.007371906656771898, - "learning_rate": 0.00019998635294277622, - "loss": 46.0, - "step": 32666 - }, - { - "epoch": 5.26079954909618, - "grad_norm": 0.005936674773693085, - "learning_rate": 0.00019998635210697542, - "loss": 46.0, - "step": 32667 - }, - { - "epoch": 5.260960586174967, - "grad_norm": 0.0018563737394288182, - "learning_rate": 0.000199986351271149, - "loss": 46.0, - "step": 32668 - }, - { - "epoch": 5.2611216232537545, - "grad_norm": 0.003600636962801218, - "learning_rate": 0.000199986350435297, - "loss": 46.0, - "step": 32669 - }, - { - "epoch": 5.261282660332542, - "grad_norm": 0.004085914231836796, - "learning_rate": 0.00019998634959941943, - "loss": 46.0, - "step": 32670 - }, - { - "epoch": 5.261443697411329, - "grad_norm": 0.004710397217422724, - "learning_rate": 0.00019998634876351623, - "loss": 46.0, - "step": 32671 - }, - { - "epoch": 5.261604734490116, - "grad_norm": 0.0035617873072624207, - "learning_rate": 0.00019998634792758746, - "loss": 46.0, - "step": 32672 - }, - { - "epoch": 5.2617657715689035, - "grad_norm": 0.0027501003351062536, - "learning_rate": 0.00019998634709163308, - "loss": 46.0, - "step": 32673 - }, - { - "epoch": 5.261926808647691, - "grad_norm": 0.0017287508817389607, - "learning_rate": 0.00019998634625565314, - "loss": 46.0, - "step": 32674 - }, - { - "epoch": 5.262087845726478, - "grad_norm": 0.004268570337444544, - "learning_rate": 0.0001999863454196476, - "loss": 46.0, - "step": 32675 - }, - { - "epoch": 5.262248882805266, - "grad_norm": 0.014256948605179787, - "learning_rate": 0.00019998634458361645, - "loss": 46.0, - "step": 32676 - }, - { - "epoch": 5.262409919884053, - "grad_norm": 0.0046277130022645, - "learning_rate": 0.00019998634374755972, - "loss": 46.0, - "step": 32677 - }, - { - "epoch": 5.262570956962841, - "grad_norm": 0.009656930342316628, - "learning_rate": 0.0001999863429114774, - "loss": 46.0, - "step": 32678 - }, - { - "epoch": 5.262731994041628, - "grad_norm": 0.008416050113737583, - "learning_rate": 0.00019998634207536947, - "loss": 46.0, - "step": 32679 - }, - { - "epoch": 5.262893031120416, - "grad_norm": 0.016932500526309013, - "learning_rate": 0.00019998634123923598, - "loss": 46.0, - "step": 32680 - }, - { - "epoch": 5.263054068199203, - "grad_norm": 0.005687073338776827, - "learning_rate": 0.00019998634040307688, - "loss": 46.0, - "step": 32681 - }, - { - "epoch": 5.2632151052779905, - "grad_norm": 0.0048994203098118305, - "learning_rate": 0.0001999863395668922, - "loss": 46.0, - "step": 32682 - }, - { - "epoch": 5.263376142356778, - "grad_norm": 0.005206476431339979, - "learning_rate": 0.0001999863387306819, - "loss": 46.0, - "step": 32683 - }, - { - "epoch": 5.263537179435565, - "grad_norm": 0.0017812447622418404, - "learning_rate": 0.00019998633789444602, - "loss": 46.0, - "step": 32684 - }, - { - "epoch": 5.263698216514353, - "grad_norm": 0.02232889086008072, - "learning_rate": 0.00019998633705818456, - "loss": 46.0, - "step": 32685 - }, - { - "epoch": 5.263859253593139, - "grad_norm": 0.019658392295241356, - "learning_rate": 0.00019998633622189752, - "loss": 46.0, - "step": 32686 - }, - { - "epoch": 5.264020290671927, - "grad_norm": 0.003187777241691947, - "learning_rate": 0.00019998633538558487, - "loss": 46.0, - "step": 32687 - }, - { - "epoch": 5.264181327750714, - "grad_norm": 0.0042627472430467606, - "learning_rate": 0.00019998633454924666, - "loss": 46.0, - "step": 32688 - }, - { - "epoch": 5.264342364829502, - "grad_norm": 0.004050084855407476, - "learning_rate": 0.00019998633371288283, - "loss": 46.0, - "step": 32689 - }, - { - "epoch": 5.264503401908289, - "grad_norm": 0.006821055430918932, - "learning_rate": 0.0001999863328764934, - "loss": 46.0, - "step": 32690 - }, - { - "epoch": 5.264664438987077, - "grad_norm": 0.0021948502399027348, - "learning_rate": 0.00019998633204007838, - "loss": 46.0, - "step": 32691 - }, - { - "epoch": 5.264825476065864, - "grad_norm": 0.0024444195441901684, - "learning_rate": 0.0001999863312036378, - "loss": 46.0, - "step": 32692 - }, - { - "epoch": 5.264986513144652, - "grad_norm": 0.013647926039993763, - "learning_rate": 0.0001999863303671716, - "loss": 46.0, - "step": 32693 - }, - { - "epoch": 5.265147550223439, - "grad_norm": 0.0042956252582371235, - "learning_rate": 0.00019998632953067982, - "loss": 46.0, - "step": 32694 - }, - { - "epoch": 5.2653085873022265, - "grad_norm": 0.0008356008911505342, - "learning_rate": 0.00019998632869416242, - "loss": 46.0, - "step": 32695 - }, - { - "epoch": 5.265469624381014, - "grad_norm": 0.005223304498940706, - "learning_rate": 0.00019998632785761945, - "loss": 46.0, - "step": 32696 - }, - { - "epoch": 5.265630661459801, - "grad_norm": 0.005672079510986805, - "learning_rate": 0.00019998632702105092, - "loss": 46.0, - "step": 32697 - }, - { - "epoch": 5.265791698538589, - "grad_norm": 0.005248211324214935, - "learning_rate": 0.00019998632618445676, - "loss": 46.0, - "step": 32698 - }, - { - "epoch": 5.265952735617376, - "grad_norm": 0.002135050715878606, - "learning_rate": 0.00019998632534783703, - "loss": 46.0, - "step": 32699 - }, - { - "epoch": 5.266113772696163, - "grad_norm": 0.0070891049690544605, - "learning_rate": 0.00019998632451119171, - "loss": 46.0, - "step": 32700 - }, - { - "epoch": 5.26627480977495, - "grad_norm": 0.009737805463373661, - "learning_rate": 0.00019998632367452076, - "loss": 46.0, - "step": 32701 - }, - { - "epoch": 5.266435846853738, - "grad_norm": 0.002563453745096922, - "learning_rate": 0.00019998632283782427, - "loss": 46.0, - "step": 32702 - }, - { - "epoch": 5.266596883932525, - "grad_norm": 0.0024845018051564693, - "learning_rate": 0.00019998632200110214, - "loss": 46.0, - "step": 32703 - }, - { - "epoch": 5.266757921011313, - "grad_norm": 0.002924696309491992, - "learning_rate": 0.00019998632116435445, - "loss": 46.0, - "step": 32704 - }, - { - "epoch": 5.2669189580901, - "grad_norm": 0.002489969599992037, - "learning_rate": 0.00019998632032758114, - "loss": 46.0, - "step": 32705 - }, - { - "epoch": 5.267079995168888, - "grad_norm": 0.004896989557892084, - "learning_rate": 0.00019998631949078228, - "loss": 46.0, - "step": 32706 - }, - { - "epoch": 5.267241032247675, - "grad_norm": 0.007058939430862665, - "learning_rate": 0.0001999863186539578, - "loss": 46.0, - "step": 32707 - }, - { - "epoch": 5.2674020693264625, - "grad_norm": 0.007980616763234138, - "learning_rate": 0.00019998631781710776, - "loss": 46.0, - "step": 32708 - }, - { - "epoch": 5.26756310640525, - "grad_norm": 0.002323438646271825, - "learning_rate": 0.00019998631698023208, - "loss": 46.0, - "step": 32709 - }, - { - "epoch": 5.267724143484037, - "grad_norm": 0.017195073887705803, - "learning_rate": 0.00019998631614333087, - "loss": 46.0, - "step": 32710 - }, - { - "epoch": 5.267885180562825, - "grad_norm": 0.014264289289712906, - "learning_rate": 0.000199986315306404, - "loss": 46.0, - "step": 32711 - }, - { - "epoch": 5.268046217641612, - "grad_norm": 0.002280234592035413, - "learning_rate": 0.00019998631446945157, - "loss": 46.0, - "step": 32712 - }, - { - "epoch": 5.2682072547204, - "grad_norm": 0.004719855729490519, - "learning_rate": 0.00019998631363247354, - "loss": 46.0, - "step": 32713 - }, - { - "epoch": 5.268368291799186, - "grad_norm": 0.001047990401275456, - "learning_rate": 0.00019998631279546992, - "loss": 46.0, - "step": 32714 - }, - { - "epoch": 5.268529328877974, - "grad_norm": 0.008425416424870491, - "learning_rate": 0.00019998631195844072, - "loss": 46.0, - "step": 32715 - }, - { - "epoch": 5.268690365956761, - "grad_norm": 0.005264493636786938, - "learning_rate": 0.00019998631112138596, - "loss": 46.0, - "step": 32716 - }, - { - "epoch": 5.268851403035549, - "grad_norm": 0.005269521847367287, - "learning_rate": 0.00019998631028430555, - "loss": 46.0, - "step": 32717 - }, - { - "epoch": 5.269012440114336, - "grad_norm": 0.005105150863528252, - "learning_rate": 0.0001999863094471996, - "loss": 46.0, - "step": 32718 - }, - { - "epoch": 5.269173477193124, - "grad_norm": 0.008111500181257725, - "learning_rate": 0.000199986308610068, - "loss": 46.0, - "step": 32719 - }, - { - "epoch": 5.269334514271911, - "grad_norm": 0.002483420306816697, - "learning_rate": 0.00019998630777291084, - "loss": 46.0, - "step": 32720 - }, - { - "epoch": 5.2694955513506985, - "grad_norm": 0.009813596494495869, - "learning_rate": 0.00019998630693572811, - "loss": 46.0, - "step": 32721 - }, - { - "epoch": 5.269656588429486, - "grad_norm": 0.01022042240947485, - "learning_rate": 0.00019998630609851975, - "loss": 46.0, - "step": 32722 - }, - { - "epoch": 5.269817625508273, - "grad_norm": 0.008669614791870117, - "learning_rate": 0.00019998630526128585, - "loss": 46.0, - "step": 32723 - }, - { - "epoch": 5.269978662587061, - "grad_norm": 0.008839394897222519, - "learning_rate": 0.0001999863044240263, - "loss": 46.0, - "step": 32724 - }, - { - "epoch": 5.270139699665848, - "grad_norm": 0.00277986703440547, - "learning_rate": 0.00019998630358674117, - "loss": 46.0, - "step": 32725 - }, - { - "epoch": 5.270300736744636, - "grad_norm": 0.0019506255630403757, - "learning_rate": 0.00019998630274943046, - "loss": 46.0, - "step": 32726 - }, - { - "epoch": 5.270461773823423, - "grad_norm": 0.006148990243673325, - "learning_rate": 0.00019998630191209418, - "loss": 46.0, - "step": 32727 - }, - { - "epoch": 5.270622810902211, - "grad_norm": 0.007476849015802145, - "learning_rate": 0.0001999863010747323, - "loss": 46.0, - "step": 32728 - }, - { - "epoch": 5.270783847980997, - "grad_norm": 0.0024931083898991346, - "learning_rate": 0.0001999863002373448, - "loss": 46.0, - "step": 32729 - }, - { - "epoch": 5.270944885059785, - "grad_norm": 0.003910596948117018, - "learning_rate": 0.00019998629939993175, - "loss": 46.0, - "step": 32730 - }, - { - "epoch": 5.271105922138572, - "grad_norm": 0.010569185949862003, - "learning_rate": 0.00019998629856249307, - "loss": 46.0, - "step": 32731 - }, - { - "epoch": 5.2712669592173595, - "grad_norm": 0.0035121862310916185, - "learning_rate": 0.00019998629772502883, - "loss": 46.0, - "step": 32732 - }, - { - "epoch": 5.271427996296147, - "grad_norm": 0.010562081821262836, - "learning_rate": 0.00019998629688753897, - "loss": 46.0, - "step": 32733 - }, - { - "epoch": 5.2715890333749345, - "grad_norm": 0.0009269286529161036, - "learning_rate": 0.00019998629605002353, - "loss": 46.0, - "step": 32734 - }, - { - "epoch": 5.271750070453722, - "grad_norm": 0.0015503076137974858, - "learning_rate": 0.0001999862952124825, - "loss": 46.0, - "step": 32735 - }, - { - "epoch": 5.271911107532509, - "grad_norm": 0.006434526760131121, - "learning_rate": 0.00019998629437491586, - "loss": 46.0, - "step": 32736 - }, - { - "epoch": 5.272072144611297, - "grad_norm": 0.003989649936556816, - "learning_rate": 0.00019998629353732366, - "loss": 46.0, - "step": 32737 - }, - { - "epoch": 5.272233181690084, - "grad_norm": 0.011900383979082108, - "learning_rate": 0.00019998629269970587, - "loss": 46.0, - "step": 32738 - }, - { - "epoch": 5.272394218768872, - "grad_norm": 0.004666908178478479, - "learning_rate": 0.00019998629186206246, - "loss": 46.0, - "step": 32739 - }, - { - "epoch": 5.272555255847659, - "grad_norm": 0.0014495952054858208, - "learning_rate": 0.00019998629102439347, - "loss": 46.0, - "step": 32740 - }, - { - "epoch": 5.272716292926447, - "grad_norm": 0.002782973228022456, - "learning_rate": 0.0001999862901866989, - "loss": 46.0, - "step": 32741 - }, - { - "epoch": 5.272877330005234, - "grad_norm": 0.003483750391751528, - "learning_rate": 0.00019998628934897873, - "loss": 46.0, - "step": 32742 - }, - { - "epoch": 5.2730383670840215, - "grad_norm": 0.0021493907552212477, - "learning_rate": 0.00019998628851123295, - "loss": 46.0, - "step": 32743 - }, - { - "epoch": 5.273199404162808, - "grad_norm": 0.0012872567167505622, - "learning_rate": 0.0001999862876734616, - "loss": 46.0, - "step": 32744 - }, - { - "epoch": 5.2733604412415955, - "grad_norm": 0.00925049651414156, - "learning_rate": 0.00019998628683566468, - "loss": 46.0, - "step": 32745 - }, - { - "epoch": 5.273521478320383, - "grad_norm": 0.0046015591360628605, - "learning_rate": 0.00019998628599784211, - "loss": 46.0, - "step": 32746 - }, - { - "epoch": 5.27368251539917, - "grad_norm": 0.0036442552227526903, - "learning_rate": 0.000199986285159994, - "loss": 46.0, - "step": 32747 - }, - { - "epoch": 5.273843552477958, - "grad_norm": 0.0017966092564165592, - "learning_rate": 0.0001999862843221203, - "loss": 46.0, - "step": 32748 - }, - { - "epoch": 5.274004589556745, - "grad_norm": 0.00784636102616787, - "learning_rate": 0.000199986283484221, - "loss": 46.0, - "step": 32749 - }, - { - "epoch": 5.274165626635533, - "grad_norm": 0.0034893376287072897, - "learning_rate": 0.00019998628264629608, - "loss": 46.0, - "step": 32750 - }, - { - "epoch": 5.27432666371432, - "grad_norm": 0.0022212862968444824, - "learning_rate": 0.0001999862818083456, - "loss": 46.0, - "step": 32751 - }, - { - "epoch": 5.274487700793108, - "grad_norm": 0.015247160568833351, - "learning_rate": 0.0001999862809703695, - "loss": 46.0, - "step": 32752 - }, - { - "epoch": 5.274648737871895, - "grad_norm": 0.004423925653100014, - "learning_rate": 0.00019998628013236783, - "loss": 46.0, - "step": 32753 - }, - { - "epoch": 5.274809774950683, - "grad_norm": 0.0013157791690900922, - "learning_rate": 0.00019998627929434056, - "loss": 46.0, - "step": 32754 - }, - { - "epoch": 5.27497081202947, - "grad_norm": 0.003976835869252682, - "learning_rate": 0.0001999862784562877, - "loss": 46.0, - "step": 32755 - }, - { - "epoch": 5.2751318491082575, - "grad_norm": 0.0010143291437998414, - "learning_rate": 0.00019998627761820927, - "loss": 46.0, - "step": 32756 - }, - { - "epoch": 5.275292886187045, - "grad_norm": 0.00988712627440691, - "learning_rate": 0.00019998627678010522, - "loss": 46.0, - "step": 32757 - }, - { - "epoch": 5.275453923265832, - "grad_norm": 0.0009437710978090763, - "learning_rate": 0.00019998627594197557, - "loss": 46.0, - "step": 32758 - }, - { - "epoch": 5.275614960344619, - "grad_norm": 0.004082816652953625, - "learning_rate": 0.00019998627510382035, - "loss": 46.0, - "step": 32759 - }, - { - "epoch": 5.275775997423406, - "grad_norm": 0.011374269612133503, - "learning_rate": 0.00019998627426563953, - "loss": 46.0, - "step": 32760 - }, - { - "epoch": 5.275937034502194, - "grad_norm": 0.001038078567944467, - "learning_rate": 0.00019998627342743315, - "loss": 46.0, - "step": 32761 - }, - { - "epoch": 5.276098071580981, - "grad_norm": 0.009498587809503078, - "learning_rate": 0.00019998627258920116, - "loss": 46.0, - "step": 32762 - }, - { - "epoch": 5.276259108659769, - "grad_norm": 0.007078665774315596, - "learning_rate": 0.00019998627175094359, - "loss": 46.0, - "step": 32763 - }, - { - "epoch": 5.276420145738556, - "grad_norm": 0.0020795254968106747, - "learning_rate": 0.0001999862709126604, - "loss": 46.0, - "step": 32764 - }, - { - "epoch": 5.276581182817344, - "grad_norm": 0.0035221728030592203, - "learning_rate": 0.0001999862700743516, - "loss": 46.0, - "step": 32765 - }, - { - "epoch": 5.276742219896131, - "grad_norm": 0.0013312288792803884, - "learning_rate": 0.00019998626923601728, - "loss": 46.0, - "step": 32766 - }, - { - "epoch": 5.276903256974919, - "grad_norm": 0.004713039379566908, - "learning_rate": 0.0001999862683976573, - "loss": 46.0, - "step": 32767 - }, - { - "epoch": 5.277064294053706, - "grad_norm": 0.0017045966815203428, - "learning_rate": 0.00019998626755927175, - "loss": 46.0, - "step": 32768 - }, - { - "epoch": 5.2772253311324935, - "grad_norm": 0.004850388970226049, - "learning_rate": 0.0001999862667208606, - "loss": 46.0, - "step": 32769 - }, - { - "epoch": 5.277386368211281, - "grad_norm": 0.011895795352756977, - "learning_rate": 0.00019998626588242389, - "loss": 46.0, - "step": 32770 - }, - { - "epoch": 5.277547405290068, - "grad_norm": 0.0101715587079525, - "learning_rate": 0.00019998626504396158, - "loss": 46.0, - "step": 32771 - }, - { - "epoch": 5.277708442368856, - "grad_norm": 0.006214440334588289, - "learning_rate": 0.00019998626420547367, - "loss": 46.0, - "step": 32772 - }, - { - "epoch": 5.277869479447643, - "grad_norm": 0.0024516754783689976, - "learning_rate": 0.00019998626336696016, - "loss": 46.0, - "step": 32773 - }, - { - "epoch": 5.27803051652643, - "grad_norm": 0.0020385959651321173, - "learning_rate": 0.00019998626252842105, - "loss": 46.0, - "step": 32774 - }, - { - "epoch": 5.278191553605217, - "grad_norm": 0.024326873943209648, - "learning_rate": 0.00019998626168985637, - "loss": 46.0, - "step": 32775 - }, - { - "epoch": 5.278352590684005, - "grad_norm": 0.0008937088423408568, - "learning_rate": 0.0001999862608512661, - "loss": 46.0, - "step": 32776 - }, - { - "epoch": 5.278513627762792, - "grad_norm": 0.0014647595817223191, - "learning_rate": 0.00019998626001265025, - "loss": 46.0, - "step": 32777 - }, - { - "epoch": 5.27867466484158, - "grad_norm": 0.0017795716412365437, - "learning_rate": 0.00019998625917400878, - "loss": 46.0, - "step": 32778 - }, - { - "epoch": 5.278835701920367, - "grad_norm": 0.003916000947356224, - "learning_rate": 0.00019998625833534173, - "loss": 46.0, - "step": 32779 - }, - { - "epoch": 5.278996738999155, - "grad_norm": 0.005517369601875544, - "learning_rate": 0.00019998625749664906, - "loss": 46.0, - "step": 32780 - }, - { - "epoch": 5.279157776077942, - "grad_norm": 0.0024814626667648554, - "learning_rate": 0.00019998625665793086, - "loss": 46.0, - "step": 32781 - }, - { - "epoch": 5.2793188131567295, - "grad_norm": 0.0020449808798730373, - "learning_rate": 0.00019998625581918705, - "loss": 46.0, - "step": 32782 - }, - { - "epoch": 5.279479850235517, - "grad_norm": 0.005443553905934095, - "learning_rate": 0.00019998625498041762, - "loss": 46.0, - "step": 32783 - }, - { - "epoch": 5.279640887314304, - "grad_norm": 0.002270561410114169, - "learning_rate": 0.0001999862541416226, - "loss": 46.0, - "step": 32784 - }, - { - "epoch": 5.279801924393092, - "grad_norm": 0.003994714003056288, - "learning_rate": 0.00019998625330280203, - "loss": 46.0, - "step": 32785 - }, - { - "epoch": 5.279962961471879, - "grad_norm": 0.003503819927573204, - "learning_rate": 0.00019998625246395583, - "loss": 46.0, - "step": 32786 - }, - { - "epoch": 5.280123998550666, - "grad_norm": 0.0017188319470733404, - "learning_rate": 0.00019998625162508403, - "loss": 46.0, - "step": 32787 - }, - { - "epoch": 5.280285035629453, - "grad_norm": 0.0017572222277522087, - "learning_rate": 0.00019998625078618666, - "loss": 46.0, - "step": 32788 - }, - { - "epoch": 5.280446072708241, - "grad_norm": 0.0015798885142430663, - "learning_rate": 0.00019998624994726368, - "loss": 46.0, - "step": 32789 - }, - { - "epoch": 5.280607109787028, - "grad_norm": 0.020196184515953064, - "learning_rate": 0.00019998624910831514, - "loss": 46.0, - "step": 32790 - }, - { - "epoch": 5.280768146865816, - "grad_norm": 0.009554862044751644, - "learning_rate": 0.000199986248269341, - "loss": 46.0, - "step": 32791 - }, - { - "epoch": 5.280929183944603, - "grad_norm": 0.011458268389105797, - "learning_rate": 0.00019998624743034128, - "loss": 46.0, - "step": 32792 - }, - { - "epoch": 5.2810902210233905, - "grad_norm": 0.006587871816009283, - "learning_rate": 0.00019998624659131595, - "loss": 46.0, - "step": 32793 - }, - { - "epoch": 5.281251258102178, - "grad_norm": 0.004437935538589954, - "learning_rate": 0.00019998624575226503, - "loss": 46.0, - "step": 32794 - }, - { - "epoch": 5.281412295180965, - "grad_norm": 0.012687429785728455, - "learning_rate": 0.0001999862449131885, - "loss": 46.0, - "step": 32795 - }, - { - "epoch": 5.281573332259753, - "grad_norm": 0.009609921835362911, - "learning_rate": 0.00019998624407408641, - "loss": 46.0, - "step": 32796 - }, - { - "epoch": 5.28173436933854, - "grad_norm": 0.004458840936422348, - "learning_rate": 0.0001999862432349587, - "loss": 46.0, - "step": 32797 - }, - { - "epoch": 5.281895406417328, - "grad_norm": 0.010326903313398361, - "learning_rate": 0.00019998624239580545, - "loss": 46.0, - "step": 32798 - }, - { - "epoch": 5.282056443496115, - "grad_norm": 0.027878884226083755, - "learning_rate": 0.00019998624155662654, - "loss": 46.0, - "step": 32799 - }, - { - "epoch": 5.282217480574903, - "grad_norm": 0.014449832029640675, - "learning_rate": 0.0001999862407174221, - "loss": 46.0, - "step": 32800 - }, - { - "epoch": 5.28237851765369, - "grad_norm": 0.0024302948731929064, - "learning_rate": 0.00019998623987819202, - "loss": 46.0, - "step": 32801 - }, - { - "epoch": 5.282539554732477, - "grad_norm": 0.005840963684022427, - "learning_rate": 0.00019998623903893638, - "loss": 46.0, - "step": 32802 - }, - { - "epoch": 5.282700591811264, - "grad_norm": 0.015816377475857735, - "learning_rate": 0.00019998623819965513, - "loss": 46.0, - "step": 32803 - }, - { - "epoch": 5.282861628890052, - "grad_norm": 0.0071578435599803925, - "learning_rate": 0.0001999862373603483, - "loss": 46.0, - "step": 32804 - }, - { - "epoch": 5.283022665968839, - "grad_norm": 0.006389778107404709, - "learning_rate": 0.00019998623652101586, - "loss": 46.0, - "step": 32805 - }, - { - "epoch": 5.2831837030476265, - "grad_norm": 0.011697894893586636, - "learning_rate": 0.00019998623568165784, - "loss": 46.0, - "step": 32806 - }, - { - "epoch": 5.283344740126414, - "grad_norm": 0.004841446410864592, - "learning_rate": 0.00019998623484227427, - "loss": 46.0, - "step": 32807 - }, - { - "epoch": 5.283505777205201, - "grad_norm": 0.012552822008728981, - "learning_rate": 0.00019998623400286505, - "loss": 46.0, - "step": 32808 - }, - { - "epoch": 5.283666814283989, - "grad_norm": 0.014014051295816898, - "learning_rate": 0.00019998623316343025, - "loss": 46.0, - "step": 32809 - }, - { - "epoch": 5.283827851362776, - "grad_norm": 0.002752182772383094, - "learning_rate": 0.00019998623232396988, - "loss": 46.0, - "step": 32810 - }, - { - "epoch": 5.283988888441564, - "grad_norm": 0.011237689293920994, - "learning_rate": 0.0001999862314844839, - "loss": 46.0, - "step": 32811 - }, - { - "epoch": 5.284149925520351, - "grad_norm": 0.00717261852696538, - "learning_rate": 0.00019998623064497234, - "loss": 46.0, - "step": 32812 - }, - { - "epoch": 5.284310962599139, - "grad_norm": 0.0019925220403820276, - "learning_rate": 0.00019998622980543519, - "loss": 46.0, - "step": 32813 - }, - { - "epoch": 5.284471999677926, - "grad_norm": 0.0072485958226025105, - "learning_rate": 0.00019998622896587245, - "loss": 46.0, - "step": 32814 - }, - { - "epoch": 5.284633036756714, - "grad_norm": 0.0047632320784032345, - "learning_rate": 0.0001999862281262841, - "loss": 46.0, - "step": 32815 - }, - { - "epoch": 5.284794073835501, - "grad_norm": 0.004829026758670807, - "learning_rate": 0.00019998622728667015, - "loss": 46.0, - "step": 32816 - }, - { - "epoch": 5.284955110914288, - "grad_norm": 0.015597792342305183, - "learning_rate": 0.00019998622644703065, - "loss": 46.0, - "step": 32817 - }, - { - "epoch": 5.285116147993075, - "grad_norm": 0.0007577510550618172, - "learning_rate": 0.00019998622560736556, - "loss": 46.0, - "step": 32818 - }, - { - "epoch": 5.2852771850718625, - "grad_norm": 0.012728163041174412, - "learning_rate": 0.00019998622476767483, - "loss": 46.0, - "step": 32819 - }, - { - "epoch": 5.28543822215065, - "grad_norm": 0.002703931648284197, - "learning_rate": 0.00019998622392795854, - "loss": 46.0, - "step": 32820 - }, - { - "epoch": 5.285599259229437, - "grad_norm": 0.008138729259371758, - "learning_rate": 0.00019998622308821666, - "loss": 46.0, - "step": 32821 - }, - { - "epoch": 5.285760296308225, - "grad_norm": 0.0023652147501707077, - "learning_rate": 0.00019998622224844917, - "loss": 46.0, - "step": 32822 - }, - { - "epoch": 5.285921333387012, - "grad_norm": 0.004464826080948114, - "learning_rate": 0.00019998622140865612, - "loss": 46.0, - "step": 32823 - }, - { - "epoch": 5.2860823704658, - "grad_norm": 0.0026193042285740376, - "learning_rate": 0.00019998622056883745, - "loss": 46.0, - "step": 32824 - }, - { - "epoch": 5.286243407544587, - "grad_norm": 0.0034537853207439184, - "learning_rate": 0.00019998621972899317, - "loss": 46.0, - "step": 32825 - }, - { - "epoch": 5.286404444623375, - "grad_norm": 0.0020162579603493214, - "learning_rate": 0.00019998621888912336, - "loss": 46.0, - "step": 32826 - }, - { - "epoch": 5.286565481702162, - "grad_norm": 0.005515541881322861, - "learning_rate": 0.00019998621804922793, - "loss": 46.0, - "step": 32827 - }, - { - "epoch": 5.28672651878095, - "grad_norm": 0.012862896546721458, - "learning_rate": 0.00019998621720930692, - "loss": 46.0, - "step": 32828 - }, - { - "epoch": 5.286887555859737, - "grad_norm": 0.00212840992026031, - "learning_rate": 0.00019998621636936026, - "loss": 46.0, - "step": 32829 - }, - { - "epoch": 5.2870485929385245, - "grad_norm": 0.002766086021438241, - "learning_rate": 0.00019998621552938807, - "loss": 46.0, - "step": 32830 - }, - { - "epoch": 5.287209630017312, - "grad_norm": 0.009885932318866253, - "learning_rate": 0.00019998621468939027, - "loss": 46.0, - "step": 32831 - }, - { - "epoch": 5.2873706670960985, - "grad_norm": 0.0011209732620045543, - "learning_rate": 0.00019998621384936688, - "loss": 46.0, - "step": 32832 - }, - { - "epoch": 5.287531704174886, - "grad_norm": 0.006951188202947378, - "learning_rate": 0.00019998621300931787, - "loss": 46.0, - "step": 32833 - }, - { - "epoch": 5.287692741253673, - "grad_norm": 0.00442025950178504, - "learning_rate": 0.0001999862121692433, - "loss": 46.0, - "step": 32834 - }, - { - "epoch": 5.287853778332461, - "grad_norm": 0.0016486956737935543, - "learning_rate": 0.00019998621132914318, - "loss": 46.0, - "step": 32835 - }, - { - "epoch": 5.288014815411248, - "grad_norm": 0.0021979613229632378, - "learning_rate": 0.0001999862104890174, - "loss": 46.0, - "step": 32836 - }, - { - "epoch": 5.288175852490036, - "grad_norm": 0.011926851235330105, - "learning_rate": 0.00019998620964886606, - "loss": 46.0, - "step": 32837 - }, - { - "epoch": 5.288336889568823, - "grad_norm": 0.010933454148471355, - "learning_rate": 0.0001999862088086891, - "loss": 46.0, - "step": 32838 - }, - { - "epoch": 5.288497926647611, - "grad_norm": 0.00704097468405962, - "learning_rate": 0.00019998620796848657, - "loss": 46.0, - "step": 32839 - }, - { - "epoch": 5.288658963726398, - "grad_norm": 0.00409117853268981, - "learning_rate": 0.00019998620712825845, - "loss": 46.0, - "step": 32840 - }, - { - "epoch": 5.2888200008051856, - "grad_norm": 0.003234849777072668, - "learning_rate": 0.00019998620628800472, - "loss": 46.0, - "step": 32841 - }, - { - "epoch": 5.288981037883973, - "grad_norm": 0.007692200597375631, - "learning_rate": 0.00019998620544772543, - "loss": 46.0, - "step": 32842 - }, - { - "epoch": 5.2891420749627605, - "grad_norm": 0.006310525815933943, - "learning_rate": 0.00019998620460742056, - "loss": 46.0, - "step": 32843 - }, - { - "epoch": 5.289303112041548, - "grad_norm": 0.010792076587677002, - "learning_rate": 0.00019998620376709004, - "loss": 46.0, - "step": 32844 - }, - { - "epoch": 5.289464149120335, - "grad_norm": 0.0019435528665781021, - "learning_rate": 0.000199986202926734, - "loss": 46.0, - "step": 32845 - }, - { - "epoch": 5.289625186199123, - "grad_norm": 0.0055960011668503284, - "learning_rate": 0.0001999862020863523, - "loss": 46.0, - "step": 32846 - }, - { - "epoch": 5.289786223277909, - "grad_norm": 0.006108923349529505, - "learning_rate": 0.00019998620124594504, - "loss": 46.0, - "step": 32847 - }, - { - "epoch": 5.289947260356697, - "grad_norm": 0.003643910400569439, - "learning_rate": 0.0001999862004055122, - "loss": 46.0, - "step": 32848 - }, - { - "epoch": 5.290108297435484, - "grad_norm": 0.0015204141382128, - "learning_rate": 0.00019998619956505375, - "loss": 46.0, - "step": 32849 - }, - { - "epoch": 5.290269334514272, - "grad_norm": 0.006978069432079792, - "learning_rate": 0.0001999861987245697, - "loss": 46.0, - "step": 32850 - }, - { - "epoch": 5.290430371593059, - "grad_norm": 0.005346689838916063, - "learning_rate": 0.00019998619788406008, - "loss": 46.0, - "step": 32851 - }, - { - "epoch": 5.290591408671847, - "grad_norm": 0.0023616200778633356, - "learning_rate": 0.0001999861970435249, - "loss": 46.0, - "step": 32852 - }, - { - "epoch": 5.290752445750634, - "grad_norm": 0.002466344041749835, - "learning_rate": 0.00019998619620296407, - "loss": 46.0, - "step": 32853 - }, - { - "epoch": 5.2909134828294215, - "grad_norm": 0.005166475661098957, - "learning_rate": 0.00019998619536237765, - "loss": 46.0, - "step": 32854 - }, - { - "epoch": 5.291074519908209, - "grad_norm": 0.006704852916300297, - "learning_rate": 0.00019998619452176567, - "loss": 46.0, - "step": 32855 - }, - { - "epoch": 5.291235556986996, - "grad_norm": 0.0020694448612630367, - "learning_rate": 0.00019998619368112808, - "loss": 46.0, - "step": 32856 - }, - { - "epoch": 5.291396594065784, - "grad_norm": 0.009188308380544186, - "learning_rate": 0.0001999861928404649, - "loss": 46.0, - "step": 32857 - }, - { - "epoch": 5.291557631144571, - "grad_norm": 0.017101731151342392, - "learning_rate": 0.00019998619199977616, - "loss": 46.0, - "step": 32858 - }, - { - "epoch": 5.291718668223359, - "grad_norm": 0.0055967941880226135, - "learning_rate": 0.00019998619115906178, - "loss": 46.0, - "step": 32859 - }, - { - "epoch": 5.291879705302145, - "grad_norm": 0.005138530861586332, - "learning_rate": 0.00019998619031832184, - "loss": 46.0, - "step": 32860 - }, - { - "epoch": 5.292040742380933, - "grad_norm": 0.006875460501760244, - "learning_rate": 0.0001999861894775563, - "loss": 46.0, - "step": 32861 - }, - { - "epoch": 5.29220177945972, - "grad_norm": 0.0011748223332688212, - "learning_rate": 0.00019998618863676518, - "loss": 46.0, - "step": 32862 - }, - { - "epoch": 5.292362816538508, - "grad_norm": 0.007890964858233929, - "learning_rate": 0.00019998618779594845, - "loss": 46.0, - "step": 32863 - }, - { - "epoch": 5.292523853617295, - "grad_norm": 0.0018381194677203894, - "learning_rate": 0.00019998618695510613, - "loss": 46.0, - "step": 32864 - }, - { - "epoch": 5.292684890696083, - "grad_norm": 0.0024217022582888603, - "learning_rate": 0.00019998618611423823, - "loss": 46.0, - "step": 32865 - }, - { - "epoch": 5.29284592777487, - "grad_norm": 0.002413748065009713, - "learning_rate": 0.0001999861852733447, - "loss": 46.0, - "step": 32866 - }, - { - "epoch": 5.2930069648536575, - "grad_norm": 0.0053440299816429615, - "learning_rate": 0.00019998618443242564, - "loss": 46.0, - "step": 32867 - }, - { - "epoch": 5.293168001932445, - "grad_norm": 0.001821198151446879, - "learning_rate": 0.00019998618359148095, - "loss": 46.0, - "step": 32868 - }, - { - "epoch": 5.293329039011232, - "grad_norm": 0.002211799379438162, - "learning_rate": 0.0001999861827505107, - "loss": 46.0, - "step": 32869 - }, - { - "epoch": 5.29349007609002, - "grad_norm": 0.008944440633058548, - "learning_rate": 0.00019998618190951483, - "loss": 46.0, - "step": 32870 - }, - { - "epoch": 5.293651113168807, - "grad_norm": 0.004079732578247786, - "learning_rate": 0.00019998618106849338, - "loss": 46.0, - "step": 32871 - }, - { - "epoch": 5.293812150247595, - "grad_norm": 0.01905720867216587, - "learning_rate": 0.00019998618022744634, - "loss": 46.0, - "step": 32872 - }, - { - "epoch": 5.293973187326382, - "grad_norm": 0.0019842679612338543, - "learning_rate": 0.0001999861793863737, - "loss": 46.0, - "step": 32873 - }, - { - "epoch": 5.29413422440517, - "grad_norm": 0.005579811055213213, - "learning_rate": 0.00019998617854527547, - "loss": 46.0, - "step": 32874 - }, - { - "epoch": 5.294295261483956, - "grad_norm": 0.007021198980510235, - "learning_rate": 0.00019998617770415167, - "loss": 46.0, - "step": 32875 - }, - { - "epoch": 5.294456298562744, - "grad_norm": 0.0051645333878695965, - "learning_rate": 0.00019998617686300225, - "loss": 46.0, - "step": 32876 - }, - { - "epoch": 5.294617335641531, - "grad_norm": 0.011732934974133968, - "learning_rate": 0.00019998617602182725, - "loss": 46.0, - "step": 32877 - }, - { - "epoch": 5.294778372720319, - "grad_norm": 0.007981189526617527, - "learning_rate": 0.00019998617518062663, - "loss": 46.0, - "step": 32878 - }, - { - "epoch": 5.294939409799106, - "grad_norm": 0.0014728050446137786, - "learning_rate": 0.00019998617433940048, - "loss": 46.0, - "step": 32879 - }, - { - "epoch": 5.2951004468778935, - "grad_norm": 0.023625358939170837, - "learning_rate": 0.0001999861734981487, - "loss": 46.0, - "step": 32880 - }, - { - "epoch": 5.295261483956681, - "grad_norm": 0.003997601568698883, - "learning_rate": 0.0001999861726568713, - "loss": 46.0, - "step": 32881 - }, - { - "epoch": 5.295422521035468, - "grad_norm": 0.006560850888490677, - "learning_rate": 0.00019998617181556837, - "loss": 46.0, - "step": 32882 - }, - { - "epoch": 5.295583558114256, - "grad_norm": 0.0015688257990404963, - "learning_rate": 0.00019998617097423982, - "loss": 46.0, - "step": 32883 - }, - { - "epoch": 5.295744595193043, - "grad_norm": 0.010468300431966782, - "learning_rate": 0.00019998617013288568, - "loss": 46.0, - "step": 32884 - }, - { - "epoch": 5.295905632271831, - "grad_norm": 0.003485786961391568, - "learning_rate": 0.00019998616929150592, - "loss": 46.0, - "step": 32885 - }, - { - "epoch": 5.296066669350618, - "grad_norm": 0.005726935341954231, - "learning_rate": 0.00019998616845010064, - "loss": 46.0, - "step": 32886 - }, - { - "epoch": 5.296227706429406, - "grad_norm": 0.0030403868295252323, - "learning_rate": 0.0001999861676086697, - "loss": 46.0, - "step": 32887 - }, - { - "epoch": 5.296388743508193, - "grad_norm": 0.011329368688166142, - "learning_rate": 0.0001999861667672132, - "loss": 46.0, - "step": 32888 - }, - { - "epoch": 5.296549780586981, - "grad_norm": 0.013742944225668907, - "learning_rate": 0.00019998616592573111, - "loss": 46.0, - "step": 32889 - }, - { - "epoch": 5.296710817665767, - "grad_norm": 0.004545212723314762, - "learning_rate": 0.0001999861650842234, - "loss": 46.0, - "step": 32890 - }, - { - "epoch": 5.296871854744555, - "grad_norm": 0.006464786361902952, - "learning_rate": 0.00019998616424269012, - "loss": 46.0, - "step": 32891 - }, - { - "epoch": 5.297032891823342, - "grad_norm": 0.01642397791147232, - "learning_rate": 0.00019998616340113125, - "loss": 46.0, - "step": 32892 - }, - { - "epoch": 5.2971939289021295, - "grad_norm": 0.002626928733661771, - "learning_rate": 0.0001999861625595468, - "loss": 46.0, - "step": 32893 - }, - { - "epoch": 5.297354965980917, - "grad_norm": 0.003786102868616581, - "learning_rate": 0.00019998616171793674, - "loss": 46.0, - "step": 32894 - }, - { - "epoch": 5.297516003059704, - "grad_norm": 0.017067410051822662, - "learning_rate": 0.00019998616087630108, - "loss": 46.0, - "step": 32895 - }, - { - "epoch": 5.297677040138492, - "grad_norm": 0.0018080538138747215, - "learning_rate": 0.00019998616003463987, - "loss": 46.0, - "step": 32896 - }, - { - "epoch": 5.297838077217279, - "grad_norm": 0.017856232821941376, - "learning_rate": 0.00019998615919295304, - "loss": 46.0, - "step": 32897 - }, - { - "epoch": 5.297999114296067, - "grad_norm": 0.008416845463216305, - "learning_rate": 0.00019998615835124063, - "loss": 46.0, - "step": 32898 - }, - { - "epoch": 5.298160151374854, - "grad_norm": 0.0028227707371115685, - "learning_rate": 0.0001999861575095026, - "loss": 46.0, - "step": 32899 - }, - { - "epoch": 5.298321188453642, - "grad_norm": 0.0019845531787723303, - "learning_rate": 0.000199986156667739, - "loss": 46.0, - "step": 32900 - }, - { - "epoch": 5.298482225532429, - "grad_norm": 0.003557221032679081, - "learning_rate": 0.0001999861558259498, - "loss": 46.0, - "step": 32901 - }, - { - "epoch": 5.2986432626112165, - "grad_norm": 0.013394337147474289, - "learning_rate": 0.00019998615498413504, - "loss": 46.0, - "step": 32902 - }, - { - "epoch": 5.298804299690004, - "grad_norm": 0.011069035157561302, - "learning_rate": 0.00019998615414229466, - "loss": 46.0, - "step": 32903 - }, - { - "epoch": 5.2989653367687914, - "grad_norm": 0.006162405479699373, - "learning_rate": 0.00019998615330042867, - "loss": 46.0, - "step": 32904 - }, - { - "epoch": 5.299126373847578, - "grad_norm": 0.0018614601576700807, - "learning_rate": 0.00019998615245853715, - "loss": 46.0, - "step": 32905 - }, - { - "epoch": 5.2992874109263655, - "grad_norm": 0.024059470742940903, - "learning_rate": 0.00019998615161661998, - "loss": 46.0, - "step": 32906 - }, - { - "epoch": 5.299448448005153, - "grad_norm": 0.005213485099375248, - "learning_rate": 0.00019998615077467723, - "loss": 46.0, - "step": 32907 - }, - { - "epoch": 5.29960948508394, - "grad_norm": 0.0058577093295753, - "learning_rate": 0.0001999861499327089, - "loss": 46.0, - "step": 32908 - }, - { - "epoch": 5.299770522162728, - "grad_norm": 0.003455518279224634, - "learning_rate": 0.00019998614909071499, - "loss": 46.0, - "step": 32909 - }, - { - "epoch": 5.299931559241515, - "grad_norm": 0.0068109589628875256, - "learning_rate": 0.00019998614824869547, - "loss": 46.0, - "step": 32910 - }, - { - "epoch": 5.300092596320303, - "grad_norm": 0.012743260711431503, - "learning_rate": 0.00019998614740665037, - "loss": 46.0, - "step": 32911 - }, - { - "epoch": 5.30025363339909, - "grad_norm": 0.012862837873399258, - "learning_rate": 0.00019998614656457965, - "loss": 46.0, - "step": 32912 - }, - { - "epoch": 5.300414670477878, - "grad_norm": 0.005507150199264288, - "learning_rate": 0.00019998614572248338, - "loss": 46.0, - "step": 32913 - }, - { - "epoch": 5.300575707556665, - "grad_norm": 0.00125038786791265, - "learning_rate": 0.00019998614488036148, - "loss": 46.0, - "step": 32914 - }, - { - "epoch": 5.3007367446354525, - "grad_norm": 0.005946934223175049, - "learning_rate": 0.00019998614403821403, - "loss": 46.0, - "step": 32915 - }, - { - "epoch": 5.30089778171424, - "grad_norm": 0.007258351892232895, - "learning_rate": 0.00019998614319604094, - "loss": 46.0, - "step": 32916 - }, - { - "epoch": 5.301058818793027, - "grad_norm": 0.0035813457798212767, - "learning_rate": 0.0001999861423538423, - "loss": 46.0, - "step": 32917 - }, - { - "epoch": 5.301219855871815, - "grad_norm": 0.011239600367844105, - "learning_rate": 0.00019998614151161805, - "loss": 46.0, - "step": 32918 - }, - { - "epoch": 5.301380892950602, - "grad_norm": 0.002644438063725829, - "learning_rate": 0.0001999861406693682, - "loss": 46.0, - "step": 32919 - }, - { - "epoch": 5.301541930029389, - "grad_norm": 0.006767032667994499, - "learning_rate": 0.0001999861398270928, - "loss": 46.0, - "step": 32920 - }, - { - "epoch": 5.301702967108176, - "grad_norm": 0.0018406646559014916, - "learning_rate": 0.00019998613898479178, - "loss": 46.0, - "step": 32921 - }, - { - "epoch": 5.301864004186964, - "grad_norm": 0.002352233277633786, - "learning_rate": 0.00019998613814246517, - "loss": 46.0, - "step": 32922 - }, - { - "epoch": 5.302025041265751, - "grad_norm": 0.005606109742075205, - "learning_rate": 0.00019998613730011296, - "loss": 46.0, - "step": 32923 - }, - { - "epoch": 5.302186078344539, - "grad_norm": 0.0032901305239647627, - "learning_rate": 0.00019998613645773517, - "loss": 46.0, - "step": 32924 - }, - { - "epoch": 5.302347115423326, - "grad_norm": 0.005535468924790621, - "learning_rate": 0.00019998613561533177, - "loss": 46.0, - "step": 32925 - }, - { - "epoch": 5.302508152502114, - "grad_norm": 0.005546476226300001, - "learning_rate": 0.0001999861347729028, - "loss": 46.0, - "step": 32926 - }, - { - "epoch": 5.302669189580901, - "grad_norm": 0.009324170649051666, - "learning_rate": 0.00019998613393044825, - "loss": 46.0, - "step": 32927 - }, - { - "epoch": 5.3028302266596885, - "grad_norm": 0.0022420398890972137, - "learning_rate": 0.0001999861330879681, - "loss": 46.0, - "step": 32928 - }, - { - "epoch": 5.302991263738476, - "grad_norm": 0.013722854666411877, - "learning_rate": 0.0001999861322454623, - "loss": 46.0, - "step": 32929 - }, - { - "epoch": 5.303152300817263, - "grad_norm": 0.00519983097910881, - "learning_rate": 0.000199986131402931, - "loss": 46.0, - "step": 32930 - }, - { - "epoch": 5.303313337896051, - "grad_norm": 0.001633224543184042, - "learning_rate": 0.00019998613056037404, - "loss": 46.0, - "step": 32931 - }, - { - "epoch": 5.303474374974838, - "grad_norm": 0.004387850873172283, - "learning_rate": 0.00019998612971779153, - "loss": 46.0, - "step": 32932 - }, - { - "epoch": 5.303635412053625, - "grad_norm": 0.007314561866223812, - "learning_rate": 0.00019998612887518343, - "loss": 46.0, - "step": 32933 - }, - { - "epoch": 5.303796449132412, - "grad_norm": 0.021742820739746094, - "learning_rate": 0.00019998612803254974, - "loss": 46.0, - "step": 32934 - }, - { - "epoch": 5.3039574862112, - "grad_norm": 0.004345561843365431, - "learning_rate": 0.0001999861271898904, - "loss": 46.0, - "step": 32935 - }, - { - "epoch": 5.304118523289987, - "grad_norm": 0.012240979820489883, - "learning_rate": 0.00019998612634720552, - "loss": 46.0, - "step": 32936 - }, - { - "epoch": 5.304279560368775, - "grad_norm": 0.0026070650201290846, - "learning_rate": 0.000199986125504495, - "loss": 46.0, - "step": 32937 - }, - { - "epoch": 5.304440597447562, - "grad_norm": 0.01934000663459301, - "learning_rate": 0.00019998612466175898, - "loss": 46.0, - "step": 32938 - }, - { - "epoch": 5.30460163452635, - "grad_norm": 0.018276270478963852, - "learning_rate": 0.0001999861238189973, - "loss": 46.0, - "step": 32939 - }, - { - "epoch": 5.304762671605137, - "grad_norm": 0.010701451450586319, - "learning_rate": 0.00019998612297621006, - "loss": 46.0, - "step": 32940 - }, - { - "epoch": 5.3049237086839245, - "grad_norm": 0.002259456319734454, - "learning_rate": 0.0001999861221333972, - "loss": 46.0, - "step": 32941 - }, - { - "epoch": 5.305084745762712, - "grad_norm": 0.0013941318029537797, - "learning_rate": 0.00019998612129055877, - "loss": 46.0, - "step": 32942 - }, - { - "epoch": 5.305245782841499, - "grad_norm": 0.011542130261659622, - "learning_rate": 0.00019998612044769474, - "loss": 46.0, - "step": 32943 - }, - { - "epoch": 5.305406819920287, - "grad_norm": 0.005293803755193949, - "learning_rate": 0.00019998611960480512, - "loss": 46.0, - "step": 32944 - }, - { - "epoch": 5.305567856999074, - "grad_norm": 0.0014259928138926625, - "learning_rate": 0.0001999861187618899, - "loss": 46.0, - "step": 32945 - }, - { - "epoch": 5.305728894077862, - "grad_norm": 0.0016877994639798999, - "learning_rate": 0.0001999861179189491, - "loss": 46.0, - "step": 32946 - }, - { - "epoch": 5.305889931156649, - "grad_norm": 0.0017205652548000216, - "learning_rate": 0.0001999861170759827, - "loss": 46.0, - "step": 32947 - }, - { - "epoch": 5.306050968235436, - "grad_norm": 0.0020924003329128027, - "learning_rate": 0.00019998611623299074, - "loss": 46.0, - "step": 32948 - }, - { - "epoch": 5.306212005314223, - "grad_norm": 0.003963875584304333, - "learning_rate": 0.00019998611538997314, - "loss": 46.0, - "step": 32949 - }, - { - "epoch": 5.306373042393011, - "grad_norm": 0.0038015092723071575, - "learning_rate": 0.00019998611454693, - "loss": 46.0, - "step": 32950 - }, - { - "epoch": 5.306534079471798, - "grad_norm": 0.0036802717950195074, - "learning_rate": 0.00019998611370386122, - "loss": 46.0, - "step": 32951 - }, - { - "epoch": 5.306695116550586, - "grad_norm": 0.006565919611603022, - "learning_rate": 0.00019998611286076688, - "loss": 46.0, - "step": 32952 - }, - { - "epoch": 5.306856153629373, - "grad_norm": 0.003474402241408825, - "learning_rate": 0.00019998611201764695, - "loss": 46.0, - "step": 32953 - }, - { - "epoch": 5.3070171907081605, - "grad_norm": 0.006863272748887539, - "learning_rate": 0.0001999861111745014, - "loss": 46.0, - "step": 32954 - }, - { - "epoch": 5.307178227786948, - "grad_norm": 0.0032033929601311684, - "learning_rate": 0.00019998611033133026, - "loss": 46.0, - "step": 32955 - }, - { - "epoch": 5.307339264865735, - "grad_norm": 0.004401786718517542, - "learning_rate": 0.00019998610948813354, - "loss": 46.0, - "step": 32956 - }, - { - "epoch": 5.307500301944523, - "grad_norm": 0.002599339233711362, - "learning_rate": 0.00019998610864491124, - "loss": 46.0, - "step": 32957 - }, - { - "epoch": 5.30766133902331, - "grad_norm": 0.0025066251400858164, - "learning_rate": 0.00019998610780166333, - "loss": 46.0, - "step": 32958 - }, - { - "epoch": 5.307822376102098, - "grad_norm": 0.007181468885391951, - "learning_rate": 0.00019998610695838985, - "loss": 46.0, - "step": 32959 - }, - { - "epoch": 5.307983413180885, - "grad_norm": 0.0015719872899353504, - "learning_rate": 0.00019998610611509076, - "loss": 46.0, - "step": 32960 - }, - { - "epoch": 5.308144450259673, - "grad_norm": 0.003023274941369891, - "learning_rate": 0.0001999861052717661, - "loss": 46.0, - "step": 32961 - }, - { - "epoch": 5.30830548733846, - "grad_norm": 0.004649336915463209, - "learning_rate": 0.00019998610442841584, - "loss": 46.0, - "step": 32962 - }, - { - "epoch": 5.308466524417247, - "grad_norm": 0.007514502853155136, - "learning_rate": 0.00019998610358503997, - "loss": 46.0, - "step": 32963 - }, - { - "epoch": 5.308627561496034, - "grad_norm": 0.0021884969901293516, - "learning_rate": 0.00019998610274163853, - "loss": 46.0, - "step": 32964 - }, - { - "epoch": 5.3087885985748215, - "grad_norm": 0.0021241442300379276, - "learning_rate": 0.00019998610189821153, - "loss": 46.0, - "step": 32965 - }, - { - "epoch": 5.308949635653609, - "grad_norm": 0.021784642711281776, - "learning_rate": 0.0001999861010547589, - "loss": 46.0, - "step": 32966 - }, - { - "epoch": 5.3091106727323965, - "grad_norm": 0.0021011612843722105, - "learning_rate": 0.00019998610021128066, - "loss": 46.0, - "step": 32967 - }, - { - "epoch": 5.309271709811184, - "grad_norm": 0.005242939572781324, - "learning_rate": 0.00019998609936777687, - "loss": 46.0, - "step": 32968 - }, - { - "epoch": 5.309432746889971, - "grad_norm": 0.013063615188002586, - "learning_rate": 0.00019998609852424744, - "loss": 46.0, - "step": 32969 - }, - { - "epoch": 5.309593783968759, - "grad_norm": 0.005171630997210741, - "learning_rate": 0.00019998609768069245, - "loss": 46.0, - "step": 32970 - }, - { - "epoch": 5.309754821047546, - "grad_norm": 0.001569027896039188, - "learning_rate": 0.00019998609683711188, - "loss": 46.0, - "step": 32971 - }, - { - "epoch": 5.309915858126334, - "grad_norm": 0.010841806419193745, - "learning_rate": 0.00019998609599350569, - "loss": 46.0, - "step": 32972 - }, - { - "epoch": 5.310076895205121, - "grad_norm": 0.0023207219783216715, - "learning_rate": 0.0001999860951498739, - "loss": 46.0, - "step": 32973 - }, - { - "epoch": 5.310237932283909, - "grad_norm": 0.002600132254883647, - "learning_rate": 0.00019998609430621657, - "loss": 46.0, - "step": 32974 - }, - { - "epoch": 5.310398969362696, - "grad_norm": 0.007160381879657507, - "learning_rate": 0.00019998609346253362, - "loss": 46.0, - "step": 32975 - }, - { - "epoch": 5.3105600064414835, - "grad_norm": 0.0028567477129399776, - "learning_rate": 0.00019998609261882505, - "loss": 46.0, - "step": 32976 - }, - { - "epoch": 5.310721043520271, - "grad_norm": 0.005168521776795387, - "learning_rate": 0.00019998609177509092, - "loss": 46.0, - "step": 32977 - }, - { - "epoch": 5.3108820805990575, - "grad_norm": 0.006747628096491098, - "learning_rate": 0.0001999860909313312, - "loss": 46.0, - "step": 32978 - }, - { - "epoch": 5.311043117677845, - "grad_norm": 0.021432820707559586, - "learning_rate": 0.0001999860900875459, - "loss": 46.0, - "step": 32979 - }, - { - "epoch": 5.311204154756632, - "grad_norm": 0.0060675800777971745, - "learning_rate": 0.000199986089243735, - "loss": 46.0, - "step": 32980 - }, - { - "epoch": 5.31136519183542, - "grad_norm": 0.001670889905653894, - "learning_rate": 0.00019998608839989846, - "loss": 46.0, - "step": 32981 - }, - { - "epoch": 5.311526228914207, - "grad_norm": 0.0027938117273151875, - "learning_rate": 0.00019998608755603637, - "loss": 46.0, - "step": 32982 - }, - { - "epoch": 5.311687265992995, - "grad_norm": 0.002398216864094138, - "learning_rate": 0.00019998608671214872, - "loss": 46.0, - "step": 32983 - }, - { - "epoch": 5.311848303071782, - "grad_norm": 0.0028288995381444693, - "learning_rate": 0.00019998608586823543, - "loss": 46.0, - "step": 32984 - }, - { - "epoch": 5.31200934015057, - "grad_norm": 0.008504693396389484, - "learning_rate": 0.00019998608502429658, - "loss": 46.0, - "step": 32985 - }, - { - "epoch": 5.312170377229357, - "grad_norm": 0.0044979010708630085, - "learning_rate": 0.00019998608418033214, - "loss": 46.0, - "step": 32986 - }, - { - "epoch": 5.312331414308145, - "grad_norm": 0.0017711810069158673, - "learning_rate": 0.00019998608333634206, - "loss": 46.0, - "step": 32987 - }, - { - "epoch": 5.312492451386932, - "grad_norm": 0.0012784923892468214, - "learning_rate": 0.00019998608249232645, - "loss": 46.0, - "step": 32988 - }, - { - "epoch": 5.3126534884657195, - "grad_norm": 0.002647585002705455, - "learning_rate": 0.0001999860816482852, - "loss": 46.0, - "step": 32989 - }, - { - "epoch": 5.312814525544507, - "grad_norm": 0.0034546919632703066, - "learning_rate": 0.00019998608080421838, - "loss": 46.0, - "step": 32990 - }, - { - "epoch": 5.312975562623294, - "grad_norm": 0.002908692928031087, - "learning_rate": 0.00019998607996012598, - "loss": 46.0, - "step": 32991 - }, - { - "epoch": 5.313136599702082, - "grad_norm": 0.0047160726971924305, - "learning_rate": 0.00019998607911600797, - "loss": 46.0, - "step": 32992 - }, - { - "epoch": 5.313297636780868, - "grad_norm": 0.003456257516518235, - "learning_rate": 0.00019998607827186436, - "loss": 46.0, - "step": 32993 - }, - { - "epoch": 5.313458673859656, - "grad_norm": 0.0018826433224603534, - "learning_rate": 0.0001999860774276952, - "loss": 46.0, - "step": 32994 - }, - { - "epoch": 5.313619710938443, - "grad_norm": 0.0027917560655623674, - "learning_rate": 0.00019998607658350043, - "loss": 46.0, - "step": 32995 - }, - { - "epoch": 5.313780748017231, - "grad_norm": 0.0027280922513455153, - "learning_rate": 0.00019998607573928006, - "loss": 46.0, - "step": 32996 - }, - { - "epoch": 5.313941785096018, - "grad_norm": 0.004896465688943863, - "learning_rate": 0.00019998607489503408, - "loss": 46.0, - "step": 32997 - }, - { - "epoch": 5.314102822174806, - "grad_norm": 0.003834158182144165, - "learning_rate": 0.00019998607405076254, - "loss": 46.0, - "step": 32998 - }, - { - "epoch": 5.314263859253593, - "grad_norm": 0.0033229454420506954, - "learning_rate": 0.00019998607320646542, - "loss": 46.0, - "step": 32999 - }, - { - "epoch": 5.314424896332381, - "grad_norm": 0.002223525894805789, - "learning_rate": 0.00019998607236214268, - "loss": 46.0, - "step": 33000 - }, - { - "epoch": 5.314585933411168, - "grad_norm": 0.0014318267349153757, - "learning_rate": 0.00019998607151779435, - "loss": 46.0, - "step": 33001 - }, - { - "epoch": 5.3147469704899555, - "grad_norm": 0.002659878693521023, - "learning_rate": 0.00019998607067342044, - "loss": 46.0, - "step": 33002 - }, - { - "epoch": 5.314908007568743, - "grad_norm": 0.008306640200316906, - "learning_rate": 0.00019998606982902094, - "loss": 46.0, - "step": 33003 - }, - { - "epoch": 5.31506904464753, - "grad_norm": 0.0023147037718445063, - "learning_rate": 0.00019998606898459582, - "loss": 46.0, - "step": 33004 - }, - { - "epoch": 5.315230081726318, - "grad_norm": 0.006360264029353857, - "learning_rate": 0.00019998606814014514, - "loss": 46.0, - "step": 33005 - }, - { - "epoch": 5.315391118805105, - "grad_norm": 0.00375958695076406, - "learning_rate": 0.00019998606729566885, - "loss": 46.0, - "step": 33006 - }, - { - "epoch": 5.315552155883893, - "grad_norm": 0.008646082133054733, - "learning_rate": 0.00019998606645116698, - "loss": 46.0, - "step": 33007 - }, - { - "epoch": 5.315713192962679, - "grad_norm": 0.007140029687434435, - "learning_rate": 0.0001999860656066395, - "loss": 46.0, - "step": 33008 - }, - { - "epoch": 5.315874230041467, - "grad_norm": 0.0014294757274910808, - "learning_rate": 0.00019998606476208646, - "loss": 46.0, - "step": 33009 - }, - { - "epoch": 5.316035267120254, - "grad_norm": 0.009339352138340473, - "learning_rate": 0.00019998606391750782, - "loss": 46.0, - "step": 33010 - }, - { - "epoch": 5.316196304199042, - "grad_norm": 0.005555547308176756, - "learning_rate": 0.00019998606307290357, - "loss": 46.0, - "step": 33011 - }, - { - "epoch": 5.316357341277829, - "grad_norm": 0.0038346245419234037, - "learning_rate": 0.00019998606222827373, - "loss": 46.0, - "step": 33012 - }, - { - "epoch": 5.316518378356617, - "grad_norm": 0.00588664785027504, - "learning_rate": 0.00019998606138361833, - "loss": 46.0, - "step": 33013 - }, - { - "epoch": 5.316679415435404, - "grad_norm": 0.001739988918416202, - "learning_rate": 0.0001999860605389373, - "loss": 46.0, - "step": 33014 - }, - { - "epoch": 5.3168404525141915, - "grad_norm": 0.001739312196150422, - "learning_rate": 0.0001999860596942307, - "loss": 46.0, - "step": 33015 - }, - { - "epoch": 5.317001489592979, - "grad_norm": 0.008644703775644302, - "learning_rate": 0.00019998605884949852, - "loss": 46.0, - "step": 33016 - }, - { - "epoch": 5.317162526671766, - "grad_norm": 0.006621269043534994, - "learning_rate": 0.00019998605800474075, - "loss": 46.0, - "step": 33017 - }, - { - "epoch": 5.317323563750554, - "grad_norm": 0.0023716334253549576, - "learning_rate": 0.00019998605715995736, - "loss": 46.0, - "step": 33018 - }, - { - "epoch": 5.317484600829341, - "grad_norm": 0.007661569397896528, - "learning_rate": 0.00019998605631514838, - "loss": 46.0, - "step": 33019 - }, - { - "epoch": 5.317645637908129, - "grad_norm": 0.00661576958373189, - "learning_rate": 0.00019998605547031381, - "loss": 46.0, - "step": 33020 - }, - { - "epoch": 5.317806674986915, - "grad_norm": 0.007498806342482567, - "learning_rate": 0.0001999860546254537, - "loss": 46.0, - "step": 33021 - }, - { - "epoch": 5.317967712065703, - "grad_norm": 0.013822809793055058, - "learning_rate": 0.00019998605378056795, - "loss": 46.0, - "step": 33022 - }, - { - "epoch": 5.31812874914449, - "grad_norm": 0.00567164970561862, - "learning_rate": 0.00019998605293565662, - "loss": 46.0, - "step": 33023 - }, - { - "epoch": 5.318289786223278, - "grad_norm": 0.0007239525439217687, - "learning_rate": 0.00019998605209071968, - "loss": 46.0, - "step": 33024 - }, - { - "epoch": 5.318450823302065, - "grad_norm": 0.0006108058732934296, - "learning_rate": 0.00019998605124575716, - "loss": 46.0, - "step": 33025 - }, - { - "epoch": 5.3186118603808525, - "grad_norm": 0.007186190225183964, - "learning_rate": 0.00019998605040076904, - "loss": 46.0, - "step": 33026 - }, - { - "epoch": 5.31877289745964, - "grad_norm": 0.003068361897021532, - "learning_rate": 0.00019998604955575537, - "loss": 46.0, - "step": 33027 - }, - { - "epoch": 5.318933934538427, - "grad_norm": 0.0020151156932115555, - "learning_rate": 0.00019998604871071608, - "loss": 46.0, - "step": 33028 - }, - { - "epoch": 5.319094971617215, - "grad_norm": 0.0015516963321715593, - "learning_rate": 0.00019998604786565117, - "loss": 46.0, - "step": 33029 - }, - { - "epoch": 5.319256008696002, - "grad_norm": 0.004806454759091139, - "learning_rate": 0.0001999860470205607, - "loss": 46.0, - "step": 33030 - }, - { - "epoch": 5.31941704577479, - "grad_norm": 0.012646634131669998, - "learning_rate": 0.00019998604617544466, - "loss": 46.0, - "step": 33031 - }, - { - "epoch": 5.319578082853577, - "grad_norm": 0.004600579384714365, - "learning_rate": 0.000199986045330303, - "loss": 46.0, - "step": 33032 - }, - { - "epoch": 5.319739119932365, - "grad_norm": 0.0011752629652619362, - "learning_rate": 0.00019998604448513577, - "loss": 46.0, - "step": 33033 - }, - { - "epoch": 5.319900157011152, - "grad_norm": 0.013503649272024632, - "learning_rate": 0.0001999860436399429, - "loss": 46.0, - "step": 33034 - }, - { - "epoch": 5.32006119408994, - "grad_norm": 0.0034524749498814344, - "learning_rate": 0.00019998604279472448, - "loss": 46.0, - "step": 33035 - }, - { - "epoch": 5.320222231168726, - "grad_norm": 0.006999840494245291, - "learning_rate": 0.00019998604194948046, - "loss": 46.0, - "step": 33036 - }, - { - "epoch": 5.320383268247514, - "grad_norm": 0.002707788487896323, - "learning_rate": 0.00019998604110421083, - "loss": 46.0, - "step": 33037 - }, - { - "epoch": 5.320544305326301, - "grad_norm": 0.019953666254878044, - "learning_rate": 0.00019998604025891565, - "loss": 46.0, - "step": 33038 - }, - { - "epoch": 5.3207053424050885, - "grad_norm": 0.001057117129676044, - "learning_rate": 0.00019998603941359487, - "loss": 46.0, - "step": 33039 - }, - { - "epoch": 5.320866379483876, - "grad_norm": 0.0037325371522456408, - "learning_rate": 0.00019998603856824848, - "loss": 46.0, - "step": 33040 - }, - { - "epoch": 5.321027416562663, - "grad_norm": 0.004492900334298611, - "learning_rate": 0.0001999860377228765, - "loss": 46.0, - "step": 33041 - }, - { - "epoch": 5.321188453641451, - "grad_norm": 0.0019144448451697826, - "learning_rate": 0.00019998603687747891, - "loss": 46.0, - "step": 33042 - }, - { - "epoch": 5.321349490720238, - "grad_norm": 0.0017227960051968694, - "learning_rate": 0.00019998603603205576, - "loss": 46.0, - "step": 33043 - }, - { - "epoch": 5.321510527799026, - "grad_norm": 0.0015465219039469957, - "learning_rate": 0.00019998603518660702, - "loss": 46.0, - "step": 33044 - }, - { - "epoch": 5.321671564877813, - "grad_norm": 0.003507799468934536, - "learning_rate": 0.00019998603434113267, - "loss": 46.0, - "step": 33045 - }, - { - "epoch": 5.321832601956601, - "grad_norm": 0.0031565867830067873, - "learning_rate": 0.00019998603349563273, - "loss": 46.0, - "step": 33046 - }, - { - "epoch": 5.321993639035388, - "grad_norm": 0.014228135347366333, - "learning_rate": 0.0001999860326501072, - "loss": 46.0, - "step": 33047 - }, - { - "epoch": 5.322154676114176, - "grad_norm": 0.003176738740876317, - "learning_rate": 0.00019998603180455612, - "loss": 46.0, - "step": 33048 - }, - { - "epoch": 5.322315713192963, - "grad_norm": 0.007489599287509918, - "learning_rate": 0.0001999860309589794, - "loss": 46.0, - "step": 33049 - }, - { - "epoch": 5.3224767502717505, - "grad_norm": 0.006652691401541233, - "learning_rate": 0.00019998603011337708, - "loss": 46.0, - "step": 33050 - }, - { - "epoch": 5.322637787350537, - "grad_norm": 0.003964683040976524, - "learning_rate": 0.0001999860292677492, - "loss": 46.0, - "step": 33051 - }, - { - "epoch": 5.3227988244293245, - "grad_norm": 0.011925505474209785, - "learning_rate": 0.00019998602842209574, - "loss": 46.0, - "step": 33052 - }, - { - "epoch": 5.322959861508112, - "grad_norm": 0.0011837847996503115, - "learning_rate": 0.00019998602757641666, - "loss": 46.0, - "step": 33053 - }, - { - "epoch": 5.323120898586899, - "grad_norm": 0.005415219347923994, - "learning_rate": 0.000199986026730712, - "loss": 46.0, - "step": 33054 - }, - { - "epoch": 5.323281935665687, - "grad_norm": 0.00490936916321516, - "learning_rate": 0.00019998602588498172, - "loss": 46.0, - "step": 33055 - }, - { - "epoch": 5.323442972744474, - "grad_norm": 0.0021279919892549515, - "learning_rate": 0.0001999860250392259, - "loss": 46.0, - "step": 33056 - }, - { - "epoch": 5.323604009823262, - "grad_norm": 0.0008878706721588969, - "learning_rate": 0.00019998602419344445, - "loss": 46.0, - "step": 33057 - }, - { - "epoch": 5.323765046902049, - "grad_norm": 0.005314687266945839, - "learning_rate": 0.00019998602334763744, - "loss": 46.0, - "step": 33058 - }, - { - "epoch": 5.323926083980837, - "grad_norm": 0.0029676822014153004, - "learning_rate": 0.00019998602250180481, - "loss": 46.0, - "step": 33059 - }, - { - "epoch": 5.324087121059624, - "grad_norm": 0.003534886287525296, - "learning_rate": 0.0001999860216559466, - "loss": 46.0, - "step": 33060 - }, - { - "epoch": 5.324248158138412, - "grad_norm": 0.0021425921004265547, - "learning_rate": 0.0001999860208100628, - "loss": 46.0, - "step": 33061 - }, - { - "epoch": 5.324409195217199, - "grad_norm": 0.006580045446753502, - "learning_rate": 0.0001999860199641534, - "loss": 46.0, - "step": 33062 - }, - { - "epoch": 5.3245702322959865, - "grad_norm": 0.005584504920989275, - "learning_rate": 0.0001999860191182184, - "loss": 46.0, - "step": 33063 - }, - { - "epoch": 5.324731269374774, - "grad_norm": 0.0016489874105900526, - "learning_rate": 0.00019998601827225784, - "loss": 46.0, - "step": 33064 - }, - { - "epoch": 5.324892306453561, - "grad_norm": 0.007520326413214207, - "learning_rate": 0.00019998601742627167, - "loss": 46.0, - "step": 33065 - }, - { - "epoch": 5.325053343532348, - "grad_norm": 0.001698884880170226, - "learning_rate": 0.0001999860165802599, - "loss": 46.0, - "step": 33066 - }, - { - "epoch": 5.325214380611135, - "grad_norm": 0.007266270462423563, - "learning_rate": 0.00019998601573422255, - "loss": 46.0, - "step": 33067 - }, - { - "epoch": 5.325375417689923, - "grad_norm": 0.001486351597122848, - "learning_rate": 0.0001999860148881596, - "loss": 46.0, - "step": 33068 - }, - { - "epoch": 5.32553645476871, - "grad_norm": 0.010560727678239346, - "learning_rate": 0.00019998601404207106, - "loss": 46.0, - "step": 33069 - }, - { - "epoch": 5.325697491847498, - "grad_norm": 0.0055545661598443985, - "learning_rate": 0.00019998601319595695, - "loss": 46.0, - "step": 33070 - }, - { - "epoch": 5.325858528926285, - "grad_norm": 0.010793630965054035, - "learning_rate": 0.00019998601234981725, - "loss": 46.0, - "step": 33071 - }, - { - "epoch": 5.326019566005073, - "grad_norm": 0.003891882486641407, - "learning_rate": 0.00019998601150365196, - "loss": 46.0, - "step": 33072 - }, - { - "epoch": 5.32618060308386, - "grad_norm": 0.002866546157747507, - "learning_rate": 0.00019998601065746106, - "loss": 46.0, - "step": 33073 - }, - { - "epoch": 5.3263416401626476, - "grad_norm": 0.002724231919273734, - "learning_rate": 0.00019998600981124457, - "loss": 46.0, - "step": 33074 - }, - { - "epoch": 5.326502677241435, - "grad_norm": 0.015910878777503967, - "learning_rate": 0.0001999860089650025, - "loss": 46.0, - "step": 33075 - }, - { - "epoch": 5.3266637143202225, - "grad_norm": 0.00824980903416872, - "learning_rate": 0.0001999860081187348, - "loss": 46.0, - "step": 33076 - }, - { - "epoch": 5.32682475139901, - "grad_norm": 0.0013140970841050148, - "learning_rate": 0.00019998600727244155, - "loss": 46.0, - "step": 33077 - }, - { - "epoch": 5.326985788477797, - "grad_norm": 0.0077692368067801, - "learning_rate": 0.0001999860064261227, - "loss": 46.0, - "step": 33078 - }, - { - "epoch": 5.327146825556585, - "grad_norm": 0.008483261801302433, - "learning_rate": 0.00019998600557977824, - "loss": 46.0, - "step": 33079 - }, - { - "epoch": 5.327307862635372, - "grad_norm": 0.003456692909821868, - "learning_rate": 0.0001999860047334082, - "loss": 46.0, - "step": 33080 - }, - { - "epoch": 5.327468899714159, - "grad_norm": 0.004828238859772682, - "learning_rate": 0.0001999860038870126, - "loss": 46.0, - "step": 33081 - }, - { - "epoch": 5.327629936792946, - "grad_norm": 0.003765688044950366, - "learning_rate": 0.00019998600304059133, - "loss": 46.0, - "step": 33082 - }, - { - "epoch": 5.327790973871734, - "grad_norm": 0.012493998743593693, - "learning_rate": 0.00019998600219414453, - "loss": 46.0, - "step": 33083 - }, - { - "epoch": 5.327952010950521, - "grad_norm": 0.0010309262434020638, - "learning_rate": 0.00019998600134767214, - "loss": 46.0, - "step": 33084 - }, - { - "epoch": 5.328113048029309, - "grad_norm": 0.022467877715826035, - "learning_rate": 0.00019998600050117414, - "loss": 46.0, - "step": 33085 - }, - { - "epoch": 5.328274085108096, - "grad_norm": 0.0011528560426086187, - "learning_rate": 0.00019998599965465058, - "loss": 46.0, - "step": 33086 - }, - { - "epoch": 5.3284351221868835, - "grad_norm": 0.005552551243454218, - "learning_rate": 0.00019998599880810138, - "loss": 46.0, - "step": 33087 - }, - { - "epoch": 5.328596159265671, - "grad_norm": 0.006712593603879213, - "learning_rate": 0.0001999859979615266, - "loss": 46.0, - "step": 33088 - }, - { - "epoch": 5.328757196344458, - "grad_norm": 0.003801204962655902, - "learning_rate": 0.00019998599711492626, - "loss": 46.0, - "step": 33089 - }, - { - "epoch": 5.328918233423246, - "grad_norm": 0.016428135335445404, - "learning_rate": 0.0001999859962683003, - "loss": 46.0, - "step": 33090 - }, - { - "epoch": 5.329079270502033, - "grad_norm": 0.006544114556163549, - "learning_rate": 0.00019998599542164874, - "loss": 46.0, - "step": 33091 - }, - { - "epoch": 5.329240307580821, - "grad_norm": 0.004577435087412596, - "learning_rate": 0.00019998599457497163, - "loss": 46.0, - "step": 33092 - }, - { - "epoch": 5.329401344659608, - "grad_norm": 0.004981053061783314, - "learning_rate": 0.0001999859937282689, - "loss": 46.0, - "step": 33093 - }, - { - "epoch": 5.329562381738395, - "grad_norm": 0.0037103891372680664, - "learning_rate": 0.0001999859928815406, - "loss": 46.0, - "step": 33094 - }, - { - "epoch": 5.329723418817182, - "grad_norm": 0.014178406447172165, - "learning_rate": 0.0001999859920347867, - "loss": 46.0, - "step": 33095 - }, - { - "epoch": 5.32988445589597, - "grad_norm": 0.0023808954283595085, - "learning_rate": 0.00019998599118800718, - "loss": 46.0, - "step": 33096 - }, - { - "epoch": 5.330045492974757, - "grad_norm": 0.0042678555473685265, - "learning_rate": 0.0001999859903412021, - "loss": 46.0, - "step": 33097 - }, - { - "epoch": 5.330206530053545, - "grad_norm": 0.009074622765183449, - "learning_rate": 0.0001999859894943714, - "loss": 46.0, - "step": 33098 - }, - { - "epoch": 5.330367567132332, - "grad_norm": 0.0025333582889288664, - "learning_rate": 0.00019998598864751514, - "loss": 46.0, - "step": 33099 - }, - { - "epoch": 5.3305286042111195, - "grad_norm": 0.008307427167892456, - "learning_rate": 0.0001999859878006333, - "loss": 46.0, - "step": 33100 - }, - { - "epoch": 5.330689641289907, - "grad_norm": 0.015622492879629135, - "learning_rate": 0.00019998598695372582, - "loss": 46.0, - "step": 33101 - }, - { - "epoch": 5.330850678368694, - "grad_norm": 0.0032494382467120886, - "learning_rate": 0.0001999859861067928, - "loss": 46.0, - "step": 33102 - }, - { - "epoch": 5.331011715447482, - "grad_norm": 0.00774772185832262, - "learning_rate": 0.00019998598525983416, - "loss": 46.0, - "step": 33103 - }, - { - "epoch": 5.331172752526269, - "grad_norm": 0.009939687326550484, - "learning_rate": 0.00019998598441284992, - "loss": 46.0, - "step": 33104 - }, - { - "epoch": 5.331333789605057, - "grad_norm": 0.0057042501866817474, - "learning_rate": 0.00019998598356584012, - "loss": 46.0, - "step": 33105 - }, - { - "epoch": 5.331494826683844, - "grad_norm": 0.010075083002448082, - "learning_rate": 0.00019998598271880468, - "loss": 46.0, - "step": 33106 - }, - { - "epoch": 5.331655863762632, - "grad_norm": 0.0013022555503994226, - "learning_rate": 0.00019998598187174368, - "loss": 46.0, - "step": 33107 - }, - { - "epoch": 5.331816900841419, - "grad_norm": 0.003510661656036973, - "learning_rate": 0.00019998598102465712, - "loss": 46.0, - "step": 33108 - }, - { - "epoch": 5.331977937920206, - "grad_norm": 0.002145532751455903, - "learning_rate": 0.00019998598017754491, - "loss": 46.0, - "step": 33109 - }, - { - "epoch": 5.332138974998993, - "grad_norm": 0.0016985288821160793, - "learning_rate": 0.00019998597933040715, - "loss": 46.0, - "step": 33110 - }, - { - "epoch": 5.332300012077781, - "grad_norm": 0.003935207147151232, - "learning_rate": 0.00019998597848324377, - "loss": 46.0, - "step": 33111 - }, - { - "epoch": 5.332461049156568, - "grad_norm": 0.005572963505983353, - "learning_rate": 0.0001999859776360548, - "loss": 46.0, - "step": 33112 - }, - { - "epoch": 5.3326220862353555, - "grad_norm": 0.006349101662635803, - "learning_rate": 0.00019998597678884026, - "loss": 46.0, - "step": 33113 - }, - { - "epoch": 5.332783123314143, - "grad_norm": 0.006011417135596275, - "learning_rate": 0.00019998597594160012, - "loss": 46.0, - "step": 33114 - }, - { - "epoch": 5.33294416039293, - "grad_norm": 0.004160599783062935, - "learning_rate": 0.0001999859750943344, - "loss": 46.0, - "step": 33115 - }, - { - "epoch": 5.333105197471718, - "grad_norm": 0.007534564007073641, - "learning_rate": 0.00019998597424704308, - "loss": 46.0, - "step": 33116 - }, - { - "epoch": 5.333266234550505, - "grad_norm": 0.010425425134599209, - "learning_rate": 0.00019998597339972616, - "loss": 46.0, - "step": 33117 - }, - { - "epoch": 5.333427271629293, - "grad_norm": 0.01217585802078247, - "learning_rate": 0.00019998597255238364, - "loss": 46.0, - "step": 33118 - }, - { - "epoch": 5.33358830870808, - "grad_norm": 0.006327691487967968, - "learning_rate": 0.00019998597170501557, - "loss": 46.0, - "step": 33119 - }, - { - "epoch": 5.333749345786868, - "grad_norm": 0.012276839464902878, - "learning_rate": 0.00019998597085762185, - "loss": 46.0, - "step": 33120 - }, - { - "epoch": 5.333910382865655, - "grad_norm": 0.0010991720482707024, - "learning_rate": 0.00019998597001020258, - "loss": 46.0, - "step": 33121 - }, - { - "epoch": 5.334071419944443, - "grad_norm": 0.0010855909204110503, - "learning_rate": 0.00019998596916275771, - "loss": 46.0, - "step": 33122 - }, - { - "epoch": 5.33423245702323, - "grad_norm": 0.0035561115946620703, - "learning_rate": 0.00019998596831528724, - "loss": 46.0, - "step": 33123 - }, - { - "epoch": 5.334393494102017, - "grad_norm": 0.0018690096912905574, - "learning_rate": 0.0001999859674677912, - "loss": 46.0, - "step": 33124 - }, - { - "epoch": 5.334554531180804, - "grad_norm": 0.001971221761777997, - "learning_rate": 0.00019998596662026955, - "loss": 46.0, - "step": 33125 - }, - { - "epoch": 5.3347155682595915, - "grad_norm": 0.0030504302121698856, - "learning_rate": 0.00019998596577272234, - "loss": 46.0, - "step": 33126 - }, - { - "epoch": 5.334876605338379, - "grad_norm": 0.002618497470393777, - "learning_rate": 0.00019998596492514948, - "loss": 46.0, - "step": 33127 - }, - { - "epoch": 5.335037642417166, - "grad_norm": 0.0011711036786437035, - "learning_rate": 0.00019998596407755107, - "loss": 46.0, - "step": 33128 - }, - { - "epoch": 5.335198679495954, - "grad_norm": 0.02571200393140316, - "learning_rate": 0.00019998596322992707, - "loss": 46.0, - "step": 33129 - }, - { - "epoch": 5.335359716574741, - "grad_norm": 0.00836899597197771, - "learning_rate": 0.00019998596238227746, - "loss": 46.0, - "step": 33130 - }, - { - "epoch": 5.335520753653529, - "grad_norm": 0.0061898063868284225, - "learning_rate": 0.00019998596153460228, - "loss": 46.0, - "step": 33131 - }, - { - "epoch": 5.335681790732316, - "grad_norm": 0.006214721594005823, - "learning_rate": 0.0001999859606869015, - "loss": 46.0, - "step": 33132 - }, - { - "epoch": 5.335842827811104, - "grad_norm": 0.003894099500030279, - "learning_rate": 0.00019998595983917512, - "loss": 46.0, - "step": 33133 - }, - { - "epoch": 5.336003864889891, - "grad_norm": 0.0030999884475022554, - "learning_rate": 0.00019998595899142315, - "loss": 46.0, - "step": 33134 - }, - { - "epoch": 5.3361649019686785, - "grad_norm": 0.008983098901808262, - "learning_rate": 0.00019998595814364558, - "loss": 46.0, - "step": 33135 - }, - { - "epoch": 5.336325939047466, - "grad_norm": 0.015418984927237034, - "learning_rate": 0.00019998595729584244, - "loss": 46.0, - "step": 33136 - }, - { - "epoch": 5.3364869761262534, - "grad_norm": 0.0032537744846194983, - "learning_rate": 0.0001999859564480137, - "loss": 46.0, - "step": 33137 - }, - { - "epoch": 5.336648013205041, - "grad_norm": 0.001825700979679823, - "learning_rate": 0.00019998595560015935, - "loss": 46.0, - "step": 33138 - }, - { - "epoch": 5.3368090502838275, - "grad_norm": 0.0013571919407695532, - "learning_rate": 0.00019998595475227945, - "loss": 46.0, - "step": 33139 - }, - { - "epoch": 5.336970087362615, - "grad_norm": 0.00876959040760994, - "learning_rate": 0.00019998595390437394, - "loss": 46.0, - "step": 33140 - }, - { - "epoch": 5.337131124441402, - "grad_norm": 0.002108845626935363, - "learning_rate": 0.00019998595305644284, - "loss": 46.0, - "step": 33141 - }, - { - "epoch": 5.33729216152019, - "grad_norm": 0.01445204671472311, - "learning_rate": 0.00019998595220848615, - "loss": 46.0, - "step": 33142 - }, - { - "epoch": 5.337453198598977, - "grad_norm": 0.0016119669890031219, - "learning_rate": 0.00019998595136050385, - "loss": 46.0, - "step": 33143 - }, - { - "epoch": 5.337614235677765, - "grad_norm": 0.00953258853405714, - "learning_rate": 0.00019998595051249596, - "loss": 46.0, - "step": 33144 - }, - { - "epoch": 5.337775272756552, - "grad_norm": 0.001916867564432323, - "learning_rate": 0.00019998594966446248, - "loss": 46.0, - "step": 33145 - }, - { - "epoch": 5.33793630983534, - "grad_norm": 0.0016668703174218535, - "learning_rate": 0.00019998594881640345, - "loss": 46.0, - "step": 33146 - }, - { - "epoch": 5.338097346914127, - "grad_norm": 0.0018223979277536273, - "learning_rate": 0.00019998594796831877, - "loss": 46.0, - "step": 33147 - }, - { - "epoch": 5.3382583839929145, - "grad_norm": 0.0010866760276257992, - "learning_rate": 0.00019998594712020853, - "loss": 46.0, - "step": 33148 - }, - { - "epoch": 5.338419421071702, - "grad_norm": 0.005003968719393015, - "learning_rate": 0.0001999859462720727, - "loss": 46.0, - "step": 33149 - }, - { - "epoch": 5.338580458150489, - "grad_norm": 0.0035697175189852715, - "learning_rate": 0.0001999859454239113, - "loss": 46.0, - "step": 33150 - }, - { - "epoch": 5.338741495229277, - "grad_norm": 0.004712163005024195, - "learning_rate": 0.00019998594457572427, - "loss": 46.0, - "step": 33151 - }, - { - "epoch": 5.338902532308064, - "grad_norm": 0.0019653046038001776, - "learning_rate": 0.00019998594372751166, - "loss": 46.0, - "step": 33152 - }, - { - "epoch": 5.339063569386852, - "grad_norm": 0.0026867883279919624, - "learning_rate": 0.00019998594287927345, - "loss": 46.0, - "step": 33153 - }, - { - "epoch": 5.339224606465638, - "grad_norm": 0.00278069986961782, - "learning_rate": 0.00019998594203100964, - "loss": 46.0, - "step": 33154 - }, - { - "epoch": 5.339385643544426, - "grad_norm": 0.005466041620820761, - "learning_rate": 0.00019998594118272026, - "loss": 46.0, - "step": 33155 - }, - { - "epoch": 5.339546680623213, - "grad_norm": 0.0047329203225672245, - "learning_rate": 0.0001999859403344053, - "loss": 46.0, - "step": 33156 - }, - { - "epoch": 5.339707717702001, - "grad_norm": 0.002004955429583788, - "learning_rate": 0.00019998593948606473, - "loss": 46.0, - "step": 33157 - }, - { - "epoch": 5.339868754780788, - "grad_norm": 0.011458859778940678, - "learning_rate": 0.0001999859386376986, - "loss": 46.0, - "step": 33158 - }, - { - "epoch": 5.340029791859576, - "grad_norm": 0.0017322632484138012, - "learning_rate": 0.00019998593778930684, - "loss": 46.0, - "step": 33159 - }, - { - "epoch": 5.340190828938363, - "grad_norm": 0.0011169033823534846, - "learning_rate": 0.0001999859369408895, - "loss": 46.0, - "step": 33160 - }, - { - "epoch": 5.3403518660171505, - "grad_norm": 0.0022553205490112305, - "learning_rate": 0.00019998593609244655, - "loss": 46.0, - "step": 33161 - }, - { - "epoch": 5.340512903095938, - "grad_norm": 0.0006720060482621193, - "learning_rate": 0.00019998593524397804, - "loss": 46.0, - "step": 33162 - }, - { - "epoch": 5.340673940174725, - "grad_norm": 0.0028036385774612427, - "learning_rate": 0.0001999859343954839, - "loss": 46.0, - "step": 33163 - }, - { - "epoch": 5.340834977253513, - "grad_norm": 0.002368019660934806, - "learning_rate": 0.00019998593354696422, - "loss": 46.0, - "step": 33164 - }, - { - "epoch": 5.3409960143323, - "grad_norm": 0.0032712381798774004, - "learning_rate": 0.00019998593269841895, - "loss": 46.0, - "step": 33165 - }, - { - "epoch": 5.341157051411088, - "grad_norm": 0.0012793343048542738, - "learning_rate": 0.00019998593184984803, - "loss": 46.0, - "step": 33166 - }, - { - "epoch": 5.341318088489874, - "grad_norm": 0.003984276670962572, - "learning_rate": 0.00019998593100125156, - "loss": 46.0, - "step": 33167 - }, - { - "epoch": 5.341479125568662, - "grad_norm": 0.002494280692189932, - "learning_rate": 0.0001999859301526295, - "loss": 46.0, - "step": 33168 - }, - { - "epoch": 5.341640162647449, - "grad_norm": 0.0025377566926181316, - "learning_rate": 0.00019998592930398182, - "loss": 46.0, - "step": 33169 - }, - { - "epoch": 5.341801199726237, - "grad_norm": 0.002192853018641472, - "learning_rate": 0.00019998592845530858, - "loss": 46.0, - "step": 33170 - }, - { - "epoch": 5.341962236805024, - "grad_norm": 0.004914535209536552, - "learning_rate": 0.00019998592760660976, - "loss": 46.0, - "step": 33171 - }, - { - "epoch": 5.342123273883812, - "grad_norm": 0.00907733105123043, - "learning_rate": 0.0001999859267578853, - "loss": 46.0, - "step": 33172 - }, - { - "epoch": 5.342284310962599, - "grad_norm": 0.006224296521395445, - "learning_rate": 0.00019998592590913527, - "loss": 46.0, - "step": 33173 - }, - { - "epoch": 5.3424453480413865, - "grad_norm": 0.0022465987130999565, - "learning_rate": 0.00019998592506035966, - "loss": 46.0, - "step": 33174 - }, - { - "epoch": 5.342606385120174, - "grad_norm": 0.001446399255655706, - "learning_rate": 0.00019998592421155843, - "loss": 46.0, - "step": 33175 - }, - { - "epoch": 5.342767422198961, - "grad_norm": 0.008680148050189018, - "learning_rate": 0.00019998592336273164, - "loss": 46.0, - "step": 33176 - }, - { - "epoch": 5.342928459277749, - "grad_norm": 0.0034825035836547613, - "learning_rate": 0.00019998592251387927, - "loss": 46.0, - "step": 33177 - }, - { - "epoch": 5.343089496356536, - "grad_norm": 0.0027706236578524113, - "learning_rate": 0.00019998592166500128, - "loss": 46.0, - "step": 33178 - }, - { - "epoch": 5.343250533435324, - "grad_norm": 0.0012397044338285923, - "learning_rate": 0.0001999859208160977, - "loss": 46.0, - "step": 33179 - }, - { - "epoch": 5.343411570514111, - "grad_norm": 0.019820643588900566, - "learning_rate": 0.00019998591996716854, - "loss": 46.0, - "step": 33180 - }, - { - "epoch": 5.343572607592899, - "grad_norm": 0.0028394998516887426, - "learning_rate": 0.0001999859191182138, - "loss": 46.0, - "step": 33181 - }, - { - "epoch": 5.343733644671685, - "grad_norm": 0.01724991388618946, - "learning_rate": 0.00019998591826923346, - "loss": 46.0, - "step": 33182 - }, - { - "epoch": 5.343894681750473, - "grad_norm": 0.010076344013214111, - "learning_rate": 0.0001999859174202275, - "loss": 46.0, - "step": 33183 - }, - { - "epoch": 5.34405571882926, - "grad_norm": 0.006137934513390064, - "learning_rate": 0.00019998591657119597, - "loss": 46.0, - "step": 33184 - }, - { - "epoch": 5.344216755908048, - "grad_norm": 0.0028756221290677786, - "learning_rate": 0.00019998591572213884, - "loss": 46.0, - "step": 33185 - }, - { - "epoch": 5.344377792986835, - "grad_norm": 0.002660792088136077, - "learning_rate": 0.00019998591487305613, - "loss": 46.0, - "step": 33186 - }, - { - "epoch": 5.3445388300656225, - "grad_norm": 0.012917597778141499, - "learning_rate": 0.00019998591402394783, - "loss": 46.0, - "step": 33187 - }, - { - "epoch": 5.34469986714441, - "grad_norm": 0.003583114128559828, - "learning_rate": 0.00019998591317481394, - "loss": 46.0, - "step": 33188 - }, - { - "epoch": 5.344860904223197, - "grad_norm": 0.027246685698628426, - "learning_rate": 0.00019998591232565444, - "loss": 46.0, - "step": 33189 - }, - { - "epoch": 5.345021941301985, - "grad_norm": 0.005287294276058674, - "learning_rate": 0.00019998591147646938, - "loss": 46.0, - "step": 33190 - }, - { - "epoch": 5.345182978380772, - "grad_norm": 0.0030744103714823723, - "learning_rate": 0.0001999859106272587, - "loss": 46.0, - "step": 33191 - }, - { - "epoch": 5.34534401545956, - "grad_norm": 0.0018977083964273334, - "learning_rate": 0.00019998590977802244, - "loss": 46.0, - "step": 33192 - }, - { - "epoch": 5.345505052538347, - "grad_norm": 0.0036831132601946592, - "learning_rate": 0.00019998590892876062, - "loss": 46.0, - "step": 33193 - }, - { - "epoch": 5.345666089617135, - "grad_norm": 0.0054842764511704445, - "learning_rate": 0.00019998590807947315, - "loss": 46.0, - "step": 33194 - }, - { - "epoch": 5.345827126695922, - "grad_norm": 0.013415954075753689, - "learning_rate": 0.0001999859072301601, - "loss": 46.0, - "step": 33195 - }, - { - "epoch": 5.3459881637747095, - "grad_norm": 0.005169915035367012, - "learning_rate": 0.00019998590638082152, - "loss": 46.0, - "step": 33196 - }, - { - "epoch": 5.346149200853496, - "grad_norm": 0.0014741383492946625, - "learning_rate": 0.0001999859055314573, - "loss": 46.0, - "step": 33197 - }, - { - "epoch": 5.3463102379322835, - "grad_norm": 0.010834702290594578, - "learning_rate": 0.00019998590468206748, - "loss": 46.0, - "step": 33198 - }, - { - "epoch": 5.346471275011071, - "grad_norm": 0.0033167554065585136, - "learning_rate": 0.00019998590383265208, - "loss": 46.0, - "step": 33199 - }, - { - "epoch": 5.3466323120898585, - "grad_norm": 0.003050136147066951, - "learning_rate": 0.0001999859029832111, - "loss": 46.0, - "step": 33200 - }, - { - "epoch": 5.346793349168646, - "grad_norm": 0.005842990707606077, - "learning_rate": 0.00019998590213374451, - "loss": 46.0, - "step": 33201 - }, - { - "epoch": 5.346954386247433, - "grad_norm": 0.004670327063649893, - "learning_rate": 0.00019998590128425235, - "loss": 46.0, - "step": 33202 - }, - { - "epoch": 5.347115423326221, - "grad_norm": 0.009079623967409134, - "learning_rate": 0.00019998590043473455, - "loss": 46.0, - "step": 33203 - }, - { - "epoch": 5.347276460405008, - "grad_norm": 0.012257524766027927, - "learning_rate": 0.00019998589958519124, - "loss": 46.0, - "step": 33204 - }, - { - "epoch": 5.347437497483796, - "grad_norm": 0.003308637300506234, - "learning_rate": 0.0001999858987356223, - "loss": 46.0, - "step": 33205 - }, - { - "epoch": 5.347598534562583, - "grad_norm": 0.0025279151741415262, - "learning_rate": 0.00019998589788602773, - "loss": 46.0, - "step": 33206 - }, - { - "epoch": 5.347759571641371, - "grad_norm": 0.01778295822441578, - "learning_rate": 0.0001999858970364076, - "loss": 46.0, - "step": 33207 - }, - { - "epoch": 5.347920608720158, - "grad_norm": 0.001226358930580318, - "learning_rate": 0.00019998589618676186, - "loss": 46.0, - "step": 33208 - }, - { - "epoch": 5.3480816457989455, - "grad_norm": 0.002565372036769986, - "learning_rate": 0.00019998589533709056, - "loss": 46.0, - "step": 33209 - }, - { - "epoch": 5.348242682877733, - "grad_norm": 0.009788377210497856, - "learning_rate": 0.00019998589448739368, - "loss": 46.0, - "step": 33210 - }, - { - "epoch": 5.34840371995652, - "grad_norm": 0.003157887374982238, - "learning_rate": 0.00019998589363767118, - "loss": 46.0, - "step": 33211 - }, - { - "epoch": 5.348564757035307, - "grad_norm": 0.017140492796897888, - "learning_rate": 0.0001999858927879231, - "loss": 46.0, - "step": 33212 - }, - { - "epoch": 5.348725794114094, - "grad_norm": 0.008377748541533947, - "learning_rate": 0.00019998589193814944, - "loss": 46.0, - "step": 33213 - }, - { - "epoch": 5.348886831192882, - "grad_norm": 0.0018164287321269512, - "learning_rate": 0.00019998589108835013, - "loss": 46.0, - "step": 33214 - }, - { - "epoch": 5.349047868271669, - "grad_norm": 0.0025338446721434593, - "learning_rate": 0.0001999858902385253, - "loss": 46.0, - "step": 33215 - }, - { - "epoch": 5.349208905350457, - "grad_norm": 0.0017539168475195765, - "learning_rate": 0.00019998588938867484, - "loss": 46.0, - "step": 33216 - }, - { - "epoch": 5.349369942429244, - "grad_norm": 0.0034128446131944656, - "learning_rate": 0.00019998588853879882, - "loss": 46.0, - "step": 33217 - }, - { - "epoch": 5.349530979508032, - "grad_norm": 0.009461546316742897, - "learning_rate": 0.00019998588768889718, - "loss": 46.0, - "step": 33218 - }, - { - "epoch": 5.349692016586819, - "grad_norm": 0.0018438647966831923, - "learning_rate": 0.00019998588683896995, - "loss": 46.0, - "step": 33219 - }, - { - "epoch": 5.349853053665607, - "grad_norm": 0.005322962999343872, - "learning_rate": 0.00019998588598901714, - "loss": 46.0, - "step": 33220 - }, - { - "epoch": 5.350014090744394, - "grad_norm": 0.028859008103609085, - "learning_rate": 0.00019998588513903874, - "loss": 46.0, - "step": 33221 - }, - { - "epoch": 5.3501751278231815, - "grad_norm": 0.0017195470863953233, - "learning_rate": 0.0001999858842890347, - "loss": 46.0, - "step": 33222 - }, - { - "epoch": 5.350336164901969, - "grad_norm": 0.01269600447267294, - "learning_rate": 0.00019998588343900513, - "loss": 46.0, - "step": 33223 - }, - { - "epoch": 5.350497201980756, - "grad_norm": 0.009484685957431793, - "learning_rate": 0.00019998588258894994, - "loss": 46.0, - "step": 33224 - }, - { - "epoch": 5.350658239059544, - "grad_norm": 0.00613419059664011, - "learning_rate": 0.00019998588173886916, - "loss": 46.0, - "step": 33225 - }, - { - "epoch": 5.350819276138331, - "grad_norm": 0.01434311643242836, - "learning_rate": 0.00019998588088876283, - "loss": 46.0, - "step": 33226 - }, - { - "epoch": 5.350980313217118, - "grad_norm": 0.01462017185986042, - "learning_rate": 0.00019998588003863085, - "loss": 46.0, - "step": 33227 - }, - { - "epoch": 5.351141350295905, - "grad_norm": 0.009560765698552132, - "learning_rate": 0.00019998587918847332, - "loss": 46.0, - "step": 33228 - }, - { - "epoch": 5.351302387374693, - "grad_norm": 0.0227262731641531, - "learning_rate": 0.0001999858783382902, - "loss": 46.0, - "step": 33229 - }, - { - "epoch": 5.35146342445348, - "grad_norm": 0.001176685094833374, - "learning_rate": 0.00019998587748808145, - "loss": 46.0, - "step": 33230 - }, - { - "epoch": 5.351624461532268, - "grad_norm": 0.002229700330644846, - "learning_rate": 0.00019998587663784713, - "loss": 46.0, - "step": 33231 - }, - { - "epoch": 5.351785498611055, - "grad_norm": 0.009022231213748455, - "learning_rate": 0.00019998587578758722, - "loss": 46.0, - "step": 33232 - }, - { - "epoch": 5.351946535689843, - "grad_norm": 0.0059882174246013165, - "learning_rate": 0.00019998587493730172, - "loss": 46.0, - "step": 33233 - }, - { - "epoch": 5.35210757276863, - "grad_norm": 0.001762591302394867, - "learning_rate": 0.00019998587408699063, - "loss": 46.0, - "step": 33234 - }, - { - "epoch": 5.3522686098474175, - "grad_norm": 0.0027219848707318306, - "learning_rate": 0.00019998587323665393, - "loss": 46.0, - "step": 33235 - }, - { - "epoch": 5.352429646926205, - "grad_norm": 0.0027197706513106823, - "learning_rate": 0.00019998587238629167, - "loss": 46.0, - "step": 33236 - }, - { - "epoch": 5.352590684004992, - "grad_norm": 0.011602818965911865, - "learning_rate": 0.0001999858715359038, - "loss": 46.0, - "step": 33237 - }, - { - "epoch": 5.35275172108378, - "grad_norm": 0.0031015598215162754, - "learning_rate": 0.00019998587068549036, - "loss": 46.0, - "step": 33238 - }, - { - "epoch": 5.352912758162567, - "grad_norm": 0.006244780495762825, - "learning_rate": 0.0001999858698350513, - "loss": 46.0, - "step": 33239 - }, - { - "epoch": 5.353073795241355, - "grad_norm": 0.005492097232490778, - "learning_rate": 0.00019998586898458664, - "loss": 46.0, - "step": 33240 - }, - { - "epoch": 5.353234832320141, - "grad_norm": 0.01124728936702013, - "learning_rate": 0.00019998586813409642, - "loss": 46.0, - "step": 33241 - }, - { - "epoch": 5.353395869398929, - "grad_norm": 0.005954296328127384, - "learning_rate": 0.0001999858672835806, - "loss": 46.0, - "step": 33242 - }, - { - "epoch": 5.353556906477716, - "grad_norm": 0.030257176607847214, - "learning_rate": 0.00019998586643303918, - "loss": 46.0, - "step": 33243 - }, - { - "epoch": 5.353717943556504, - "grad_norm": 0.0056516434997320175, - "learning_rate": 0.0001999858655824722, - "loss": 46.0, - "step": 33244 - }, - { - "epoch": 5.353878980635291, - "grad_norm": 0.021140802651643753, - "learning_rate": 0.0001999858647318796, - "loss": 46.0, - "step": 33245 - }, - { - "epoch": 5.354040017714079, - "grad_norm": 0.006468849256634712, - "learning_rate": 0.0001999858638812614, - "loss": 46.0, - "step": 33246 - }, - { - "epoch": 5.354201054792866, - "grad_norm": 0.011477414518594742, - "learning_rate": 0.00019998586303061764, - "loss": 46.0, - "step": 33247 - }, - { - "epoch": 5.3543620918716535, - "grad_norm": 0.008933735080063343, - "learning_rate": 0.00019998586217994825, - "loss": 46.0, - "step": 33248 - }, - { - "epoch": 5.354523128950441, - "grad_norm": 0.0035963142290711403, - "learning_rate": 0.0001999858613292533, - "loss": 46.0, - "step": 33249 - }, - { - "epoch": 5.354684166029228, - "grad_norm": 0.002323634224012494, - "learning_rate": 0.00019998586047853274, - "loss": 46.0, - "step": 33250 - }, - { - "epoch": 5.354845203108016, - "grad_norm": 0.003420127322897315, - "learning_rate": 0.0001999858596277866, - "loss": 46.0, - "step": 33251 - }, - { - "epoch": 5.355006240186803, - "grad_norm": 0.0044273692183196545, - "learning_rate": 0.00019998585877701485, - "loss": 46.0, - "step": 33252 - }, - { - "epoch": 5.355167277265591, - "grad_norm": 0.01521384809166193, - "learning_rate": 0.00019998585792621753, - "loss": 46.0, - "step": 33253 - }, - { - "epoch": 5.355328314344378, - "grad_norm": 0.005429886281490326, - "learning_rate": 0.00019998585707539461, - "loss": 46.0, - "step": 33254 - }, - { - "epoch": 5.355489351423165, - "grad_norm": 0.008967528119683266, - "learning_rate": 0.00019998585622454612, - "loss": 46.0, - "step": 33255 - }, - { - "epoch": 5.355650388501952, - "grad_norm": 0.002316574566066265, - "learning_rate": 0.000199985855373672, - "loss": 46.0, - "step": 33256 - }, - { - "epoch": 5.35581142558074, - "grad_norm": 0.002046039793640375, - "learning_rate": 0.00019998585452277233, - "loss": 46.0, - "step": 33257 - }, - { - "epoch": 5.355972462659527, - "grad_norm": 0.013033757917582989, - "learning_rate": 0.00019998585367184704, - "loss": 46.0, - "step": 33258 - }, - { - "epoch": 5.3561334997383145, - "grad_norm": 0.00303908484056592, - "learning_rate": 0.00019998585282089617, - "loss": 46.0, - "step": 33259 - }, - { - "epoch": 5.356294536817102, - "grad_norm": 0.007970873266458511, - "learning_rate": 0.00019998585196991968, - "loss": 46.0, - "step": 33260 - }, - { - "epoch": 5.356455573895889, - "grad_norm": 0.02804405987262726, - "learning_rate": 0.00019998585111891763, - "loss": 46.0, - "step": 33261 - }, - { - "epoch": 5.356616610974677, - "grad_norm": 0.0022014323621988297, - "learning_rate": 0.00019998585026789, - "loss": 46.0, - "step": 33262 - }, - { - "epoch": 5.356777648053464, - "grad_norm": 0.006103958934545517, - "learning_rate": 0.00019998584941683674, - "loss": 46.0, - "step": 33263 - }, - { - "epoch": 5.356938685132252, - "grad_norm": 0.001840987242758274, - "learning_rate": 0.00019998584856575793, - "loss": 46.0, - "step": 33264 - }, - { - "epoch": 5.357099722211039, - "grad_norm": 0.009221870452165604, - "learning_rate": 0.0001999858477146535, - "loss": 46.0, - "step": 33265 - }, - { - "epoch": 5.357260759289827, - "grad_norm": 0.007611990440636873, - "learning_rate": 0.0001999858468635235, - "loss": 46.0, - "step": 33266 - }, - { - "epoch": 5.357421796368614, - "grad_norm": 0.018108505755662918, - "learning_rate": 0.0001999858460123679, - "loss": 46.0, - "step": 33267 - }, - { - "epoch": 5.357582833447402, - "grad_norm": 0.0021959838923066854, - "learning_rate": 0.00019998584516118669, - "loss": 46.0, - "step": 33268 - }, - { - "epoch": 5.357743870526189, - "grad_norm": 0.007607196923345327, - "learning_rate": 0.00019998584430997989, - "loss": 46.0, - "step": 33269 - }, - { - "epoch": 5.357904907604976, - "grad_norm": 0.0022318274714052677, - "learning_rate": 0.00019998584345874752, - "loss": 46.0, - "step": 33270 - }, - { - "epoch": 5.358065944683763, - "grad_norm": 0.0009972254047170281, - "learning_rate": 0.00019998584260748955, - "loss": 46.0, - "step": 33271 - }, - { - "epoch": 5.3582269817625505, - "grad_norm": 0.0031633200123906136, - "learning_rate": 0.000199985841756206, - "loss": 46.0, - "step": 33272 - }, - { - "epoch": 5.358388018841338, - "grad_norm": 0.0016663161804899573, - "learning_rate": 0.00019998584090489684, - "loss": 46.0, - "step": 33273 - }, - { - "epoch": 5.358549055920125, - "grad_norm": 0.004439766984432936, - "learning_rate": 0.0001999858400535621, - "loss": 46.0, - "step": 33274 - }, - { - "epoch": 5.358710092998913, - "grad_norm": 0.003509029047563672, - "learning_rate": 0.00019998583920220178, - "loss": 46.0, - "step": 33275 - }, - { - "epoch": 5.3588711300777, - "grad_norm": 0.006376531440764666, - "learning_rate": 0.00019998583835081584, - "loss": 46.0, - "step": 33276 - }, - { - "epoch": 5.359032167156488, - "grad_norm": 0.021703248843550682, - "learning_rate": 0.00019998583749940432, - "loss": 46.0, - "step": 33277 - }, - { - "epoch": 5.359193204235275, - "grad_norm": 0.0021995895076543093, - "learning_rate": 0.0001999858366479672, - "loss": 46.0, - "step": 33278 - }, - { - "epoch": 5.359354241314063, - "grad_norm": 0.0010908916592597961, - "learning_rate": 0.0001999858357965045, - "loss": 46.0, - "step": 33279 - }, - { - "epoch": 5.35951527839285, - "grad_norm": 0.004063006024807692, - "learning_rate": 0.00019998583494501625, - "loss": 46.0, - "step": 33280 - }, - { - "epoch": 5.359676315471638, - "grad_norm": 0.003869170555844903, - "learning_rate": 0.00019998583409350235, - "loss": 46.0, - "step": 33281 - }, - { - "epoch": 5.359837352550425, - "grad_norm": 0.00443886686116457, - "learning_rate": 0.00019998583324196289, - "loss": 46.0, - "step": 33282 - }, - { - "epoch": 5.3599983896292125, - "grad_norm": 0.0015142090851441026, - "learning_rate": 0.0001999858323903978, - "loss": 46.0, - "step": 33283 - }, - { - "epoch": 5.360159426708, - "grad_norm": 0.0017443251563236117, - "learning_rate": 0.00019998583153880715, - "loss": 46.0, - "step": 33284 - }, - { - "epoch": 5.3603204637867865, - "grad_norm": 0.003215186530724168, - "learning_rate": 0.0001999858306871909, - "loss": 46.0, - "step": 33285 - }, - { - "epoch": 5.360481500865574, - "grad_norm": 0.004337142687290907, - "learning_rate": 0.0001999858298355491, - "loss": 46.0, - "step": 33286 - }, - { - "epoch": 5.360642537944361, - "grad_norm": 0.003072574269026518, - "learning_rate": 0.00019998582898388164, - "loss": 46.0, - "step": 33287 - }, - { - "epoch": 5.360803575023149, - "grad_norm": 0.014539993368089199, - "learning_rate": 0.00019998582813218863, - "loss": 46.0, - "step": 33288 - }, - { - "epoch": 5.360964612101936, - "grad_norm": 0.004221667069941759, - "learning_rate": 0.00019998582728047003, - "loss": 46.0, - "step": 33289 - }, - { - "epoch": 5.361125649180724, - "grad_norm": 0.01120102871209383, - "learning_rate": 0.00019998582642872582, - "loss": 46.0, - "step": 33290 - }, - { - "epoch": 5.361286686259511, - "grad_norm": 0.0028298741672188044, - "learning_rate": 0.00019998582557695602, - "loss": 46.0, - "step": 33291 - }, - { - "epoch": 5.361447723338299, - "grad_norm": 0.0013431668048724532, - "learning_rate": 0.00019998582472516063, - "loss": 46.0, - "step": 33292 - }, - { - "epoch": 5.361608760417086, - "grad_norm": 0.008113016374409199, - "learning_rate": 0.00019998582387333966, - "loss": 46.0, - "step": 33293 - }, - { - "epoch": 5.361769797495874, - "grad_norm": 0.0029859375208616257, - "learning_rate": 0.0001999858230214931, - "loss": 46.0, - "step": 33294 - }, - { - "epoch": 5.361930834574661, - "grad_norm": 0.01254460122436285, - "learning_rate": 0.00019998582216962092, - "loss": 46.0, - "step": 33295 - }, - { - "epoch": 5.3620918716534485, - "grad_norm": 0.0012836339883506298, - "learning_rate": 0.00019998582131772316, - "loss": 46.0, - "step": 33296 - }, - { - "epoch": 5.362252908732236, - "grad_norm": 0.0015273225726559758, - "learning_rate": 0.00019998582046579984, - "loss": 46.0, - "step": 33297 - }, - { - "epoch": 5.362413945811023, - "grad_norm": 0.005086224526166916, - "learning_rate": 0.00019998581961385093, - "loss": 46.0, - "step": 33298 - }, - { - "epoch": 5.362574982889811, - "grad_norm": 0.006983438041061163, - "learning_rate": 0.00019998581876187638, - "loss": 46.0, - "step": 33299 - }, - { - "epoch": 5.362736019968597, - "grad_norm": 0.006965779233723879, - "learning_rate": 0.00019998581790987627, - "loss": 46.0, - "step": 33300 - }, - { - "epoch": 5.362897057047385, - "grad_norm": 0.008995898999273777, - "learning_rate": 0.00019998581705785057, - "loss": 46.0, - "step": 33301 - }, - { - "epoch": 5.363058094126172, - "grad_norm": 0.0008115133969113231, - "learning_rate": 0.00019998581620579925, - "loss": 46.0, - "step": 33302 - }, - { - "epoch": 5.36321913120496, - "grad_norm": 0.004873207304626703, - "learning_rate": 0.00019998581535372235, - "loss": 46.0, - "step": 33303 - }, - { - "epoch": 5.363380168283747, - "grad_norm": 0.003568889107555151, - "learning_rate": 0.0001999858145016199, - "loss": 46.0, - "step": 33304 - }, - { - "epoch": 5.363541205362535, - "grad_norm": 0.0034354953095316887, - "learning_rate": 0.00019998581364949182, - "loss": 46.0, - "step": 33305 - }, - { - "epoch": 5.363702242441322, - "grad_norm": 0.006653422024101019, - "learning_rate": 0.00019998581279733816, - "loss": 46.0, - "step": 33306 - }, - { - "epoch": 5.3638632795201096, - "grad_norm": 0.002001228742301464, - "learning_rate": 0.0001999858119451589, - "loss": 46.0, - "step": 33307 - }, - { - "epoch": 5.364024316598897, - "grad_norm": 0.004664026200771332, - "learning_rate": 0.00019998581109295408, - "loss": 46.0, - "step": 33308 - }, - { - "epoch": 5.3641853536776845, - "grad_norm": 0.0033457849640399218, - "learning_rate": 0.00019998581024072363, - "loss": 46.0, - "step": 33309 - }, - { - "epoch": 5.364346390756472, - "grad_norm": 0.0062686987221241, - "learning_rate": 0.00019998580938846762, - "loss": 46.0, - "step": 33310 - }, - { - "epoch": 5.364507427835259, - "grad_norm": 0.004319933243095875, - "learning_rate": 0.000199985808536186, - "loss": 46.0, - "step": 33311 - }, - { - "epoch": 5.364668464914047, - "grad_norm": 0.013152024708688259, - "learning_rate": 0.00019998580768387878, - "loss": 46.0, - "step": 33312 - }, - { - "epoch": 5.364829501992834, - "grad_norm": 0.004020859021693468, - "learning_rate": 0.00019998580683154598, - "loss": 46.0, - "step": 33313 - }, - { - "epoch": 5.364990539071622, - "grad_norm": 0.0056895967572927475, - "learning_rate": 0.00019998580597918757, - "loss": 46.0, - "step": 33314 - }, - { - "epoch": 5.365151576150408, - "grad_norm": 0.0030085165053606033, - "learning_rate": 0.0001999858051268036, - "loss": 46.0, - "step": 33315 - }, - { - "epoch": 5.365312613229196, - "grad_norm": 0.0010852243285626173, - "learning_rate": 0.000199985804274394, - "loss": 46.0, - "step": 33316 - }, - { - "epoch": 5.365473650307983, - "grad_norm": 0.002301767934113741, - "learning_rate": 0.00019998580342195886, - "loss": 46.0, - "step": 33317 - }, - { - "epoch": 5.365634687386771, - "grad_norm": 0.0026175789535045624, - "learning_rate": 0.0001999858025694981, - "loss": 46.0, - "step": 33318 - }, - { - "epoch": 5.365795724465558, - "grad_norm": 0.007056757807731628, - "learning_rate": 0.00019998580171701175, - "loss": 46.0, - "step": 33319 - }, - { - "epoch": 5.3659567615443455, - "grad_norm": 0.0018180046463385224, - "learning_rate": 0.00019998580086449982, - "loss": 46.0, - "step": 33320 - }, - { - "epoch": 5.366117798623133, - "grad_norm": 0.002870079828426242, - "learning_rate": 0.00019998580001196227, - "loss": 46.0, - "step": 33321 - }, - { - "epoch": 5.36627883570192, - "grad_norm": 0.003968583419919014, - "learning_rate": 0.00019998579915939916, - "loss": 46.0, - "step": 33322 - }, - { - "epoch": 5.366439872780708, - "grad_norm": 0.0013545351102948189, - "learning_rate": 0.00019998579830681043, - "loss": 46.0, - "step": 33323 - }, - { - "epoch": 5.366600909859495, - "grad_norm": 0.005956271663308144, - "learning_rate": 0.00019998579745419615, - "loss": 46.0, - "step": 33324 - }, - { - "epoch": 5.366761946938283, - "grad_norm": 0.003099706256762147, - "learning_rate": 0.00019998579660155622, - "loss": 46.0, - "step": 33325 - }, - { - "epoch": 5.36692298401707, - "grad_norm": 0.0016058270120993257, - "learning_rate": 0.00019998579574889076, - "loss": 46.0, - "step": 33326 - }, - { - "epoch": 5.367084021095858, - "grad_norm": 0.013404106721282005, - "learning_rate": 0.0001999857948961997, - "loss": 46.0, - "step": 33327 - }, - { - "epoch": 5.367245058174644, - "grad_norm": 0.003390738507732749, - "learning_rate": 0.000199985794043483, - "loss": 46.0, - "step": 33328 - }, - { - "epoch": 5.367406095253432, - "grad_norm": 0.002164378296583891, - "learning_rate": 0.00019998579319074075, - "loss": 46.0, - "step": 33329 - }, - { - "epoch": 5.367567132332219, - "grad_norm": 0.004207930061966181, - "learning_rate": 0.0001999857923379729, - "loss": 46.0, - "step": 33330 - }, - { - "epoch": 5.367728169411007, - "grad_norm": 0.004162882920354605, - "learning_rate": 0.00019998579148517944, - "loss": 46.0, - "step": 33331 - }, - { - "epoch": 5.367889206489794, - "grad_norm": 0.0037815652322024107, - "learning_rate": 0.00019998579063236043, - "loss": 46.0, - "step": 33332 - }, - { - "epoch": 5.3680502435685815, - "grad_norm": 0.008563963696360588, - "learning_rate": 0.00019998578977951578, - "loss": 46.0, - "step": 33333 - }, - { - "epoch": 5.368211280647369, - "grad_norm": 0.004082670900970697, - "learning_rate": 0.00019998578892664557, - "loss": 46.0, - "step": 33334 - }, - { - "epoch": 5.368372317726156, - "grad_norm": 0.002040287246927619, - "learning_rate": 0.00019998578807374978, - "loss": 46.0, - "step": 33335 - }, - { - "epoch": 5.368533354804944, - "grad_norm": 0.007208664435893297, - "learning_rate": 0.00019998578722082836, - "loss": 46.0, - "step": 33336 - }, - { - "epoch": 5.368694391883731, - "grad_norm": 0.011896511539816856, - "learning_rate": 0.0001999857863678814, - "loss": 46.0, - "step": 33337 - }, - { - "epoch": 5.368855428962519, - "grad_norm": 0.006968928035348654, - "learning_rate": 0.0001999857855149088, - "loss": 46.0, - "step": 33338 - }, - { - "epoch": 5.369016466041306, - "grad_norm": 0.010746875777840614, - "learning_rate": 0.00019998578466191063, - "loss": 46.0, - "step": 33339 - }, - { - "epoch": 5.369177503120094, - "grad_norm": 0.011602395214140415, - "learning_rate": 0.00019998578380888687, - "loss": 46.0, - "step": 33340 - }, - { - "epoch": 5.369338540198881, - "grad_norm": 0.0012585223885253072, - "learning_rate": 0.0001999857829558375, - "loss": 46.0, - "step": 33341 - }, - { - "epoch": 5.369499577277669, - "grad_norm": 0.004034592770040035, - "learning_rate": 0.00019998578210276256, - "loss": 46.0, - "step": 33342 - }, - { - "epoch": 5.369660614356455, - "grad_norm": 0.005447559989988804, - "learning_rate": 0.00019998578124966204, - "loss": 46.0, - "step": 33343 - }, - { - "epoch": 5.369821651435243, - "grad_norm": 0.01061705220490694, - "learning_rate": 0.00019998578039653588, - "loss": 46.0, - "step": 33344 - }, - { - "epoch": 5.36998268851403, - "grad_norm": 0.001415864797309041, - "learning_rate": 0.00019998577954338418, - "loss": 46.0, - "step": 33345 - }, - { - "epoch": 5.3701437255928175, - "grad_norm": 0.002299806335940957, - "learning_rate": 0.00019998577869020684, - "loss": 46.0, - "step": 33346 - }, - { - "epoch": 5.370304762671605, - "grad_norm": 0.005004958249628544, - "learning_rate": 0.00019998577783700397, - "loss": 46.0, - "step": 33347 - }, - { - "epoch": 5.370465799750392, - "grad_norm": 0.008812904357910156, - "learning_rate": 0.00019998577698377546, - "loss": 46.0, - "step": 33348 - }, - { - "epoch": 5.37062683682918, - "grad_norm": 0.0014168878551572561, - "learning_rate": 0.00019998577613052136, - "loss": 46.0, - "step": 33349 - }, - { - "epoch": 5.370787873907967, - "grad_norm": 0.0023052701726555824, - "learning_rate": 0.0001999857752772417, - "loss": 46.0, - "step": 33350 - }, - { - "epoch": 5.370948910986755, - "grad_norm": 0.004098802339285612, - "learning_rate": 0.00019998577442393645, - "loss": 46.0, - "step": 33351 - }, - { - "epoch": 5.371109948065542, - "grad_norm": 0.0010477675823494792, - "learning_rate": 0.00019998577357060557, - "loss": 46.0, - "step": 33352 - }, - { - "epoch": 5.37127098514433, - "grad_norm": 0.00528474198654294, - "learning_rate": 0.00019998577271724912, - "loss": 46.0, - "step": 33353 - }, - { - "epoch": 5.371432022223117, - "grad_norm": 0.0011248798109591007, - "learning_rate": 0.00019998577186386708, - "loss": 46.0, - "step": 33354 - }, - { - "epoch": 5.371593059301905, - "grad_norm": 0.004878604784607887, - "learning_rate": 0.00019998577101045946, - "loss": 46.0, - "step": 33355 - }, - { - "epoch": 5.371754096380692, - "grad_norm": 0.004384479019790888, - "learning_rate": 0.00019998577015702625, - "loss": 46.0, - "step": 33356 - }, - { - "epoch": 5.3719151334594795, - "grad_norm": 0.006826380733400583, - "learning_rate": 0.00019998576930356743, - "loss": 46.0, - "step": 33357 - }, - { - "epoch": 5.372076170538266, - "grad_norm": 0.004660832230001688, - "learning_rate": 0.00019998576845008302, - "loss": 46.0, - "step": 33358 - }, - { - "epoch": 5.3722372076170535, - "grad_norm": 0.01064602192491293, - "learning_rate": 0.00019998576759657302, - "loss": 46.0, - "step": 33359 - }, - { - "epoch": 5.372398244695841, - "grad_norm": 0.010420400649309158, - "learning_rate": 0.0001999857667430374, - "loss": 46.0, - "step": 33360 - }, - { - "epoch": 5.372559281774628, - "grad_norm": 0.002950671361759305, - "learning_rate": 0.00019998576588947623, - "loss": 46.0, - "step": 33361 - }, - { - "epoch": 5.372720318853416, - "grad_norm": 0.011195690371096134, - "learning_rate": 0.00019998576503588947, - "loss": 46.0, - "step": 33362 - }, - { - "epoch": 5.372881355932203, - "grad_norm": 0.00164573744405061, - "learning_rate": 0.0001999857641822771, - "loss": 46.0, - "step": 33363 - }, - { - "epoch": 5.373042393010991, - "grad_norm": 0.003164286958053708, - "learning_rate": 0.00019998576332863914, - "loss": 46.0, - "step": 33364 - }, - { - "epoch": 5.373203430089778, - "grad_norm": 0.0027676664758473635, - "learning_rate": 0.0001999857624749756, - "loss": 46.0, - "step": 33365 - }, - { - "epoch": 5.373364467168566, - "grad_norm": 0.004352774005383253, - "learning_rate": 0.00019998576162128648, - "loss": 46.0, - "step": 33366 - }, - { - "epoch": 5.373525504247353, - "grad_norm": 0.0033374002669006586, - "learning_rate": 0.00019998576076757173, - "loss": 46.0, - "step": 33367 - }, - { - "epoch": 5.3736865413261405, - "grad_norm": 0.005186352413147688, - "learning_rate": 0.00019998575991383142, - "loss": 46.0, - "step": 33368 - }, - { - "epoch": 5.373847578404928, - "grad_norm": 0.012580971233546734, - "learning_rate": 0.00019998575906006553, - "loss": 46.0, - "step": 33369 - }, - { - "epoch": 5.3740086154837154, - "grad_norm": 0.0006617176695726812, - "learning_rate": 0.00019998575820627401, - "loss": 46.0, - "step": 33370 - }, - { - "epoch": 5.374169652562503, - "grad_norm": 0.00987592339515686, - "learning_rate": 0.00019998575735245692, - "loss": 46.0, - "step": 33371 - }, - { - "epoch": 5.37433068964129, - "grad_norm": 0.008651567623019218, - "learning_rate": 0.00019998575649861423, - "loss": 46.0, - "step": 33372 - }, - { - "epoch": 5.374491726720077, - "grad_norm": 0.0022212076000869274, - "learning_rate": 0.00019998575564474598, - "loss": 46.0, - "step": 33373 - }, - { - "epoch": 5.374652763798864, - "grad_norm": 0.016480112448334694, - "learning_rate": 0.0001999857547908521, - "loss": 46.0, - "step": 33374 - }, - { - "epoch": 5.374813800877652, - "grad_norm": 0.0024351885076612234, - "learning_rate": 0.00019998575393693265, - "loss": 46.0, - "step": 33375 - }, - { - "epoch": 5.374974837956439, - "grad_norm": 0.004483151249587536, - "learning_rate": 0.0001999857530829876, - "loss": 46.0, - "step": 33376 - }, - { - "epoch": 5.375135875035227, - "grad_norm": 0.005428415257483721, - "learning_rate": 0.00019998575222901694, - "loss": 46.0, - "step": 33377 - }, - { - "epoch": 5.375296912114014, - "grad_norm": 0.0006876824190840125, - "learning_rate": 0.00019998575137502073, - "loss": 46.0, - "step": 33378 - }, - { - "epoch": 5.375457949192802, - "grad_norm": 0.011283421888947487, - "learning_rate": 0.0001999857505209989, - "loss": 46.0, - "step": 33379 - }, - { - "epoch": 5.375618986271589, - "grad_norm": 0.007189368829131126, - "learning_rate": 0.0001999857496669515, - "loss": 46.0, - "step": 33380 - }, - { - "epoch": 5.3757800233503765, - "grad_norm": 0.00192391371820122, - "learning_rate": 0.00019998574881287847, - "loss": 46.0, - "step": 33381 - }, - { - "epoch": 5.375941060429164, - "grad_norm": 0.002130111213773489, - "learning_rate": 0.0001999857479587799, - "loss": 46.0, - "step": 33382 - }, - { - "epoch": 5.376102097507951, - "grad_norm": 0.004121072124689817, - "learning_rate": 0.0001999857471046557, - "loss": 46.0, - "step": 33383 - }, - { - "epoch": 5.376263134586739, - "grad_norm": 0.01183114293962717, - "learning_rate": 0.00019998574625050593, - "loss": 46.0, - "step": 33384 - }, - { - "epoch": 5.376424171665526, - "grad_norm": 0.0017487617442384362, - "learning_rate": 0.00019998574539633056, - "loss": 46.0, - "step": 33385 - }, - { - "epoch": 5.376585208744314, - "grad_norm": 0.00510413059964776, - "learning_rate": 0.0001999857445421296, - "loss": 46.0, - "step": 33386 - }, - { - "epoch": 5.376746245823101, - "grad_norm": 0.0030885094311088324, - "learning_rate": 0.00019998574368790303, - "loss": 46.0, - "step": 33387 - }, - { - "epoch": 5.376907282901888, - "grad_norm": 0.0019977055490016937, - "learning_rate": 0.0001999857428336509, - "loss": 46.0, - "step": 33388 - }, - { - "epoch": 5.377068319980675, - "grad_norm": 0.0033095483668148518, - "learning_rate": 0.00019998574197937317, - "loss": 46.0, - "step": 33389 - }, - { - "epoch": 5.377229357059463, - "grad_norm": 0.009518763981759548, - "learning_rate": 0.00019998574112506986, - "loss": 46.0, - "step": 33390 - }, - { - "epoch": 5.37739039413825, - "grad_norm": 0.0059511540457606316, - "learning_rate": 0.00019998574027074094, - "loss": 46.0, - "step": 33391 - }, - { - "epoch": 5.377551431217038, - "grad_norm": 0.004025678150355816, - "learning_rate": 0.0001999857394163864, - "loss": 46.0, - "step": 33392 - }, - { - "epoch": 5.377712468295825, - "grad_norm": 0.007552312221378088, - "learning_rate": 0.0001999857385620063, - "loss": 46.0, - "step": 33393 - }, - { - "epoch": 5.3778735053746125, - "grad_norm": 0.0043281069956719875, - "learning_rate": 0.00019998573770760062, - "loss": 46.0, - "step": 33394 - }, - { - "epoch": 5.3780345424534, - "grad_norm": 0.009891233406960964, - "learning_rate": 0.00019998573685316932, - "loss": 46.0, - "step": 33395 - }, - { - "epoch": 5.378195579532187, - "grad_norm": 0.006546927150338888, - "learning_rate": 0.00019998573599871246, - "loss": 46.0, - "step": 33396 - }, - { - "epoch": 5.378356616610975, - "grad_norm": 0.0027265669777989388, - "learning_rate": 0.00019998573514423, - "loss": 46.0, - "step": 33397 - }, - { - "epoch": 5.378517653689762, - "grad_norm": 0.007394163403660059, - "learning_rate": 0.00019998573428972195, - "loss": 46.0, - "step": 33398 - }, - { - "epoch": 5.37867869076855, - "grad_norm": 0.014024234376847744, - "learning_rate": 0.0001999857334351883, - "loss": 46.0, - "step": 33399 - }, - { - "epoch": 5.378839727847337, - "grad_norm": 0.0036724803503602743, - "learning_rate": 0.00019998573258062904, - "loss": 46.0, - "step": 33400 - }, - { - "epoch": 5.379000764926124, - "grad_norm": 0.0011330376146361232, - "learning_rate": 0.00019998573172604425, - "loss": 46.0, - "step": 33401 - }, - { - "epoch": 5.379161802004911, - "grad_norm": 0.0036879756953567266, - "learning_rate": 0.0001999857308714338, - "loss": 46.0, - "step": 33402 - }, - { - "epoch": 5.379322839083699, - "grad_norm": 0.002011275850236416, - "learning_rate": 0.00019998573001679782, - "loss": 46.0, - "step": 33403 - }, - { - "epoch": 5.379483876162486, - "grad_norm": 0.010497014038264751, - "learning_rate": 0.0001999857291621362, - "loss": 46.0, - "step": 33404 - }, - { - "epoch": 5.379644913241274, - "grad_norm": 0.008969152346253395, - "learning_rate": 0.000199985728307449, - "loss": 46.0, - "step": 33405 - }, - { - "epoch": 5.379805950320061, - "grad_norm": 0.003393877064809203, - "learning_rate": 0.00019998572745273622, - "loss": 46.0, - "step": 33406 - }, - { - "epoch": 5.3799669873988485, - "grad_norm": 0.01113277580589056, - "learning_rate": 0.00019998572659799785, - "loss": 46.0, - "step": 33407 - }, - { - "epoch": 5.380128024477636, - "grad_norm": 0.001524369465187192, - "learning_rate": 0.00019998572574323386, - "loss": 46.0, - "step": 33408 - }, - { - "epoch": 5.380289061556423, - "grad_norm": 0.0029023520182818174, - "learning_rate": 0.00019998572488844432, - "loss": 46.0, - "step": 33409 - }, - { - "epoch": 5.380450098635211, - "grad_norm": 0.004046233836561441, - "learning_rate": 0.00019998572403362918, - "loss": 46.0, - "step": 33410 - }, - { - "epoch": 5.380611135713998, - "grad_norm": 0.0036837707739323378, - "learning_rate": 0.00019998572317878844, - "loss": 46.0, - "step": 33411 - }, - { - "epoch": 5.380772172792786, - "grad_norm": 0.003365278011187911, - "learning_rate": 0.0001999857223239221, - "loss": 46.0, - "step": 33412 - }, - { - "epoch": 5.380933209871573, - "grad_norm": 0.0011758033651858568, - "learning_rate": 0.00019998572146903015, - "loss": 46.0, - "step": 33413 - }, - { - "epoch": 5.381094246950361, - "grad_norm": 0.0019845296628773212, - "learning_rate": 0.00019998572061411264, - "loss": 46.0, - "step": 33414 - }, - { - "epoch": 5.381255284029148, - "grad_norm": 0.0005921475240029395, - "learning_rate": 0.00019998571975916955, - "loss": 46.0, - "step": 33415 - }, - { - "epoch": 5.381416321107935, - "grad_norm": 0.012319914065301418, - "learning_rate": 0.00019998571890420086, - "loss": 46.0, - "step": 33416 - }, - { - "epoch": 5.381577358186722, - "grad_norm": 0.005228742491453886, - "learning_rate": 0.0001999857180492066, - "loss": 46.0, - "step": 33417 - }, - { - "epoch": 5.38173839526551, - "grad_norm": 0.0036351035814732313, - "learning_rate": 0.00019998571719418668, - "loss": 46.0, - "step": 33418 - }, - { - "epoch": 5.381899432344297, - "grad_norm": 0.001702353940345347, - "learning_rate": 0.0001999857163391412, - "loss": 46.0, - "step": 33419 - }, - { - "epoch": 5.3820604694230845, - "grad_norm": 0.003618385409936309, - "learning_rate": 0.00019998571548407017, - "loss": 46.0, - "step": 33420 - }, - { - "epoch": 5.382221506501872, - "grad_norm": 0.0018738234648481011, - "learning_rate": 0.0001999857146289735, - "loss": 46.0, - "step": 33421 - }, - { - "epoch": 5.382382543580659, - "grad_norm": 0.0019335250835865736, - "learning_rate": 0.00019998571377385127, - "loss": 46.0, - "step": 33422 - }, - { - "epoch": 5.382543580659447, - "grad_norm": 0.0033097444102168083, - "learning_rate": 0.00019998571291870342, - "loss": 46.0, - "step": 33423 - }, - { - "epoch": 5.382704617738234, - "grad_norm": 0.00042688887333497405, - "learning_rate": 0.00019998571206353, - "loss": 46.0, - "step": 33424 - }, - { - "epoch": 5.382865654817022, - "grad_norm": 0.0016118204221129417, - "learning_rate": 0.000199985711208331, - "loss": 46.0, - "step": 33425 - }, - { - "epoch": 5.383026691895809, - "grad_norm": 0.0003310071479063481, - "learning_rate": 0.00019998571035310638, - "loss": 46.0, - "step": 33426 - }, - { - "epoch": 5.383187728974597, - "grad_norm": 0.0009101869654841721, - "learning_rate": 0.00019998570949785615, - "loss": 46.0, - "step": 33427 - }, - { - "epoch": 5.383348766053384, - "grad_norm": 0.0040672714821994305, - "learning_rate": 0.00019998570864258037, - "loss": 46.0, - "step": 33428 - }, - { - "epoch": 5.3835098031321715, - "grad_norm": 0.008558794856071472, - "learning_rate": 0.000199985707787279, - "loss": 46.0, - "step": 33429 - }, - { - "epoch": 5.383670840210959, - "grad_norm": 0.005282717291265726, - "learning_rate": 0.000199985706931952, - "loss": 46.0, - "step": 33430 - }, - { - "epoch": 5.3838318772897455, - "grad_norm": 0.01839902438223362, - "learning_rate": 0.00019998570607659947, - "loss": 46.0, - "step": 33431 - }, - { - "epoch": 5.383992914368533, - "grad_norm": 0.0026638892013579607, - "learning_rate": 0.00019998570522122128, - "loss": 46.0, - "step": 33432 - }, - { - "epoch": 5.3841539514473205, - "grad_norm": 0.002414526417851448, - "learning_rate": 0.00019998570436581753, - "loss": 46.0, - "step": 33433 - }, - { - "epoch": 5.384314988526108, - "grad_norm": 0.0034217049833387136, - "learning_rate": 0.0001999857035103882, - "loss": 46.0, - "step": 33434 - }, - { - "epoch": 5.384476025604895, - "grad_norm": 0.0033208851236850023, - "learning_rate": 0.00019998570265493328, - "loss": 46.0, - "step": 33435 - }, - { - "epoch": 5.384637062683683, - "grad_norm": 0.0019141914090141654, - "learning_rate": 0.00019998570179945277, - "loss": 46.0, - "step": 33436 - }, - { - "epoch": 5.38479809976247, - "grad_norm": 0.0010673184879124165, - "learning_rate": 0.00019998570094394665, - "loss": 46.0, - "step": 33437 - }, - { - "epoch": 5.384959136841258, - "grad_norm": 0.0030328824650496244, - "learning_rate": 0.00019998570008841494, - "loss": 46.0, - "step": 33438 - }, - { - "epoch": 5.385120173920045, - "grad_norm": 0.002872882643714547, - "learning_rate": 0.00019998569923285767, - "loss": 46.0, - "step": 33439 - }, - { - "epoch": 5.385281210998833, - "grad_norm": 0.005898167844861746, - "learning_rate": 0.00019998569837727476, - "loss": 46.0, - "step": 33440 - }, - { - "epoch": 5.38544224807762, - "grad_norm": 0.004220498725771904, - "learning_rate": 0.00019998569752166628, - "loss": 46.0, - "step": 33441 - }, - { - "epoch": 5.3856032851564075, - "grad_norm": 0.008398385718464851, - "learning_rate": 0.00019998569666603223, - "loss": 46.0, - "step": 33442 - }, - { - "epoch": 5.385764322235195, - "grad_norm": 0.0015079734148457646, - "learning_rate": 0.00019998569581037255, - "loss": 46.0, - "step": 33443 - }, - { - "epoch": 5.385925359313982, - "grad_norm": 0.00890716165304184, - "learning_rate": 0.00019998569495468732, - "loss": 46.0, - "step": 33444 - }, - { - "epoch": 5.38608639639277, - "grad_norm": 0.01060525979846716, - "learning_rate": 0.00019998569409897647, - "loss": 46.0, - "step": 33445 - }, - { - "epoch": 5.386247433471556, - "grad_norm": 0.0012454123934730887, - "learning_rate": 0.00019998569324324, - "loss": 46.0, - "step": 33446 - }, - { - "epoch": 5.386408470550344, - "grad_norm": 0.0034388345666229725, - "learning_rate": 0.00019998569238747802, - "loss": 46.0, - "step": 33447 - }, - { - "epoch": 5.386569507629131, - "grad_norm": 0.005685487762093544, - "learning_rate": 0.0001999856915316904, - "loss": 46.0, - "step": 33448 - }, - { - "epoch": 5.386730544707919, - "grad_norm": 0.004930159077048302, - "learning_rate": 0.00019998569067587719, - "loss": 46.0, - "step": 33449 - }, - { - "epoch": 5.386891581786706, - "grad_norm": 0.0255642831325531, - "learning_rate": 0.00019998568982003837, - "loss": 46.0, - "step": 33450 - }, - { - "epoch": 5.387052618865494, - "grad_norm": 0.003627521451562643, - "learning_rate": 0.00019998568896417398, - "loss": 46.0, - "step": 33451 - }, - { - "epoch": 5.387213655944281, - "grad_norm": 0.005542671773582697, - "learning_rate": 0.000199985688108284, - "loss": 46.0, - "step": 33452 - }, - { - "epoch": 5.387374693023069, - "grad_norm": 0.0017212245147675276, - "learning_rate": 0.00019998568725236845, - "loss": 46.0, - "step": 33453 - }, - { - "epoch": 5.387535730101856, - "grad_norm": 0.0063540819101035595, - "learning_rate": 0.00019998568639642726, - "loss": 46.0, - "step": 33454 - }, - { - "epoch": 5.3876967671806435, - "grad_norm": 0.004496101755648851, - "learning_rate": 0.00019998568554046054, - "loss": 46.0, - "step": 33455 - }, - { - "epoch": 5.387857804259431, - "grad_norm": 0.004551996476948261, - "learning_rate": 0.00019998568468446818, - "loss": 46.0, - "step": 33456 - }, - { - "epoch": 5.388018841338218, - "grad_norm": 0.0015107227955013514, - "learning_rate": 0.00019998568382845024, - "loss": 46.0, - "step": 33457 - }, - { - "epoch": 5.388179878417006, - "grad_norm": 0.004374251700937748, - "learning_rate": 0.00019998568297240673, - "loss": 46.0, - "step": 33458 - }, - { - "epoch": 5.388340915495793, - "grad_norm": 0.007724551949650049, - "learning_rate": 0.00019998568211633758, - "loss": 46.0, - "step": 33459 - }, - { - "epoch": 5.388501952574581, - "grad_norm": 0.008130398578941822, - "learning_rate": 0.0001999856812602429, - "loss": 46.0, - "step": 33460 - }, - { - "epoch": 5.388662989653367, - "grad_norm": 0.0009826701134443283, - "learning_rate": 0.00019998568040412257, - "loss": 46.0, - "step": 33461 - }, - { - "epoch": 5.388824026732155, - "grad_norm": 0.00276808044873178, - "learning_rate": 0.0001999856795479767, - "loss": 46.0, - "step": 33462 - }, - { - "epoch": 5.388985063810942, - "grad_norm": 0.007066166959702969, - "learning_rate": 0.0001999856786918052, - "loss": 46.0, - "step": 33463 - }, - { - "epoch": 5.38914610088973, - "grad_norm": 0.002974366070702672, - "learning_rate": 0.00019998567783560814, - "loss": 46.0, - "step": 33464 - }, - { - "epoch": 5.389307137968517, - "grad_norm": 0.0017695093993097544, - "learning_rate": 0.00019998567697938546, - "loss": 46.0, - "step": 33465 - }, - { - "epoch": 5.389468175047305, - "grad_norm": 0.0032727045472711325, - "learning_rate": 0.0001999856761231372, - "loss": 46.0, - "step": 33466 - }, - { - "epoch": 5.389629212126092, - "grad_norm": 0.002224060008302331, - "learning_rate": 0.00019998567526686336, - "loss": 46.0, - "step": 33467 - }, - { - "epoch": 5.3897902492048795, - "grad_norm": 0.009895727969706059, - "learning_rate": 0.00019998567441056392, - "loss": 46.0, - "step": 33468 - }, - { - "epoch": 5.389951286283667, - "grad_norm": 0.0024988381192088127, - "learning_rate": 0.00019998567355423888, - "loss": 46.0, - "step": 33469 - }, - { - "epoch": 5.390112323362454, - "grad_norm": 0.0025691697373986244, - "learning_rate": 0.00019998567269788827, - "loss": 46.0, - "step": 33470 - }, - { - "epoch": 5.390273360441242, - "grad_norm": 0.0020229837391525507, - "learning_rate": 0.00019998567184151205, - "loss": 46.0, - "step": 33471 - }, - { - "epoch": 5.390434397520029, - "grad_norm": 0.009341203607618809, - "learning_rate": 0.00019998567098511024, - "loss": 46.0, - "step": 33472 - }, - { - "epoch": 5.390595434598817, - "grad_norm": 0.021939948201179504, - "learning_rate": 0.00019998567012868287, - "loss": 46.0, - "step": 33473 - }, - { - "epoch": 5.390756471677603, - "grad_norm": 0.009470638819038868, - "learning_rate": 0.00019998566927222986, - "loss": 46.0, - "step": 33474 - }, - { - "epoch": 5.390917508756391, - "grad_norm": 0.007434885948896408, - "learning_rate": 0.0001999856684157513, - "loss": 46.0, - "step": 33475 - }, - { - "epoch": 5.391078545835178, - "grad_norm": 0.002830767072737217, - "learning_rate": 0.00019998566755924713, - "loss": 46.0, - "step": 33476 - }, - { - "epoch": 5.391239582913966, - "grad_norm": 0.0023723661433905363, - "learning_rate": 0.00019998566670271736, - "loss": 46.0, - "step": 33477 - }, - { - "epoch": 5.391400619992753, - "grad_norm": 0.0024492102675139904, - "learning_rate": 0.000199985665846162, - "loss": 46.0, - "step": 33478 - }, - { - "epoch": 5.391561657071541, - "grad_norm": 0.0027372061740607023, - "learning_rate": 0.00019998566498958105, - "loss": 46.0, - "step": 33479 - }, - { - "epoch": 5.391722694150328, - "grad_norm": 0.008980734273791313, - "learning_rate": 0.00019998566413297452, - "loss": 46.0, - "step": 33480 - }, - { - "epoch": 5.3918837312291155, - "grad_norm": 0.0062769996002316475, - "learning_rate": 0.0001999856632763424, - "loss": 46.0, - "step": 33481 - }, - { - "epoch": 5.392044768307903, - "grad_norm": 0.0015832630451768637, - "learning_rate": 0.0001999856624196847, - "loss": 46.0, - "step": 33482 - }, - { - "epoch": 5.39220580538669, - "grad_norm": 0.01112749520689249, - "learning_rate": 0.00019998566156300137, - "loss": 46.0, - "step": 33483 - }, - { - "epoch": 5.392366842465478, - "grad_norm": 0.0016363660106435418, - "learning_rate": 0.00019998566070629248, - "loss": 46.0, - "step": 33484 - }, - { - "epoch": 5.392527879544265, - "grad_norm": 0.0028556224424391985, - "learning_rate": 0.00019998565984955796, - "loss": 46.0, - "step": 33485 - }, - { - "epoch": 5.392688916623053, - "grad_norm": 0.0037885401397943497, - "learning_rate": 0.00019998565899279788, - "loss": 46.0, - "step": 33486 - }, - { - "epoch": 5.39284995370184, - "grad_norm": 0.012513256631791592, - "learning_rate": 0.00019998565813601223, - "loss": 46.0, - "step": 33487 - }, - { - "epoch": 5.393010990780628, - "grad_norm": 0.005354071035981178, - "learning_rate": 0.00019998565727920097, - "loss": 46.0, - "step": 33488 - }, - { - "epoch": 5.393172027859414, - "grad_norm": 0.00628530140966177, - "learning_rate": 0.0001999856564223641, - "loss": 46.0, - "step": 33489 - }, - { - "epoch": 5.393333064938202, - "grad_norm": 0.007867788895964622, - "learning_rate": 0.00019998565556550167, - "loss": 46.0, - "step": 33490 - }, - { - "epoch": 5.393494102016989, - "grad_norm": 0.002536516636610031, - "learning_rate": 0.00019998565470861362, - "loss": 46.0, - "step": 33491 - }, - { - "epoch": 5.3936551390957765, - "grad_norm": 0.0021895705722272396, - "learning_rate": 0.0001999856538517, - "loss": 46.0, - "step": 33492 - }, - { - "epoch": 5.393816176174564, - "grad_norm": 0.008843330666422844, - "learning_rate": 0.00019998565299476077, - "loss": 46.0, - "step": 33493 - }, - { - "epoch": 5.393977213253351, - "grad_norm": 0.001473539276048541, - "learning_rate": 0.00019998565213779596, - "loss": 46.0, - "step": 33494 - }, - { - "epoch": 5.394138250332139, - "grad_norm": 0.0046297828666865826, - "learning_rate": 0.00019998565128080556, - "loss": 46.0, - "step": 33495 - }, - { - "epoch": 5.394299287410926, - "grad_norm": 0.003924149554222822, - "learning_rate": 0.00019998565042378958, - "loss": 46.0, - "step": 33496 - }, - { - "epoch": 5.394460324489714, - "grad_norm": 0.01227219682186842, - "learning_rate": 0.00019998564956674798, - "loss": 46.0, - "step": 33497 - }, - { - "epoch": 5.394621361568501, - "grad_norm": 0.004981328267604113, - "learning_rate": 0.00019998564870968083, - "loss": 46.0, - "step": 33498 - }, - { - "epoch": 5.394782398647289, - "grad_norm": 0.0041697281412780285, - "learning_rate": 0.00019998564785258805, - "loss": 46.0, - "step": 33499 - }, - { - "epoch": 5.394943435726076, - "grad_norm": 0.0061863684095442295, - "learning_rate": 0.0001999856469954697, - "loss": 46.0, - "step": 33500 - }, - { - "epoch": 5.395104472804864, - "grad_norm": 0.007273610215634108, - "learning_rate": 0.00019998564613832572, - "loss": 46.0, - "step": 33501 - }, - { - "epoch": 5.395265509883651, - "grad_norm": 0.006181139498949051, - "learning_rate": 0.0001999856452811562, - "loss": 46.0, - "step": 33502 - }, - { - "epoch": 5.3954265469624385, - "grad_norm": 0.0028360106516629457, - "learning_rate": 0.00019998564442396107, - "loss": 46.0, - "step": 33503 - }, - { - "epoch": 5.395587584041225, - "grad_norm": 0.010671830736100674, - "learning_rate": 0.00019998564356674034, - "loss": 46.0, - "step": 33504 - }, - { - "epoch": 5.3957486211200125, - "grad_norm": 0.007207934278994799, - "learning_rate": 0.00019998564270949401, - "loss": 46.0, - "step": 33505 - }, - { - "epoch": 5.3959096581988, - "grad_norm": 0.00286873709410429, - "learning_rate": 0.00019998564185222213, - "loss": 46.0, - "step": 33506 - }, - { - "epoch": 5.396070695277587, - "grad_norm": 0.00712064141407609, - "learning_rate": 0.0001999856409949246, - "loss": 46.0, - "step": 33507 - }, - { - "epoch": 5.396231732356375, - "grad_norm": 0.009728697128593922, - "learning_rate": 0.00019998564013760155, - "loss": 46.0, - "step": 33508 - }, - { - "epoch": 5.396392769435162, - "grad_norm": 0.001330304890871048, - "learning_rate": 0.00019998563928025286, - "loss": 46.0, - "step": 33509 - }, - { - "epoch": 5.39655380651395, - "grad_norm": 0.0044160704128444195, - "learning_rate": 0.00019998563842287857, - "loss": 46.0, - "step": 33510 - }, - { - "epoch": 5.396714843592737, - "grad_norm": 0.006356305442750454, - "learning_rate": 0.00019998563756547873, - "loss": 46.0, - "step": 33511 - }, - { - "epoch": 5.396875880671525, - "grad_norm": 0.0037580919452011585, - "learning_rate": 0.00019998563670805327, - "loss": 46.0, - "step": 33512 - }, - { - "epoch": 5.397036917750312, - "grad_norm": 0.016473636031150818, - "learning_rate": 0.00019998563585060222, - "loss": 46.0, - "step": 33513 - }, - { - "epoch": 5.3971979548291, - "grad_norm": 0.001658030552789569, - "learning_rate": 0.0001999856349931256, - "loss": 46.0, - "step": 33514 - }, - { - "epoch": 5.397358991907887, - "grad_norm": 0.005189374089241028, - "learning_rate": 0.00019998563413562337, - "loss": 46.0, - "step": 33515 - }, - { - "epoch": 5.3975200289866745, - "grad_norm": 0.005965104792267084, - "learning_rate": 0.00019998563327809556, - "loss": 46.0, - "step": 33516 - }, - { - "epoch": 5.397681066065462, - "grad_norm": 0.0006654943572357297, - "learning_rate": 0.00019998563242054214, - "loss": 46.0, - "step": 33517 - }, - { - "epoch": 5.397842103144249, - "grad_norm": 0.004483413882553577, - "learning_rate": 0.00019998563156296313, - "loss": 46.0, - "step": 33518 - }, - { - "epoch": 5.398003140223036, - "grad_norm": 0.0023640641011297703, - "learning_rate": 0.00019998563070535856, - "loss": 46.0, - "step": 33519 - }, - { - "epoch": 5.398164177301823, - "grad_norm": 0.007390114013105631, - "learning_rate": 0.00019998562984772838, - "loss": 46.0, - "step": 33520 - }, - { - "epoch": 5.398325214380611, - "grad_norm": 0.0042753578163683414, - "learning_rate": 0.0001999856289900726, - "loss": 46.0, - "step": 33521 - }, - { - "epoch": 5.398486251459398, - "grad_norm": 0.005977501627057791, - "learning_rate": 0.00019998562813239122, - "loss": 46.0, - "step": 33522 - }, - { - "epoch": 5.398647288538186, - "grad_norm": 0.00928088091313839, - "learning_rate": 0.00019998562727468428, - "loss": 46.0, - "step": 33523 - }, - { - "epoch": 5.398808325616973, - "grad_norm": 0.01390144880861044, - "learning_rate": 0.00019998562641695172, - "loss": 46.0, - "step": 33524 - }, - { - "epoch": 5.398969362695761, - "grad_norm": 0.005800340324640274, - "learning_rate": 0.0001999856255591936, - "loss": 46.0, - "step": 33525 - }, - { - "epoch": 5.399130399774548, - "grad_norm": 0.012404757551848888, - "learning_rate": 0.00019998562470140984, - "loss": 46.0, - "step": 33526 - }, - { - "epoch": 5.399291436853336, - "grad_norm": 0.002577253384515643, - "learning_rate": 0.00019998562384360052, - "loss": 46.0, - "step": 33527 - }, - { - "epoch": 5.399452473932123, - "grad_norm": 0.002280876040458679, - "learning_rate": 0.00019998562298576564, - "loss": 46.0, - "step": 33528 - }, - { - "epoch": 5.3996135110109105, - "grad_norm": 0.003409678814932704, - "learning_rate": 0.00019998562212790512, - "loss": 46.0, - "step": 33529 - }, - { - "epoch": 5.399774548089698, - "grad_norm": 0.0028816857375204563, - "learning_rate": 0.00019998562127001904, - "loss": 46.0, - "step": 33530 - }, - { - "epoch": 5.399935585168485, - "grad_norm": 0.014008548110723495, - "learning_rate": 0.00019998562041210734, - "loss": 46.0, - "step": 33531 - }, - { - "epoch": 5.400096622247273, - "grad_norm": 0.008499814197421074, - "learning_rate": 0.00019998561955417006, - "loss": 46.0, - "step": 33532 - }, - { - "epoch": 5.40025765932606, - "grad_norm": 0.002058165380731225, - "learning_rate": 0.0001999856186962072, - "loss": 46.0, - "step": 33533 - }, - { - "epoch": 5.400418696404847, - "grad_norm": 0.0042491573840379715, - "learning_rate": 0.00019998561783821875, - "loss": 46.0, - "step": 33534 - }, - { - "epoch": 5.400579733483634, - "grad_norm": 0.005727290641516447, - "learning_rate": 0.0001999856169802047, - "loss": 46.0, - "step": 33535 - }, - { - "epoch": 5.400740770562422, - "grad_norm": 0.001044887350872159, - "learning_rate": 0.00019998561612216508, - "loss": 46.0, - "step": 33536 - }, - { - "epoch": 5.400901807641209, - "grad_norm": 0.00909384060651064, - "learning_rate": 0.0001999856152640998, - "loss": 46.0, - "step": 33537 - }, - { - "epoch": 5.401062844719997, - "grad_norm": 0.013593494892120361, - "learning_rate": 0.000199985614406009, - "loss": 46.0, - "step": 33538 - }, - { - "epoch": 5.401223881798784, - "grad_norm": 0.002498663729056716, - "learning_rate": 0.00019998561354789258, - "loss": 46.0, - "step": 33539 - }, - { - "epoch": 5.4013849188775716, - "grad_norm": 0.005663787480443716, - "learning_rate": 0.00019998561268975057, - "loss": 46.0, - "step": 33540 - }, - { - "epoch": 5.401545955956359, - "grad_norm": 0.0018130455864593387, - "learning_rate": 0.00019998561183158297, - "loss": 46.0, - "step": 33541 - }, - { - "epoch": 5.4017069930351465, - "grad_norm": 0.010311313904821873, - "learning_rate": 0.0001999856109733898, - "loss": 46.0, - "step": 33542 - }, - { - "epoch": 5.401868030113934, - "grad_norm": 0.0009938409784808755, - "learning_rate": 0.000199985610115171, - "loss": 46.0, - "step": 33543 - }, - { - "epoch": 5.402029067192721, - "grad_norm": 0.009713970124721527, - "learning_rate": 0.00019998560925692664, - "loss": 46.0, - "step": 33544 - }, - { - "epoch": 5.402190104271509, - "grad_norm": 0.01077380683273077, - "learning_rate": 0.00019998560839865667, - "loss": 46.0, - "step": 33545 - }, - { - "epoch": 5.402351141350296, - "grad_norm": 0.020101845264434814, - "learning_rate": 0.00019998560754036114, - "loss": 46.0, - "step": 33546 - }, - { - "epoch": 5.402512178429084, - "grad_norm": 0.008321388624608517, - "learning_rate": 0.00019998560668204, - "loss": 46.0, - "step": 33547 - }, - { - "epoch": 5.40267321550787, - "grad_norm": 0.0015650109853595495, - "learning_rate": 0.00019998560582369326, - "loss": 46.0, - "step": 33548 - }, - { - "epoch": 5.402834252586658, - "grad_norm": 0.0008906018920242786, - "learning_rate": 0.0001999856049653209, - "loss": 46.0, - "step": 33549 - }, - { - "epoch": 5.402995289665445, - "grad_norm": 0.0027846964076161385, - "learning_rate": 0.000199985604106923, - "loss": 46.0, - "step": 33550 - }, - { - "epoch": 5.403156326744233, - "grad_norm": 0.003964104223996401, - "learning_rate": 0.00019998560324849948, - "loss": 46.0, - "step": 33551 - }, - { - "epoch": 5.40331736382302, - "grad_norm": 0.005119128152728081, - "learning_rate": 0.00019998560239005038, - "loss": 46.0, - "step": 33552 - }, - { - "epoch": 5.4034784009018075, - "grad_norm": 0.0012736073695123196, - "learning_rate": 0.0001999856015315757, - "loss": 46.0, - "step": 33553 - }, - { - "epoch": 5.403639437980595, - "grad_norm": 0.020718751475214958, - "learning_rate": 0.00019998560067307542, - "loss": 46.0, - "step": 33554 - }, - { - "epoch": 5.403800475059382, - "grad_norm": 0.014644449576735497, - "learning_rate": 0.00019998559981454953, - "loss": 46.0, - "step": 33555 - }, - { - "epoch": 5.40396151213817, - "grad_norm": 0.023552147671580315, - "learning_rate": 0.0001999855989559981, - "loss": 46.0, - "step": 33556 - }, - { - "epoch": 5.404122549216957, - "grad_norm": 0.0076517341658473015, - "learning_rate": 0.000199985598097421, - "loss": 46.0, - "step": 33557 - }, - { - "epoch": 5.404283586295745, - "grad_norm": 0.0034961772616952658, - "learning_rate": 0.00019998559723881837, - "loss": 46.0, - "step": 33558 - }, - { - "epoch": 5.404444623374532, - "grad_norm": 0.0019426201470196247, - "learning_rate": 0.00019998559638019012, - "loss": 46.0, - "step": 33559 - }, - { - "epoch": 5.40460566045332, - "grad_norm": 0.003406703472137451, - "learning_rate": 0.00019998559552153632, - "loss": 46.0, - "step": 33560 - }, - { - "epoch": 5.404766697532107, - "grad_norm": 0.0010419911704957485, - "learning_rate": 0.00019998559466285687, - "loss": 46.0, - "step": 33561 - }, - { - "epoch": 5.404927734610894, - "grad_norm": 0.00535344984382391, - "learning_rate": 0.00019998559380415186, - "loss": 46.0, - "step": 33562 - }, - { - "epoch": 5.405088771689681, - "grad_norm": 0.0015867863548919559, - "learning_rate": 0.00019998559294542124, - "loss": 46.0, - "step": 33563 - }, - { - "epoch": 5.405249808768469, - "grad_norm": 0.0038380767218768597, - "learning_rate": 0.0001999855920866651, - "loss": 46.0, - "step": 33564 - }, - { - "epoch": 5.405410845847256, - "grad_norm": 0.004812820348888636, - "learning_rate": 0.00019998559122788326, - "loss": 46.0, - "step": 33565 - }, - { - "epoch": 5.4055718829260435, - "grad_norm": 0.0038598033133894205, - "learning_rate": 0.0001999855903690759, - "loss": 46.0, - "step": 33566 - }, - { - "epoch": 5.405732920004831, - "grad_norm": 0.010577513836324215, - "learning_rate": 0.00019998558951024294, - "loss": 46.0, - "step": 33567 - }, - { - "epoch": 5.405893957083618, - "grad_norm": 0.006066112779080868, - "learning_rate": 0.00019998558865138438, - "loss": 46.0, - "step": 33568 - }, - { - "epoch": 5.406054994162406, - "grad_norm": 0.006049091927707195, - "learning_rate": 0.00019998558779250024, - "loss": 46.0, - "step": 33569 - }, - { - "epoch": 5.406216031241193, - "grad_norm": 0.009319797158241272, - "learning_rate": 0.00019998558693359048, - "loss": 46.0, - "step": 33570 - }, - { - "epoch": 5.406377068319981, - "grad_norm": 0.004520068876445293, - "learning_rate": 0.00019998558607465516, - "loss": 46.0, - "step": 33571 - }, - { - "epoch": 5.406538105398768, - "grad_norm": 0.014994814060628414, - "learning_rate": 0.00019998558521569423, - "loss": 46.0, - "step": 33572 - }, - { - "epoch": 5.406699142477556, - "grad_norm": 0.008603583090007305, - "learning_rate": 0.00019998558435670773, - "loss": 46.0, - "step": 33573 - }, - { - "epoch": 5.406860179556343, - "grad_norm": 0.004327674861997366, - "learning_rate": 0.00019998558349769563, - "loss": 46.0, - "step": 33574 - }, - { - "epoch": 5.407021216635131, - "grad_norm": 0.010475815273821354, - "learning_rate": 0.00019998558263865793, - "loss": 46.0, - "step": 33575 - }, - { - "epoch": 5.407182253713918, - "grad_norm": 0.002505118492990732, - "learning_rate": 0.00019998558177959462, - "loss": 46.0, - "step": 33576 - }, - { - "epoch": 5.407343290792705, - "grad_norm": 0.006551137659698725, - "learning_rate": 0.00019998558092050575, - "loss": 46.0, - "step": 33577 - }, - { - "epoch": 5.407504327871492, - "grad_norm": 0.012180093675851822, - "learning_rate": 0.00019998558006139127, - "loss": 46.0, - "step": 33578 - }, - { - "epoch": 5.4076653649502795, - "grad_norm": 0.003010115586221218, - "learning_rate": 0.00019998557920225123, - "loss": 46.0, - "step": 33579 - }, - { - "epoch": 5.407826402029067, - "grad_norm": 0.011785740032792091, - "learning_rate": 0.00019998557834308554, - "loss": 46.0, - "step": 33580 - }, - { - "epoch": 5.407987439107854, - "grad_norm": 0.014025033451616764, - "learning_rate": 0.0001999855774838943, - "loss": 46.0, - "step": 33581 - }, - { - "epoch": 5.408148476186642, - "grad_norm": 0.002184830605983734, - "learning_rate": 0.00019998557662467747, - "loss": 46.0, - "step": 33582 - }, - { - "epoch": 5.408309513265429, - "grad_norm": 0.00212856475263834, - "learning_rate": 0.00019998557576543508, - "loss": 46.0, - "step": 33583 - }, - { - "epoch": 5.408470550344217, - "grad_norm": 0.005818785633891821, - "learning_rate": 0.00019998557490616701, - "loss": 46.0, - "step": 33584 - }, - { - "epoch": 5.408631587423004, - "grad_norm": 0.008384395390748978, - "learning_rate": 0.00019998557404687342, - "loss": 46.0, - "step": 33585 - }, - { - "epoch": 5.408792624501792, - "grad_norm": 0.004590184893459082, - "learning_rate": 0.00019998557318755421, - "loss": 46.0, - "step": 33586 - }, - { - "epoch": 5.408953661580579, - "grad_norm": 0.0016751501243561506, - "learning_rate": 0.00019998557232820945, - "loss": 46.0, - "step": 33587 - }, - { - "epoch": 5.409114698659367, - "grad_norm": 0.002438553376123309, - "learning_rate": 0.00019998557146883904, - "loss": 46.0, - "step": 33588 - }, - { - "epoch": 5.409275735738154, - "grad_norm": 0.0023840717040002346, - "learning_rate": 0.00019998557060944307, - "loss": 46.0, - "step": 33589 - }, - { - "epoch": 5.4094367728169415, - "grad_norm": 0.0013377969153225422, - "learning_rate": 0.0001999855697500215, - "loss": 46.0, - "step": 33590 - }, - { - "epoch": 5.409597809895729, - "grad_norm": 0.0029756140429526567, - "learning_rate": 0.00019998556889057437, - "loss": 46.0, - "step": 33591 - }, - { - "epoch": 5.4097588469745155, - "grad_norm": 0.0028415340930223465, - "learning_rate": 0.0001999855680311016, - "loss": 46.0, - "step": 33592 - }, - { - "epoch": 5.409919884053303, - "grad_norm": 0.005608447827398777, - "learning_rate": 0.0001999855671716033, - "loss": 46.0, - "step": 33593 - }, - { - "epoch": 5.41008092113209, - "grad_norm": 0.005532634910196066, - "learning_rate": 0.00019998556631207933, - "loss": 46.0, - "step": 33594 - }, - { - "epoch": 5.410241958210878, - "grad_norm": 0.0065854270942509174, - "learning_rate": 0.00019998556545252981, - "loss": 46.0, - "step": 33595 - }, - { - "epoch": 5.410402995289665, - "grad_norm": 0.00596965104341507, - "learning_rate": 0.0001999855645929547, - "loss": 46.0, - "step": 33596 - }, - { - "epoch": 5.410564032368453, - "grad_norm": 0.01035352237522602, - "learning_rate": 0.000199985563733354, - "loss": 46.0, - "step": 33597 - }, - { - "epoch": 5.41072506944724, - "grad_norm": 0.006125032436102629, - "learning_rate": 0.0001999855628737277, - "loss": 46.0, - "step": 33598 - }, - { - "epoch": 5.410886106526028, - "grad_norm": 0.0020886987913399935, - "learning_rate": 0.0001999855620140758, - "loss": 46.0, - "step": 33599 - }, - { - "epoch": 5.411047143604815, - "grad_norm": 0.012737304903566837, - "learning_rate": 0.00019998556115439836, - "loss": 46.0, - "step": 33600 - }, - { - "epoch": 5.4112081806836025, - "grad_norm": 0.004250716883689165, - "learning_rate": 0.0001999855602946953, - "loss": 46.0, - "step": 33601 - }, - { - "epoch": 5.41136921776239, - "grad_norm": 0.006766407750546932, - "learning_rate": 0.00019998555943496663, - "loss": 46.0, - "step": 33602 - }, - { - "epoch": 5.4115302548411774, - "grad_norm": 0.024289848282933235, - "learning_rate": 0.00019998555857521236, - "loss": 46.0, - "step": 33603 - }, - { - "epoch": 5.411691291919965, - "grad_norm": 0.0014451007591560483, - "learning_rate": 0.00019998555771543256, - "loss": 46.0, - "step": 33604 - }, - { - "epoch": 5.411852328998752, - "grad_norm": 0.008498722687363625, - "learning_rate": 0.0001999855568556271, - "loss": 46.0, - "step": 33605 - }, - { - "epoch": 5.41201336607754, - "grad_norm": 0.006658232305198908, - "learning_rate": 0.00019998555599579608, - "loss": 46.0, - "step": 33606 - }, - { - "epoch": 5.412174403156326, - "grad_norm": 0.010507320985198021, - "learning_rate": 0.00019998555513593946, - "loss": 46.0, - "step": 33607 - }, - { - "epoch": 5.412335440235114, - "grad_norm": 0.0025305356830358505, - "learning_rate": 0.00019998555427605725, - "loss": 46.0, - "step": 33608 - }, - { - "epoch": 5.412496477313901, - "grad_norm": 0.002887978684157133, - "learning_rate": 0.00019998555341614943, - "loss": 46.0, - "step": 33609 - }, - { - "epoch": 5.412657514392689, - "grad_norm": 0.0008898144005797803, - "learning_rate": 0.00019998555255621608, - "loss": 46.0, - "step": 33610 - }, - { - "epoch": 5.412818551471476, - "grad_norm": 0.007634243927896023, - "learning_rate": 0.00019998555169625708, - "loss": 46.0, - "step": 33611 - }, - { - "epoch": 5.412979588550264, - "grad_norm": 0.011333703994750977, - "learning_rate": 0.0001999855508362725, - "loss": 46.0, - "step": 33612 - }, - { - "epoch": 5.413140625629051, - "grad_norm": 0.011937910690903664, - "learning_rate": 0.00019998554997626236, - "loss": 46.0, - "step": 33613 - }, - { - "epoch": 5.4133016627078385, - "grad_norm": 0.005609564483165741, - "learning_rate": 0.0001999855491162266, - "loss": 46.0, - "step": 33614 - }, - { - "epoch": 5.413462699786626, - "grad_norm": 0.008082227781414986, - "learning_rate": 0.00019998554825616526, - "loss": 46.0, - "step": 33615 - }, - { - "epoch": 5.413623736865413, - "grad_norm": 0.0019346611807122827, - "learning_rate": 0.00019998554739607833, - "loss": 46.0, - "step": 33616 - }, - { - "epoch": 5.413784773944201, - "grad_norm": 0.009286635555326939, - "learning_rate": 0.0001999855465359658, - "loss": 46.0, - "step": 33617 - }, - { - "epoch": 5.413945811022988, - "grad_norm": 0.017126353457570076, - "learning_rate": 0.00019998554567582767, - "loss": 46.0, - "step": 33618 - }, - { - "epoch": 5.414106848101776, - "grad_norm": 0.004990117158740759, - "learning_rate": 0.00019998554481566395, - "loss": 46.0, - "step": 33619 - }, - { - "epoch": 5.414267885180563, - "grad_norm": 0.0036235779989510775, - "learning_rate": 0.00019998554395547467, - "loss": 46.0, - "step": 33620 - }, - { - "epoch": 5.414428922259351, - "grad_norm": 0.0021115224808454514, - "learning_rate": 0.00019998554309525978, - "loss": 46.0, - "step": 33621 - }, - { - "epoch": 5.414589959338137, - "grad_norm": 0.005655521061271429, - "learning_rate": 0.00019998554223501927, - "loss": 46.0, - "step": 33622 - }, - { - "epoch": 5.414750996416925, - "grad_norm": 0.017471928149461746, - "learning_rate": 0.0001999855413747532, - "loss": 46.0, - "step": 33623 - }, - { - "epoch": 5.414912033495712, - "grad_norm": 0.005181267857551575, - "learning_rate": 0.00019998554051446155, - "loss": 46.0, - "step": 33624 - }, - { - "epoch": 5.4150730705745, - "grad_norm": 0.0059936451725661755, - "learning_rate": 0.0001999855396541443, - "loss": 46.0, - "step": 33625 - }, - { - "epoch": 5.415234107653287, - "grad_norm": 0.005156735423952341, - "learning_rate": 0.00019998553879380145, - "loss": 46.0, - "step": 33626 - }, - { - "epoch": 5.4153951447320745, - "grad_norm": 0.006914381403476, - "learning_rate": 0.000199985537933433, - "loss": 46.0, - "step": 33627 - }, - { - "epoch": 5.415556181810862, - "grad_norm": 0.0009555213036946952, - "learning_rate": 0.00019998553707303897, - "loss": 46.0, - "step": 33628 - }, - { - "epoch": 5.415717218889649, - "grad_norm": 0.0022707798052579165, - "learning_rate": 0.00019998553621261938, - "loss": 46.0, - "step": 33629 - }, - { - "epoch": 5.415878255968437, - "grad_norm": 0.005456534679979086, - "learning_rate": 0.00019998553535217415, - "loss": 46.0, - "step": 33630 - }, - { - "epoch": 5.416039293047224, - "grad_norm": 0.005757391918450594, - "learning_rate": 0.00019998553449170333, - "loss": 46.0, - "step": 33631 - }, - { - "epoch": 5.416200330126012, - "grad_norm": 0.003124311799183488, - "learning_rate": 0.00019998553363120695, - "loss": 46.0, - "step": 33632 - }, - { - "epoch": 5.416361367204799, - "grad_norm": 0.005804627668112516, - "learning_rate": 0.00019998553277068498, - "loss": 46.0, - "step": 33633 - }, - { - "epoch": 5.416522404283587, - "grad_norm": 0.017441198229789734, - "learning_rate": 0.0001999855319101374, - "loss": 46.0, - "step": 33634 - }, - { - "epoch": 5.416683441362373, - "grad_norm": 0.0024849544279277325, - "learning_rate": 0.00019998553104956423, - "loss": 46.0, - "step": 33635 - }, - { - "epoch": 5.416844478441161, - "grad_norm": 0.00952769722789526, - "learning_rate": 0.00019998553018896547, - "loss": 46.0, - "step": 33636 - }, - { - "epoch": 5.417005515519948, - "grad_norm": 0.0021035310346633196, - "learning_rate": 0.0001999855293283411, - "loss": 46.0, - "step": 33637 - }, - { - "epoch": 5.417166552598736, - "grad_norm": 0.006762698292732239, - "learning_rate": 0.00019998552846769117, - "loss": 46.0, - "step": 33638 - }, - { - "epoch": 5.417327589677523, - "grad_norm": 0.002041331259533763, - "learning_rate": 0.00019998552760701563, - "loss": 46.0, - "step": 33639 - }, - { - "epoch": 5.4174886267563105, - "grad_norm": 0.00481139263138175, - "learning_rate": 0.00019998552674631453, - "loss": 46.0, - "step": 33640 - }, - { - "epoch": 5.417649663835098, - "grad_norm": 0.011447397992014885, - "learning_rate": 0.00019998552588558778, - "loss": 46.0, - "step": 33641 - }, - { - "epoch": 5.417810700913885, - "grad_norm": 0.0020422530360519886, - "learning_rate": 0.0001999855250248355, - "loss": 46.0, - "step": 33642 - }, - { - "epoch": 5.417971737992673, - "grad_norm": 0.013513357378542423, - "learning_rate": 0.0001999855241640576, - "loss": 46.0, - "step": 33643 - }, - { - "epoch": 5.41813277507146, - "grad_norm": 0.0046821534633636475, - "learning_rate": 0.0001999855233032541, - "loss": 46.0, - "step": 33644 - }, - { - "epoch": 5.418293812150248, - "grad_norm": 0.01117374375462532, - "learning_rate": 0.00019998552244242503, - "loss": 46.0, - "step": 33645 - }, - { - "epoch": 5.418454849229035, - "grad_norm": 0.008008531294763088, - "learning_rate": 0.00019998552158157038, - "loss": 46.0, - "step": 33646 - }, - { - "epoch": 5.418615886307823, - "grad_norm": 0.003920272924005985, - "learning_rate": 0.0001999855207206901, - "loss": 46.0, - "step": 33647 - }, - { - "epoch": 5.41877692338661, - "grad_norm": 0.019243530929088593, - "learning_rate": 0.00019998551985978425, - "loss": 46.0, - "step": 33648 - }, - { - "epoch": 5.4189379604653976, - "grad_norm": 0.006622105371206999, - "learning_rate": 0.0001999855189988528, - "loss": 46.0, - "step": 33649 - }, - { - "epoch": 5.419098997544184, - "grad_norm": 0.0025609456934034824, - "learning_rate": 0.00019998551813789578, - "loss": 46.0, - "step": 33650 - }, - { - "epoch": 5.419260034622972, - "grad_norm": 0.012442146427929401, - "learning_rate": 0.00019998551727691316, - "loss": 46.0, - "step": 33651 - }, - { - "epoch": 5.419421071701759, - "grad_norm": 0.01483453344553709, - "learning_rate": 0.00019998551641590493, - "loss": 46.0, - "step": 33652 - }, - { - "epoch": 5.4195821087805465, - "grad_norm": 0.0077059767208993435, - "learning_rate": 0.0001999855155548711, - "loss": 46.0, - "step": 33653 - }, - { - "epoch": 5.419743145859334, - "grad_norm": 0.0020480400417000055, - "learning_rate": 0.0001999855146938117, - "loss": 46.0, - "step": 33654 - }, - { - "epoch": 5.419904182938121, - "grad_norm": 0.004197259899228811, - "learning_rate": 0.0001999855138327267, - "loss": 46.0, - "step": 33655 - }, - { - "epoch": 5.420065220016909, - "grad_norm": 0.009529270231723785, - "learning_rate": 0.00019998551297161613, - "loss": 46.0, - "step": 33656 - }, - { - "epoch": 5.420226257095696, - "grad_norm": 0.007029429078102112, - "learning_rate": 0.00019998551211048, - "loss": 46.0, - "step": 33657 - }, - { - "epoch": 5.420387294174484, - "grad_norm": 0.0033611590042710304, - "learning_rate": 0.0001999855112493182, - "loss": 46.0, - "step": 33658 - }, - { - "epoch": 5.420548331253271, - "grad_norm": 0.0013261355925351381, - "learning_rate": 0.00019998551038813084, - "loss": 46.0, - "step": 33659 - }, - { - "epoch": 5.420709368332059, - "grad_norm": 0.005337535869330168, - "learning_rate": 0.0001999855095269179, - "loss": 46.0, - "step": 33660 - }, - { - "epoch": 5.420870405410846, - "grad_norm": 0.00729743717238307, - "learning_rate": 0.00019998550866567936, - "loss": 46.0, - "step": 33661 - }, - { - "epoch": 5.4210314424896335, - "grad_norm": 0.0034848027862608433, - "learning_rate": 0.00019998550780441523, - "loss": 46.0, - "step": 33662 - }, - { - "epoch": 5.421192479568421, - "grad_norm": 0.012427321635186672, - "learning_rate": 0.00019998550694312552, - "loss": 46.0, - "step": 33663 - }, - { - "epoch": 5.421353516647208, - "grad_norm": 0.011609414592385292, - "learning_rate": 0.00019998550608181018, - "loss": 46.0, - "step": 33664 - }, - { - "epoch": 5.421514553725995, - "grad_norm": 0.001503894105553627, - "learning_rate": 0.0001999855052204693, - "loss": 46.0, - "step": 33665 - }, - { - "epoch": 5.4216755908047825, - "grad_norm": 0.007099046837538481, - "learning_rate": 0.00019998550435910278, - "loss": 46.0, - "step": 33666 - }, - { - "epoch": 5.42183662788357, - "grad_norm": 0.001849538297392428, - "learning_rate": 0.00019998550349771072, - "loss": 46.0, - "step": 33667 - }, - { - "epoch": 5.421997664962357, - "grad_norm": 0.0016216576332226396, - "learning_rate": 0.00019998550263629304, - "loss": 46.0, - "step": 33668 - }, - { - "epoch": 5.422158702041145, - "grad_norm": 0.0057668848894536495, - "learning_rate": 0.00019998550177484977, - "loss": 46.0, - "step": 33669 - }, - { - "epoch": 5.422319739119932, - "grad_norm": 0.0019296335522085428, - "learning_rate": 0.00019998550091338091, - "loss": 46.0, - "step": 33670 - }, - { - "epoch": 5.42248077619872, - "grad_norm": 0.001681244932115078, - "learning_rate": 0.00019998550005188647, - "loss": 46.0, - "step": 33671 - }, - { - "epoch": 5.422641813277507, - "grad_norm": 0.0051642353646457195, - "learning_rate": 0.00019998549919036642, - "loss": 46.0, - "step": 33672 - }, - { - "epoch": 5.422802850356295, - "grad_norm": 0.0020802582148462534, - "learning_rate": 0.0001999854983288208, - "loss": 46.0, - "step": 33673 - }, - { - "epoch": 5.422963887435082, - "grad_norm": 0.0008842212264426053, - "learning_rate": 0.00019998549746724957, - "loss": 46.0, - "step": 33674 - }, - { - "epoch": 5.4231249245138695, - "grad_norm": 0.01572221890091896, - "learning_rate": 0.00019998549660565278, - "loss": 46.0, - "step": 33675 - }, - { - "epoch": 5.423285961592657, - "grad_norm": 0.0037692878395318985, - "learning_rate": 0.00019998549574403037, - "loss": 46.0, - "step": 33676 - }, - { - "epoch": 5.423446998671444, - "grad_norm": 0.0034856435377150774, - "learning_rate": 0.00019998549488238235, - "loss": 46.0, - "step": 33677 - }, - { - "epoch": 5.423608035750232, - "grad_norm": 0.013210601173341274, - "learning_rate": 0.0001999854940207088, - "loss": 46.0, - "step": 33678 - }, - { - "epoch": 5.423769072829019, - "grad_norm": 0.0023444397374987602, - "learning_rate": 0.00019998549315900958, - "loss": 46.0, - "step": 33679 - }, - { - "epoch": 5.423930109907806, - "grad_norm": 0.014169315807521343, - "learning_rate": 0.00019998549229728482, - "loss": 46.0, - "step": 33680 - }, - { - "epoch": 5.424091146986593, - "grad_norm": 0.007368167862296104, - "learning_rate": 0.00019998549143553446, - "loss": 46.0, - "step": 33681 - }, - { - "epoch": 5.424252184065381, - "grad_norm": 0.002794538391754031, - "learning_rate": 0.0001999854905737585, - "loss": 46.0, - "step": 33682 - }, - { - "epoch": 5.424413221144168, - "grad_norm": 0.011421543546020985, - "learning_rate": 0.00019998548971195696, - "loss": 46.0, - "step": 33683 - }, - { - "epoch": 5.424574258222956, - "grad_norm": 0.009053487330675125, - "learning_rate": 0.00019998548885012985, - "loss": 46.0, - "step": 33684 - }, - { - "epoch": 5.424735295301743, - "grad_norm": 0.002092099515721202, - "learning_rate": 0.0001999854879882771, - "loss": 46.0, - "step": 33685 - }, - { - "epoch": 5.424896332380531, - "grad_norm": 0.0028156281914561987, - "learning_rate": 0.00019998548712639878, - "loss": 46.0, - "step": 33686 - }, - { - "epoch": 5.425057369459318, - "grad_norm": 0.001339444424957037, - "learning_rate": 0.0001999854862644949, - "loss": 46.0, - "step": 33687 - }, - { - "epoch": 5.4252184065381055, - "grad_norm": 0.004757916089147329, - "learning_rate": 0.00019998548540256538, - "loss": 46.0, - "step": 33688 - }, - { - "epoch": 5.425379443616893, - "grad_norm": 0.005150027573108673, - "learning_rate": 0.0001999854845406103, - "loss": 46.0, - "step": 33689 - }, - { - "epoch": 5.42554048069568, - "grad_norm": 0.008297018706798553, - "learning_rate": 0.0001999854836786296, - "loss": 46.0, - "step": 33690 - }, - { - "epoch": 5.425701517774468, - "grad_norm": 0.0023368827532976866, - "learning_rate": 0.00019998548281662334, - "loss": 46.0, - "step": 33691 - }, - { - "epoch": 5.425862554853255, - "grad_norm": 0.0016465721419081092, - "learning_rate": 0.0001999854819545915, - "loss": 46.0, - "step": 33692 - }, - { - "epoch": 5.426023591932043, - "grad_norm": 0.004651474766433239, - "learning_rate": 0.00019998548109253405, - "loss": 46.0, - "step": 33693 - }, - { - "epoch": 5.42618462901083, - "grad_norm": 0.0022437949664890766, - "learning_rate": 0.00019998548023045096, - "loss": 46.0, - "step": 33694 - }, - { - "epoch": 5.426345666089617, - "grad_norm": 0.0022637040819972754, - "learning_rate": 0.00019998547936834232, - "loss": 46.0, - "step": 33695 - }, - { - "epoch": 5.426506703168404, - "grad_norm": 0.004307035356760025, - "learning_rate": 0.00019998547850620812, - "loss": 46.0, - "step": 33696 - }, - { - "epoch": 5.426667740247192, - "grad_norm": 0.0020192808005958796, - "learning_rate": 0.0001999854776440483, - "loss": 46.0, - "step": 33697 - }, - { - "epoch": 5.426828777325979, - "grad_norm": 0.0075362008064985275, - "learning_rate": 0.00019998547678186287, - "loss": 46.0, - "step": 33698 - }, - { - "epoch": 5.426989814404767, - "grad_norm": 0.0031508186366409063, - "learning_rate": 0.00019998547591965188, - "loss": 46.0, - "step": 33699 - }, - { - "epoch": 5.427150851483554, - "grad_norm": 0.014034881256520748, - "learning_rate": 0.00019998547505741527, - "loss": 46.0, - "step": 33700 - }, - { - "epoch": 5.4273118885623415, - "grad_norm": 0.003862165380269289, - "learning_rate": 0.0001999854741951531, - "loss": 46.0, - "step": 33701 - }, - { - "epoch": 5.427472925641129, - "grad_norm": 0.0027518896386027336, - "learning_rate": 0.00019998547333286532, - "loss": 46.0, - "step": 33702 - }, - { - "epoch": 5.427633962719916, - "grad_norm": 0.0015293208416551352, - "learning_rate": 0.00019998547247055198, - "loss": 46.0, - "step": 33703 - }, - { - "epoch": 5.427794999798704, - "grad_norm": 0.008325648494064808, - "learning_rate": 0.000199985471608213, - "loss": 46.0, - "step": 33704 - }, - { - "epoch": 5.427956036877491, - "grad_norm": 0.011661135591566563, - "learning_rate": 0.00019998547074584843, - "loss": 46.0, - "step": 33705 - }, - { - "epoch": 5.428117073956279, - "grad_norm": 0.01024138554930687, - "learning_rate": 0.00019998546988345833, - "loss": 46.0, - "step": 33706 - }, - { - "epoch": 5.428278111035066, - "grad_norm": 0.0030228830873966217, - "learning_rate": 0.0001999854690210426, - "loss": 46.0, - "step": 33707 - }, - { - "epoch": 5.428439148113853, - "grad_norm": 0.0019812029786407948, - "learning_rate": 0.00019998546815860126, - "loss": 46.0, - "step": 33708 - }, - { - "epoch": 5.42860018519264, - "grad_norm": 0.0019485143711790442, - "learning_rate": 0.00019998546729613434, - "loss": 46.0, - "step": 33709 - }, - { - "epoch": 5.428761222271428, - "grad_norm": 0.0027567846700549126, - "learning_rate": 0.00019998546643364184, - "loss": 46.0, - "step": 33710 - }, - { - "epoch": 5.428922259350215, - "grad_norm": 0.017840752378106117, - "learning_rate": 0.00019998546557112374, - "loss": 46.0, - "step": 33711 - }, - { - "epoch": 5.429083296429003, - "grad_norm": 0.017581719905138016, - "learning_rate": 0.00019998546470858007, - "loss": 46.0, - "step": 33712 - }, - { - "epoch": 5.42924433350779, - "grad_norm": 0.000688565953169018, - "learning_rate": 0.00019998546384601077, - "loss": 46.0, - "step": 33713 - }, - { - "epoch": 5.4294053705865775, - "grad_norm": 0.008608333766460419, - "learning_rate": 0.00019998546298341592, - "loss": 46.0, - "step": 33714 - }, - { - "epoch": 5.429566407665365, - "grad_norm": 0.002623354783281684, - "learning_rate": 0.00019998546212079545, - "loss": 46.0, - "step": 33715 - }, - { - "epoch": 5.429727444744152, - "grad_norm": 0.004323564935475588, - "learning_rate": 0.0001999854612581494, - "loss": 46.0, - "step": 33716 - }, - { - "epoch": 5.42988848182294, - "grad_norm": 0.002725014230236411, - "learning_rate": 0.00019998546039547775, - "loss": 46.0, - "step": 33717 - }, - { - "epoch": 5.430049518901727, - "grad_norm": 0.005183933302760124, - "learning_rate": 0.00019998545953278052, - "loss": 46.0, - "step": 33718 - }, - { - "epoch": 5.430210555980515, - "grad_norm": 0.01034574769437313, - "learning_rate": 0.0001999854586700577, - "loss": 46.0, - "step": 33719 - }, - { - "epoch": 5.430371593059302, - "grad_norm": 0.005405900999903679, - "learning_rate": 0.00019998545780730928, - "loss": 46.0, - "step": 33720 - }, - { - "epoch": 5.43053263013809, - "grad_norm": 0.008120122365653515, - "learning_rate": 0.0001999854569445353, - "loss": 46.0, - "step": 33721 - }, - { - "epoch": 5.430693667216877, - "grad_norm": 0.003856923431158066, - "learning_rate": 0.00019998545608173565, - "loss": 46.0, - "step": 33722 - }, - { - "epoch": 5.430854704295664, - "grad_norm": 0.006800223141908646, - "learning_rate": 0.0001999854552189105, - "loss": 46.0, - "step": 33723 - }, - { - "epoch": 5.431015741374451, - "grad_norm": 0.0028839276637881994, - "learning_rate": 0.0001999854543560597, - "loss": 46.0, - "step": 33724 - }, - { - "epoch": 5.4311767784532385, - "grad_norm": 0.01973271183669567, - "learning_rate": 0.00019998545349318332, - "loss": 46.0, - "step": 33725 - }, - { - "epoch": 5.431337815532026, - "grad_norm": 0.004148564301431179, - "learning_rate": 0.00019998545263028136, - "loss": 46.0, - "step": 33726 - }, - { - "epoch": 5.431498852610813, - "grad_norm": 0.004408187232911587, - "learning_rate": 0.00019998545176735382, - "loss": 46.0, - "step": 33727 - }, - { - "epoch": 5.431659889689601, - "grad_norm": 0.0015528157819062471, - "learning_rate": 0.00019998545090440067, - "loss": 46.0, - "step": 33728 - }, - { - "epoch": 5.431820926768388, - "grad_norm": 0.008553935214877129, - "learning_rate": 0.00019998545004142193, - "loss": 46.0, - "step": 33729 - }, - { - "epoch": 5.431981963847176, - "grad_norm": 0.006922516506165266, - "learning_rate": 0.00019998544917841762, - "loss": 46.0, - "step": 33730 - }, - { - "epoch": 5.432143000925963, - "grad_norm": 0.0024310853332281113, - "learning_rate": 0.0001999854483153877, - "loss": 46.0, - "step": 33731 - }, - { - "epoch": 5.432304038004751, - "grad_norm": 0.0049537308514118195, - "learning_rate": 0.00019998544745233218, - "loss": 46.0, - "step": 33732 - }, - { - "epoch": 5.432465075083538, - "grad_norm": 0.010817292146384716, - "learning_rate": 0.00019998544658925108, - "loss": 46.0, - "step": 33733 - }, - { - "epoch": 5.432626112162326, - "grad_norm": 0.016791680827736855, - "learning_rate": 0.0001999854457261444, - "loss": 46.0, - "step": 33734 - }, - { - "epoch": 5.432787149241113, - "grad_norm": 0.0012591202976182103, - "learning_rate": 0.00019998544486301211, - "loss": 46.0, - "step": 33735 - }, - { - "epoch": 5.4329481863199005, - "grad_norm": 0.003230729606002569, - "learning_rate": 0.0001999854439998542, - "loss": 46.0, - "step": 33736 - }, - { - "epoch": 5.433109223398688, - "grad_norm": 0.001971533754840493, - "learning_rate": 0.00019998544313667077, - "loss": 46.0, - "step": 33737 - }, - { - "epoch": 5.4332702604774745, - "grad_norm": 0.009771594777703285, - "learning_rate": 0.0001999854422734617, - "loss": 46.0, - "step": 33738 - }, - { - "epoch": 5.433431297556262, - "grad_norm": 0.007710725534707308, - "learning_rate": 0.00019998544141022707, - "loss": 46.0, - "step": 33739 - }, - { - "epoch": 5.433592334635049, - "grad_norm": 0.004870411939918995, - "learning_rate": 0.00019998544054696682, - "loss": 46.0, - "step": 33740 - }, - { - "epoch": 5.433753371713837, - "grad_norm": 0.006178237032145262, - "learning_rate": 0.00019998543968368097, - "loss": 46.0, - "step": 33741 - }, - { - "epoch": 5.433914408792624, - "grad_norm": 0.004385080188512802, - "learning_rate": 0.00019998543882036954, - "loss": 46.0, - "step": 33742 - }, - { - "epoch": 5.434075445871412, - "grad_norm": 0.0019855606369674206, - "learning_rate": 0.00019998543795703255, - "loss": 46.0, - "step": 33743 - }, - { - "epoch": 5.434236482950199, - "grad_norm": 0.01655840501189232, - "learning_rate": 0.00019998543709366995, - "loss": 46.0, - "step": 33744 - }, - { - "epoch": 5.434397520028987, - "grad_norm": 0.0004960960941389203, - "learning_rate": 0.00019998543623028176, - "loss": 46.0, - "step": 33745 - }, - { - "epoch": 5.434558557107774, - "grad_norm": 0.0033130559604614973, - "learning_rate": 0.00019998543536686798, - "loss": 46.0, - "step": 33746 - }, - { - "epoch": 5.434719594186562, - "grad_norm": 0.009820588864386082, - "learning_rate": 0.0001999854345034286, - "loss": 46.0, - "step": 33747 - }, - { - "epoch": 5.434880631265349, - "grad_norm": 0.0012613895814865828, - "learning_rate": 0.00019998543363996364, - "loss": 46.0, - "step": 33748 - }, - { - "epoch": 5.4350416683441365, - "grad_norm": 0.007635081652551889, - "learning_rate": 0.00019998543277647307, - "loss": 46.0, - "step": 33749 - }, - { - "epoch": 5.435202705422924, - "grad_norm": 0.0116058848798275, - "learning_rate": 0.00019998543191295691, - "loss": 46.0, - "step": 33750 - }, - { - "epoch": 5.435363742501711, - "grad_norm": 0.007447515148669481, - "learning_rate": 0.00019998543104941517, - "loss": 46.0, - "step": 33751 - }, - { - "epoch": 5.435524779580499, - "grad_norm": 0.004526468459516764, - "learning_rate": 0.00019998543018584784, - "loss": 46.0, - "step": 33752 - }, - { - "epoch": 5.435685816659285, - "grad_norm": 0.002396880416199565, - "learning_rate": 0.0001999854293222549, - "loss": 46.0, - "step": 33753 - }, - { - "epoch": 5.435846853738073, - "grad_norm": 0.010567798279225826, - "learning_rate": 0.0001999854284586364, - "loss": 46.0, - "step": 33754 - }, - { - "epoch": 5.43600789081686, - "grad_norm": 0.0034893634729087353, - "learning_rate": 0.0001999854275949923, - "loss": 46.0, - "step": 33755 - }, - { - "epoch": 5.436168927895648, - "grad_norm": 0.0030591634567826986, - "learning_rate": 0.00019998542673132258, - "loss": 46.0, - "step": 33756 - }, - { - "epoch": 5.436329964974435, - "grad_norm": 0.005990249570459127, - "learning_rate": 0.00019998542586762729, - "loss": 46.0, - "step": 33757 - }, - { - "epoch": 5.436491002053223, - "grad_norm": 0.01983432099223137, - "learning_rate": 0.00019998542500390643, - "loss": 46.0, - "step": 33758 - }, - { - "epoch": 5.43665203913201, - "grad_norm": 0.0014307056553661823, - "learning_rate": 0.00019998542414015997, - "loss": 46.0, - "step": 33759 - }, - { - "epoch": 5.436813076210798, - "grad_norm": 0.0015668855048716068, - "learning_rate": 0.0001999854232763879, - "loss": 46.0, - "step": 33760 - }, - { - "epoch": 5.436974113289585, - "grad_norm": 0.0019072664435952902, - "learning_rate": 0.00019998542241259025, - "loss": 46.0, - "step": 33761 - }, - { - "epoch": 5.4371351503683725, - "grad_norm": 0.005499883554875851, - "learning_rate": 0.000199985421548767, - "loss": 46.0, - "step": 33762 - }, - { - "epoch": 5.43729618744716, - "grad_norm": 0.0028436563443392515, - "learning_rate": 0.00019998542068491818, - "loss": 46.0, - "step": 33763 - }, - { - "epoch": 5.437457224525947, - "grad_norm": 0.00255585671402514, - "learning_rate": 0.00019998541982104375, - "loss": 46.0, - "step": 33764 - }, - { - "epoch": 5.437618261604735, - "grad_norm": 0.004707845393568277, - "learning_rate": 0.00019998541895714373, - "loss": 46.0, - "step": 33765 - }, - { - "epoch": 5.437779298683522, - "grad_norm": 0.017210451886057854, - "learning_rate": 0.0001999854180932181, - "loss": 46.0, - "step": 33766 - }, - { - "epoch": 5.43794033576231, - "grad_norm": 0.0029135614167898893, - "learning_rate": 0.00019998541722926691, - "loss": 46.0, - "step": 33767 - }, - { - "epoch": 5.438101372841096, - "grad_norm": 0.0022394165862351656, - "learning_rate": 0.0001999854163652901, - "loss": 46.0, - "step": 33768 - }, - { - "epoch": 5.438262409919884, - "grad_norm": 0.00235547567717731, - "learning_rate": 0.00019998541550128774, - "loss": 46.0, - "step": 33769 - }, - { - "epoch": 5.438423446998671, - "grad_norm": 0.0018850163323804736, - "learning_rate": 0.00019998541463725977, - "loss": 46.0, - "step": 33770 - }, - { - "epoch": 5.438584484077459, - "grad_norm": 0.0011621728772297502, - "learning_rate": 0.0001999854137732062, - "loss": 46.0, - "step": 33771 - }, - { - "epoch": 5.438745521156246, - "grad_norm": 0.003751841140910983, - "learning_rate": 0.00019998541290912707, - "loss": 46.0, - "step": 33772 - }, - { - "epoch": 5.4389065582350336, - "grad_norm": 0.01840587519109249, - "learning_rate": 0.0001999854120450223, - "loss": 46.0, - "step": 33773 - }, - { - "epoch": 5.439067595313821, - "grad_norm": 0.0015066039049997926, - "learning_rate": 0.00019998541118089195, - "loss": 46.0, - "step": 33774 - }, - { - "epoch": 5.4392286323926085, - "grad_norm": 0.004510980565100908, - "learning_rate": 0.00019998541031673606, - "loss": 46.0, - "step": 33775 - }, - { - "epoch": 5.439389669471396, - "grad_norm": 0.011435563676059246, - "learning_rate": 0.00019998540945255453, - "loss": 46.0, - "step": 33776 - }, - { - "epoch": 5.439550706550183, - "grad_norm": 0.013452005572617054, - "learning_rate": 0.00019998540858834742, - "loss": 46.0, - "step": 33777 - }, - { - "epoch": 5.439711743628971, - "grad_norm": 0.0035934315528720617, - "learning_rate": 0.0001999854077241147, - "loss": 46.0, - "step": 33778 - }, - { - "epoch": 5.439872780707758, - "grad_norm": 0.007829852402210236, - "learning_rate": 0.00019998540685985642, - "loss": 46.0, - "step": 33779 - }, - { - "epoch": 5.440033817786546, - "grad_norm": 0.006716438569128513, - "learning_rate": 0.00019998540599557255, - "loss": 46.0, - "step": 33780 - }, - { - "epoch": 5.440194854865332, - "grad_norm": 0.005890178959816694, - "learning_rate": 0.00019998540513126305, - "loss": 46.0, - "step": 33781 - }, - { - "epoch": 5.44035589194412, - "grad_norm": 0.0033761782106012106, - "learning_rate": 0.00019998540426692798, - "loss": 46.0, - "step": 33782 - }, - { - "epoch": 5.440516929022907, - "grad_norm": 0.00318462448194623, - "learning_rate": 0.00019998540340256734, - "loss": 46.0, - "step": 33783 - }, - { - "epoch": 5.440677966101695, - "grad_norm": 0.018436431884765625, - "learning_rate": 0.00019998540253818108, - "loss": 46.0, - "step": 33784 - }, - { - "epoch": 5.440839003180482, - "grad_norm": 0.005125865805894136, - "learning_rate": 0.00019998540167376921, - "loss": 46.0, - "step": 33785 - }, - { - "epoch": 5.4410000402592695, - "grad_norm": 0.00320666516199708, - "learning_rate": 0.00019998540080933181, - "loss": 46.0, - "step": 33786 - }, - { - "epoch": 5.441161077338057, - "grad_norm": 0.003564119804650545, - "learning_rate": 0.00019998539994486877, - "loss": 46.0, - "step": 33787 - }, - { - "epoch": 5.441322114416844, - "grad_norm": 0.007742627523839474, - "learning_rate": 0.00019998539908038017, - "loss": 46.0, - "step": 33788 - }, - { - "epoch": 5.441483151495632, - "grad_norm": 0.004321575164794922, - "learning_rate": 0.00019998539821586598, - "loss": 46.0, - "step": 33789 - }, - { - "epoch": 5.441644188574419, - "grad_norm": 0.001923218253068626, - "learning_rate": 0.00019998539735132618, - "loss": 46.0, - "step": 33790 - }, - { - "epoch": 5.441805225653207, - "grad_norm": 0.000759912480134517, - "learning_rate": 0.00019998539648676079, - "loss": 46.0, - "step": 33791 - }, - { - "epoch": 5.441966262731994, - "grad_norm": 0.0036928998306393623, - "learning_rate": 0.0001999853956221698, - "loss": 46.0, - "step": 33792 - }, - { - "epoch": 5.442127299810782, - "grad_norm": 0.0011880164965987206, - "learning_rate": 0.00019998539475755321, - "loss": 46.0, - "step": 33793 - }, - { - "epoch": 5.442288336889569, - "grad_norm": 0.0013276466634124517, - "learning_rate": 0.0001999853938929111, - "loss": 46.0, - "step": 33794 - }, - { - "epoch": 5.442449373968357, - "grad_norm": 0.0013939006021246314, - "learning_rate": 0.00019998539302824332, - "loss": 46.0, - "step": 33795 - }, - { - "epoch": 5.442610411047143, - "grad_norm": 0.012781567871570587, - "learning_rate": 0.00019998539216355, - "loss": 46.0, - "step": 33796 - }, - { - "epoch": 5.442771448125931, - "grad_norm": 0.0018839980475604534, - "learning_rate": 0.00019998539129883106, - "loss": 46.0, - "step": 33797 - }, - { - "epoch": 5.442932485204718, - "grad_norm": 0.0020724984351545572, - "learning_rate": 0.00019998539043408653, - "loss": 46.0, - "step": 33798 - }, - { - "epoch": 5.4430935222835055, - "grad_norm": 0.007911965250968933, - "learning_rate": 0.0001999853895693164, - "loss": 46.0, - "step": 33799 - }, - { - "epoch": 5.443254559362293, - "grad_norm": 0.0013278990518301725, - "learning_rate": 0.0001999853887045207, - "loss": 46.0, - "step": 33800 - }, - { - "epoch": 5.44341559644108, - "grad_norm": 0.007801434490829706, - "learning_rate": 0.0001999853878396994, - "loss": 46.0, - "step": 33801 - }, - { - "epoch": 5.443576633519868, - "grad_norm": 0.006084978114813566, - "learning_rate": 0.00019998538697485252, - "loss": 46.0, - "step": 33802 - }, - { - "epoch": 5.443737670598655, - "grad_norm": 0.006249023601412773, - "learning_rate": 0.00019998538610998002, - "loss": 46.0, - "step": 33803 - }, - { - "epoch": 5.443898707677443, - "grad_norm": 0.008042052388191223, - "learning_rate": 0.00019998538524508197, - "loss": 46.0, - "step": 33804 - }, - { - "epoch": 5.44405974475623, - "grad_norm": 0.00547713553532958, - "learning_rate": 0.0001999853843801583, - "loss": 46.0, - "step": 33805 - }, - { - "epoch": 5.444220781835018, - "grad_norm": 0.010139192454516888, - "learning_rate": 0.00019998538351520906, - "loss": 46.0, - "step": 33806 - }, - { - "epoch": 5.444381818913805, - "grad_norm": 0.0010297633707523346, - "learning_rate": 0.00019998538265023422, - "loss": 46.0, - "step": 33807 - }, - { - "epoch": 5.444542855992593, - "grad_norm": 0.01330942939966917, - "learning_rate": 0.00019998538178523376, - "loss": 46.0, - "step": 33808 - }, - { - "epoch": 5.44470389307138, - "grad_norm": 0.00419691763818264, - "learning_rate": 0.00019998538092020772, - "loss": 46.0, - "step": 33809 - }, - { - "epoch": 5.4448649301501675, - "grad_norm": 0.0010829964885488153, - "learning_rate": 0.0001999853800551561, - "loss": 46.0, - "step": 33810 - }, - { - "epoch": 5.445025967228954, - "grad_norm": 0.001953398808836937, - "learning_rate": 0.0001999853791900789, - "loss": 46.0, - "step": 33811 - }, - { - "epoch": 5.4451870043077415, - "grad_norm": 0.007270127534866333, - "learning_rate": 0.0001999853783249761, - "loss": 46.0, - "step": 33812 - }, - { - "epoch": 5.445348041386529, - "grad_norm": 0.003097108332440257, - "learning_rate": 0.0001999853774598477, - "loss": 46.0, - "step": 33813 - }, - { - "epoch": 5.445509078465316, - "grad_norm": 0.0017479805974289775, - "learning_rate": 0.00019998537659469373, - "loss": 46.0, - "step": 33814 - }, - { - "epoch": 5.445670115544104, - "grad_norm": 0.0027759966906160116, - "learning_rate": 0.00019998537572951417, - "loss": 46.0, - "step": 33815 - }, - { - "epoch": 5.445831152622891, - "grad_norm": 0.00909650232642889, - "learning_rate": 0.000199985374864309, - "loss": 46.0, - "step": 33816 - }, - { - "epoch": 5.445992189701679, - "grad_norm": 0.001967012882232666, - "learning_rate": 0.00019998537399907825, - "loss": 46.0, - "step": 33817 - }, - { - "epoch": 5.446153226780466, - "grad_norm": 0.0024580832105129957, - "learning_rate": 0.00019998537313382187, - "loss": 46.0, - "step": 33818 - }, - { - "epoch": 5.446314263859254, - "grad_norm": 0.006300802808254957, - "learning_rate": 0.00019998537226853996, - "loss": 46.0, - "step": 33819 - }, - { - "epoch": 5.446475300938041, - "grad_norm": 0.004440753720700741, - "learning_rate": 0.00019998537140323243, - "loss": 46.0, - "step": 33820 - }, - { - "epoch": 5.446636338016829, - "grad_norm": 0.00453588692471385, - "learning_rate": 0.0001999853705378993, - "loss": 46.0, - "step": 33821 - }, - { - "epoch": 5.446797375095616, - "grad_norm": 0.0036536843981593847, - "learning_rate": 0.0001999853696725406, - "loss": 46.0, - "step": 33822 - }, - { - "epoch": 5.4469584121744035, - "grad_norm": 0.012010179460048676, - "learning_rate": 0.00019998536880715627, - "loss": 46.0, - "step": 33823 - }, - { - "epoch": 5.447119449253191, - "grad_norm": 0.005867932457476854, - "learning_rate": 0.0001999853679417464, - "loss": 46.0, - "step": 33824 - }, - { - "epoch": 5.447280486331978, - "grad_norm": 0.005730391480028629, - "learning_rate": 0.0001999853670763109, - "loss": 46.0, - "step": 33825 - }, - { - "epoch": 5.447441523410765, - "grad_norm": 0.00212597637437284, - "learning_rate": 0.00019998536621084983, - "loss": 46.0, - "step": 33826 - }, - { - "epoch": 5.447602560489552, - "grad_norm": 0.0027515387628227472, - "learning_rate": 0.00019998536534536316, - "loss": 46.0, - "step": 33827 - }, - { - "epoch": 5.44776359756834, - "grad_norm": 0.008567631244659424, - "learning_rate": 0.0001999853644798509, - "loss": 46.0, - "step": 33828 - }, - { - "epoch": 5.447924634647127, - "grad_norm": 0.002860043430700898, - "learning_rate": 0.00019998536361431305, - "loss": 46.0, - "step": 33829 - }, - { - "epoch": 5.448085671725915, - "grad_norm": 0.01580500416457653, - "learning_rate": 0.00019998536274874962, - "loss": 46.0, - "step": 33830 - }, - { - "epoch": 5.448246708804702, - "grad_norm": 0.00769634498283267, - "learning_rate": 0.00019998536188316058, - "loss": 46.0, - "step": 33831 - }, - { - "epoch": 5.44840774588349, - "grad_norm": 0.0036289342679083347, - "learning_rate": 0.00019998536101754595, - "loss": 46.0, - "step": 33832 - }, - { - "epoch": 5.448568782962277, - "grad_norm": 0.014895744621753693, - "learning_rate": 0.00019998536015190574, - "loss": 46.0, - "step": 33833 - }, - { - "epoch": 5.4487298200410645, - "grad_norm": 0.016011957079172134, - "learning_rate": 0.00019998535928623994, - "loss": 46.0, - "step": 33834 - }, - { - "epoch": 5.448890857119852, - "grad_norm": 0.006428055465221405, - "learning_rate": 0.00019998535842054852, - "loss": 46.0, - "step": 33835 - }, - { - "epoch": 5.4490518941986394, - "grad_norm": 0.0018947329372167587, - "learning_rate": 0.00019998535755483157, - "loss": 46.0, - "step": 33836 - }, - { - "epoch": 5.449212931277427, - "grad_norm": 0.005177473183721304, - "learning_rate": 0.00019998535668908898, - "loss": 46.0, - "step": 33837 - }, - { - "epoch": 5.449373968356214, - "grad_norm": 0.013433219864964485, - "learning_rate": 0.0001999853558233208, - "loss": 46.0, - "step": 33838 - }, - { - "epoch": 5.449535005435002, - "grad_norm": 0.0031552989967167377, - "learning_rate": 0.00019998535495752707, - "loss": 46.0, - "step": 33839 - }, - { - "epoch": 5.449696042513789, - "grad_norm": 0.006568028125911951, - "learning_rate": 0.0001999853540917077, - "loss": 46.0, - "step": 33840 - }, - { - "epoch": 5.449857079592576, - "grad_norm": 0.020712271332740784, - "learning_rate": 0.00019998535322586275, - "loss": 46.0, - "step": 33841 - }, - { - "epoch": 5.450018116671363, - "grad_norm": 0.0020591234788298607, - "learning_rate": 0.0001999853523599922, - "loss": 46.0, - "step": 33842 - }, - { - "epoch": 5.450179153750151, - "grad_norm": 0.01104546058923006, - "learning_rate": 0.0001999853514940961, - "loss": 46.0, - "step": 33843 - }, - { - "epoch": 5.450340190828938, - "grad_norm": 0.00718472246080637, - "learning_rate": 0.00019998535062817435, - "loss": 46.0, - "step": 33844 - }, - { - "epoch": 5.450501227907726, - "grad_norm": 0.003475119825452566, - "learning_rate": 0.00019998534976222707, - "loss": 46.0, - "step": 33845 - }, - { - "epoch": 5.450662264986513, - "grad_norm": 0.010517102666199207, - "learning_rate": 0.00019998534889625416, - "loss": 46.0, - "step": 33846 - }, - { - "epoch": 5.4508233020653005, - "grad_norm": 0.004338243510574102, - "learning_rate": 0.00019998534803025567, - "loss": 46.0, - "step": 33847 - }, - { - "epoch": 5.450984339144088, - "grad_norm": 0.01050893496721983, - "learning_rate": 0.0001999853471642316, - "loss": 46.0, - "step": 33848 - }, - { - "epoch": 5.451145376222875, - "grad_norm": 0.0058682686649262905, - "learning_rate": 0.0001999853462981819, - "loss": 46.0, - "step": 33849 - }, - { - "epoch": 5.451306413301663, - "grad_norm": 0.02847227267920971, - "learning_rate": 0.00019998534543210666, - "loss": 46.0, - "step": 33850 - }, - { - "epoch": 5.45146745038045, - "grad_norm": 0.0028384150937199593, - "learning_rate": 0.00019998534456600582, - "loss": 46.0, - "step": 33851 - }, - { - "epoch": 5.451628487459238, - "grad_norm": 0.0020966320298612118, - "learning_rate": 0.00019998534369987937, - "loss": 46.0, - "step": 33852 - }, - { - "epoch": 5.451789524538025, - "grad_norm": 0.007990136742591858, - "learning_rate": 0.00019998534283372733, - "loss": 46.0, - "step": 33853 - }, - { - "epoch": 5.451950561616813, - "grad_norm": 0.020022640004754066, - "learning_rate": 0.0001999853419675497, - "loss": 46.0, - "step": 33854 - }, - { - "epoch": 5.452111598695599, - "grad_norm": 0.010539705865085125, - "learning_rate": 0.00019998534110134649, - "loss": 46.0, - "step": 33855 - }, - { - "epoch": 5.452272635774387, - "grad_norm": 0.019215842708945274, - "learning_rate": 0.00019998534023511768, - "loss": 46.0, - "step": 33856 - }, - { - "epoch": 5.452433672853174, - "grad_norm": 0.0017711084801703691, - "learning_rate": 0.00019998533936886327, - "loss": 46.0, - "step": 33857 - }, - { - "epoch": 5.452594709931962, - "grad_norm": 0.003719313768669963, - "learning_rate": 0.00019998533850258327, - "loss": 46.0, - "step": 33858 - }, - { - "epoch": 5.452755747010749, - "grad_norm": 0.006358031183481216, - "learning_rate": 0.0001999853376362777, - "loss": 46.0, - "step": 33859 - }, - { - "epoch": 5.4529167840895365, - "grad_norm": 0.0026636661496013403, - "learning_rate": 0.0001999853367699465, - "loss": 46.0, - "step": 33860 - }, - { - "epoch": 5.453077821168324, - "grad_norm": 0.00944479089230299, - "learning_rate": 0.00019998533590358974, - "loss": 46.0, - "step": 33861 - }, - { - "epoch": 5.453238858247111, - "grad_norm": 0.0041820695623755455, - "learning_rate": 0.0001999853350372074, - "loss": 46.0, - "step": 33862 - }, - { - "epoch": 5.453399895325899, - "grad_norm": 0.0037767766043543816, - "learning_rate": 0.00019998533417079942, - "loss": 46.0, - "step": 33863 - }, - { - "epoch": 5.453560932404686, - "grad_norm": 0.012879302725195885, - "learning_rate": 0.0001999853333043659, - "loss": 46.0, - "step": 33864 - }, - { - "epoch": 5.453721969483474, - "grad_norm": 0.005034186411648989, - "learning_rate": 0.00019998533243790678, - "loss": 46.0, - "step": 33865 - }, - { - "epoch": 5.453883006562261, - "grad_norm": 0.0017000120133161545, - "learning_rate": 0.00019998533157142206, - "loss": 46.0, - "step": 33866 - }, - { - "epoch": 5.454044043641049, - "grad_norm": 0.015321878716349602, - "learning_rate": 0.00019998533070491174, - "loss": 46.0, - "step": 33867 - }, - { - "epoch": 5.454205080719836, - "grad_norm": 0.004623544402420521, - "learning_rate": 0.00019998532983837587, - "loss": 46.0, - "step": 33868 - }, - { - "epoch": 5.454366117798623, - "grad_norm": 0.004284393507987261, - "learning_rate": 0.00019998532897181435, - "loss": 46.0, - "step": 33869 - }, - { - "epoch": 5.45452715487741, - "grad_norm": 0.007372124586254358, - "learning_rate": 0.00019998532810522725, - "loss": 46.0, - "step": 33870 - }, - { - "epoch": 5.454688191956198, - "grad_norm": 0.0013585810083895922, - "learning_rate": 0.0001999853272386146, - "loss": 46.0, - "step": 33871 - }, - { - "epoch": 5.454849229034985, - "grad_norm": 0.011057652533054352, - "learning_rate": 0.0001999853263719763, - "loss": 46.0, - "step": 33872 - }, - { - "epoch": 5.4550102661137725, - "grad_norm": 0.017047543078660965, - "learning_rate": 0.00019998532550531247, - "loss": 46.0, - "step": 33873 - }, - { - "epoch": 5.45517130319256, - "grad_norm": 0.007600443437695503, - "learning_rate": 0.000199985324638623, - "loss": 46.0, - "step": 33874 - }, - { - "epoch": 5.455332340271347, - "grad_norm": 0.0013674166984856129, - "learning_rate": 0.00019998532377190796, - "loss": 46.0, - "step": 33875 - }, - { - "epoch": 5.455493377350135, - "grad_norm": 0.0027655805461108685, - "learning_rate": 0.00019998532290516733, - "loss": 46.0, - "step": 33876 - }, - { - "epoch": 5.455654414428922, - "grad_norm": 0.002250152174383402, - "learning_rate": 0.0001999853220384011, - "loss": 46.0, - "step": 33877 - }, - { - "epoch": 5.45581545150771, - "grad_norm": 0.010605317540466785, - "learning_rate": 0.00019998532117160932, - "loss": 46.0, - "step": 33878 - }, - { - "epoch": 5.455976488586497, - "grad_norm": 0.0034706478472799063, - "learning_rate": 0.0001999853203047919, - "loss": 46.0, - "step": 33879 - }, - { - "epoch": 5.456137525665285, - "grad_norm": 0.009512580931186676, - "learning_rate": 0.0001999853194379489, - "loss": 46.0, - "step": 33880 - }, - { - "epoch": 5.456298562744072, - "grad_norm": 0.006329284515231848, - "learning_rate": 0.0001999853185710803, - "loss": 46.0, - "step": 33881 - }, - { - "epoch": 5.4564595998228596, - "grad_norm": 0.001741435145959258, - "learning_rate": 0.0001999853177041861, - "loss": 46.0, - "step": 33882 - }, - { - "epoch": 5.456620636901647, - "grad_norm": 0.000949910085182637, - "learning_rate": 0.00019998531683726635, - "loss": 46.0, - "step": 33883 - }, - { - "epoch": 5.456781673980434, - "grad_norm": 0.004449079278856516, - "learning_rate": 0.000199985315970321, - "loss": 46.0, - "step": 33884 - }, - { - "epoch": 5.456942711059221, - "grad_norm": 0.004924300126731396, - "learning_rate": 0.00019998531510335003, - "loss": 46.0, - "step": 33885 - }, - { - "epoch": 5.4571037481380085, - "grad_norm": 0.0030333097092807293, - "learning_rate": 0.0001999853142363535, - "loss": 46.0, - "step": 33886 - }, - { - "epoch": 5.457264785216796, - "grad_norm": 0.005165104288607836, - "learning_rate": 0.00019998531336933137, - "loss": 46.0, - "step": 33887 - }, - { - "epoch": 5.457425822295583, - "grad_norm": 0.0022031229455024004, - "learning_rate": 0.00019998531250228364, - "loss": 46.0, - "step": 33888 - }, - { - "epoch": 5.457586859374371, - "grad_norm": 0.00294051063247025, - "learning_rate": 0.00019998531163521033, - "loss": 46.0, - "step": 33889 - }, - { - "epoch": 5.457747896453158, - "grad_norm": 0.0017148663755506277, - "learning_rate": 0.00019998531076811143, - "loss": 46.0, - "step": 33890 - }, - { - "epoch": 5.457908933531946, - "grad_norm": 0.0173055287450552, - "learning_rate": 0.00019998530990098692, - "loss": 46.0, - "step": 33891 - }, - { - "epoch": 5.458069970610733, - "grad_norm": 0.002718935953453183, - "learning_rate": 0.00019998530903383684, - "loss": 46.0, - "step": 33892 - }, - { - "epoch": 5.458231007689521, - "grad_norm": 0.007467819377779961, - "learning_rate": 0.00019998530816666115, - "loss": 46.0, - "step": 33893 - }, - { - "epoch": 5.458392044768308, - "grad_norm": 0.0011812165612354875, - "learning_rate": 0.00019998530729945988, - "loss": 46.0, - "step": 33894 - }, - { - "epoch": 5.4585530818470955, - "grad_norm": 0.00619635172188282, - "learning_rate": 0.000199985306432233, - "loss": 46.0, - "step": 33895 - }, - { - "epoch": 5.458714118925883, - "grad_norm": 0.004404815845191479, - "learning_rate": 0.00019998530556498056, - "loss": 46.0, - "step": 33896 - }, - { - "epoch": 5.45887515600467, - "grad_norm": 0.0033379755914211273, - "learning_rate": 0.00019998530469770252, - "loss": 46.0, - "step": 33897 - }, - { - "epoch": 5.459036193083458, - "grad_norm": 0.002729051746428013, - "learning_rate": 0.00019998530383039887, - "loss": 46.0, - "step": 33898 - }, - { - "epoch": 5.4591972301622445, - "grad_norm": 0.0036032467614859343, - "learning_rate": 0.00019998530296306966, - "loss": 46.0, - "step": 33899 - }, - { - "epoch": 5.459358267241032, - "grad_norm": 0.0013720316346734762, - "learning_rate": 0.00019998530209571483, - "loss": 46.0, - "step": 33900 - }, - { - "epoch": 5.459519304319819, - "grad_norm": 0.004136795178055763, - "learning_rate": 0.00019998530122833442, - "loss": 46.0, - "step": 33901 - }, - { - "epoch": 5.459680341398607, - "grad_norm": 0.002127335174009204, - "learning_rate": 0.00019998530036092842, - "loss": 46.0, - "step": 33902 - }, - { - "epoch": 5.459841378477394, - "grad_norm": 0.003875572932884097, - "learning_rate": 0.00019998529949349683, - "loss": 46.0, - "step": 33903 - }, - { - "epoch": 5.460002415556182, - "grad_norm": 0.0017143464647233486, - "learning_rate": 0.00019998529862603963, - "loss": 46.0, - "step": 33904 - }, - { - "epoch": 5.460163452634969, - "grad_norm": 0.015358680859208107, - "learning_rate": 0.00019998529775855687, - "loss": 46.0, - "step": 33905 - }, - { - "epoch": 5.460324489713757, - "grad_norm": 0.00425465265288949, - "learning_rate": 0.0001999852968910485, - "loss": 46.0, - "step": 33906 - }, - { - "epoch": 5.460485526792544, - "grad_norm": 0.00239172694273293, - "learning_rate": 0.00019998529602351456, - "loss": 46.0, - "step": 33907 - }, - { - "epoch": 5.4606465638713315, - "grad_norm": 0.0067107947543263435, - "learning_rate": 0.000199985295155955, - "loss": 46.0, - "step": 33908 - }, - { - "epoch": 5.460807600950119, - "grad_norm": 0.001255196868441999, - "learning_rate": 0.00019998529428836984, - "loss": 46.0, - "step": 33909 - }, - { - "epoch": 5.460968638028906, - "grad_norm": 0.0015801206463947892, - "learning_rate": 0.00019998529342075912, - "loss": 46.0, - "step": 33910 - }, - { - "epoch": 5.461129675107694, - "grad_norm": 0.014534699730575085, - "learning_rate": 0.0001999852925531228, - "loss": 46.0, - "step": 33911 - }, - { - "epoch": 5.461290712186481, - "grad_norm": 0.0017297585727646947, - "learning_rate": 0.0001999852916854609, - "loss": 46.0, - "step": 33912 - }, - { - "epoch": 5.461451749265269, - "grad_norm": 0.013643634505569935, - "learning_rate": 0.00019998529081777336, - "loss": 46.0, - "step": 33913 - }, - { - "epoch": 5.461612786344055, - "grad_norm": 0.003494001692160964, - "learning_rate": 0.0001999852899500603, - "loss": 46.0, - "step": 33914 - }, - { - "epoch": 5.461773823422843, - "grad_norm": 0.0027977884747087955, - "learning_rate": 0.0001999852890823216, - "loss": 46.0, - "step": 33915 - }, - { - "epoch": 5.46193486050163, - "grad_norm": 0.002195589942857623, - "learning_rate": 0.00019998528821455733, - "loss": 46.0, - "step": 33916 - }, - { - "epoch": 5.462095897580418, - "grad_norm": 0.00214962475001812, - "learning_rate": 0.00019998528734676746, - "loss": 46.0, - "step": 33917 - }, - { - "epoch": 5.462256934659205, - "grad_norm": 0.009241893887519836, - "learning_rate": 0.00019998528647895201, - "loss": 46.0, - "step": 33918 - }, - { - "epoch": 5.462417971737993, - "grad_norm": 0.004307929426431656, - "learning_rate": 0.00019998528561111095, - "loss": 46.0, - "step": 33919 - }, - { - "epoch": 5.46257900881678, - "grad_norm": 0.006896978244185448, - "learning_rate": 0.00019998528474324433, - "loss": 46.0, - "step": 33920 - }, - { - "epoch": 5.4627400458955675, - "grad_norm": 0.0018596044974401593, - "learning_rate": 0.00019998528387535206, - "loss": 46.0, - "step": 33921 - }, - { - "epoch": 5.462901082974355, - "grad_norm": 0.012784102000296116, - "learning_rate": 0.00019998528300743424, - "loss": 46.0, - "step": 33922 - }, - { - "epoch": 5.463062120053142, - "grad_norm": 0.0023449386935681105, - "learning_rate": 0.00019998528213949082, - "loss": 46.0, - "step": 33923 - }, - { - "epoch": 5.46322315713193, - "grad_norm": 0.0060007041320204735, - "learning_rate": 0.00019998528127152185, - "loss": 46.0, - "step": 33924 - }, - { - "epoch": 5.463384194210717, - "grad_norm": 0.01186884194612503, - "learning_rate": 0.00019998528040352724, - "loss": 46.0, - "step": 33925 - }, - { - "epoch": 5.463545231289505, - "grad_norm": 0.00402445113286376, - "learning_rate": 0.00019998527953550706, - "loss": 46.0, - "step": 33926 - }, - { - "epoch": 5.463706268368292, - "grad_norm": 0.001791347865946591, - "learning_rate": 0.00019998527866746128, - "loss": 46.0, - "step": 33927 - }, - { - "epoch": 5.46386730544708, - "grad_norm": 0.00810669269412756, - "learning_rate": 0.0001999852777993899, - "loss": 46.0, - "step": 33928 - }, - { - "epoch": 5.464028342525866, - "grad_norm": 0.0035595502704381943, - "learning_rate": 0.00019998527693129294, - "loss": 46.0, - "step": 33929 - }, - { - "epoch": 5.464189379604654, - "grad_norm": 0.007522111292928457, - "learning_rate": 0.0001999852760631704, - "loss": 46.0, - "step": 33930 - }, - { - "epoch": 5.464350416683441, - "grad_norm": 0.006966003682464361, - "learning_rate": 0.00019998527519502222, - "loss": 46.0, - "step": 33931 - }, - { - "epoch": 5.464511453762229, - "grad_norm": 0.0027638860046863556, - "learning_rate": 0.00019998527432684853, - "loss": 46.0, - "step": 33932 - }, - { - "epoch": 5.464672490841016, - "grad_norm": 0.018949005752801895, - "learning_rate": 0.0001999852734586492, - "loss": 46.0, - "step": 33933 - }, - { - "epoch": 5.4648335279198035, - "grad_norm": 0.0020665456540882587, - "learning_rate": 0.0001999852725904243, - "loss": 46.0, - "step": 33934 - }, - { - "epoch": 5.464994564998591, - "grad_norm": 0.0008323253132402897, - "learning_rate": 0.00019998527172217378, - "loss": 46.0, - "step": 33935 - }, - { - "epoch": 5.465155602077378, - "grad_norm": 0.005588851403445005, - "learning_rate": 0.00019998527085389768, - "loss": 46.0, - "step": 33936 - }, - { - "epoch": 5.465316639156166, - "grad_norm": 0.0023333595599979162, - "learning_rate": 0.00019998526998559596, - "loss": 46.0, - "step": 33937 - }, - { - "epoch": 5.465477676234953, - "grad_norm": 0.0014875035267323256, - "learning_rate": 0.0001999852691172687, - "loss": 46.0, - "step": 33938 - }, - { - "epoch": 5.465638713313741, - "grad_norm": 0.03501440957188606, - "learning_rate": 0.00019998526824891583, - "loss": 46.0, - "step": 33939 - }, - { - "epoch": 5.465799750392528, - "grad_norm": 0.005613334476947784, - "learning_rate": 0.00019998526738053735, - "loss": 46.0, - "step": 33940 - }, - { - "epoch": 5.465960787471316, - "grad_norm": 0.004374946001917124, - "learning_rate": 0.0001999852665121333, - "loss": 46.0, - "step": 33941 - }, - { - "epoch": 5.466121824550102, - "grad_norm": 0.0014744155341759324, - "learning_rate": 0.00019998526564370367, - "loss": 46.0, - "step": 33942 - }, - { - "epoch": 5.46628286162889, - "grad_norm": 0.0032316846773028374, - "learning_rate": 0.00019998526477524843, - "loss": 46.0, - "step": 33943 - }, - { - "epoch": 5.466443898707677, - "grad_norm": 0.009230783209204674, - "learning_rate": 0.0001999852639067676, - "loss": 46.0, - "step": 33944 - }, - { - "epoch": 5.466604935786465, - "grad_norm": 0.003151049604639411, - "learning_rate": 0.00019998526303826117, - "loss": 46.0, - "step": 33945 - }, - { - "epoch": 5.466765972865252, - "grad_norm": 0.0030997558496892452, - "learning_rate": 0.00019998526216972917, - "loss": 46.0, - "step": 33946 - }, - { - "epoch": 5.4669270099440395, - "grad_norm": 0.006346467416733503, - "learning_rate": 0.00019998526130117155, - "loss": 46.0, - "step": 33947 - }, - { - "epoch": 5.467088047022827, - "grad_norm": 0.0038231804501265287, - "learning_rate": 0.00019998526043258838, - "loss": 46.0, - "step": 33948 - }, - { - "epoch": 5.467249084101614, - "grad_norm": 0.013598942197859287, - "learning_rate": 0.0001999852595639796, - "loss": 46.0, - "step": 33949 - }, - { - "epoch": 5.467410121180402, - "grad_norm": 0.0029380833730101585, - "learning_rate": 0.00019998525869534525, - "loss": 46.0, - "step": 33950 - }, - { - "epoch": 5.467571158259189, - "grad_norm": 0.01583113521337509, - "learning_rate": 0.00019998525782668526, - "loss": 46.0, - "step": 33951 - }, - { - "epoch": 5.467732195337977, - "grad_norm": 0.0023203128948807716, - "learning_rate": 0.0001999852569579997, - "loss": 46.0, - "step": 33952 - }, - { - "epoch": 5.467893232416764, - "grad_norm": 0.018242256715893745, - "learning_rate": 0.00019998525608928855, - "loss": 46.0, - "step": 33953 - }, - { - "epoch": 5.468054269495552, - "grad_norm": 0.009462394751608372, - "learning_rate": 0.0001999852552205518, - "loss": 46.0, - "step": 33954 - }, - { - "epoch": 5.468215306574339, - "grad_norm": 0.0072496808134019375, - "learning_rate": 0.00019998525435178949, - "loss": 46.0, - "step": 33955 - }, - { - "epoch": 5.4683763436531265, - "grad_norm": 0.004082279745489359, - "learning_rate": 0.00019998525348300156, - "loss": 46.0, - "step": 33956 - }, - { - "epoch": 5.468537380731913, - "grad_norm": 0.009565218351781368, - "learning_rate": 0.00019998525261418805, - "loss": 46.0, - "step": 33957 - }, - { - "epoch": 5.4686984178107005, - "grad_norm": 0.0022864476777613163, - "learning_rate": 0.00019998525174534898, - "loss": 46.0, - "step": 33958 - }, - { - "epoch": 5.468859454889488, - "grad_norm": 0.002576803555712104, - "learning_rate": 0.00019998525087648426, - "loss": 46.0, - "step": 33959 - }, - { - "epoch": 5.469020491968275, - "grad_norm": 0.004243740811944008, - "learning_rate": 0.00019998525000759396, - "loss": 46.0, - "step": 33960 - }, - { - "epoch": 5.469181529047063, - "grad_norm": 0.001701556844636798, - "learning_rate": 0.00019998524913867808, - "loss": 46.0, - "step": 33961 - }, - { - "epoch": 5.46934256612585, - "grad_norm": 0.0012116024736315012, - "learning_rate": 0.00019998524826973663, - "loss": 46.0, - "step": 33962 - }, - { - "epoch": 5.469503603204638, - "grad_norm": 0.0028388125356286764, - "learning_rate": 0.00019998524740076957, - "loss": 46.0, - "step": 33963 - }, - { - "epoch": 5.469664640283425, - "grad_norm": 0.006310044787824154, - "learning_rate": 0.00019998524653177692, - "loss": 46.0, - "step": 33964 - }, - { - "epoch": 5.469825677362213, - "grad_norm": 0.001129554002545774, - "learning_rate": 0.00019998524566275868, - "loss": 46.0, - "step": 33965 - }, - { - "epoch": 5.469986714441, - "grad_norm": 0.004442868288606405, - "learning_rate": 0.00019998524479371486, - "loss": 46.0, - "step": 33966 - }, - { - "epoch": 5.470147751519788, - "grad_norm": 0.002943875268101692, - "learning_rate": 0.00019998524392464542, - "loss": 46.0, - "step": 33967 - }, - { - "epoch": 5.470308788598575, - "grad_norm": 0.012347236275672913, - "learning_rate": 0.0001999852430555504, - "loss": 46.0, - "step": 33968 - }, - { - "epoch": 5.4704698256773625, - "grad_norm": 0.006594682112336159, - "learning_rate": 0.0001999852421864298, - "loss": 46.0, - "step": 33969 - }, - { - "epoch": 5.47063086275615, - "grad_norm": 0.010824074037373066, - "learning_rate": 0.0001999852413172836, - "loss": 46.0, - "step": 33970 - }, - { - "epoch": 5.470791899834937, - "grad_norm": 0.013155831955373287, - "learning_rate": 0.00019998524044811182, - "loss": 46.0, - "step": 33971 - }, - { - "epoch": 5.470952936913724, - "grad_norm": 0.004117035306990147, - "learning_rate": 0.00019998523957891445, - "loss": 46.0, - "step": 33972 - }, - { - "epoch": 5.471113973992511, - "grad_norm": 0.01561188604682684, - "learning_rate": 0.0001999852387096915, - "loss": 46.0, - "step": 33973 - }, - { - "epoch": 5.471275011071299, - "grad_norm": 0.0023450301960110664, - "learning_rate": 0.00019998523784044291, - "loss": 46.0, - "step": 33974 - }, - { - "epoch": 5.471436048150086, - "grad_norm": 0.013035872019827366, - "learning_rate": 0.00019998523697116878, - "loss": 46.0, - "step": 33975 - }, - { - "epoch": 5.471597085228874, - "grad_norm": 0.007042041979730129, - "learning_rate": 0.00019998523610186903, - "loss": 46.0, - "step": 33976 - }, - { - "epoch": 5.471758122307661, - "grad_norm": 0.004770055413246155, - "learning_rate": 0.0001999852352325437, - "loss": 46.0, - "step": 33977 - }, - { - "epoch": 5.471919159386449, - "grad_norm": 0.0034993013832718134, - "learning_rate": 0.00019998523436319277, - "loss": 46.0, - "step": 33978 - }, - { - "epoch": 5.472080196465236, - "grad_norm": 0.0034382238518446684, - "learning_rate": 0.00019998523349381623, - "loss": 46.0, - "step": 33979 - }, - { - "epoch": 5.472241233544024, - "grad_norm": 0.0015040583675727248, - "learning_rate": 0.00019998523262441413, - "loss": 46.0, - "step": 33980 - }, - { - "epoch": 5.472402270622811, - "grad_norm": 0.010898053646087646, - "learning_rate": 0.00019998523175498645, - "loss": 46.0, - "step": 33981 - }, - { - "epoch": 5.4725633077015985, - "grad_norm": 0.0020360820926725864, - "learning_rate": 0.00019998523088553315, - "loss": 46.0, - "step": 33982 - }, - { - "epoch": 5.472724344780386, - "grad_norm": 0.0016121412627398968, - "learning_rate": 0.0001999852300160543, - "loss": 46.0, - "step": 33983 - }, - { - "epoch": 5.472885381859173, - "grad_norm": 0.0034480844624340534, - "learning_rate": 0.0001999852291465498, - "loss": 46.0, - "step": 33984 - }, - { - "epoch": 5.473046418937961, - "grad_norm": 0.00642159441486001, - "learning_rate": 0.00019998522827701973, - "loss": 46.0, - "step": 33985 - }, - { - "epoch": 5.473207456016748, - "grad_norm": 0.004151217173784971, - "learning_rate": 0.0001999852274074641, - "loss": 46.0, - "step": 33986 - }, - { - "epoch": 5.473368493095535, - "grad_norm": 0.005182954482734203, - "learning_rate": 0.00019998522653788284, - "loss": 46.0, - "step": 33987 - }, - { - "epoch": 5.473529530174322, - "grad_norm": 0.0009014858515001833, - "learning_rate": 0.00019998522566827602, - "loss": 46.0, - "step": 33988 - }, - { - "epoch": 5.47369056725311, - "grad_norm": 0.014669469557702541, - "learning_rate": 0.00019998522479864358, - "loss": 46.0, - "step": 33989 - }, - { - "epoch": 5.473851604331897, - "grad_norm": 0.011654900386929512, - "learning_rate": 0.0001999852239289856, - "loss": 46.0, - "step": 33990 - }, - { - "epoch": 5.474012641410685, - "grad_norm": 0.002652890747413039, - "learning_rate": 0.00019998522305930195, - "loss": 46.0, - "step": 33991 - }, - { - "epoch": 5.474173678489472, - "grad_norm": 0.0024331207387149334, - "learning_rate": 0.00019998522218959278, - "loss": 46.0, - "step": 33992 - }, - { - "epoch": 5.47433471556826, - "grad_norm": 0.0052316877990961075, - "learning_rate": 0.00019998522131985794, - "loss": 46.0, - "step": 33993 - }, - { - "epoch": 5.474495752647047, - "grad_norm": 0.003056933870539069, - "learning_rate": 0.0001999852204500976, - "loss": 46.0, - "step": 33994 - }, - { - "epoch": 5.4746567897258345, - "grad_norm": 0.0026589741464704275, - "learning_rate": 0.00019998521958031163, - "loss": 46.0, - "step": 33995 - }, - { - "epoch": 5.474817826804622, - "grad_norm": 0.0057871281169354916, - "learning_rate": 0.00019998521871050003, - "loss": 46.0, - "step": 33996 - }, - { - "epoch": 5.474978863883409, - "grad_norm": 0.00477339792996645, - "learning_rate": 0.0001999852178406629, - "loss": 46.0, - "step": 33997 - }, - { - "epoch": 5.475139900962197, - "grad_norm": 0.005761266686022282, - "learning_rate": 0.00019998521697080015, - "loss": 46.0, - "step": 33998 - }, - { - "epoch": 5.475300938040984, - "grad_norm": 0.005985742434859276, - "learning_rate": 0.00019998521610091181, - "loss": 46.0, - "step": 33999 - }, - { - "epoch": 5.475461975119772, - "grad_norm": 0.003352626459673047, - "learning_rate": 0.00019998521523099786, - "loss": 46.0, - "step": 34000 - }, - { - "epoch": 5.475623012198559, - "grad_norm": 0.0029440601356327534, - "learning_rate": 0.00019998521436105838, - "loss": 46.0, - "step": 34001 - }, - { - "epoch": 5.475784049277346, - "grad_norm": 0.0032684379257261753, - "learning_rate": 0.00019998521349109326, - "loss": 46.0, - "step": 34002 - }, - { - "epoch": 5.475945086356133, - "grad_norm": 0.0021878937259316444, - "learning_rate": 0.00019998521262110255, - "loss": 46.0, - "step": 34003 - }, - { - "epoch": 5.476106123434921, - "grad_norm": 0.0016625227872282267, - "learning_rate": 0.00019998521175108627, - "loss": 46.0, - "step": 34004 - }, - { - "epoch": 5.476267160513708, - "grad_norm": 0.007223007269203663, - "learning_rate": 0.00019998521088104436, - "loss": 46.0, - "step": 34005 - }, - { - "epoch": 5.4764281975924956, - "grad_norm": 0.008779977448284626, - "learning_rate": 0.0001999852100109769, - "loss": 46.0, - "step": 34006 - }, - { - "epoch": 5.476589234671283, - "grad_norm": 0.004818316549062729, - "learning_rate": 0.00019998520914088383, - "loss": 46.0, - "step": 34007 - }, - { - "epoch": 5.4767502717500705, - "grad_norm": 0.0036331687588244677, - "learning_rate": 0.00019998520827076518, - "loss": 46.0, - "step": 34008 - }, - { - "epoch": 5.476911308828858, - "grad_norm": 0.007848354056477547, - "learning_rate": 0.00019998520740062092, - "loss": 46.0, - "step": 34009 - }, - { - "epoch": 5.477072345907645, - "grad_norm": 0.0012876796536147594, - "learning_rate": 0.0001999852065304511, - "loss": 46.0, - "step": 34010 - }, - { - "epoch": 5.477233382986433, - "grad_norm": 0.0017034618649631739, - "learning_rate": 0.00019998520566025566, - "loss": 46.0, - "step": 34011 - }, - { - "epoch": 5.47739442006522, - "grad_norm": 0.008430598303675652, - "learning_rate": 0.0001999852047900346, - "loss": 46.0, - "step": 34012 - }, - { - "epoch": 5.477555457144008, - "grad_norm": 0.0037775845266878605, - "learning_rate": 0.00019998520391978803, - "loss": 46.0, - "step": 34013 - }, - { - "epoch": 5.477716494222795, - "grad_norm": 0.0053141117095947266, - "learning_rate": 0.00019998520304951583, - "loss": 46.0, - "step": 34014 - }, - { - "epoch": 5.477877531301582, - "grad_norm": 0.016840746626257896, - "learning_rate": 0.00019998520217921805, - "loss": 46.0, - "step": 34015 - }, - { - "epoch": 5.478038568380369, - "grad_norm": 0.0034186423290520906, - "learning_rate": 0.00019998520130889462, - "loss": 46.0, - "step": 34016 - }, - { - "epoch": 5.478199605459157, - "grad_norm": 0.0020906454883515835, - "learning_rate": 0.00019998520043854566, - "loss": 46.0, - "step": 34017 - }, - { - "epoch": 5.478360642537944, - "grad_norm": 0.011682221665978432, - "learning_rate": 0.0001999851995681711, - "loss": 46.0, - "step": 34018 - }, - { - "epoch": 5.4785216796167315, - "grad_norm": 0.006353188306093216, - "learning_rate": 0.00019998519869777095, - "loss": 46.0, - "step": 34019 - }, - { - "epoch": 5.478682716695519, - "grad_norm": 0.013782745227217674, - "learning_rate": 0.00019998519782734518, - "loss": 46.0, - "step": 34020 - }, - { - "epoch": 5.478843753774306, - "grad_norm": 0.005086518824100494, - "learning_rate": 0.00019998519695689385, - "loss": 46.0, - "step": 34021 - }, - { - "epoch": 5.479004790853094, - "grad_norm": 0.002089346293359995, - "learning_rate": 0.0001999851960864169, - "loss": 46.0, - "step": 34022 - }, - { - "epoch": 5.479165827931881, - "grad_norm": 0.010834196582436562, - "learning_rate": 0.0001999851952159144, - "loss": 46.0, - "step": 34023 - }, - { - "epoch": 5.479326865010669, - "grad_norm": 0.006796369329094887, - "learning_rate": 0.00019998519434538626, - "loss": 46.0, - "step": 34024 - }, - { - "epoch": 5.479487902089456, - "grad_norm": 0.0034908675588667393, - "learning_rate": 0.00019998519347483255, - "loss": 46.0, - "step": 34025 - }, - { - "epoch": 5.479648939168244, - "grad_norm": 0.002998356707394123, - "learning_rate": 0.00019998519260425326, - "loss": 46.0, - "step": 34026 - }, - { - "epoch": 5.479809976247031, - "grad_norm": 0.0012734472984448075, - "learning_rate": 0.00019998519173364837, - "loss": 46.0, - "step": 34027 - }, - { - "epoch": 5.479971013325819, - "grad_norm": 0.016380852088332176, - "learning_rate": 0.0001999851908630179, - "loss": 46.0, - "step": 34028 - }, - { - "epoch": 5.480132050404606, - "grad_norm": 0.0009933009278029203, - "learning_rate": 0.00019998518999236184, - "loss": 46.0, - "step": 34029 - }, - { - "epoch": 5.480293087483393, - "grad_norm": 0.00865671131759882, - "learning_rate": 0.0001999851891216802, - "loss": 46.0, - "step": 34030 - }, - { - "epoch": 5.48045412456218, - "grad_norm": 0.002906026318669319, - "learning_rate": 0.00019998518825097293, - "loss": 46.0, - "step": 34031 - }, - { - "epoch": 5.4806151616409675, - "grad_norm": 0.004925843328237534, - "learning_rate": 0.00019998518738024009, - "loss": 46.0, - "step": 34032 - }, - { - "epoch": 5.480776198719755, - "grad_norm": 0.0007878330652602017, - "learning_rate": 0.00019998518650948165, - "loss": 46.0, - "step": 34033 - }, - { - "epoch": 5.480937235798542, - "grad_norm": 0.009944569319486618, - "learning_rate": 0.0001999851856386976, - "loss": 46.0, - "step": 34034 - }, - { - "epoch": 5.48109827287733, - "grad_norm": 0.0035611798521131277, - "learning_rate": 0.00019998518476788802, - "loss": 46.0, - "step": 34035 - }, - { - "epoch": 5.481259309956117, - "grad_norm": 0.0036387438885867596, - "learning_rate": 0.0001999851838970528, - "loss": 46.0, - "step": 34036 - }, - { - "epoch": 5.481420347034905, - "grad_norm": 0.002845211187377572, - "learning_rate": 0.00019998518302619198, - "loss": 46.0, - "step": 34037 - }, - { - "epoch": 5.481581384113692, - "grad_norm": 0.012023522518575191, - "learning_rate": 0.00019998518215530559, - "loss": 46.0, - "step": 34038 - }, - { - "epoch": 5.48174242119248, - "grad_norm": 0.005608272273093462, - "learning_rate": 0.00019998518128439363, - "loss": 46.0, - "step": 34039 - }, - { - "epoch": 5.481903458271267, - "grad_norm": 0.0010498077608644962, - "learning_rate": 0.00019998518041345603, - "loss": 46.0, - "step": 34040 - }, - { - "epoch": 5.482064495350055, - "grad_norm": 0.0070892926305532455, - "learning_rate": 0.00019998517954249287, - "loss": 46.0, - "step": 34041 - }, - { - "epoch": 5.482225532428842, - "grad_norm": 0.008261364884674549, - "learning_rate": 0.00019998517867150412, - "loss": 46.0, - "step": 34042 - }, - { - "epoch": 5.4823865695076295, - "grad_norm": 0.002518248511478305, - "learning_rate": 0.0001999851778004898, - "loss": 46.0, - "step": 34043 - }, - { - "epoch": 5.482547606586417, - "grad_norm": 0.0017912589246407151, - "learning_rate": 0.00019998517692944984, - "loss": 46.0, - "step": 34044 - }, - { - "epoch": 5.4827086436652035, - "grad_norm": 0.005765694193542004, - "learning_rate": 0.0001999851760583843, - "loss": 46.0, - "step": 34045 - }, - { - "epoch": 5.482869680743991, - "grad_norm": 0.0038748509250581264, - "learning_rate": 0.0001999851751872932, - "loss": 46.0, - "step": 34046 - }, - { - "epoch": 5.483030717822778, - "grad_norm": 0.0016907951794564724, - "learning_rate": 0.0001999851743161765, - "loss": 46.0, - "step": 34047 - }, - { - "epoch": 5.483191754901566, - "grad_norm": 0.0027127834036946297, - "learning_rate": 0.0001999851734450342, - "loss": 46.0, - "step": 34048 - }, - { - "epoch": 5.483352791980353, - "grad_norm": 0.0026182737201452255, - "learning_rate": 0.00019998517257386632, - "loss": 46.0, - "step": 34049 - }, - { - "epoch": 5.483513829059141, - "grad_norm": 0.004523037932813168, - "learning_rate": 0.00019998517170267282, - "loss": 46.0, - "step": 34050 - }, - { - "epoch": 5.483674866137928, - "grad_norm": 0.005244947038590908, - "learning_rate": 0.00019998517083145374, - "loss": 46.0, - "step": 34051 - }, - { - "epoch": 5.483835903216716, - "grad_norm": 0.001922972034662962, - "learning_rate": 0.0001999851699602091, - "loss": 46.0, - "step": 34052 - }, - { - "epoch": 5.483996940295503, - "grad_norm": 0.0017984440783038735, - "learning_rate": 0.00019998516908893883, - "loss": 46.0, - "step": 34053 - }, - { - "epoch": 5.484157977374291, - "grad_norm": 0.0021901780273765326, - "learning_rate": 0.00019998516821764296, - "loss": 46.0, - "step": 34054 - }, - { - "epoch": 5.484319014453078, - "grad_norm": 0.010198758915066719, - "learning_rate": 0.00019998516734632155, - "loss": 46.0, - "step": 34055 - }, - { - "epoch": 5.4844800515318655, - "grad_norm": 0.0030106448102742434, - "learning_rate": 0.00019998516647497453, - "loss": 46.0, - "step": 34056 - }, - { - "epoch": 5.484641088610653, - "grad_norm": 0.00413285568356514, - "learning_rate": 0.0001999851656036019, - "loss": 46.0, - "step": 34057 - }, - { - "epoch": 5.48480212568944, - "grad_norm": 0.003924929071217775, - "learning_rate": 0.0001999851647322037, - "loss": 46.0, - "step": 34058 - }, - { - "epoch": 5.484963162768228, - "grad_norm": 0.006923154927790165, - "learning_rate": 0.0001999851638607799, - "loss": 46.0, - "step": 34059 - }, - { - "epoch": 5.485124199847014, - "grad_norm": 0.0038515881169587374, - "learning_rate": 0.00019998516298933046, - "loss": 46.0, - "step": 34060 - }, - { - "epoch": 5.485285236925802, - "grad_norm": 0.0027746856212615967, - "learning_rate": 0.0001999851621178555, - "loss": 46.0, - "step": 34061 - }, - { - "epoch": 5.485446274004589, - "grad_norm": 0.01583748124539852, - "learning_rate": 0.00019998516124635494, - "loss": 46.0, - "step": 34062 - }, - { - "epoch": 5.485607311083377, - "grad_norm": 0.007230414543300867, - "learning_rate": 0.00019998516037482875, - "loss": 46.0, - "step": 34063 - }, - { - "epoch": 5.485768348162164, - "grad_norm": 0.006358353421092033, - "learning_rate": 0.000199985159503277, - "loss": 46.0, - "step": 34064 - }, - { - "epoch": 5.485929385240952, - "grad_norm": 0.008169732056558132, - "learning_rate": 0.00019998515863169964, - "loss": 46.0, - "step": 34065 - }, - { - "epoch": 5.486090422319739, - "grad_norm": 0.0013715194072574377, - "learning_rate": 0.00019998515776009672, - "loss": 46.0, - "step": 34066 - }, - { - "epoch": 5.4862514593985265, - "grad_norm": 0.001878093578852713, - "learning_rate": 0.00019998515688846816, - "loss": 46.0, - "step": 34067 - }, - { - "epoch": 5.486412496477314, - "grad_norm": 0.0031343852169811726, - "learning_rate": 0.00019998515601681407, - "loss": 46.0, - "step": 34068 - }, - { - "epoch": 5.4865735335561014, - "grad_norm": 0.004168075043708086, - "learning_rate": 0.00019998515514513433, - "loss": 46.0, - "step": 34069 - }, - { - "epoch": 5.486734570634889, - "grad_norm": 0.0060966731980443, - "learning_rate": 0.00019998515427342906, - "loss": 46.0, - "step": 34070 - }, - { - "epoch": 5.486895607713676, - "grad_norm": 0.013217343017458916, - "learning_rate": 0.00019998515340169815, - "loss": 46.0, - "step": 34071 - }, - { - "epoch": 5.487056644792464, - "grad_norm": 0.003502884414047003, - "learning_rate": 0.00019998515252994166, - "loss": 46.0, - "step": 34072 - }, - { - "epoch": 5.487217681871251, - "grad_norm": 0.0014455786440521479, - "learning_rate": 0.0001999851516581596, - "loss": 46.0, - "step": 34073 - }, - { - "epoch": 5.487378718950039, - "grad_norm": 0.00547865591943264, - "learning_rate": 0.00019998515078635193, - "loss": 46.0, - "step": 34074 - }, - { - "epoch": 5.487539756028825, - "grad_norm": 0.014122439548373222, - "learning_rate": 0.00019998514991451867, - "loss": 46.0, - "step": 34075 - }, - { - "epoch": 5.487700793107613, - "grad_norm": 0.0014552170177921653, - "learning_rate": 0.00019998514904265982, - "loss": 46.0, - "step": 34076 - }, - { - "epoch": 5.4878618301864, - "grad_norm": 0.0008031705510802567, - "learning_rate": 0.0001999851481707754, - "loss": 46.0, - "step": 34077 - }, - { - "epoch": 5.488022867265188, - "grad_norm": 0.000917566881980747, - "learning_rate": 0.00019998514729886532, - "loss": 46.0, - "step": 34078 - }, - { - "epoch": 5.488183904343975, - "grad_norm": 0.006410039961338043, - "learning_rate": 0.0001999851464269297, - "loss": 46.0, - "step": 34079 - }, - { - "epoch": 5.4883449414227625, - "grad_norm": 0.012301741167902946, - "learning_rate": 0.0001999851455549685, - "loss": 46.0, - "step": 34080 - }, - { - "epoch": 5.48850597850155, - "grad_norm": 0.0034094585571438074, - "learning_rate": 0.00019998514468298168, - "loss": 46.0, - "step": 34081 - }, - { - "epoch": 5.488667015580337, - "grad_norm": 0.005187683273106813, - "learning_rate": 0.0001999851438109693, - "loss": 46.0, - "step": 34082 - }, - { - "epoch": 5.488828052659125, - "grad_norm": 0.010926825925707817, - "learning_rate": 0.00019998514293893132, - "loss": 46.0, - "step": 34083 - }, - { - "epoch": 5.488989089737912, - "grad_norm": 0.0028332334477454424, - "learning_rate": 0.00019998514206686773, - "loss": 46.0, - "step": 34084 - }, - { - "epoch": 5.4891501268167, - "grad_norm": 0.005750021431595087, - "learning_rate": 0.00019998514119477857, - "loss": 46.0, - "step": 34085 - }, - { - "epoch": 5.489311163895487, - "grad_norm": 0.002164200646802783, - "learning_rate": 0.0001999851403226638, - "loss": 46.0, - "step": 34086 - }, - { - "epoch": 5.489472200974275, - "grad_norm": 0.015085523016750813, - "learning_rate": 0.00019998513945052344, - "loss": 46.0, - "step": 34087 - }, - { - "epoch": 5.489633238053061, - "grad_norm": 0.0060419607907533646, - "learning_rate": 0.0001999851385783575, - "loss": 46.0, - "step": 34088 - }, - { - "epoch": 5.489794275131849, - "grad_norm": 0.0045350040309131145, - "learning_rate": 0.00019998513770616599, - "loss": 46.0, - "step": 34089 - }, - { - "epoch": 5.489955312210636, - "grad_norm": 0.005301353987306356, - "learning_rate": 0.00019998513683394884, - "loss": 46.0, - "step": 34090 - }, - { - "epoch": 5.490116349289424, - "grad_norm": 0.0031123608350753784, - "learning_rate": 0.0001999851359617061, - "loss": 46.0, - "step": 34091 - }, - { - "epoch": 5.490277386368211, - "grad_norm": 0.013791980221867561, - "learning_rate": 0.0001999851350894378, - "loss": 46.0, - "step": 34092 - }, - { - "epoch": 5.4904384234469985, - "grad_norm": 0.0007826744113117456, - "learning_rate": 0.00019998513421714393, - "loss": 46.0, - "step": 34093 - }, - { - "epoch": 5.490599460525786, - "grad_norm": 0.004682152532041073, - "learning_rate": 0.00019998513334482443, - "loss": 46.0, - "step": 34094 - }, - { - "epoch": 5.490760497604573, - "grad_norm": 0.0020901490934193134, - "learning_rate": 0.00019998513247247935, - "loss": 46.0, - "step": 34095 - }, - { - "epoch": 5.490921534683361, - "grad_norm": 0.005975193809717894, - "learning_rate": 0.00019998513160010868, - "loss": 46.0, - "step": 34096 - }, - { - "epoch": 5.491082571762148, - "grad_norm": 0.005556042771786451, - "learning_rate": 0.00019998513072771245, - "loss": 46.0, - "step": 34097 - }, - { - "epoch": 5.491243608840936, - "grad_norm": 0.005330977495759726, - "learning_rate": 0.00019998512985529057, - "loss": 46.0, - "step": 34098 - }, - { - "epoch": 5.491404645919723, - "grad_norm": 0.023547105491161346, - "learning_rate": 0.00019998512898284311, - "loss": 46.0, - "step": 34099 - }, - { - "epoch": 5.491565682998511, - "grad_norm": 0.0007415079162456095, - "learning_rate": 0.0001999851281103701, - "loss": 46.0, - "step": 34100 - }, - { - "epoch": 5.491726720077298, - "grad_norm": 0.007302392739802599, - "learning_rate": 0.00019998512723787146, - "loss": 46.0, - "step": 34101 - }, - { - "epoch": 5.491887757156086, - "grad_norm": 0.001321967109106481, - "learning_rate": 0.00019998512636534724, - "loss": 46.0, - "step": 34102 - }, - { - "epoch": 5.492048794234872, - "grad_norm": 0.005085607059299946, - "learning_rate": 0.00019998512549279743, - "loss": 46.0, - "step": 34103 - }, - { - "epoch": 5.49220983131366, - "grad_norm": 0.0017957368399947882, - "learning_rate": 0.00019998512462022204, - "loss": 46.0, - "step": 34104 - }, - { - "epoch": 5.492370868392447, - "grad_norm": 0.007057764567434788, - "learning_rate": 0.00019998512374762103, - "loss": 46.0, - "step": 34105 - }, - { - "epoch": 5.4925319054712345, - "grad_norm": 0.0038359789177775383, - "learning_rate": 0.00019998512287499446, - "loss": 46.0, - "step": 34106 - }, - { - "epoch": 5.492692942550022, - "grad_norm": 0.015206251293420792, - "learning_rate": 0.00019998512200234227, - "loss": 46.0, - "step": 34107 - }, - { - "epoch": 5.492853979628809, - "grad_norm": 0.0011466221185401082, - "learning_rate": 0.00019998512112966453, - "loss": 46.0, - "step": 34108 - }, - { - "epoch": 5.493015016707597, - "grad_norm": 0.01089741475880146, - "learning_rate": 0.00019998512025696117, - "loss": 46.0, - "step": 34109 - }, - { - "epoch": 5.493176053786384, - "grad_norm": 0.004488585516810417, - "learning_rate": 0.0001999851193842322, - "loss": 46.0, - "step": 34110 - }, - { - "epoch": 5.493337090865172, - "grad_norm": 0.006881280802190304, - "learning_rate": 0.00019998511851147767, - "loss": 46.0, - "step": 34111 - }, - { - "epoch": 5.493498127943959, - "grad_norm": 0.003830853383988142, - "learning_rate": 0.00019998511763869755, - "loss": 46.0, - "step": 34112 - }, - { - "epoch": 5.493659165022747, - "grad_norm": 0.00198124791495502, - "learning_rate": 0.00019998511676589184, - "loss": 46.0, - "step": 34113 - }, - { - "epoch": 5.493820202101534, - "grad_norm": 0.010118413716554642, - "learning_rate": 0.00019998511589306052, - "loss": 46.0, - "step": 34114 - }, - { - "epoch": 5.4939812391803216, - "grad_norm": 0.00990296807140112, - "learning_rate": 0.00019998511502020364, - "loss": 46.0, - "step": 34115 - }, - { - "epoch": 5.494142276259109, - "grad_norm": 0.0018488084897398949, - "learning_rate": 0.00019998511414732112, - "loss": 46.0, - "step": 34116 - }, - { - "epoch": 5.4943033133378965, - "grad_norm": 0.004723068792372942, - "learning_rate": 0.00019998511327441306, - "loss": 46.0, - "step": 34117 - }, - { - "epoch": 5.494464350416683, - "grad_norm": 0.014449436217546463, - "learning_rate": 0.00019998511240147936, - "loss": 46.0, - "step": 34118 - }, - { - "epoch": 5.4946253874954705, - "grad_norm": 0.0053884778171777725, - "learning_rate": 0.0001999851115285201, - "loss": 46.0, - "step": 34119 - }, - { - "epoch": 5.494786424574258, - "grad_norm": 0.006324960384517908, - "learning_rate": 0.00019998511065553524, - "loss": 46.0, - "step": 34120 - }, - { - "epoch": 5.494947461653045, - "grad_norm": 0.009068763814866543, - "learning_rate": 0.0001999851097825248, - "loss": 46.0, - "step": 34121 - }, - { - "epoch": 5.495108498731833, - "grad_norm": 0.002731488784775138, - "learning_rate": 0.00019998510890948876, - "loss": 46.0, - "step": 34122 - }, - { - "epoch": 5.49526953581062, - "grad_norm": 0.004338014405220747, - "learning_rate": 0.00019998510803642713, - "loss": 46.0, - "step": 34123 - }, - { - "epoch": 5.495430572889408, - "grad_norm": 0.003702192334458232, - "learning_rate": 0.00019998510716333988, - "loss": 46.0, - "step": 34124 - }, - { - "epoch": 5.495591609968195, - "grad_norm": 0.004951875191181898, - "learning_rate": 0.0001999851062902271, - "loss": 46.0, - "step": 34125 - }, - { - "epoch": 5.495752647046983, - "grad_norm": 0.006439009215682745, - "learning_rate": 0.0001999851054170887, - "loss": 46.0, - "step": 34126 - }, - { - "epoch": 5.49591368412577, - "grad_norm": 0.0013956889742985368, - "learning_rate": 0.0001999851045439247, - "loss": 46.0, - "step": 34127 - }, - { - "epoch": 5.4960747212045575, - "grad_norm": 0.0022253713104873896, - "learning_rate": 0.0001999851036707351, - "loss": 46.0, - "step": 34128 - }, - { - "epoch": 5.496235758283345, - "grad_norm": 0.0068828389048576355, - "learning_rate": 0.00019998510279751992, - "loss": 46.0, - "step": 34129 - }, - { - "epoch": 5.496396795362132, - "grad_norm": 0.005418810062110424, - "learning_rate": 0.00019998510192427915, - "loss": 46.0, - "step": 34130 - }, - { - "epoch": 5.49655783244092, - "grad_norm": 0.0026387288235127926, - "learning_rate": 0.0001999851010510128, - "loss": 46.0, - "step": 34131 - }, - { - "epoch": 5.496718869519707, - "grad_norm": 0.0049411579966545105, - "learning_rate": 0.00019998510017772085, - "loss": 46.0, - "step": 34132 - }, - { - "epoch": 5.496879906598494, - "grad_norm": 0.005068782716989517, - "learning_rate": 0.00019998509930440331, - "loss": 46.0, - "step": 34133 - }, - { - "epoch": 5.497040943677281, - "grad_norm": 0.002494296757504344, - "learning_rate": 0.0001999850984310602, - "loss": 46.0, - "step": 34134 - }, - { - "epoch": 5.497201980756069, - "grad_norm": 0.01123699452728033, - "learning_rate": 0.00019998509755769146, - "loss": 46.0, - "step": 34135 - }, - { - "epoch": 5.497363017834856, - "grad_norm": 0.0022961741778999567, - "learning_rate": 0.00019998509668429714, - "loss": 46.0, - "step": 34136 - }, - { - "epoch": 5.497524054913644, - "grad_norm": 0.0012729689478874207, - "learning_rate": 0.00019998509581087723, - "loss": 46.0, - "step": 34137 - }, - { - "epoch": 5.497685091992431, - "grad_norm": 0.003151624696329236, - "learning_rate": 0.00019998509493743174, - "loss": 46.0, - "step": 34138 - }, - { - "epoch": 5.497846129071219, - "grad_norm": 0.005862434860318899, - "learning_rate": 0.00019998509406396066, - "loss": 46.0, - "step": 34139 - }, - { - "epoch": 5.498007166150006, - "grad_norm": 0.0068648867309093475, - "learning_rate": 0.00019998509319046396, - "loss": 46.0, - "step": 34140 - }, - { - "epoch": 5.4981682032287935, - "grad_norm": 0.0038339283782988787, - "learning_rate": 0.0001999850923169417, - "loss": 46.0, - "step": 34141 - }, - { - "epoch": 5.498329240307581, - "grad_norm": 0.0020844812970608473, - "learning_rate": 0.00019998509144339384, - "loss": 46.0, - "step": 34142 - }, - { - "epoch": 5.498490277386368, - "grad_norm": 0.0018930728547275066, - "learning_rate": 0.0001999850905698204, - "loss": 46.0, - "step": 34143 - }, - { - "epoch": 5.498651314465156, - "grad_norm": 0.006712261121720076, - "learning_rate": 0.00019998508969622133, - "loss": 46.0, - "step": 34144 - }, - { - "epoch": 5.498812351543943, - "grad_norm": 0.009071686305105686, - "learning_rate": 0.00019998508882259673, - "loss": 46.0, - "step": 34145 - }, - { - "epoch": 5.498973388622731, - "grad_norm": 0.0018597109010443091, - "learning_rate": 0.00019998508794894648, - "loss": 46.0, - "step": 34146 - }, - { - "epoch": 5.499134425701518, - "grad_norm": 0.007114425301551819, - "learning_rate": 0.00019998508707527068, - "loss": 46.0, - "step": 34147 - }, - { - "epoch": 5.499295462780305, - "grad_norm": 0.0013667274033650756, - "learning_rate": 0.00019998508620156926, - "loss": 46.0, - "step": 34148 - }, - { - "epoch": 5.499456499859092, - "grad_norm": 0.01908756047487259, - "learning_rate": 0.00019998508532784228, - "loss": 46.0, - "step": 34149 - }, - { - "epoch": 5.49961753693788, - "grad_norm": 0.005090340040624142, - "learning_rate": 0.00019998508445408968, - "loss": 46.0, - "step": 34150 - }, - { - "epoch": 5.499778574016667, - "grad_norm": 0.026592692360281944, - "learning_rate": 0.00019998508358031153, - "loss": 46.0, - "step": 34151 - }, - { - "epoch": 5.499939611095455, - "grad_norm": 0.002731078304350376, - "learning_rate": 0.00019998508270650773, - "loss": 46.0, - "step": 34152 - }, - { - "epoch": 5.500100648174242, - "grad_norm": 0.0017696896102279425, - "learning_rate": 0.00019998508183267837, - "loss": 46.0, - "step": 34153 - }, - { - "epoch": 5.5002616852530295, - "grad_norm": 0.0016494258306920528, - "learning_rate": 0.00019998508095882343, - "loss": 46.0, - "step": 34154 - }, - { - "epoch": 5.500422722331817, - "grad_norm": 0.005124740768224001, - "learning_rate": 0.00019998508008494287, - "loss": 46.0, - "step": 34155 - }, - { - "epoch": 5.500583759410604, - "grad_norm": 0.002132552908733487, - "learning_rate": 0.00019998507921103675, - "loss": 46.0, - "step": 34156 - }, - { - "epoch": 5.500744796489392, - "grad_norm": 0.010711484588682652, - "learning_rate": 0.00019998507833710502, - "loss": 46.0, - "step": 34157 - }, - { - "epoch": 5.500905833568179, - "grad_norm": 0.0009878241689875722, - "learning_rate": 0.0001999850774631477, - "loss": 46.0, - "step": 34158 - }, - { - "epoch": 5.501066870646967, - "grad_norm": 0.007199808955192566, - "learning_rate": 0.0001999850765891648, - "loss": 46.0, - "step": 34159 - }, - { - "epoch": 5.501227907725754, - "grad_norm": 0.01828848011791706, - "learning_rate": 0.00019998507571515628, - "loss": 46.0, - "step": 34160 - }, - { - "epoch": 5.501388944804541, - "grad_norm": 0.0034937537275254726, - "learning_rate": 0.0001999850748411222, - "loss": 46.0, - "step": 34161 - }, - { - "epoch": 5.501549981883329, - "grad_norm": 0.005627928301692009, - "learning_rate": 0.00019998507396706253, - "loss": 46.0, - "step": 34162 - }, - { - "epoch": 5.501711018962116, - "grad_norm": 0.0056982627138495445, - "learning_rate": 0.00019998507309297724, - "loss": 46.0, - "step": 34163 - }, - { - "epoch": 5.501872056040903, - "grad_norm": 0.006696680095046759, - "learning_rate": 0.0001999850722188664, - "loss": 46.0, - "step": 34164 - }, - { - "epoch": 5.502033093119691, - "grad_norm": 0.0043885246850550175, - "learning_rate": 0.00019998507134472995, - "loss": 46.0, - "step": 34165 - }, - { - "epoch": 5.502194130198478, - "grad_norm": 0.014521142467856407, - "learning_rate": 0.00019998507047056788, - "loss": 46.0, - "step": 34166 - }, - { - "epoch": 5.5023551672772655, - "grad_norm": 0.0067253378219902515, - "learning_rate": 0.00019998506959638027, - "loss": 46.0, - "step": 34167 - }, - { - "epoch": 5.502516204356053, - "grad_norm": 0.0022953529842197895, - "learning_rate": 0.00019998506872216703, - "loss": 46.0, - "step": 34168 - }, - { - "epoch": 5.50267724143484, - "grad_norm": 0.013663801364600658, - "learning_rate": 0.00019998506784792822, - "loss": 46.0, - "step": 34169 - }, - { - "epoch": 5.502838278513628, - "grad_norm": 0.00542009063065052, - "learning_rate": 0.0001999850669736638, - "loss": 46.0, - "step": 34170 - }, - { - "epoch": 5.502999315592415, - "grad_norm": 0.007828470319509506, - "learning_rate": 0.0001999850660993738, - "loss": 46.0, - "step": 34171 - }, - { - "epoch": 5.503160352671203, - "grad_norm": 0.0006252566818147898, - "learning_rate": 0.0001999850652250582, - "loss": 46.0, - "step": 34172 - }, - { - "epoch": 5.50332138974999, - "grad_norm": 0.010983610525727272, - "learning_rate": 0.00019998506435071702, - "loss": 46.0, - "step": 34173 - }, - { - "epoch": 5.503482426828778, - "grad_norm": 0.009040025062859058, - "learning_rate": 0.00019998506347635025, - "loss": 46.0, - "step": 34174 - }, - { - "epoch": 5.503643463907565, - "grad_norm": 0.0075152041390538216, - "learning_rate": 0.0001999850626019579, - "loss": 46.0, - "step": 34175 - }, - { - "epoch": 5.503804500986352, - "grad_norm": 0.0050583938136696815, - "learning_rate": 0.00019998506172753993, - "loss": 46.0, - "step": 34176 - }, - { - "epoch": 5.50396553806514, - "grad_norm": 0.0023125142324715853, - "learning_rate": 0.0001999850608530964, - "loss": 46.0, - "step": 34177 - }, - { - "epoch": 5.504126575143927, - "grad_norm": 0.003778290469199419, - "learning_rate": 0.00019998505997862723, - "loss": 46.0, - "step": 34178 - }, - { - "epoch": 5.504287612222714, - "grad_norm": 0.005179339088499546, - "learning_rate": 0.00019998505910413252, - "loss": 46.0, - "step": 34179 - }, - { - "epoch": 5.5044486493015015, - "grad_norm": 0.0011703349882736802, - "learning_rate": 0.0001999850582296122, - "loss": 46.0, - "step": 34180 - }, - { - "epoch": 5.504609686380289, - "grad_norm": 0.005269202403724194, - "learning_rate": 0.00019998505735506627, - "loss": 46.0, - "step": 34181 - }, - { - "epoch": 5.504770723459076, - "grad_norm": 0.004499434493482113, - "learning_rate": 0.00019998505648049478, - "loss": 46.0, - "step": 34182 - }, - { - "epoch": 5.504931760537864, - "grad_norm": 0.004150930792093277, - "learning_rate": 0.00019998505560589767, - "loss": 46.0, - "step": 34183 - }, - { - "epoch": 5.505092797616651, - "grad_norm": 0.002291294513270259, - "learning_rate": 0.00019998505473127498, - "loss": 46.0, - "step": 34184 - }, - { - "epoch": 5.505253834695439, - "grad_norm": 0.0016701340209692717, - "learning_rate": 0.00019998505385662672, - "loss": 46.0, - "step": 34185 - }, - { - "epoch": 5.505414871774226, - "grad_norm": 0.006673745345324278, - "learning_rate": 0.00019998505298195288, - "loss": 46.0, - "step": 34186 - }, - { - "epoch": 5.505575908853014, - "grad_norm": 0.006532181054353714, - "learning_rate": 0.0001999850521072534, - "loss": 46.0, - "step": 34187 - }, - { - "epoch": 5.505736945931801, - "grad_norm": 0.0020645789336413145, - "learning_rate": 0.00019998505123252836, - "loss": 46.0, - "step": 34188 - }, - { - "epoch": 5.5058979830105885, - "grad_norm": 0.009018348529934883, - "learning_rate": 0.0001999850503577777, - "loss": 46.0, - "step": 34189 - }, - { - "epoch": 5.506059020089376, - "grad_norm": 0.0013276365352794528, - "learning_rate": 0.00019998504948300148, - "loss": 46.0, - "step": 34190 - }, - { - "epoch": 5.5062200571681625, - "grad_norm": 0.005371210630983114, - "learning_rate": 0.00019998504860819965, - "loss": 46.0, - "step": 34191 - }, - { - "epoch": 5.50638109424695, - "grad_norm": 0.012877581641077995, - "learning_rate": 0.00019998504773337223, - "loss": 46.0, - "step": 34192 - }, - { - "epoch": 5.506542131325737, - "grad_norm": 0.0013488840777426958, - "learning_rate": 0.00019998504685851925, - "loss": 46.0, - "step": 34193 - }, - { - "epoch": 5.506703168404525, - "grad_norm": 0.006245528347790241, - "learning_rate": 0.00019998504598364063, - "loss": 46.0, - "step": 34194 - }, - { - "epoch": 5.506864205483312, - "grad_norm": 0.003025773447006941, - "learning_rate": 0.00019998504510873645, - "loss": 46.0, - "step": 34195 - }, - { - "epoch": 5.5070252425621, - "grad_norm": 0.00273637636564672, - "learning_rate": 0.00019998504423380668, - "loss": 46.0, - "step": 34196 - }, - { - "epoch": 5.507186279640887, - "grad_norm": 0.011829034425318241, - "learning_rate": 0.0001999850433588513, - "loss": 46.0, - "step": 34197 - }, - { - "epoch": 5.507347316719675, - "grad_norm": 0.003219072474166751, - "learning_rate": 0.00019998504248387036, - "loss": 46.0, - "step": 34198 - }, - { - "epoch": 5.507508353798462, - "grad_norm": 0.00635336060076952, - "learning_rate": 0.0001999850416088638, - "loss": 46.0, - "step": 34199 - }, - { - "epoch": 5.50766939087725, - "grad_norm": 0.001374226645566523, - "learning_rate": 0.00019998504073383166, - "loss": 46.0, - "step": 34200 - }, - { - "epoch": 5.507830427956037, - "grad_norm": 0.018123339861631393, - "learning_rate": 0.00019998503985877393, - "loss": 46.0, - "step": 34201 - }, - { - "epoch": 5.5079914650348245, - "grad_norm": 0.0018701660446822643, - "learning_rate": 0.00019998503898369061, - "loss": 46.0, - "step": 34202 - }, - { - "epoch": 5.508152502113612, - "grad_norm": 0.002980404533445835, - "learning_rate": 0.00019998503810858168, - "loss": 46.0, - "step": 34203 - }, - { - "epoch": 5.508313539192399, - "grad_norm": 0.002194474684074521, - "learning_rate": 0.0001999850372334472, - "loss": 46.0, - "step": 34204 - }, - { - "epoch": 5.508474576271187, - "grad_norm": 0.012783299200236797, - "learning_rate": 0.00019998503635828709, - "loss": 46.0, - "step": 34205 - }, - { - "epoch": 5.508635613349973, - "grad_norm": 0.002803035778924823, - "learning_rate": 0.0001999850354831014, - "loss": 46.0, - "step": 34206 - }, - { - "epoch": 5.508796650428761, - "grad_norm": 0.0049095768481493, - "learning_rate": 0.00019998503460789014, - "loss": 46.0, - "step": 34207 - }, - { - "epoch": 5.508957687507548, - "grad_norm": 0.0029583917930722237, - "learning_rate": 0.00019998503373265324, - "loss": 46.0, - "step": 34208 - }, - { - "epoch": 5.509118724586336, - "grad_norm": 0.011844675987958908, - "learning_rate": 0.00019998503285739082, - "loss": 46.0, - "step": 34209 - }, - { - "epoch": 5.509279761665123, - "grad_norm": 0.0039554196409881115, - "learning_rate": 0.00019998503198210275, - "loss": 46.0, - "step": 34210 - }, - { - "epoch": 5.509440798743911, - "grad_norm": 0.005298307631164789, - "learning_rate": 0.0001999850311067891, - "loss": 46.0, - "step": 34211 - }, - { - "epoch": 5.509601835822698, - "grad_norm": 0.010015979409217834, - "learning_rate": 0.00019998503023144987, - "loss": 46.0, - "step": 34212 - }, - { - "epoch": 5.509762872901486, - "grad_norm": 0.005829904228448868, - "learning_rate": 0.00019998502935608502, - "loss": 46.0, - "step": 34213 - }, - { - "epoch": 5.509923909980273, - "grad_norm": 0.003104221075773239, - "learning_rate": 0.00019998502848069463, - "loss": 46.0, - "step": 34214 - }, - { - "epoch": 5.5100849470590605, - "grad_norm": 0.0015328072477132082, - "learning_rate": 0.00019998502760527862, - "loss": 46.0, - "step": 34215 - }, - { - "epoch": 5.510245984137848, - "grad_norm": 0.004958477336913347, - "learning_rate": 0.000199985026729837, - "loss": 46.0, - "step": 34216 - }, - { - "epoch": 5.510407021216635, - "grad_norm": 0.007182045374065638, - "learning_rate": 0.00019998502585436985, - "loss": 46.0, - "step": 34217 - }, - { - "epoch": 5.510568058295423, - "grad_norm": 0.002235148102045059, - "learning_rate": 0.00019998502497887706, - "loss": 46.0, - "step": 34218 - }, - { - "epoch": 5.51072909537421, - "grad_norm": 0.0017434393521398306, - "learning_rate": 0.00019998502410335868, - "loss": 46.0, - "step": 34219 - }, - { - "epoch": 5.510890132452998, - "grad_norm": 0.01854533888399601, - "learning_rate": 0.00019998502322781474, - "loss": 46.0, - "step": 34220 - }, - { - "epoch": 5.511051169531784, - "grad_norm": 0.0016888438258320093, - "learning_rate": 0.00019998502235224515, - "loss": 46.0, - "step": 34221 - }, - { - "epoch": 5.511212206610572, - "grad_norm": 0.0008130050264298916, - "learning_rate": 0.00019998502147665, - "loss": 46.0, - "step": 34222 - }, - { - "epoch": 5.511373243689359, - "grad_norm": 0.012573973275721073, - "learning_rate": 0.00019998502060102928, - "loss": 46.0, - "step": 34223 - }, - { - "epoch": 5.511534280768147, - "grad_norm": 0.0010394317796453834, - "learning_rate": 0.00019998501972538297, - "loss": 46.0, - "step": 34224 - }, - { - "epoch": 5.511695317846934, - "grad_norm": 0.005306388717144728, - "learning_rate": 0.00019998501884971103, - "loss": 46.0, - "step": 34225 - }, - { - "epoch": 5.511856354925722, - "grad_norm": 0.002362685278058052, - "learning_rate": 0.00019998501797401354, - "loss": 46.0, - "step": 34226 - }, - { - "epoch": 5.512017392004509, - "grad_norm": 0.0027344832196831703, - "learning_rate": 0.00019998501709829044, - "loss": 46.0, - "step": 34227 - }, - { - "epoch": 5.5121784290832965, - "grad_norm": 0.012368762865662575, - "learning_rate": 0.00019998501622254175, - "loss": 46.0, - "step": 34228 - }, - { - "epoch": 5.512339466162084, - "grad_norm": 0.005386157892644405, - "learning_rate": 0.00019998501534676747, - "loss": 46.0, - "step": 34229 - }, - { - "epoch": 5.512500503240871, - "grad_norm": 0.01438825111836195, - "learning_rate": 0.0001999850144709676, - "loss": 46.0, - "step": 34230 - }, - { - "epoch": 5.512661540319659, - "grad_norm": 0.0038165715523064137, - "learning_rate": 0.00019998501359514212, - "loss": 46.0, - "step": 34231 - }, - { - "epoch": 5.512822577398446, - "grad_norm": 0.002818374428898096, - "learning_rate": 0.00019998501271929108, - "loss": 46.0, - "step": 34232 - }, - { - "epoch": 5.512983614477234, - "grad_norm": 0.0009560149046592414, - "learning_rate": 0.00019998501184341442, - "loss": 46.0, - "step": 34233 - }, - { - "epoch": 5.51314465155602, - "grad_norm": 0.009288566187024117, - "learning_rate": 0.00019998501096751218, - "loss": 46.0, - "step": 34234 - }, - { - "epoch": 5.513305688634809, - "grad_norm": 0.004407213069498539, - "learning_rate": 0.00019998501009158438, - "loss": 46.0, - "step": 34235 - }, - { - "epoch": 5.513466725713595, - "grad_norm": 0.005016713868826628, - "learning_rate": 0.00019998500921563096, - "loss": 46.0, - "step": 34236 - }, - { - "epoch": 5.513627762792383, - "grad_norm": 0.022559037432074547, - "learning_rate": 0.00019998500833965196, - "loss": 46.0, - "step": 34237 - }, - { - "epoch": 5.51378879987117, - "grad_norm": 0.003801714163273573, - "learning_rate": 0.00019998500746364734, - "loss": 46.0, - "step": 34238 - }, - { - "epoch": 5.5139498369499576, - "grad_norm": 0.0038948128931224346, - "learning_rate": 0.00019998500658761716, - "loss": 46.0, - "step": 34239 - }, - { - "epoch": 5.514110874028745, - "grad_norm": 0.0015044279862195253, - "learning_rate": 0.00019998500571156137, - "loss": 46.0, - "step": 34240 - }, - { - "epoch": 5.5142719111075325, - "grad_norm": 0.004923420492559671, - "learning_rate": 0.00019998500483548, - "loss": 46.0, - "step": 34241 - }, - { - "epoch": 5.51443294818632, - "grad_norm": 0.001664804178290069, - "learning_rate": 0.00019998500395937302, - "loss": 46.0, - "step": 34242 - }, - { - "epoch": 5.514593985265107, - "grad_norm": 0.0030872018542140722, - "learning_rate": 0.00019998500308324047, - "loss": 46.0, - "step": 34243 - }, - { - "epoch": 5.514755022343895, - "grad_norm": 0.0006278973305597901, - "learning_rate": 0.00019998500220708233, - "loss": 46.0, - "step": 34244 - }, - { - "epoch": 5.514916059422682, - "grad_norm": 0.008448035456240177, - "learning_rate": 0.0001999850013308986, - "loss": 46.0, - "step": 34245 - }, - { - "epoch": 5.51507709650147, - "grad_norm": 0.0005471177864819765, - "learning_rate": 0.00019998500045468926, - "loss": 46.0, - "step": 34246 - }, - { - "epoch": 5.515238133580257, - "grad_norm": 0.008499611169099808, - "learning_rate": 0.00019998499957845435, - "loss": 46.0, - "step": 34247 - }, - { - "epoch": 5.515399170659045, - "grad_norm": 0.004641486331820488, - "learning_rate": 0.00019998499870219384, - "loss": 46.0, - "step": 34248 - }, - { - "epoch": 5.515560207737831, - "grad_norm": 0.004723050631582737, - "learning_rate": 0.0001999849978259077, - "loss": 46.0, - "step": 34249 - }, - { - "epoch": 5.5157212448166195, - "grad_norm": 0.0019629092421382666, - "learning_rate": 0.00019998499694959604, - "loss": 46.0, - "step": 34250 - }, - { - "epoch": 5.515882281895406, - "grad_norm": 0.00206712051294744, - "learning_rate": 0.00019998499607325876, - "loss": 46.0, - "step": 34251 - }, - { - "epoch": 5.5160433189741935, - "grad_norm": 0.0007701151189394295, - "learning_rate": 0.00019998499519689587, - "loss": 46.0, - "step": 34252 - }, - { - "epoch": 5.516204356052981, - "grad_norm": 0.005060708150267601, - "learning_rate": 0.00019998499432050742, - "loss": 46.0, - "step": 34253 - }, - { - "epoch": 5.516365393131768, - "grad_norm": 0.008270849473774433, - "learning_rate": 0.00019998499344409335, - "loss": 46.0, - "step": 34254 - }, - { - "epoch": 5.516526430210556, - "grad_norm": 0.008350512012839317, - "learning_rate": 0.0001999849925676537, - "loss": 46.0, - "step": 34255 - }, - { - "epoch": 5.516687467289343, - "grad_norm": 0.003098027780652046, - "learning_rate": 0.00019998499169118848, - "loss": 46.0, - "step": 34256 - }, - { - "epoch": 5.516848504368131, - "grad_norm": 0.006756963208317757, - "learning_rate": 0.00019998499081469762, - "loss": 46.0, - "step": 34257 - }, - { - "epoch": 5.517009541446918, - "grad_norm": 0.0010933625744655728, - "learning_rate": 0.00019998498993818123, - "loss": 46.0, - "step": 34258 - }, - { - "epoch": 5.517170578525706, - "grad_norm": 0.005616801790893078, - "learning_rate": 0.0001999849890616392, - "loss": 46.0, - "step": 34259 - }, - { - "epoch": 5.517331615604493, - "grad_norm": 0.005372984334826469, - "learning_rate": 0.0001999849881850716, - "loss": 46.0, - "step": 34260 - }, - { - "epoch": 5.517492652683281, - "grad_norm": 0.008079294115304947, - "learning_rate": 0.0001999849873084784, - "loss": 46.0, - "step": 34261 - }, - { - "epoch": 5.517653689762068, - "grad_norm": 0.005586813669651747, - "learning_rate": 0.00019998498643185962, - "loss": 46.0, - "step": 34262 - }, - { - "epoch": 5.5178147268408555, - "grad_norm": 0.003906476777046919, - "learning_rate": 0.00019998498555521527, - "loss": 46.0, - "step": 34263 - }, - { - "epoch": 5.517975763919642, - "grad_norm": 0.0027365367859601974, - "learning_rate": 0.00019998498467854527, - "loss": 46.0, - "step": 34264 - }, - { - "epoch": 5.5181368009984295, - "grad_norm": 0.003583196084946394, - "learning_rate": 0.00019998498380184972, - "loss": 46.0, - "step": 34265 - }, - { - "epoch": 5.518297838077217, - "grad_norm": 0.001992402831092477, - "learning_rate": 0.00019998498292512858, - "loss": 46.0, - "step": 34266 - }, - { - "epoch": 5.518458875156004, - "grad_norm": 0.0035375088918954134, - "learning_rate": 0.00019998498204838183, - "loss": 46.0, - "step": 34267 - }, - { - "epoch": 5.518619912234792, - "grad_norm": 0.0033124389592558146, - "learning_rate": 0.0001999849811716095, - "loss": 46.0, - "step": 34268 - }, - { - "epoch": 5.518780949313579, - "grad_norm": 0.005209075752645731, - "learning_rate": 0.00019998498029481158, - "loss": 46.0, - "step": 34269 - }, - { - "epoch": 5.518941986392367, - "grad_norm": 0.001894405111670494, - "learning_rate": 0.00019998497941798807, - "loss": 46.0, - "step": 34270 - }, - { - "epoch": 5.519103023471154, - "grad_norm": 0.012381664477288723, - "learning_rate": 0.00019998497854113896, - "loss": 46.0, - "step": 34271 - }, - { - "epoch": 5.519264060549942, - "grad_norm": 0.0037883417680859566, - "learning_rate": 0.00019998497766426424, - "loss": 46.0, - "step": 34272 - }, - { - "epoch": 5.519425097628729, - "grad_norm": 0.0057252817787230015, - "learning_rate": 0.00019998497678736397, - "loss": 46.0, - "step": 34273 - }, - { - "epoch": 5.519586134707517, - "grad_norm": 0.0020538833923637867, - "learning_rate": 0.0001999849759104381, - "loss": 46.0, - "step": 34274 - }, - { - "epoch": 5.519747171786304, - "grad_norm": 0.025955431163311005, - "learning_rate": 0.00019998497503348662, - "loss": 46.0, - "step": 34275 - }, - { - "epoch": 5.5199082088650915, - "grad_norm": 0.009899445809423923, - "learning_rate": 0.00019998497415650958, - "loss": 46.0, - "step": 34276 - }, - { - "epoch": 5.520069245943879, - "grad_norm": 0.004798884503543377, - "learning_rate": 0.00019998497327950693, - "loss": 46.0, - "step": 34277 - }, - { - "epoch": 5.520230283022666, - "grad_norm": 0.0034558449406176805, - "learning_rate": 0.0001999849724024787, - "loss": 46.0, - "step": 34278 - }, - { - "epoch": 5.520391320101453, - "grad_norm": 0.0031368527561426163, - "learning_rate": 0.00019998497152542486, - "loss": 46.0, - "step": 34279 - }, - { - "epoch": 5.52055235718024, - "grad_norm": 0.006401309743523598, - "learning_rate": 0.00019998497064834542, - "loss": 46.0, - "step": 34280 - }, - { - "epoch": 5.520713394259028, - "grad_norm": 0.002318466315045953, - "learning_rate": 0.00019998496977124042, - "loss": 46.0, - "step": 34281 - }, - { - "epoch": 5.520874431337815, - "grad_norm": 0.003139452775940299, - "learning_rate": 0.0001999849688941098, - "loss": 46.0, - "step": 34282 - }, - { - "epoch": 5.521035468416603, - "grad_norm": 0.005002114921808243, - "learning_rate": 0.0001999849680169536, - "loss": 46.0, - "step": 34283 - }, - { - "epoch": 5.52119650549539, - "grad_norm": 0.006424926221370697, - "learning_rate": 0.0001999849671397718, - "loss": 46.0, - "step": 34284 - }, - { - "epoch": 5.521357542574178, - "grad_norm": 0.004735100083053112, - "learning_rate": 0.00019998496626256446, - "loss": 46.0, - "step": 34285 - }, - { - "epoch": 5.521518579652965, - "grad_norm": 0.012039123103022575, - "learning_rate": 0.00019998496538533146, - "loss": 46.0, - "step": 34286 - }, - { - "epoch": 5.521679616731753, - "grad_norm": 0.003961151000112295, - "learning_rate": 0.0001999849645080729, - "loss": 46.0, - "step": 34287 - }, - { - "epoch": 5.52184065381054, - "grad_norm": 0.004428533837199211, - "learning_rate": 0.00019998496363078875, - "loss": 46.0, - "step": 34288 - }, - { - "epoch": 5.5220016908893275, - "grad_norm": 0.008572126738727093, - "learning_rate": 0.00019998496275347902, - "loss": 46.0, - "step": 34289 - }, - { - "epoch": 5.522162727968115, - "grad_norm": 0.005892967339605093, - "learning_rate": 0.00019998496187614368, - "loss": 46.0, - "step": 34290 - }, - { - "epoch": 5.522323765046902, - "grad_norm": 0.002398570068180561, - "learning_rate": 0.00019998496099878278, - "loss": 46.0, - "step": 34291 - }, - { - "epoch": 5.52248480212569, - "grad_norm": 0.0023990217596292496, - "learning_rate": 0.00019998496012139623, - "loss": 46.0, - "step": 34292 - }, - { - "epoch": 5.522645839204477, - "grad_norm": 0.003208345966413617, - "learning_rate": 0.00019998495924398416, - "loss": 46.0, - "step": 34293 - }, - { - "epoch": 5.522806876283264, - "grad_norm": 0.009863338433206081, - "learning_rate": 0.00019998495836654642, - "loss": 46.0, - "step": 34294 - }, - { - "epoch": 5.522967913362051, - "grad_norm": 0.0046867383643984795, - "learning_rate": 0.00019998495748908314, - "loss": 46.0, - "step": 34295 - }, - { - "epoch": 5.523128950440839, - "grad_norm": 0.00847650971263647, - "learning_rate": 0.00019998495661159427, - "loss": 46.0, - "step": 34296 - }, - { - "epoch": 5.523289987519626, - "grad_norm": 0.0016790707595646381, - "learning_rate": 0.0001999849557340798, - "loss": 46.0, - "step": 34297 - }, - { - "epoch": 5.523451024598414, - "grad_norm": 0.0022495663724839687, - "learning_rate": 0.00019998495485653973, - "loss": 46.0, - "step": 34298 - }, - { - "epoch": 5.523612061677201, - "grad_norm": 0.0069077154621481895, - "learning_rate": 0.00019998495397897408, - "loss": 46.0, - "step": 34299 - }, - { - "epoch": 5.5237730987559885, - "grad_norm": 0.015760261565446854, - "learning_rate": 0.00019998495310138284, - "loss": 46.0, - "step": 34300 - }, - { - "epoch": 5.523934135834776, - "grad_norm": 0.004451438318938017, - "learning_rate": 0.000199984952223766, - "loss": 46.0, - "step": 34301 - }, - { - "epoch": 5.5240951729135634, - "grad_norm": 0.015721792355179787, - "learning_rate": 0.00019998495134612357, - "loss": 46.0, - "step": 34302 - }, - { - "epoch": 5.524256209992351, - "grad_norm": 0.005324992351233959, - "learning_rate": 0.00019998495046845557, - "loss": 46.0, - "step": 34303 - }, - { - "epoch": 5.524417247071138, - "grad_norm": 0.0029777069576084614, - "learning_rate": 0.00019998494959076195, - "loss": 46.0, - "step": 34304 - }, - { - "epoch": 5.524578284149926, - "grad_norm": 0.0026553133502602577, - "learning_rate": 0.00019998494871304275, - "loss": 46.0, - "step": 34305 - }, - { - "epoch": 5.524739321228713, - "grad_norm": 0.009386973455548286, - "learning_rate": 0.000199984947835298, - "loss": 46.0, - "step": 34306 - }, - { - "epoch": 5.5249003583075, - "grad_norm": 0.0059987870045006275, - "learning_rate": 0.00019998494695752758, - "loss": 46.0, - "step": 34307 - }, - { - "epoch": 5.525061395386288, - "grad_norm": 0.010576811619102955, - "learning_rate": 0.00019998494607973162, - "loss": 46.0, - "step": 34308 - }, - { - "epoch": 5.525222432465075, - "grad_norm": 0.002587325172498822, - "learning_rate": 0.00019998494520191004, - "loss": 46.0, - "step": 34309 - }, - { - "epoch": 5.525383469543862, - "grad_norm": 0.00582084758207202, - "learning_rate": 0.0001999849443240629, - "loss": 46.0, - "step": 34310 - }, - { - "epoch": 5.52554450662265, - "grad_norm": 0.008977241814136505, - "learning_rate": 0.00019998494344619015, - "loss": 46.0, - "step": 34311 - }, - { - "epoch": 5.525705543701437, - "grad_norm": 0.004376677796244621, - "learning_rate": 0.0001999849425682918, - "loss": 46.0, - "step": 34312 - }, - { - "epoch": 5.5258665807802245, - "grad_norm": 0.008413034491240978, - "learning_rate": 0.0001999849416903679, - "loss": 46.0, - "step": 34313 - }, - { - "epoch": 5.526027617859012, - "grad_norm": 0.011920742690563202, - "learning_rate": 0.00019998494081241837, - "loss": 46.0, - "step": 34314 - }, - { - "epoch": 5.526188654937799, - "grad_norm": 0.0039012304041534662, - "learning_rate": 0.00019998493993444324, - "loss": 46.0, - "step": 34315 - }, - { - "epoch": 5.526349692016587, - "grad_norm": 0.009865395724773407, - "learning_rate": 0.00019998493905644255, - "loss": 46.0, - "step": 34316 - }, - { - "epoch": 5.526510729095374, - "grad_norm": 0.003301802324131131, - "learning_rate": 0.00019998493817841628, - "loss": 46.0, - "step": 34317 - }, - { - "epoch": 5.526671766174162, - "grad_norm": 0.005590891465544701, - "learning_rate": 0.00019998493730036439, - "loss": 46.0, - "step": 34318 - }, - { - "epoch": 5.526832803252949, - "grad_norm": 0.005014342255890369, - "learning_rate": 0.0001999849364222869, - "loss": 46.0, - "step": 34319 - }, - { - "epoch": 5.526993840331737, - "grad_norm": 0.016980236396193504, - "learning_rate": 0.00019998493554418384, - "loss": 46.0, - "step": 34320 - }, - { - "epoch": 5.527154877410524, - "grad_norm": 0.004121170844882727, - "learning_rate": 0.0001999849346660552, - "loss": 46.0, - "step": 34321 - }, - { - "epoch": 5.527315914489311, - "grad_norm": 0.0013843679334968328, - "learning_rate": 0.00019998493378790095, - "loss": 46.0, - "step": 34322 - }, - { - "epoch": 5.527476951568099, - "grad_norm": 0.002124807331711054, - "learning_rate": 0.0001999849329097211, - "loss": 46.0, - "step": 34323 - }, - { - "epoch": 5.527637988646886, - "grad_norm": 0.014505774714052677, - "learning_rate": 0.0001999849320315157, - "loss": 46.0, - "step": 34324 - }, - { - "epoch": 5.527799025725673, - "grad_norm": 0.00169506692327559, - "learning_rate": 0.00019998493115328466, - "loss": 46.0, - "step": 34325 - }, - { - "epoch": 5.5279600628044605, - "grad_norm": 0.0019217573571950197, - "learning_rate": 0.00019998493027502805, - "loss": 46.0, - "step": 34326 - }, - { - "epoch": 5.528121099883248, - "grad_norm": 0.006825822405517101, - "learning_rate": 0.00019998492939674584, - "loss": 46.0, - "step": 34327 - }, - { - "epoch": 5.528282136962035, - "grad_norm": 0.006091851741075516, - "learning_rate": 0.00019998492851843803, - "loss": 46.0, - "step": 34328 - }, - { - "epoch": 5.528443174040823, - "grad_norm": 0.0034964075312018394, - "learning_rate": 0.00019998492764010468, - "loss": 46.0, - "step": 34329 - }, - { - "epoch": 5.52860421111961, - "grad_norm": 0.002492989879101515, - "learning_rate": 0.0001999849267617457, - "loss": 46.0, - "step": 34330 - }, - { - "epoch": 5.528765248198398, - "grad_norm": 0.0023658943828195333, - "learning_rate": 0.00019998492588336114, - "loss": 46.0, - "step": 34331 - }, - { - "epoch": 5.528926285277185, - "grad_norm": 0.001708652707748115, - "learning_rate": 0.00019998492500495097, - "loss": 46.0, - "step": 34332 - }, - { - "epoch": 5.529087322355973, - "grad_norm": 0.009595518000423908, - "learning_rate": 0.00019998492412651522, - "loss": 46.0, - "step": 34333 - }, - { - "epoch": 5.52924835943476, - "grad_norm": 0.009467966854572296, - "learning_rate": 0.00019998492324805388, - "loss": 46.0, - "step": 34334 - }, - { - "epoch": 5.529409396513548, - "grad_norm": 0.010365890339016914, - "learning_rate": 0.00019998492236956696, - "loss": 46.0, - "step": 34335 - }, - { - "epoch": 5.529570433592335, - "grad_norm": 0.002486846875399351, - "learning_rate": 0.00019998492149105444, - "loss": 46.0, - "step": 34336 - }, - { - "epoch": 5.529731470671122, - "grad_norm": 0.003119267988950014, - "learning_rate": 0.00019998492061251632, - "loss": 46.0, - "step": 34337 - }, - { - "epoch": 5.529892507749909, - "grad_norm": 0.021245073527097702, - "learning_rate": 0.00019998491973395263, - "loss": 46.0, - "step": 34338 - }, - { - "epoch": 5.5300535448286965, - "grad_norm": 0.001042731455527246, - "learning_rate": 0.00019998491885536335, - "loss": 46.0, - "step": 34339 - }, - { - "epoch": 5.530214581907484, - "grad_norm": 0.003292615059763193, - "learning_rate": 0.00019998491797674844, - "loss": 46.0, - "step": 34340 - }, - { - "epoch": 5.530375618986271, - "grad_norm": 0.017703954130411148, - "learning_rate": 0.000199984917098108, - "loss": 46.0, - "step": 34341 - }, - { - "epoch": 5.530536656065059, - "grad_norm": 0.006306701805442572, - "learning_rate": 0.0001999849162194419, - "loss": 46.0, - "step": 34342 - }, - { - "epoch": 5.530697693143846, - "grad_norm": 0.0068973214365541935, - "learning_rate": 0.00019998491534075025, - "loss": 46.0, - "step": 34343 - }, - { - "epoch": 5.530858730222634, - "grad_norm": 0.001817410229705274, - "learning_rate": 0.000199984914462033, - "loss": 46.0, - "step": 34344 - }, - { - "epoch": 5.531019767301421, - "grad_norm": 0.009167851880192757, - "learning_rate": 0.00019998491358329016, - "loss": 46.0, - "step": 34345 - }, - { - "epoch": 5.531180804380209, - "grad_norm": 0.005233730189502239, - "learning_rate": 0.00019998491270452174, - "loss": 46.0, - "step": 34346 - }, - { - "epoch": 5.531341841458996, - "grad_norm": 0.01128034945577383, - "learning_rate": 0.00019998491182572772, - "loss": 46.0, - "step": 34347 - }, - { - "epoch": 5.5315028785377836, - "grad_norm": 0.005587833467870951, - "learning_rate": 0.0001999849109469081, - "loss": 46.0, - "step": 34348 - }, - { - "epoch": 5.531663915616571, - "grad_norm": 0.00860118493437767, - "learning_rate": 0.0001999849100680629, - "loss": 46.0, - "step": 34349 - }, - { - "epoch": 5.5318249526953585, - "grad_norm": 0.018641334027051926, - "learning_rate": 0.00019998490918919212, - "loss": 46.0, - "step": 34350 - }, - { - "epoch": 5.531985989774146, - "grad_norm": 0.00677757803350687, - "learning_rate": 0.00019998490831029574, - "loss": 46.0, - "step": 34351 - }, - { - "epoch": 5.5321470268529325, - "grad_norm": 0.0046311113983392715, - "learning_rate": 0.00019998490743137372, - "loss": 46.0, - "step": 34352 - }, - { - "epoch": 5.53230806393172, - "grad_norm": 0.004090859089046717, - "learning_rate": 0.00019998490655242617, - "loss": 46.0, - "step": 34353 - }, - { - "epoch": 5.532469101010507, - "grad_norm": 0.008302649483084679, - "learning_rate": 0.000199984905673453, - "loss": 46.0, - "step": 34354 - }, - { - "epoch": 5.532630138089295, - "grad_norm": 0.012410673312842846, - "learning_rate": 0.00019998490479445426, - "loss": 46.0, - "step": 34355 - }, - { - "epoch": 5.532791175168082, - "grad_norm": 0.0005821784725412726, - "learning_rate": 0.00019998490391542992, - "loss": 46.0, - "step": 34356 - }, - { - "epoch": 5.53295221224687, - "grad_norm": 0.0010559582151472569, - "learning_rate": 0.00019998490303637997, - "loss": 46.0, - "step": 34357 - }, - { - "epoch": 5.533113249325657, - "grad_norm": 0.0035826722159981728, - "learning_rate": 0.00019998490215730446, - "loss": 46.0, - "step": 34358 - }, - { - "epoch": 5.533274286404445, - "grad_norm": 0.005177249666303396, - "learning_rate": 0.00019998490127820336, - "loss": 46.0, - "step": 34359 - }, - { - "epoch": 5.533435323483232, - "grad_norm": 0.0010994458571076393, - "learning_rate": 0.00019998490039907664, - "loss": 46.0, - "step": 34360 - }, - { - "epoch": 5.5335963605620195, - "grad_norm": 0.004649078939110041, - "learning_rate": 0.00019998489951992437, - "loss": 46.0, - "step": 34361 - }, - { - "epoch": 5.533757397640807, - "grad_norm": 0.0019365191692486405, - "learning_rate": 0.00019998489864074648, - "loss": 46.0, - "step": 34362 - }, - { - "epoch": 5.533918434719594, - "grad_norm": 0.0029365073423832655, - "learning_rate": 0.00019998489776154298, - "loss": 46.0, - "step": 34363 - }, - { - "epoch": 5.534079471798382, - "grad_norm": 0.004521762486547232, - "learning_rate": 0.00019998489688231392, - "loss": 46.0, - "step": 34364 - }, - { - "epoch": 5.534240508877169, - "grad_norm": 0.0012761177495121956, - "learning_rate": 0.00019998489600305927, - "loss": 46.0, - "step": 34365 - }, - { - "epoch": 5.534401545955957, - "grad_norm": 0.009701911360025406, - "learning_rate": 0.000199984895123779, - "loss": 46.0, - "step": 34366 - }, - { - "epoch": 5.534562583034743, - "grad_norm": 0.003976393491029739, - "learning_rate": 0.00019998489424447318, - "loss": 46.0, - "step": 34367 - }, - { - "epoch": 5.534723620113531, - "grad_norm": 0.01857377216219902, - "learning_rate": 0.0001999848933651417, - "loss": 46.0, - "step": 34368 - }, - { - "epoch": 5.534884657192318, - "grad_norm": 0.0061077275313436985, - "learning_rate": 0.0001999848924857847, - "loss": 46.0, - "step": 34369 - }, - { - "epoch": 5.535045694271106, - "grad_norm": 0.0031367880292236805, - "learning_rate": 0.00019998489160640207, - "loss": 46.0, - "step": 34370 - }, - { - "epoch": 5.535206731349893, - "grad_norm": 0.001604948891326785, - "learning_rate": 0.0001999848907269939, - "loss": 46.0, - "step": 34371 - }, - { - "epoch": 5.535367768428681, - "grad_norm": 0.007231251336634159, - "learning_rate": 0.0001999848898475601, - "loss": 46.0, - "step": 34372 - }, - { - "epoch": 5.535528805507468, - "grad_norm": 0.0014801690122112632, - "learning_rate": 0.0001999848889681007, - "loss": 46.0, - "step": 34373 - }, - { - "epoch": 5.5356898425862555, - "grad_norm": 0.0026634891983121634, - "learning_rate": 0.00019998488808861572, - "loss": 46.0, - "step": 34374 - }, - { - "epoch": 5.535850879665043, - "grad_norm": 0.0036713636945933104, - "learning_rate": 0.00019998488720910515, - "loss": 46.0, - "step": 34375 - }, - { - "epoch": 5.53601191674383, - "grad_norm": 0.01738077960908413, - "learning_rate": 0.000199984886329569, - "loss": 46.0, - "step": 34376 - }, - { - "epoch": 5.536172953822618, - "grad_norm": 0.0049221874214708805, - "learning_rate": 0.00019998488545000724, - "loss": 46.0, - "step": 34377 - }, - { - "epoch": 5.536333990901405, - "grad_norm": 0.006742923520505428, - "learning_rate": 0.0001999848845704199, - "loss": 46.0, - "step": 34378 - }, - { - "epoch": 5.536495027980193, - "grad_norm": 0.005495220422744751, - "learning_rate": 0.00019998488369080697, - "loss": 46.0, - "step": 34379 - }, - { - "epoch": 5.536656065058979, - "grad_norm": 0.009002111852169037, - "learning_rate": 0.00019998488281116843, - "loss": 46.0, - "step": 34380 - }, - { - "epoch": 5.536817102137768, - "grad_norm": 0.003474907483905554, - "learning_rate": 0.00019998488193150434, - "loss": 46.0, - "step": 34381 - }, - { - "epoch": 5.536978139216554, - "grad_norm": 0.006822897586971521, - "learning_rate": 0.00019998488105181462, - "loss": 46.0, - "step": 34382 - }, - { - "epoch": 5.537139176295342, - "grad_norm": 0.0038658790290355682, - "learning_rate": 0.00019998488017209932, - "loss": 46.0, - "step": 34383 - }, - { - "epoch": 5.537300213374129, - "grad_norm": 0.0008098231046460569, - "learning_rate": 0.00019998487929235844, - "loss": 46.0, - "step": 34384 - }, - { - "epoch": 5.537461250452917, - "grad_norm": 0.004747644532471895, - "learning_rate": 0.00019998487841259193, - "loss": 46.0, - "step": 34385 - }, - { - "epoch": 5.537622287531704, - "grad_norm": 0.004653476178646088, - "learning_rate": 0.00019998487753279987, - "loss": 46.0, - "step": 34386 - }, - { - "epoch": 5.5377833246104915, - "grad_norm": 0.002107733627781272, - "learning_rate": 0.00019998487665298222, - "loss": 46.0, - "step": 34387 - }, - { - "epoch": 5.537944361689279, - "grad_norm": 0.0020892168395221233, - "learning_rate": 0.000199984875773139, - "loss": 46.0, - "step": 34388 - }, - { - "epoch": 5.538105398768066, - "grad_norm": 0.01438012532889843, - "learning_rate": 0.00019998487489327014, - "loss": 46.0, - "step": 34389 - }, - { - "epoch": 5.538266435846854, - "grad_norm": 0.002079607453197241, - "learning_rate": 0.0001999848740133757, - "loss": 46.0, - "step": 34390 - }, - { - "epoch": 5.538427472925641, - "grad_norm": 0.002929080743342638, - "learning_rate": 0.00019998487313345567, - "loss": 46.0, - "step": 34391 - }, - { - "epoch": 5.538588510004429, - "grad_norm": 0.0011994106462225318, - "learning_rate": 0.00019998487225351004, - "loss": 46.0, - "step": 34392 - }, - { - "epoch": 5.538749547083216, - "grad_norm": 0.003827650099992752, - "learning_rate": 0.00019998487137353884, - "loss": 46.0, - "step": 34393 - }, - { - "epoch": 5.538910584162004, - "grad_norm": 0.002340164501219988, - "learning_rate": 0.00019998487049354205, - "loss": 46.0, - "step": 34394 - }, - { - "epoch": 5.53907162124079, - "grad_norm": 0.0025572828017175198, - "learning_rate": 0.00019998486961351965, - "loss": 46.0, - "step": 34395 - }, - { - "epoch": 5.539232658319579, - "grad_norm": 0.023076998069882393, - "learning_rate": 0.0001999848687334717, - "loss": 46.0, - "step": 34396 - }, - { - "epoch": 5.539393695398365, - "grad_norm": 0.005896314978599548, - "learning_rate": 0.0001999848678533981, - "loss": 46.0, - "step": 34397 - }, - { - "epoch": 5.539554732477153, - "grad_norm": 0.0021342081017792225, - "learning_rate": 0.00019998486697329893, - "loss": 46.0, - "step": 34398 - }, - { - "epoch": 5.53971576955594, - "grad_norm": 0.0015544709749519825, - "learning_rate": 0.0001999848660931742, - "loss": 46.0, - "step": 34399 - }, - { - "epoch": 5.5398768066347275, - "grad_norm": 0.011797095648944378, - "learning_rate": 0.00019998486521302384, - "loss": 46.0, - "step": 34400 - }, - { - "epoch": 5.540037843713515, - "grad_norm": 0.001448853756301105, - "learning_rate": 0.00019998486433284792, - "loss": 46.0, - "step": 34401 - }, - { - "epoch": 5.540198880792302, - "grad_norm": 0.003398359287530184, - "learning_rate": 0.00019998486345264638, - "loss": 46.0, - "step": 34402 - }, - { - "epoch": 5.54035991787109, - "grad_norm": 0.014259311370551586, - "learning_rate": 0.00019998486257241925, - "loss": 46.0, - "step": 34403 - }, - { - "epoch": 5.540520954949877, - "grad_norm": 0.0010322092566639185, - "learning_rate": 0.00019998486169216654, - "loss": 46.0, - "step": 34404 - }, - { - "epoch": 5.540681992028665, - "grad_norm": 0.010638273321092129, - "learning_rate": 0.00019998486081188824, - "loss": 46.0, - "step": 34405 - }, - { - "epoch": 5.540843029107452, - "grad_norm": 0.006411506328731775, - "learning_rate": 0.00019998485993158436, - "loss": 46.0, - "step": 34406 - }, - { - "epoch": 5.54100406618624, - "grad_norm": 0.003926464822143316, - "learning_rate": 0.00019998485905125488, - "loss": 46.0, - "step": 34407 - }, - { - "epoch": 5.541165103265027, - "grad_norm": 0.005655432585626841, - "learning_rate": 0.0001999848581708998, - "loss": 46.0, - "step": 34408 - }, - { - "epoch": 5.5413261403438145, - "grad_norm": 0.004261812195181847, - "learning_rate": 0.00019998485729051915, - "loss": 46.0, - "step": 34409 - }, - { - "epoch": 5.541487177422601, - "grad_norm": 0.009570648893713951, - "learning_rate": 0.00019998485641011289, - "loss": 46.0, - "step": 34410 - }, - { - "epoch": 5.541648214501389, - "grad_norm": 0.004239460453391075, - "learning_rate": 0.00019998485552968104, - "loss": 46.0, - "step": 34411 - }, - { - "epoch": 5.541809251580176, - "grad_norm": 0.006265928503125906, - "learning_rate": 0.0001999848546492236, - "loss": 46.0, - "step": 34412 - }, - { - "epoch": 5.5419702886589635, - "grad_norm": 0.008009640499949455, - "learning_rate": 0.00019998485376874058, - "loss": 46.0, - "step": 34413 - }, - { - "epoch": 5.542131325737751, - "grad_norm": 0.00547971623018384, - "learning_rate": 0.00019998485288823194, - "loss": 46.0, - "step": 34414 - }, - { - "epoch": 5.542292362816538, - "grad_norm": 0.005356329958885908, - "learning_rate": 0.00019998485200769777, - "loss": 46.0, - "step": 34415 - }, - { - "epoch": 5.542453399895326, - "grad_norm": 0.004759754985570908, - "learning_rate": 0.00019998485112713795, - "loss": 46.0, - "step": 34416 - }, - { - "epoch": 5.542614436974113, - "grad_norm": 0.0008380708168260753, - "learning_rate": 0.00019998485024655255, - "loss": 46.0, - "step": 34417 - }, - { - "epoch": 5.542775474052901, - "grad_norm": 0.0019018686143681407, - "learning_rate": 0.00019998484936594157, - "loss": 46.0, - "step": 34418 - }, - { - "epoch": 5.542936511131688, - "grad_norm": 0.002970636123791337, - "learning_rate": 0.000199984848485305, - "loss": 46.0, - "step": 34419 - }, - { - "epoch": 5.543097548210476, - "grad_norm": 0.0031251846812665462, - "learning_rate": 0.00019998484760464283, - "loss": 46.0, - "step": 34420 - }, - { - "epoch": 5.543258585289263, - "grad_norm": 0.006115109194070101, - "learning_rate": 0.00019998484672395508, - "loss": 46.0, - "step": 34421 - }, - { - "epoch": 5.5434196223680505, - "grad_norm": 0.006192250642925501, - "learning_rate": 0.00019998484584324172, - "loss": 46.0, - "step": 34422 - }, - { - "epoch": 5.543580659446838, - "grad_norm": 0.006695727817714214, - "learning_rate": 0.0001999848449625028, - "loss": 46.0, - "step": 34423 - }, - { - "epoch": 5.543741696525625, - "grad_norm": 0.0019366793567314744, - "learning_rate": 0.00019998484408173826, - "loss": 46.0, - "step": 34424 - }, - { - "epoch": 5.543902733604412, - "grad_norm": 0.01719304360449314, - "learning_rate": 0.00019998484320094814, - "loss": 46.0, - "step": 34425 - }, - { - "epoch": 5.544063770683199, - "grad_norm": 0.010537405498325825, - "learning_rate": 0.00019998484232013243, - "loss": 46.0, - "step": 34426 - }, - { - "epoch": 5.544224807761987, - "grad_norm": 0.0034111528657376766, - "learning_rate": 0.00019998484143929113, - "loss": 46.0, - "step": 34427 - }, - { - "epoch": 5.544385844840774, - "grad_norm": 0.00249963509850204, - "learning_rate": 0.00019998484055842424, - "loss": 46.0, - "step": 34428 - }, - { - "epoch": 5.544546881919562, - "grad_norm": 0.005336133763194084, - "learning_rate": 0.00019998483967753177, - "loss": 46.0, - "step": 34429 - }, - { - "epoch": 5.544707918998349, - "grad_norm": 0.002840744098648429, - "learning_rate": 0.00019998483879661368, - "loss": 46.0, - "step": 34430 - }, - { - "epoch": 5.544868956077137, - "grad_norm": 0.0067572928965091705, - "learning_rate": 0.00019998483791567, - "loss": 46.0, - "step": 34431 - }, - { - "epoch": 5.545029993155924, - "grad_norm": 0.004984838422387838, - "learning_rate": 0.00019998483703470078, - "loss": 46.0, - "step": 34432 - }, - { - "epoch": 5.545191030234712, - "grad_norm": 0.005045051220804453, - "learning_rate": 0.0001999848361537059, - "loss": 46.0, - "step": 34433 - }, - { - "epoch": 5.545352067313499, - "grad_norm": 0.010295151732861996, - "learning_rate": 0.00019998483527268546, - "loss": 46.0, - "step": 34434 - }, - { - "epoch": 5.5455131043922865, - "grad_norm": 0.0077599212527275085, - "learning_rate": 0.00019998483439163947, - "loss": 46.0, - "step": 34435 - }, - { - "epoch": 5.545674141471074, - "grad_norm": 0.004394874442368746, - "learning_rate": 0.00019998483351056783, - "loss": 46.0, - "step": 34436 - }, - { - "epoch": 5.545835178549861, - "grad_norm": 0.003599482821300626, - "learning_rate": 0.0001999848326294706, - "loss": 46.0, - "step": 34437 - }, - { - "epoch": 5.545996215628649, - "grad_norm": 0.0029495202470570803, - "learning_rate": 0.00019998483174834782, - "loss": 46.0, - "step": 34438 - }, - { - "epoch": 5.546157252707436, - "grad_norm": 0.005832471419125795, - "learning_rate": 0.00019998483086719942, - "loss": 46.0, - "step": 34439 - }, - { - "epoch": 5.546318289786223, - "grad_norm": 0.0029884595423936844, - "learning_rate": 0.00019998482998602544, - "loss": 46.0, - "step": 34440 - }, - { - "epoch": 5.54647932686501, - "grad_norm": 0.001863309764303267, - "learning_rate": 0.00019998482910482587, - "loss": 46.0, - "step": 34441 - }, - { - "epoch": 5.546640363943798, - "grad_norm": 0.0016161770327016711, - "learning_rate": 0.0001999848282236007, - "loss": 46.0, - "step": 34442 - }, - { - "epoch": 5.546801401022585, - "grad_norm": 0.006914549972862005, - "learning_rate": 0.00019998482734234993, - "loss": 46.0, - "step": 34443 - }, - { - "epoch": 5.546962438101373, - "grad_norm": 0.004871016833931208, - "learning_rate": 0.0001999848264610736, - "loss": 46.0, - "step": 34444 - }, - { - "epoch": 5.54712347518016, - "grad_norm": 0.010751190595328808, - "learning_rate": 0.00019998482557977165, - "loss": 46.0, - "step": 34445 - }, - { - "epoch": 5.547284512258948, - "grad_norm": 0.011315125040709972, - "learning_rate": 0.0001999848246984441, - "loss": 46.0, - "step": 34446 - }, - { - "epoch": 5.547445549337735, - "grad_norm": 0.0029595200903713703, - "learning_rate": 0.00019998482381709102, - "loss": 46.0, - "step": 34447 - }, - { - "epoch": 5.5476065864165225, - "grad_norm": 0.007874802686274052, - "learning_rate": 0.00019998482293571228, - "loss": 46.0, - "step": 34448 - }, - { - "epoch": 5.54776762349531, - "grad_norm": 0.007983098737895489, - "learning_rate": 0.000199984822054308, - "loss": 46.0, - "step": 34449 - }, - { - "epoch": 5.547928660574097, - "grad_norm": 0.003110006684437394, - "learning_rate": 0.0001999848211728781, - "loss": 46.0, - "step": 34450 - }, - { - "epoch": 5.548089697652885, - "grad_norm": 0.0019746567122638226, - "learning_rate": 0.0001999848202914226, - "loss": 46.0, - "step": 34451 - }, - { - "epoch": 5.548250734731672, - "grad_norm": 0.005718163680285215, - "learning_rate": 0.00019998481940994154, - "loss": 46.0, - "step": 34452 - }, - { - "epoch": 5.548411771810459, - "grad_norm": 0.004172750283032656, - "learning_rate": 0.00019998481852843484, - "loss": 46.0, - "step": 34453 - }, - { - "epoch": 5.548572808889247, - "grad_norm": 0.007584495469927788, - "learning_rate": 0.0001999848176469026, - "loss": 46.0, - "step": 34454 - }, - { - "epoch": 5.548733845968034, - "grad_norm": 0.005572207272052765, - "learning_rate": 0.00019998481676534476, - "loss": 46.0, - "step": 34455 - }, - { - "epoch": 5.548894883046821, - "grad_norm": 0.0016179116209968925, - "learning_rate": 0.0001999848158837613, - "loss": 46.0, - "step": 34456 - }, - { - "epoch": 5.549055920125609, - "grad_norm": 0.0063423095270991325, - "learning_rate": 0.0001999848150021523, - "loss": 46.0, - "step": 34457 - }, - { - "epoch": 5.549216957204396, - "grad_norm": 0.006661117076873779, - "learning_rate": 0.00019998481412051767, - "loss": 46.0, - "step": 34458 - }, - { - "epoch": 5.549377994283184, - "grad_norm": 0.00895021017640829, - "learning_rate": 0.00019998481323885744, - "loss": 46.0, - "step": 34459 - }, - { - "epoch": 5.549539031361971, - "grad_norm": 0.01310550607740879, - "learning_rate": 0.00019998481235717166, - "loss": 46.0, - "step": 34460 - }, - { - "epoch": 5.5497000684407585, - "grad_norm": 0.015178152360022068, - "learning_rate": 0.00019998481147546023, - "loss": 46.0, - "step": 34461 - }, - { - "epoch": 5.549861105519546, - "grad_norm": 0.0159327182918787, - "learning_rate": 0.00019998481059372325, - "loss": 46.0, - "step": 34462 - }, - { - "epoch": 5.550022142598333, - "grad_norm": 0.0018634002190083265, - "learning_rate": 0.00019998480971196068, - "loss": 46.0, - "step": 34463 - }, - { - "epoch": 5.550183179677121, - "grad_norm": 0.003102337010204792, - "learning_rate": 0.00019998480883017252, - "loss": 46.0, - "step": 34464 - }, - { - "epoch": 5.550344216755908, - "grad_norm": 0.004125271923840046, - "learning_rate": 0.00019998480794835877, - "loss": 46.0, - "step": 34465 - }, - { - "epoch": 5.550505253834696, - "grad_norm": 0.006804870907217264, - "learning_rate": 0.0001999848070665194, - "loss": 46.0, - "step": 34466 - }, - { - "epoch": 5.550666290913483, - "grad_norm": 0.005887329578399658, - "learning_rate": 0.00019998480618465446, - "loss": 46.0, - "step": 34467 - }, - { - "epoch": 5.55082732799227, - "grad_norm": 0.004895301535725594, - "learning_rate": 0.00019998480530276393, - "loss": 46.0, - "step": 34468 - }, - { - "epoch": 5.550988365071058, - "grad_norm": 0.006397101562470198, - "learning_rate": 0.0001999848044208478, - "loss": 46.0, - "step": 34469 - }, - { - "epoch": 5.551149402149845, - "grad_norm": 0.004611080978065729, - "learning_rate": 0.00019998480353890612, - "loss": 46.0, - "step": 34470 - }, - { - "epoch": 5.551310439228632, - "grad_norm": 0.012511455453932285, - "learning_rate": 0.0001999848026569388, - "loss": 46.0, - "step": 34471 - }, - { - "epoch": 5.5514714763074196, - "grad_norm": 0.00209240335971117, - "learning_rate": 0.0001999848017749459, - "loss": 46.0, - "step": 34472 - }, - { - "epoch": 5.551632513386207, - "grad_norm": 0.004890891723334789, - "learning_rate": 0.00019998480089292742, - "loss": 46.0, - "step": 34473 - }, - { - "epoch": 5.5517935504649945, - "grad_norm": 0.012718337588012218, - "learning_rate": 0.00019998480001088333, - "loss": 46.0, - "step": 34474 - }, - { - "epoch": 5.551954587543782, - "grad_norm": 0.004170960746705532, - "learning_rate": 0.00019998479912881366, - "loss": 46.0, - "step": 34475 - }, - { - "epoch": 5.552115624622569, - "grad_norm": 0.001989784650504589, - "learning_rate": 0.0001999847982467184, - "loss": 46.0, - "step": 34476 - }, - { - "epoch": 5.552276661701357, - "grad_norm": 0.004002275876700878, - "learning_rate": 0.00019998479736459755, - "loss": 46.0, - "step": 34477 - }, - { - "epoch": 5.552437698780144, - "grad_norm": 0.004416029900312424, - "learning_rate": 0.00019998479648245112, - "loss": 46.0, - "step": 34478 - }, - { - "epoch": 5.552598735858932, - "grad_norm": 0.005273669492453337, - "learning_rate": 0.00019998479560027907, - "loss": 46.0, - "step": 34479 - }, - { - "epoch": 5.552759772937719, - "grad_norm": 0.0024761478416621685, - "learning_rate": 0.00019998479471808146, - "loss": 46.0, - "step": 34480 - }, - { - "epoch": 5.552920810016507, - "grad_norm": 0.004912256728857756, - "learning_rate": 0.00019998479383585824, - "loss": 46.0, - "step": 34481 - }, - { - "epoch": 5.553081847095294, - "grad_norm": 0.012811501510441303, - "learning_rate": 0.00019998479295360943, - "loss": 46.0, - "step": 34482 - }, - { - "epoch": 5.553242884174081, - "grad_norm": 0.0008873852202668786, - "learning_rate": 0.00019998479207133504, - "loss": 46.0, - "step": 34483 - }, - { - "epoch": 5.553403921252869, - "grad_norm": 0.006089046597480774, - "learning_rate": 0.00019998479118903505, - "loss": 46.0, - "step": 34484 - }, - { - "epoch": 5.5535649583316555, - "grad_norm": 0.013763438910245895, - "learning_rate": 0.00019998479030670948, - "loss": 46.0, - "step": 34485 - }, - { - "epoch": 5.553725995410443, - "grad_norm": 0.0037245890125632286, - "learning_rate": 0.00019998478942435832, - "loss": 46.0, - "step": 34486 - }, - { - "epoch": 5.55388703248923, - "grad_norm": 0.0022490578703582287, - "learning_rate": 0.00019998478854198155, - "loss": 46.0, - "step": 34487 - }, - { - "epoch": 5.554048069568018, - "grad_norm": 0.0031325221061706543, - "learning_rate": 0.0001999847876595792, - "loss": 46.0, - "step": 34488 - }, - { - "epoch": 5.554209106646805, - "grad_norm": 0.001481261570006609, - "learning_rate": 0.00019998478677715124, - "loss": 46.0, - "step": 34489 - }, - { - "epoch": 5.554370143725593, - "grad_norm": 0.0010920444037765265, - "learning_rate": 0.0001999847858946977, - "loss": 46.0, - "step": 34490 - }, - { - "epoch": 5.55453118080438, - "grad_norm": 0.004955823067575693, - "learning_rate": 0.0001999847850122186, - "loss": 46.0, - "step": 34491 - }, - { - "epoch": 5.554692217883168, - "grad_norm": 0.003760437248274684, - "learning_rate": 0.00019998478412971385, - "loss": 46.0, - "step": 34492 - }, - { - "epoch": 5.554853254961955, - "grad_norm": 0.001445003435947001, - "learning_rate": 0.00019998478324718358, - "loss": 46.0, - "step": 34493 - }, - { - "epoch": 5.555014292040743, - "grad_norm": 0.002647166606038809, - "learning_rate": 0.00019998478236462767, - "loss": 46.0, - "step": 34494 - }, - { - "epoch": 5.55517532911953, - "grad_norm": 0.0009021577425301075, - "learning_rate": 0.00019998478148204618, - "loss": 46.0, - "step": 34495 - }, - { - "epoch": 5.5553363661983175, - "grad_norm": 0.006988700479269028, - "learning_rate": 0.00019998478059943912, - "loss": 46.0, - "step": 34496 - }, - { - "epoch": 5.555497403277105, - "grad_norm": 0.0035412353463470936, - "learning_rate": 0.00019998477971680645, - "loss": 46.0, - "step": 34497 - }, - { - "epoch": 5.5556584403558915, - "grad_norm": 0.005361685995012522, - "learning_rate": 0.0001999847788341482, - "loss": 46.0, - "step": 34498 - }, - { - "epoch": 5.555819477434679, - "grad_norm": 0.005229577422142029, - "learning_rate": 0.00019998477795146432, - "loss": 46.0, - "step": 34499 - }, - { - "epoch": 5.555980514513466, - "grad_norm": 0.002730033593252301, - "learning_rate": 0.00019998477706875488, - "loss": 46.0, - "step": 34500 - }, - { - "epoch": 5.556141551592254, - "grad_norm": 0.00528239319100976, - "learning_rate": 0.00019998477618601986, - "loss": 46.0, - "step": 34501 - }, - { - "epoch": 5.556302588671041, - "grad_norm": 0.00353110465221107, - "learning_rate": 0.00019998477530325923, - "loss": 46.0, - "step": 34502 - }, - { - "epoch": 5.556463625749829, - "grad_norm": 0.007440636400133371, - "learning_rate": 0.000199984774420473, - "loss": 46.0, - "step": 34503 - }, - { - "epoch": 5.556624662828616, - "grad_norm": 0.024559518322348595, - "learning_rate": 0.0001999847735376612, - "loss": 46.0, - "step": 34504 - }, - { - "epoch": 5.556785699907404, - "grad_norm": 0.006311963312327862, - "learning_rate": 0.0001999847726548238, - "loss": 46.0, - "step": 34505 - }, - { - "epoch": 5.556946736986191, - "grad_norm": 0.001077562803402543, - "learning_rate": 0.00019998477177196082, - "loss": 46.0, - "step": 34506 - }, - { - "epoch": 5.557107774064979, - "grad_norm": 0.002573074772953987, - "learning_rate": 0.00019998477088907222, - "loss": 46.0, - "step": 34507 - }, - { - "epoch": 5.557268811143766, - "grad_norm": 0.015399484895169735, - "learning_rate": 0.00019998477000615807, - "loss": 46.0, - "step": 34508 - }, - { - "epoch": 5.5574298482225535, - "grad_norm": 0.0031322527211159468, - "learning_rate": 0.0001999847691232183, - "loss": 46.0, - "step": 34509 - }, - { - "epoch": 5.557590885301341, - "grad_norm": 0.0032818394247442484, - "learning_rate": 0.00019998476824025296, - "loss": 46.0, - "step": 34510 - }, - { - "epoch": 5.557751922380128, - "grad_norm": 0.0019908598624169827, - "learning_rate": 0.000199984767357262, - "loss": 46.0, - "step": 34511 - }, - { - "epoch": 5.557912959458916, - "grad_norm": 0.006744795944541693, - "learning_rate": 0.00019998476647424546, - "loss": 46.0, - "step": 34512 - }, - { - "epoch": 5.558073996537702, - "grad_norm": 0.00100703164935112, - "learning_rate": 0.00019998476559120336, - "loss": 46.0, - "step": 34513 - }, - { - "epoch": 5.55823503361649, - "grad_norm": 0.01440840121358633, - "learning_rate": 0.00019998476470813563, - "loss": 46.0, - "step": 34514 - }, - { - "epoch": 5.558396070695277, - "grad_norm": 0.008395075798034668, - "learning_rate": 0.0001999847638250423, - "loss": 46.0, - "step": 34515 - }, - { - "epoch": 5.558557107774065, - "grad_norm": 0.001035574940033257, - "learning_rate": 0.0001999847629419234, - "loss": 46.0, - "step": 34516 - }, - { - "epoch": 5.558718144852852, - "grad_norm": 0.004762306343764067, - "learning_rate": 0.00019998476205877893, - "loss": 46.0, - "step": 34517 - }, - { - "epoch": 5.55887918193164, - "grad_norm": 0.011833344586193562, - "learning_rate": 0.00019998476117560885, - "loss": 46.0, - "step": 34518 - }, - { - "epoch": 5.559040219010427, - "grad_norm": 0.010235494002699852, - "learning_rate": 0.00019998476029241318, - "loss": 46.0, - "step": 34519 - }, - { - "epoch": 5.559201256089215, - "grad_norm": 0.006829431280493736, - "learning_rate": 0.00019998475940919192, - "loss": 46.0, - "step": 34520 - }, - { - "epoch": 5.559362293168002, - "grad_norm": 0.0015620003687217832, - "learning_rate": 0.00019998475852594507, - "loss": 46.0, - "step": 34521 - }, - { - "epoch": 5.5595233302467895, - "grad_norm": 0.0023274957202374935, - "learning_rate": 0.00019998475764267261, - "loss": 46.0, - "step": 34522 - }, - { - "epoch": 5.559684367325577, - "grad_norm": 0.004059371072798967, - "learning_rate": 0.00019998475675937457, - "loss": 46.0, - "step": 34523 - }, - { - "epoch": 5.559845404404364, - "grad_norm": 0.001856443239375949, - "learning_rate": 0.00019998475587605093, - "loss": 46.0, - "step": 34524 - }, - { - "epoch": 5.560006441483152, - "grad_norm": 0.003952109254896641, - "learning_rate": 0.00019998475499270174, - "loss": 46.0, - "step": 34525 - }, - { - "epoch": 5.560167478561939, - "grad_norm": 0.001939662266522646, - "learning_rate": 0.00019998475410932693, - "loss": 46.0, - "step": 34526 - }, - { - "epoch": 5.560328515640727, - "grad_norm": 0.009591889567673206, - "learning_rate": 0.00019998475322592654, - "loss": 46.0, - "step": 34527 - }, - { - "epoch": 5.560489552719513, - "grad_norm": 0.013927983120083809, - "learning_rate": 0.00019998475234250056, - "loss": 46.0, - "step": 34528 - }, - { - "epoch": 5.560650589798301, - "grad_norm": 0.008656861260533333, - "learning_rate": 0.00019998475145904896, - "loss": 46.0, - "step": 34529 - }, - { - "epoch": 5.560811626877088, - "grad_norm": 0.0023376161698251963, - "learning_rate": 0.00019998475057557178, - "loss": 46.0, - "step": 34530 - }, - { - "epoch": 5.560972663955876, - "grad_norm": 0.004913882352411747, - "learning_rate": 0.00019998474969206903, - "loss": 46.0, - "step": 34531 - }, - { - "epoch": 5.561133701034663, - "grad_norm": 0.002141357399523258, - "learning_rate": 0.00019998474880854067, - "loss": 46.0, - "step": 34532 - }, - { - "epoch": 5.5612947381134505, - "grad_norm": 0.016392672434449196, - "learning_rate": 0.00019998474792498673, - "loss": 46.0, - "step": 34533 - }, - { - "epoch": 5.561455775192238, - "grad_norm": 0.0009976379806175828, - "learning_rate": 0.0001999847470414072, - "loss": 46.0, - "step": 34534 - }, - { - "epoch": 5.5616168122710254, - "grad_norm": 0.0015318581135943532, - "learning_rate": 0.00019998474615780208, - "loss": 46.0, - "step": 34535 - }, - { - "epoch": 5.561777849349813, - "grad_norm": 0.0175799410790205, - "learning_rate": 0.00019998474527417134, - "loss": 46.0, - "step": 34536 - }, - { - "epoch": 5.5619388864286, - "grad_norm": 0.004948609042912722, - "learning_rate": 0.00019998474439051505, - "loss": 46.0, - "step": 34537 - }, - { - "epoch": 5.562099923507388, - "grad_norm": 0.007577365264296532, - "learning_rate": 0.00019998474350683314, - "loss": 46.0, - "step": 34538 - }, - { - "epoch": 5.562260960586175, - "grad_norm": 0.017973581328988075, - "learning_rate": 0.00019998474262312565, - "loss": 46.0, - "step": 34539 - }, - { - "epoch": 5.562421997664963, - "grad_norm": 0.0057105026207864285, - "learning_rate": 0.00019998474173939256, - "loss": 46.0, - "step": 34540 - }, - { - "epoch": 5.562583034743749, - "grad_norm": 0.008196820504963398, - "learning_rate": 0.0001999847408556339, - "loss": 46.0, - "step": 34541 - }, - { - "epoch": 5.562744071822538, - "grad_norm": 0.0031462498009204865, - "learning_rate": 0.00019998473997184964, - "loss": 46.0, - "step": 34542 - }, - { - "epoch": 5.562905108901324, - "grad_norm": 0.005572670139372349, - "learning_rate": 0.0001999847390880398, - "loss": 46.0, - "step": 34543 - }, - { - "epoch": 5.563066145980112, - "grad_norm": 0.0031304722651839256, - "learning_rate": 0.00019998473820420434, - "loss": 46.0, - "step": 34544 - }, - { - "epoch": 5.563227183058899, - "grad_norm": 0.010518571361899376, - "learning_rate": 0.00019998473732034332, - "loss": 46.0, - "step": 34545 - }, - { - "epoch": 5.5633882201376865, - "grad_norm": 0.01010617520660162, - "learning_rate": 0.00019998473643645666, - "loss": 46.0, - "step": 34546 - }, - { - "epoch": 5.563549257216474, - "grad_norm": 0.008721606805920601, - "learning_rate": 0.00019998473555254444, - "loss": 46.0, - "step": 34547 - }, - { - "epoch": 5.563710294295261, - "grad_norm": 0.01649344526231289, - "learning_rate": 0.00019998473466860663, - "loss": 46.0, - "step": 34548 - }, - { - "epoch": 5.563871331374049, - "grad_norm": 0.0019041125196963549, - "learning_rate": 0.00019998473378464324, - "loss": 46.0, - "step": 34549 - }, - { - "epoch": 5.564032368452836, - "grad_norm": 0.008882109075784683, - "learning_rate": 0.00019998473290065426, - "loss": 46.0, - "step": 34550 - }, - { - "epoch": 5.564193405531624, - "grad_norm": 0.004673162009567022, - "learning_rate": 0.00019998473201663966, - "loss": 46.0, - "step": 34551 - }, - { - "epoch": 5.564354442610411, - "grad_norm": 0.008114958181977272, - "learning_rate": 0.00019998473113259948, - "loss": 46.0, - "step": 34552 - }, - { - "epoch": 5.564515479689199, - "grad_norm": 0.01687239110469818, - "learning_rate": 0.00019998473024853374, - "loss": 46.0, - "step": 34553 - }, - { - "epoch": 5.564676516767986, - "grad_norm": 0.0021339314989745617, - "learning_rate": 0.00019998472936444238, - "loss": 46.0, - "step": 34554 - }, - { - "epoch": 5.564837553846774, - "grad_norm": 0.0014605579199269414, - "learning_rate": 0.00019998472848032543, - "loss": 46.0, - "step": 34555 - }, - { - "epoch": 5.56499859092556, - "grad_norm": 0.003656551940366626, - "learning_rate": 0.0001999847275961829, - "loss": 46.0, - "step": 34556 - }, - { - "epoch": 5.5651596280043485, - "grad_norm": 0.008041848428547382, - "learning_rate": 0.00019998472671201476, - "loss": 46.0, - "step": 34557 - }, - { - "epoch": 5.565320665083135, - "grad_norm": 0.001565627520903945, - "learning_rate": 0.00019998472582782105, - "loss": 46.0, - "step": 34558 - }, - { - "epoch": 5.5654817021619225, - "grad_norm": 0.00947917066514492, - "learning_rate": 0.00019998472494360173, - "loss": 46.0, - "step": 34559 - }, - { - "epoch": 5.56564273924071, - "grad_norm": 0.010613704100251198, - "learning_rate": 0.00019998472405935683, - "loss": 46.0, - "step": 34560 - }, - { - "epoch": 5.565803776319497, - "grad_norm": 0.007723473943769932, - "learning_rate": 0.00019998472317508633, - "loss": 46.0, - "step": 34561 - }, - { - "epoch": 5.565964813398285, - "grad_norm": 0.0053221858106553555, - "learning_rate": 0.00019998472229079025, - "loss": 46.0, - "step": 34562 - }, - { - "epoch": 5.566125850477072, - "grad_norm": 0.0020912366453558207, - "learning_rate": 0.00019998472140646858, - "loss": 46.0, - "step": 34563 - }, - { - "epoch": 5.56628688755586, - "grad_norm": 0.013028525747358799, - "learning_rate": 0.00019998472052212133, - "loss": 46.0, - "step": 34564 - }, - { - "epoch": 5.566447924634647, - "grad_norm": 0.0028623254038393497, - "learning_rate": 0.00019998471963774845, - "loss": 46.0, - "step": 34565 - }, - { - "epoch": 5.566608961713435, - "grad_norm": 0.008594634011387825, - "learning_rate": 0.00019998471875335, - "loss": 46.0, - "step": 34566 - }, - { - "epoch": 5.566769998792222, - "grad_norm": 0.0036207567900419235, - "learning_rate": 0.00019998471786892598, - "loss": 46.0, - "step": 34567 - }, - { - "epoch": 5.56693103587101, - "grad_norm": 0.015687158331274986, - "learning_rate": 0.00019998471698447635, - "loss": 46.0, - "step": 34568 - }, - { - "epoch": 5.567092072949797, - "grad_norm": 0.002217196160927415, - "learning_rate": 0.0001999847161000011, - "loss": 46.0, - "step": 34569 - }, - { - "epoch": 5.5672531100285845, - "grad_norm": 0.005931479390710592, - "learning_rate": 0.00019998471521550032, - "loss": 46.0, - "step": 34570 - }, - { - "epoch": 5.567414147107371, - "grad_norm": 0.003628320759162307, - "learning_rate": 0.00019998471433097393, - "loss": 46.0, - "step": 34571 - }, - { - "epoch": 5.5675751841861585, - "grad_norm": 0.00676200445741415, - "learning_rate": 0.00019998471344642192, - "loss": 46.0, - "step": 34572 - }, - { - "epoch": 5.567736221264946, - "grad_norm": 0.001319525996223092, - "learning_rate": 0.00019998471256184433, - "loss": 46.0, - "step": 34573 - }, - { - "epoch": 5.567897258343733, - "grad_norm": 0.0035766279324889183, - "learning_rate": 0.00019998471167724117, - "loss": 46.0, - "step": 34574 - }, - { - "epoch": 5.568058295422521, - "grad_norm": 0.004922098014503717, - "learning_rate": 0.0001999847107926124, - "loss": 46.0, - "step": 34575 - }, - { - "epoch": 5.568219332501308, - "grad_norm": 0.011862557381391525, - "learning_rate": 0.00019998470990795805, - "loss": 46.0, - "step": 34576 - }, - { - "epoch": 5.568380369580096, - "grad_norm": 0.0017253529513254762, - "learning_rate": 0.00019998470902327808, - "loss": 46.0, - "step": 34577 - }, - { - "epoch": 5.568541406658883, - "grad_norm": 0.001838596654124558, - "learning_rate": 0.00019998470813857255, - "loss": 46.0, - "step": 34578 - }, - { - "epoch": 5.568702443737671, - "grad_norm": 0.00573623925447464, - "learning_rate": 0.00019998470725384143, - "loss": 46.0, - "step": 34579 - }, - { - "epoch": 5.568863480816458, - "grad_norm": 0.006743662059307098, - "learning_rate": 0.0001999847063690847, - "loss": 46.0, - "step": 34580 - }, - { - "epoch": 5.5690245178952456, - "grad_norm": 0.0019299580017104745, - "learning_rate": 0.00019998470548430238, - "loss": 46.0, - "step": 34581 - }, - { - "epoch": 5.569185554974033, - "grad_norm": 0.0016555042238906026, - "learning_rate": 0.00019998470459949447, - "loss": 46.0, - "step": 34582 - }, - { - "epoch": 5.5693465920528205, - "grad_norm": 0.007202469278126955, - "learning_rate": 0.000199984703714661, - "loss": 46.0, - "step": 34583 - }, - { - "epoch": 5.569507629131608, - "grad_norm": 0.006697407457977533, - "learning_rate": 0.0001999847028298019, - "loss": 46.0, - "step": 34584 - }, - { - "epoch": 5.569668666210395, - "grad_norm": 0.007447336800396442, - "learning_rate": 0.00019998470194491726, - "loss": 46.0, - "step": 34585 - }, - { - "epoch": 5.569829703289182, - "grad_norm": 0.0025324360467493534, - "learning_rate": 0.00019998470106000698, - "loss": 46.0, - "step": 34586 - }, - { - "epoch": 5.569990740367969, - "grad_norm": 0.005339859053492546, - "learning_rate": 0.0001999847001750711, - "loss": 46.0, - "step": 34587 - }, - { - "epoch": 5.570151777446757, - "grad_norm": 0.0028478519525378942, - "learning_rate": 0.00019998469929010968, - "loss": 46.0, - "step": 34588 - }, - { - "epoch": 5.570312814525544, - "grad_norm": 0.0012775326613336802, - "learning_rate": 0.0001999846984051226, - "loss": 46.0, - "step": 34589 - }, - { - "epoch": 5.570473851604332, - "grad_norm": 0.0027482009027153254, - "learning_rate": 0.00019998469752011, - "loss": 46.0, - "step": 34590 - }, - { - "epoch": 5.570634888683119, - "grad_norm": 0.005615755449980497, - "learning_rate": 0.0001999846966350718, - "loss": 46.0, - "step": 34591 - }, - { - "epoch": 5.570795925761907, - "grad_norm": 0.0012348960153758526, - "learning_rate": 0.00019998469575000796, - "loss": 46.0, - "step": 34592 - }, - { - "epoch": 5.570956962840694, - "grad_norm": 0.0013820379972457886, - "learning_rate": 0.00019998469486491856, - "loss": 46.0, - "step": 34593 - }, - { - "epoch": 5.5711179999194815, - "grad_norm": 0.0035438458435237408, - "learning_rate": 0.00019998469397980358, - "loss": 46.0, - "step": 34594 - }, - { - "epoch": 5.571279036998269, - "grad_norm": 0.009423158131539822, - "learning_rate": 0.000199984693094663, - "loss": 46.0, - "step": 34595 - }, - { - "epoch": 5.571440074077056, - "grad_norm": 0.004822954535484314, - "learning_rate": 0.00019998469220949684, - "loss": 46.0, - "step": 34596 - }, - { - "epoch": 5.571601111155844, - "grad_norm": 0.0024205835070461035, - "learning_rate": 0.00019998469132430504, - "loss": 46.0, - "step": 34597 - }, - { - "epoch": 5.571762148234631, - "grad_norm": 0.0015072142705321312, - "learning_rate": 0.00019998469043908769, - "loss": 46.0, - "step": 34598 - }, - { - "epoch": 5.571923185313419, - "grad_norm": 0.0084876399487257, - "learning_rate": 0.00019998468955384472, - "loss": 46.0, - "step": 34599 - }, - { - "epoch": 5.572084222392206, - "grad_norm": 0.0026530916802585125, - "learning_rate": 0.00019998468866857621, - "loss": 46.0, - "step": 34600 - }, - { - "epoch": 5.572245259470993, - "grad_norm": 0.00257415440864861, - "learning_rate": 0.00019998468778328207, - "loss": 46.0, - "step": 34601 - }, - { - "epoch": 5.57240629654978, - "grad_norm": 0.009400319308042526, - "learning_rate": 0.00019998468689796234, - "loss": 46.0, - "step": 34602 - }, - { - "epoch": 5.572567333628568, - "grad_norm": 0.0017204693285748363, - "learning_rate": 0.00019998468601261705, - "loss": 46.0, - "step": 34603 - }, - { - "epoch": 5.572728370707355, - "grad_norm": 0.001527863903902471, - "learning_rate": 0.00019998468512724614, - "loss": 46.0, - "step": 34604 - }, - { - "epoch": 5.572889407786143, - "grad_norm": 0.009493887424468994, - "learning_rate": 0.00019998468424184965, - "loss": 46.0, - "step": 34605 - }, - { - "epoch": 5.57305044486493, - "grad_norm": 0.003111830912530422, - "learning_rate": 0.00019998468335642757, - "loss": 46.0, - "step": 34606 - }, - { - "epoch": 5.5732114819437175, - "grad_norm": 0.0009197515901178122, - "learning_rate": 0.00019998468247097988, - "loss": 46.0, - "step": 34607 - }, - { - "epoch": 5.573372519022505, - "grad_norm": 0.0014241928001865745, - "learning_rate": 0.0001999846815855066, - "loss": 46.0, - "step": 34608 - }, - { - "epoch": 5.573533556101292, - "grad_norm": 0.005001269746571779, - "learning_rate": 0.00019998468070000776, - "loss": 46.0, - "step": 34609 - }, - { - "epoch": 5.57369459318008, - "grad_norm": 0.005033680237829685, - "learning_rate": 0.00019998467981448333, - "loss": 46.0, - "step": 34610 - }, - { - "epoch": 5.573855630258867, - "grad_norm": 0.005431140307337046, - "learning_rate": 0.00019998467892893326, - "loss": 46.0, - "step": 34611 - }, - { - "epoch": 5.574016667337655, - "grad_norm": 0.001355149783194065, - "learning_rate": 0.00019998467804335763, - "loss": 46.0, - "step": 34612 - }, - { - "epoch": 5.574177704416442, - "grad_norm": 0.010457421652972698, - "learning_rate": 0.0001999846771577564, - "loss": 46.0, - "step": 34613 - }, - { - "epoch": 5.574338741495229, - "grad_norm": 0.0013484186492860317, - "learning_rate": 0.0001999846762721296, - "loss": 46.0, - "step": 34614 - }, - { - "epoch": 5.574499778574017, - "grad_norm": 0.0023423228412866592, - "learning_rate": 0.0001999846753864772, - "loss": 46.0, - "step": 34615 - }, - { - "epoch": 5.574660815652804, - "grad_norm": 0.0015337332151830196, - "learning_rate": 0.0001999846745007992, - "loss": 46.0, - "step": 34616 - }, - { - "epoch": 5.574821852731591, - "grad_norm": 0.04067227616906166, - "learning_rate": 0.00019998467361509562, - "loss": 46.0, - "step": 34617 - }, - { - "epoch": 5.574982889810379, - "grad_norm": 0.005900025833398104, - "learning_rate": 0.00019998467272936644, - "loss": 46.0, - "step": 34618 - }, - { - "epoch": 5.575143926889166, - "grad_norm": 0.0018213591538369656, - "learning_rate": 0.00019998467184361167, - "loss": 46.0, - "step": 34619 - }, - { - "epoch": 5.5753049639679535, - "grad_norm": 0.0012349592288956046, - "learning_rate": 0.00019998467095783134, - "loss": 46.0, - "step": 34620 - }, - { - "epoch": 5.575466001046741, - "grad_norm": 0.0075744627974927425, - "learning_rate": 0.00019998467007202537, - "loss": 46.0, - "step": 34621 - }, - { - "epoch": 5.575627038125528, - "grad_norm": 0.0007443575304932892, - "learning_rate": 0.00019998466918619382, - "loss": 46.0, - "step": 34622 - }, - { - "epoch": 5.575788075204316, - "grad_norm": 0.005646517965942621, - "learning_rate": 0.0001999846683003367, - "loss": 46.0, - "step": 34623 - }, - { - "epoch": 5.575949112283103, - "grad_norm": 0.0017934864154085517, - "learning_rate": 0.00019998466741445397, - "loss": 46.0, - "step": 34624 - }, - { - "epoch": 5.576110149361891, - "grad_norm": 0.00148340268060565, - "learning_rate": 0.00019998466652854566, - "loss": 46.0, - "step": 34625 - }, - { - "epoch": 5.576271186440678, - "grad_norm": 0.010009623132646084, - "learning_rate": 0.00019998466564261175, - "loss": 46.0, - "step": 34626 - }, - { - "epoch": 5.576432223519466, - "grad_norm": 0.012023997493088245, - "learning_rate": 0.00019998466475665226, - "loss": 46.0, - "step": 34627 - }, - { - "epoch": 5.576593260598253, - "grad_norm": 0.00648694159463048, - "learning_rate": 0.00019998466387066718, - "loss": 46.0, - "step": 34628 - }, - { - "epoch": 5.57675429767704, - "grad_norm": 0.011362219229340553, - "learning_rate": 0.0001999846629846565, - "loss": 46.0, - "step": 34629 - }, - { - "epoch": 5.576915334755828, - "grad_norm": 0.005906771868467331, - "learning_rate": 0.00019998466209862023, - "loss": 46.0, - "step": 34630 - }, - { - "epoch": 5.577076371834615, - "grad_norm": 0.005813187919557095, - "learning_rate": 0.0001999846612125584, - "loss": 46.0, - "step": 34631 - }, - { - "epoch": 5.577237408913402, - "grad_norm": 0.035298146307468414, - "learning_rate": 0.0001999846603264709, - "loss": 46.0, - "step": 34632 - }, - { - "epoch": 5.5773984459921895, - "grad_norm": 0.001976283034309745, - "learning_rate": 0.00019998465944035787, - "loss": 46.0, - "step": 34633 - }, - { - "epoch": 5.577559483070977, - "grad_norm": 0.01080408412963152, - "learning_rate": 0.00019998465855421924, - "loss": 46.0, - "step": 34634 - }, - { - "epoch": 5.577720520149764, - "grad_norm": 0.0017003521788865328, - "learning_rate": 0.00019998465766805503, - "loss": 46.0, - "step": 34635 - }, - { - "epoch": 5.577881557228552, - "grad_norm": 0.013147912919521332, - "learning_rate": 0.00019998465678186522, - "loss": 46.0, - "step": 34636 - }, - { - "epoch": 5.578042594307339, - "grad_norm": 0.0027957179117947817, - "learning_rate": 0.0001999846558956498, - "loss": 46.0, - "step": 34637 - }, - { - "epoch": 5.578203631386127, - "grad_norm": 0.02427743747830391, - "learning_rate": 0.00019998465500940883, - "loss": 46.0, - "step": 34638 - }, - { - "epoch": 5.578364668464914, - "grad_norm": 0.002231636783108115, - "learning_rate": 0.00019998465412314224, - "loss": 46.0, - "step": 34639 - }, - { - "epoch": 5.578525705543702, - "grad_norm": 0.007636507041752338, - "learning_rate": 0.00019998465323685006, - "loss": 46.0, - "step": 34640 - }, - { - "epoch": 5.578686742622489, - "grad_norm": 0.003328710561618209, - "learning_rate": 0.0001999846523505323, - "loss": 46.0, - "step": 34641 - }, - { - "epoch": 5.5788477797012765, - "grad_norm": 0.0014751259004697204, - "learning_rate": 0.00019998465146418894, - "loss": 46.0, - "step": 34642 - }, - { - "epoch": 5.579008816780064, - "grad_norm": 0.005650544073432684, - "learning_rate": 0.00019998465057781997, - "loss": 46.0, - "step": 34643 - }, - { - "epoch": 5.579169853858851, - "grad_norm": 0.001545062754303217, - "learning_rate": 0.00019998464969142544, - "loss": 46.0, - "step": 34644 - }, - { - "epoch": 5.579330890937638, - "grad_norm": 0.001414950587786734, - "learning_rate": 0.00019998464880500533, - "loss": 46.0, - "step": 34645 - }, - { - "epoch": 5.5794919280164255, - "grad_norm": 0.005363972391933203, - "learning_rate": 0.0001999846479185596, - "loss": 46.0, - "step": 34646 - }, - { - "epoch": 5.579652965095213, - "grad_norm": 0.004626765847206116, - "learning_rate": 0.00019998464703208826, - "loss": 46.0, - "step": 34647 - }, - { - "epoch": 5.579814002174, - "grad_norm": 0.003942246548831463, - "learning_rate": 0.00019998464614559138, - "loss": 46.0, - "step": 34648 - }, - { - "epoch": 5.579975039252788, - "grad_norm": 0.0029159795958548784, - "learning_rate": 0.0001999846452590689, - "loss": 46.0, - "step": 34649 - }, - { - "epoch": 5.580136076331575, - "grad_norm": 0.006351748947054148, - "learning_rate": 0.00019998464437252078, - "loss": 46.0, - "step": 34650 - }, - { - "epoch": 5.580297113410363, - "grad_norm": 0.00282082031480968, - "learning_rate": 0.0001999846434859471, - "loss": 46.0, - "step": 34651 - }, - { - "epoch": 5.58045815048915, - "grad_norm": 0.005532176699489355, - "learning_rate": 0.00019998464259934784, - "loss": 46.0, - "step": 34652 - }, - { - "epoch": 5.580619187567938, - "grad_norm": 0.026183107867836952, - "learning_rate": 0.00019998464171272297, - "loss": 46.0, - "step": 34653 - }, - { - "epoch": 5.580780224646725, - "grad_norm": 0.001215551164932549, - "learning_rate": 0.00019998464082607255, - "loss": 46.0, - "step": 34654 - }, - { - "epoch": 5.5809412617255125, - "grad_norm": 0.004192876163870096, - "learning_rate": 0.00019998463993939648, - "loss": 46.0, - "step": 34655 - }, - { - "epoch": 5.5811022988043, - "grad_norm": 0.0006772354245185852, - "learning_rate": 0.00019998463905269485, - "loss": 46.0, - "step": 34656 - }, - { - "epoch": 5.581263335883087, - "grad_norm": 0.007355592679232359, - "learning_rate": 0.0001999846381659676, - "loss": 46.0, - "step": 34657 - }, - { - "epoch": 5.581424372961875, - "grad_norm": 0.004486621357500553, - "learning_rate": 0.0001999846372792148, - "loss": 46.0, - "step": 34658 - }, - { - "epoch": 5.581585410040661, - "grad_norm": 0.0008767768740653992, - "learning_rate": 0.00019998463639243642, - "loss": 46.0, - "step": 34659 - }, - { - "epoch": 5.581746447119449, - "grad_norm": 0.020514369010925293, - "learning_rate": 0.0001999846355056324, - "loss": 46.0, - "step": 34660 - }, - { - "epoch": 5.581907484198236, - "grad_norm": 0.016994360834360123, - "learning_rate": 0.00019998463461880282, - "loss": 46.0, - "step": 34661 - }, - { - "epoch": 5.582068521277024, - "grad_norm": 0.008145978674292564, - "learning_rate": 0.00019998463373194767, - "loss": 46.0, - "step": 34662 - }, - { - "epoch": 5.582229558355811, - "grad_norm": 0.002680091420188546, - "learning_rate": 0.00019998463284506688, - "loss": 46.0, - "step": 34663 - }, - { - "epoch": 5.582390595434599, - "grad_norm": 0.005467541981488466, - "learning_rate": 0.00019998463195816053, - "loss": 46.0, - "step": 34664 - }, - { - "epoch": 5.582551632513386, - "grad_norm": 0.007323038298636675, - "learning_rate": 0.0001999846310712286, - "loss": 46.0, - "step": 34665 - }, - { - "epoch": 5.582712669592174, - "grad_norm": 0.00705747352913022, - "learning_rate": 0.00019998463018427103, - "loss": 46.0, - "step": 34666 - }, - { - "epoch": 5.582873706670961, - "grad_norm": 0.0039236838929355145, - "learning_rate": 0.0001999846292972879, - "loss": 46.0, - "step": 34667 - }, - { - "epoch": 5.5830347437497485, - "grad_norm": 0.006195586174726486, - "learning_rate": 0.0001999846284102792, - "loss": 46.0, - "step": 34668 - }, - { - "epoch": 5.583195780828536, - "grad_norm": 0.009123623371124268, - "learning_rate": 0.00019998462752324488, - "loss": 46.0, - "step": 34669 - }, - { - "epoch": 5.583356817907323, - "grad_norm": 0.0026506390422582626, - "learning_rate": 0.00019998462663618495, - "loss": 46.0, - "step": 34670 - }, - { - "epoch": 5.583517854986111, - "grad_norm": 0.007771295960992575, - "learning_rate": 0.00019998462574909946, - "loss": 46.0, - "step": 34671 - }, - { - "epoch": 5.583678892064898, - "grad_norm": 0.0029733378905802965, - "learning_rate": 0.00019998462486198838, - "loss": 46.0, - "step": 34672 - }, - { - "epoch": 5.583839929143686, - "grad_norm": 0.006273067090660334, - "learning_rate": 0.0001999846239748517, - "loss": 46.0, - "step": 34673 - }, - { - "epoch": 5.584000966222472, - "grad_norm": 0.004027407616376877, - "learning_rate": 0.00019998462308768944, - "loss": 46.0, - "step": 34674 - }, - { - "epoch": 5.58416200330126, - "grad_norm": 0.006385553162544966, - "learning_rate": 0.00019998462220050157, - "loss": 46.0, - "step": 34675 - }, - { - "epoch": 5.584323040380047, - "grad_norm": 0.001051747240126133, - "learning_rate": 0.0001999846213132881, - "loss": 46.0, - "step": 34676 - }, - { - "epoch": 5.584484077458835, - "grad_norm": 0.004667689558118582, - "learning_rate": 0.00019998462042604905, - "loss": 46.0, - "step": 34677 - }, - { - "epoch": 5.584645114537622, - "grad_norm": 0.0025501823984086514, - "learning_rate": 0.00019998461953878445, - "loss": 46.0, - "step": 34678 - }, - { - "epoch": 5.58480615161641, - "grad_norm": 0.015013315714895725, - "learning_rate": 0.0001999846186514942, - "loss": 46.0, - "step": 34679 - }, - { - "epoch": 5.584967188695197, - "grad_norm": 0.0013518910855054855, - "learning_rate": 0.0001999846177641784, - "loss": 46.0, - "step": 34680 - }, - { - "epoch": 5.5851282257739845, - "grad_norm": 0.0013906301464885473, - "learning_rate": 0.000199984616876837, - "loss": 46.0, - "step": 34681 - }, - { - "epoch": 5.585289262852772, - "grad_norm": 0.013587641529738903, - "learning_rate": 0.00019998461598947, - "loss": 46.0, - "step": 34682 - }, - { - "epoch": 5.585450299931559, - "grad_norm": 0.004045858047902584, - "learning_rate": 0.0001999846151020774, - "loss": 46.0, - "step": 34683 - }, - { - "epoch": 5.585611337010347, - "grad_norm": 0.012017167173326015, - "learning_rate": 0.00019998461421465925, - "loss": 46.0, - "step": 34684 - }, - { - "epoch": 5.585772374089134, - "grad_norm": 0.006830972619354725, - "learning_rate": 0.00019998461332721546, - "loss": 46.0, - "step": 34685 - }, - { - "epoch": 5.585933411167922, - "grad_norm": 0.0012241060612723231, - "learning_rate": 0.0001999846124397461, - "loss": 46.0, - "step": 34686 - }, - { - "epoch": 5.586094448246708, - "grad_norm": 0.006106555461883545, - "learning_rate": 0.00019998461155225114, - "loss": 46.0, - "step": 34687 - }, - { - "epoch": 5.586255485325497, - "grad_norm": 0.0015198610490188003, - "learning_rate": 0.0001999846106647306, - "loss": 46.0, - "step": 34688 - }, - { - "epoch": 5.586416522404283, - "grad_norm": 0.006363158114254475, - "learning_rate": 0.00019998460977718448, - "loss": 46.0, - "step": 34689 - }, - { - "epoch": 5.586577559483071, - "grad_norm": 0.002384186489507556, - "learning_rate": 0.00019998460888961278, - "loss": 46.0, - "step": 34690 - }, - { - "epoch": 5.586738596561858, - "grad_norm": 0.013466891832649708, - "learning_rate": 0.00019998460800201544, - "loss": 46.0, - "step": 34691 - }, - { - "epoch": 5.586899633640646, - "grad_norm": 0.0026554784271866083, - "learning_rate": 0.00019998460711439253, - "loss": 46.0, - "step": 34692 - }, - { - "epoch": 5.587060670719433, - "grad_norm": 0.010037070140242577, - "learning_rate": 0.00019998460622674404, - "loss": 46.0, - "step": 34693 - }, - { - "epoch": 5.5872217077982205, - "grad_norm": 0.0013583747204393148, - "learning_rate": 0.00019998460533906994, - "loss": 46.0, - "step": 34694 - }, - { - "epoch": 5.587382744877008, - "grad_norm": 0.003949266392737627, - "learning_rate": 0.00019998460445137028, - "loss": 46.0, - "step": 34695 - }, - { - "epoch": 5.587543781955795, - "grad_norm": 0.003091406775638461, - "learning_rate": 0.000199984603563645, - "loss": 46.0, - "step": 34696 - }, - { - "epoch": 5.587704819034583, - "grad_norm": 0.0037286831066012383, - "learning_rate": 0.00019998460267589414, - "loss": 46.0, - "step": 34697 - }, - { - "epoch": 5.58786585611337, - "grad_norm": 0.0180059764534235, - "learning_rate": 0.00019998460178811768, - "loss": 46.0, - "step": 34698 - }, - { - "epoch": 5.588026893192158, - "grad_norm": 0.0033999825827777386, - "learning_rate": 0.00019998460090031565, - "loss": 46.0, - "step": 34699 - }, - { - "epoch": 5.588187930270945, - "grad_norm": 0.004126907326281071, - "learning_rate": 0.00019998460001248802, - "loss": 46.0, - "step": 34700 - }, - { - "epoch": 5.588348967349733, - "grad_norm": 0.0020060690585523844, - "learning_rate": 0.00019998459912463478, - "loss": 46.0, - "step": 34701 - }, - { - "epoch": 5.588510004428519, - "grad_norm": 0.012995957396924496, - "learning_rate": 0.000199984598236756, - "loss": 46.0, - "step": 34702 - }, - { - "epoch": 5.5886710415073075, - "grad_norm": 0.012879100628197193, - "learning_rate": 0.00019998459734885156, - "loss": 46.0, - "step": 34703 - }, - { - "epoch": 5.588832078586094, - "grad_norm": 0.004682404454797506, - "learning_rate": 0.0001999845964609216, - "loss": 46.0, - "step": 34704 - }, - { - "epoch": 5.5889931156648816, - "grad_norm": 0.0017997967079281807, - "learning_rate": 0.000199984595572966, - "loss": 46.0, - "step": 34705 - }, - { - "epoch": 5.589154152743669, - "grad_norm": 0.0016788548091426492, - "learning_rate": 0.0001999845946849848, - "loss": 46.0, - "step": 34706 - }, - { - "epoch": 5.5893151898224565, - "grad_norm": 0.011671469546854496, - "learning_rate": 0.00019998459379697803, - "loss": 46.0, - "step": 34707 - }, - { - "epoch": 5.589476226901244, - "grad_norm": 0.01430121622979641, - "learning_rate": 0.00019998459290894566, - "loss": 46.0, - "step": 34708 - }, - { - "epoch": 5.589637263980031, - "grad_norm": 0.001300667878240347, - "learning_rate": 0.00019998459202088772, - "loss": 46.0, - "step": 34709 - }, - { - "epoch": 5.589798301058819, - "grad_norm": 0.01275206170976162, - "learning_rate": 0.0001999845911328042, - "loss": 46.0, - "step": 34710 - }, - { - "epoch": 5.589959338137606, - "grad_norm": 0.006128380540758371, - "learning_rate": 0.00019998459024469508, - "loss": 46.0, - "step": 34711 - }, - { - "epoch": 5.590120375216394, - "grad_norm": 0.012952849268913269, - "learning_rate": 0.00019998458935656033, - "loss": 46.0, - "step": 34712 - }, - { - "epoch": 5.590281412295181, - "grad_norm": 0.0019022960914298892, - "learning_rate": 0.00019998458846840001, - "loss": 46.0, - "step": 34713 - }, - { - "epoch": 5.590442449373969, - "grad_norm": 0.005407972261309624, - "learning_rate": 0.00019998458758021411, - "loss": 46.0, - "step": 34714 - }, - { - "epoch": 5.590603486452756, - "grad_norm": 0.0009761581895872951, - "learning_rate": 0.00019998458669200263, - "loss": 46.0, - "step": 34715 - }, - { - "epoch": 5.5907645235315435, - "grad_norm": 0.004773018881678581, - "learning_rate": 0.00019998458580376552, - "loss": 46.0, - "step": 34716 - }, - { - "epoch": 5.59092556061033, - "grad_norm": 0.0037342195864766836, - "learning_rate": 0.00019998458491550286, - "loss": 46.0, - "step": 34717 - }, - { - "epoch": 5.5910865976891175, - "grad_norm": 0.010398722253739834, - "learning_rate": 0.00019998458402721458, - "loss": 46.0, - "step": 34718 - }, - { - "epoch": 5.591247634767905, - "grad_norm": 0.002075908239930868, - "learning_rate": 0.00019998458313890072, - "loss": 46.0, - "step": 34719 - }, - { - "epoch": 5.591408671846692, - "grad_norm": 0.0018005879828706384, - "learning_rate": 0.00019998458225056127, - "loss": 46.0, - "step": 34720 - }, - { - "epoch": 5.59156970892548, - "grad_norm": 0.003978176973760128, - "learning_rate": 0.00019998458136219623, - "loss": 46.0, - "step": 34721 - }, - { - "epoch": 5.591730746004267, - "grad_norm": 0.001142253284342587, - "learning_rate": 0.0001999845804738056, - "loss": 46.0, - "step": 34722 - }, - { - "epoch": 5.591891783083055, - "grad_norm": 0.0032236415427178144, - "learning_rate": 0.00019998457958538937, - "loss": 46.0, - "step": 34723 - }, - { - "epoch": 5.592052820161842, - "grad_norm": 0.0014490430476143956, - "learning_rate": 0.00019998457869694757, - "loss": 46.0, - "step": 34724 - }, - { - "epoch": 5.59221385724063, - "grad_norm": 0.002264301525428891, - "learning_rate": 0.00019998457780848015, - "loss": 46.0, - "step": 34725 - }, - { - "epoch": 5.592374894319417, - "grad_norm": 0.0034744602162390947, - "learning_rate": 0.00019998457691998715, - "loss": 46.0, - "step": 34726 - }, - { - "epoch": 5.592535931398205, - "grad_norm": 0.0016214636852964759, - "learning_rate": 0.00019998457603146857, - "loss": 46.0, - "step": 34727 - }, - { - "epoch": 5.592696968476992, - "grad_norm": 0.0033249491825699806, - "learning_rate": 0.0001999845751429244, - "loss": 46.0, - "step": 34728 - }, - { - "epoch": 5.5928580055557795, - "grad_norm": 0.007058362010866404, - "learning_rate": 0.0001999845742543546, - "loss": 46.0, - "step": 34729 - }, - { - "epoch": 5.593019042634567, - "grad_norm": 0.006156631745398045, - "learning_rate": 0.00019998457336575925, - "loss": 46.0, - "step": 34730 - }, - { - "epoch": 5.593180079713354, - "grad_norm": 0.008450658991932869, - "learning_rate": 0.0001999845724771383, - "loss": 46.0, - "step": 34731 - }, - { - "epoch": 5.593341116792141, - "grad_norm": 0.003843887010589242, - "learning_rate": 0.00019998457158849176, - "loss": 46.0, - "step": 34732 - }, - { - "epoch": 5.593502153870928, - "grad_norm": 0.004342313855886459, - "learning_rate": 0.00019998457069981962, - "loss": 46.0, - "step": 34733 - }, - { - "epoch": 5.593663190949716, - "grad_norm": 0.002976346295326948, - "learning_rate": 0.00019998456981112187, - "loss": 46.0, - "step": 34734 - }, - { - "epoch": 5.593824228028503, - "grad_norm": 0.002610750962048769, - "learning_rate": 0.00019998456892239856, - "loss": 46.0, - "step": 34735 - }, - { - "epoch": 5.593985265107291, - "grad_norm": 0.0047075035981833935, - "learning_rate": 0.00019998456803364966, - "loss": 46.0, - "step": 34736 - }, - { - "epoch": 5.594146302186078, - "grad_norm": 0.0028581772930920124, - "learning_rate": 0.00019998456714487517, - "loss": 46.0, - "step": 34737 - }, - { - "epoch": 5.594307339264866, - "grad_norm": 0.0022644835989922285, - "learning_rate": 0.0001999845662560751, - "loss": 46.0, - "step": 34738 - }, - { - "epoch": 5.594468376343653, - "grad_norm": 0.004189268220216036, - "learning_rate": 0.0001999845653672494, - "loss": 46.0, - "step": 34739 - }, - { - "epoch": 5.594629413422441, - "grad_norm": 0.00970964040607214, - "learning_rate": 0.00019998456447839816, - "loss": 46.0, - "step": 34740 - }, - { - "epoch": 5.594790450501228, - "grad_norm": 0.003984611015766859, - "learning_rate": 0.00019998456358952127, - "loss": 46.0, - "step": 34741 - }, - { - "epoch": 5.5949514875800155, - "grad_norm": 0.0022746813483536243, - "learning_rate": 0.00019998456270061882, - "loss": 46.0, - "step": 34742 - }, - { - "epoch": 5.595112524658803, - "grad_norm": 0.002649534959346056, - "learning_rate": 0.00019998456181169079, - "loss": 46.0, - "step": 34743 - }, - { - "epoch": 5.59527356173759, - "grad_norm": 0.0070677027106285095, - "learning_rate": 0.00019998456092273713, - "loss": 46.0, - "step": 34744 - }, - { - "epoch": 5.595434598816378, - "grad_norm": 0.0032268110662698746, - "learning_rate": 0.00019998456003375792, - "loss": 46.0, - "step": 34745 - }, - { - "epoch": 5.595595635895165, - "grad_norm": 0.004400305915623903, - "learning_rate": 0.00019998455914475312, - "loss": 46.0, - "step": 34746 - }, - { - "epoch": 5.595756672973952, - "grad_norm": 0.0008275015861727297, - "learning_rate": 0.0001999845582557227, - "loss": 46.0, - "step": 34747 - }, - { - "epoch": 5.595917710052739, - "grad_norm": 0.009321720339357853, - "learning_rate": 0.0001999845573666667, - "loss": 46.0, - "step": 34748 - }, - { - "epoch": 5.596078747131527, - "grad_norm": 0.004384103696793318, - "learning_rate": 0.00019998455647758512, - "loss": 46.0, - "step": 34749 - }, - { - "epoch": 5.596239784210314, - "grad_norm": 0.0025092782452702522, - "learning_rate": 0.00019998455558847792, - "loss": 46.0, - "step": 34750 - }, - { - "epoch": 5.596400821289102, - "grad_norm": 0.004068761132657528, - "learning_rate": 0.00019998455469934516, - "loss": 46.0, - "step": 34751 - }, - { - "epoch": 5.596561858367889, - "grad_norm": 0.005694488994777203, - "learning_rate": 0.0001999845538101868, - "loss": 46.0, - "step": 34752 - }, - { - "epoch": 5.596722895446677, - "grad_norm": 0.005226957146078348, - "learning_rate": 0.00019998455292100285, - "loss": 46.0, - "step": 34753 - }, - { - "epoch": 5.596883932525464, - "grad_norm": 0.003106221556663513, - "learning_rate": 0.0001999845520317933, - "loss": 46.0, - "step": 34754 - }, - { - "epoch": 5.5970449696042515, - "grad_norm": 0.002458963543176651, - "learning_rate": 0.00019998455114255816, - "loss": 46.0, - "step": 34755 - }, - { - "epoch": 5.597206006683039, - "grad_norm": 0.0013307415647432208, - "learning_rate": 0.00019998455025329746, - "loss": 46.0, - "step": 34756 - }, - { - "epoch": 5.597367043761826, - "grad_norm": 0.008632835932075977, - "learning_rate": 0.00019998454936401113, - "loss": 46.0, - "step": 34757 - }, - { - "epoch": 5.597528080840614, - "grad_norm": 0.010196693241596222, - "learning_rate": 0.00019998454847469923, - "loss": 46.0, - "step": 34758 - }, - { - "epoch": 5.597689117919401, - "grad_norm": 0.012990818358957767, - "learning_rate": 0.0001999845475853617, - "loss": 46.0, - "step": 34759 - }, - { - "epoch": 5.597850154998189, - "grad_norm": 0.008843060582876205, - "learning_rate": 0.00019998454669599864, - "loss": 46.0, - "step": 34760 - }, - { - "epoch": 5.598011192076976, - "grad_norm": 0.004096719902008772, - "learning_rate": 0.00019998454580660995, - "loss": 46.0, - "step": 34761 - }, - { - "epoch": 5.598172229155763, - "grad_norm": 0.002911268500611186, - "learning_rate": 0.0001999845449171957, - "loss": 46.0, - "step": 34762 - }, - { - "epoch": 5.59833326623455, - "grad_norm": 0.013834027573466301, - "learning_rate": 0.00019998454402775582, - "loss": 46.0, - "step": 34763 - }, - { - "epoch": 5.598494303313338, - "grad_norm": 0.0027927388437092304, - "learning_rate": 0.0001999845431382904, - "loss": 46.0, - "step": 34764 - }, - { - "epoch": 5.598655340392125, - "grad_norm": 0.002818124135956168, - "learning_rate": 0.00019998454224879933, - "loss": 46.0, - "step": 34765 - }, - { - "epoch": 5.5988163774709125, - "grad_norm": 0.008320937864482403, - "learning_rate": 0.0001999845413592827, - "loss": 46.0, - "step": 34766 - }, - { - "epoch": 5.5989774145497, - "grad_norm": 0.010386254638433456, - "learning_rate": 0.00019998454046974047, - "loss": 46.0, - "step": 34767 - }, - { - "epoch": 5.5991384516284874, - "grad_norm": 0.0020796344615519047, - "learning_rate": 0.00019998453958017267, - "loss": 46.0, - "step": 34768 - }, - { - "epoch": 5.599299488707275, - "grad_norm": 0.001319212606176734, - "learning_rate": 0.00019998453869057926, - "loss": 46.0, - "step": 34769 - }, - { - "epoch": 5.599460525786062, - "grad_norm": 0.004777243360877037, - "learning_rate": 0.00019998453780096026, - "loss": 46.0, - "step": 34770 - }, - { - "epoch": 5.59962156286485, - "grad_norm": 0.0022300691343843937, - "learning_rate": 0.00019998453691131567, - "loss": 46.0, - "step": 34771 - }, - { - "epoch": 5.599782599943637, - "grad_norm": 0.009431141428649426, - "learning_rate": 0.00019998453602164547, - "loss": 46.0, - "step": 34772 - }, - { - "epoch": 5.599943637022425, - "grad_norm": 0.0006995851290412247, - "learning_rate": 0.0001999845351319497, - "loss": 46.0, - "step": 34773 - }, - { - "epoch": 5.600104674101212, - "grad_norm": 0.0018728980794548988, - "learning_rate": 0.00019998453424222837, - "loss": 46.0, - "step": 34774 - }, - { - "epoch": 5.600265711179999, - "grad_norm": 0.010138577781617641, - "learning_rate": 0.00019998453335248143, - "loss": 46.0, - "step": 34775 - }, - { - "epoch": 5.600426748258787, - "grad_norm": 0.006323046050965786, - "learning_rate": 0.00019998453246270885, - "loss": 46.0, - "step": 34776 - }, - { - "epoch": 5.600587785337574, - "grad_norm": 0.0011384043609723449, - "learning_rate": 0.00019998453157291072, - "loss": 46.0, - "step": 34777 - }, - { - "epoch": 5.600748822416361, - "grad_norm": 0.009958439506590366, - "learning_rate": 0.000199984530683087, - "loss": 46.0, - "step": 34778 - }, - { - "epoch": 5.6009098594951485, - "grad_norm": 0.007385083008557558, - "learning_rate": 0.00019998452979323768, - "loss": 46.0, - "step": 34779 - }, - { - "epoch": 5.601070896573936, - "grad_norm": 0.002337296027690172, - "learning_rate": 0.00019998452890336279, - "loss": 46.0, - "step": 34780 - }, - { - "epoch": 5.601231933652723, - "grad_norm": 0.0006477715214714408, - "learning_rate": 0.00019998452801346227, - "loss": 46.0, - "step": 34781 - }, - { - "epoch": 5.601392970731511, - "grad_norm": 0.00799001194536686, - "learning_rate": 0.0001999845271235362, - "loss": 46.0, - "step": 34782 - }, - { - "epoch": 5.601554007810298, - "grad_norm": 0.0018834640504792333, - "learning_rate": 0.00019998452623358451, - "loss": 46.0, - "step": 34783 - }, - { - "epoch": 5.601715044889086, - "grad_norm": 0.0038098585791885853, - "learning_rate": 0.00019998452534360724, - "loss": 46.0, - "step": 34784 - }, - { - "epoch": 5.601876081967873, - "grad_norm": 0.006432806141674519, - "learning_rate": 0.00019998452445360438, - "loss": 46.0, - "step": 34785 - }, - { - "epoch": 5.602037119046661, - "grad_norm": 0.0069917780347168446, - "learning_rate": 0.00019998452356357593, - "loss": 46.0, - "step": 34786 - }, - { - "epoch": 5.602198156125448, - "grad_norm": 0.006083559710532427, - "learning_rate": 0.00019998452267352187, - "loss": 46.0, - "step": 34787 - }, - { - "epoch": 5.602359193204236, - "grad_norm": 0.0023505445569753647, - "learning_rate": 0.00019998452178344227, - "loss": 46.0, - "step": 34788 - }, - { - "epoch": 5.602520230283023, - "grad_norm": 0.004209406673908234, - "learning_rate": 0.00019998452089333704, - "loss": 46.0, - "step": 34789 - }, - { - "epoch": 5.60268126736181, - "grad_norm": 0.00223302049562335, - "learning_rate": 0.0001999845200032062, - "loss": 46.0, - "step": 34790 - }, - { - "epoch": 5.602842304440598, - "grad_norm": 0.001660041161812842, - "learning_rate": 0.0001999845191130498, - "loss": 46.0, - "step": 34791 - }, - { - "epoch": 5.6030033415193845, - "grad_norm": 0.004566125106066465, - "learning_rate": 0.0001999845182228678, - "loss": 46.0, - "step": 34792 - }, - { - "epoch": 5.603164378598172, - "grad_norm": 0.00858062133193016, - "learning_rate": 0.00019998451733266022, - "loss": 46.0, - "step": 34793 - }, - { - "epoch": 5.603325415676959, - "grad_norm": 0.013128002174198627, - "learning_rate": 0.00019998451644242705, - "loss": 46.0, - "step": 34794 - }, - { - "epoch": 5.603486452755747, - "grad_norm": 0.0027794018387794495, - "learning_rate": 0.0001999845155521683, - "loss": 46.0, - "step": 34795 - }, - { - "epoch": 5.603647489834534, - "grad_norm": 0.009374003857374191, - "learning_rate": 0.0001999845146618839, - "loss": 46.0, - "step": 34796 - }, - { - "epoch": 5.603808526913322, - "grad_norm": 0.011360440403223038, - "learning_rate": 0.00019998451377157398, - "loss": 46.0, - "step": 34797 - }, - { - "epoch": 5.603969563992109, - "grad_norm": 0.005560627672821283, - "learning_rate": 0.0001999845128812384, - "loss": 46.0, - "step": 34798 - }, - { - "epoch": 5.604130601070897, - "grad_norm": 0.009031318128108978, - "learning_rate": 0.0001999845119908773, - "loss": 46.0, - "step": 34799 - }, - { - "epoch": 5.604291638149684, - "grad_norm": 0.007395155727863312, - "learning_rate": 0.00019998451110049054, - "loss": 46.0, - "step": 34800 - }, - { - "epoch": 5.604452675228472, - "grad_norm": 0.0023184281308203936, - "learning_rate": 0.00019998451021007824, - "loss": 46.0, - "step": 34801 - }, - { - "epoch": 5.604613712307259, - "grad_norm": 0.0031362087465822697, - "learning_rate": 0.0001999845093196403, - "loss": 46.0, - "step": 34802 - }, - { - "epoch": 5.6047747493860465, - "grad_norm": 0.0045433202758431435, - "learning_rate": 0.00019998450842917685, - "loss": 46.0, - "step": 34803 - }, - { - "epoch": 5.604935786464834, - "grad_norm": 0.012602468021214008, - "learning_rate": 0.00019998450753868773, - "loss": 46.0, - "step": 34804 - }, - { - "epoch": 5.6050968235436205, - "grad_norm": 0.0040380205027759075, - "learning_rate": 0.00019998450664817307, - "loss": 46.0, - "step": 34805 - }, - { - "epoch": 5.605257860622408, - "grad_norm": 0.00943379383534193, - "learning_rate": 0.0001999845057576328, - "loss": 46.0, - "step": 34806 - }, - { - "epoch": 5.605418897701195, - "grad_norm": 0.007022729143500328, - "learning_rate": 0.00019998450486706694, - "loss": 46.0, - "step": 34807 - }, - { - "epoch": 5.605579934779983, - "grad_norm": 0.0029596660751849413, - "learning_rate": 0.0001999845039764755, - "loss": 46.0, - "step": 34808 - }, - { - "epoch": 5.60574097185877, - "grad_norm": 0.01099966000765562, - "learning_rate": 0.00019998450308585843, - "loss": 46.0, - "step": 34809 - }, - { - "epoch": 5.605902008937558, - "grad_norm": 0.004348713904619217, - "learning_rate": 0.0001999845021952158, - "loss": 46.0, - "step": 34810 - }, - { - "epoch": 5.606063046016345, - "grad_norm": 0.0012874577660113573, - "learning_rate": 0.0001999845013045476, - "loss": 46.0, - "step": 34811 - }, - { - "epoch": 5.606224083095133, - "grad_norm": 0.002988834399729967, - "learning_rate": 0.00019998450041385378, - "loss": 46.0, - "step": 34812 - }, - { - "epoch": 5.60638512017392, - "grad_norm": 0.016629081219434738, - "learning_rate": 0.00019998449952313437, - "loss": 46.0, - "step": 34813 - }, - { - "epoch": 5.6065461572527076, - "grad_norm": 0.013277328573167324, - "learning_rate": 0.00019998449863238937, - "loss": 46.0, - "step": 34814 - }, - { - "epoch": 5.606707194331495, - "grad_norm": 0.0037772334180772305, - "learning_rate": 0.00019998449774161878, - "loss": 46.0, - "step": 34815 - }, - { - "epoch": 5.6068682314102825, - "grad_norm": 0.002887663198634982, - "learning_rate": 0.0001999844968508226, - "loss": 46.0, - "step": 34816 - }, - { - "epoch": 5.60702926848907, - "grad_norm": 0.003901789430528879, - "learning_rate": 0.00019998449596000085, - "loss": 46.0, - "step": 34817 - }, - { - "epoch": 5.607190305567857, - "grad_norm": 0.0009883488528430462, - "learning_rate": 0.00019998449506915345, - "loss": 46.0, - "step": 34818 - }, - { - "epoch": 5.607351342646645, - "grad_norm": 0.003526639426127076, - "learning_rate": 0.00019998449417828052, - "loss": 46.0, - "step": 34819 - }, - { - "epoch": 5.607512379725431, - "grad_norm": 0.014215493574738503, - "learning_rate": 0.00019998449328738197, - "loss": 46.0, - "step": 34820 - }, - { - "epoch": 5.607673416804219, - "grad_norm": 0.006592354737222195, - "learning_rate": 0.00019998449239645784, - "loss": 46.0, - "step": 34821 - }, - { - "epoch": 5.607834453883006, - "grad_norm": 0.0026498534716665745, - "learning_rate": 0.00019998449150550812, - "loss": 46.0, - "step": 34822 - }, - { - "epoch": 5.607995490961794, - "grad_norm": 0.0035986576694995165, - "learning_rate": 0.0001999844906145328, - "loss": 46.0, - "step": 34823 - }, - { - "epoch": 5.608156528040581, - "grad_norm": 0.0033094820100814104, - "learning_rate": 0.0001999844897235319, - "loss": 46.0, - "step": 34824 - }, - { - "epoch": 5.608317565119369, - "grad_norm": 0.004507584497332573, - "learning_rate": 0.0001999844888325054, - "loss": 46.0, - "step": 34825 - }, - { - "epoch": 5.608478602198156, - "grad_norm": 0.009295037016272545, - "learning_rate": 0.00019998448794145333, - "loss": 46.0, - "step": 34826 - }, - { - "epoch": 5.6086396392769435, - "grad_norm": 0.008341183885931969, - "learning_rate": 0.00019998448705037562, - "loss": 46.0, - "step": 34827 - }, - { - "epoch": 5.608800676355731, - "grad_norm": 0.0016650771722197533, - "learning_rate": 0.00019998448615927237, - "loss": 46.0, - "step": 34828 - }, - { - "epoch": 5.608961713434518, - "grad_norm": 0.01993122324347496, - "learning_rate": 0.0001999844852681435, - "loss": 46.0, - "step": 34829 - }, - { - "epoch": 5.609122750513306, - "grad_norm": 0.002327660797163844, - "learning_rate": 0.00019998448437698904, - "loss": 46.0, - "step": 34830 - }, - { - "epoch": 5.609283787592093, - "grad_norm": 0.00765073811635375, - "learning_rate": 0.000199984483485809, - "loss": 46.0, - "step": 34831 - }, - { - "epoch": 5.609444824670881, - "grad_norm": 0.011233063414692879, - "learning_rate": 0.0001999844825946034, - "loss": 46.0, - "step": 34832 - }, - { - "epoch": 5.609605861749668, - "grad_norm": 0.005589455831795931, - "learning_rate": 0.00019998448170337215, - "loss": 46.0, - "step": 34833 - }, - { - "epoch": 5.609766898828456, - "grad_norm": 0.01829095557332039, - "learning_rate": 0.00019998448081211533, - "loss": 46.0, - "step": 34834 - }, - { - "epoch": 5.609927935907242, - "grad_norm": 0.001708773779682815, - "learning_rate": 0.00019998447992083295, - "loss": 46.0, - "step": 34835 - }, - { - "epoch": 5.61008897298603, - "grad_norm": 0.010568246245384216, - "learning_rate": 0.00019998447902952492, - "loss": 46.0, - "step": 34836 - }, - { - "epoch": 5.610250010064817, - "grad_norm": 0.002612903481349349, - "learning_rate": 0.00019998447813819134, - "loss": 46.0, - "step": 34837 - }, - { - "epoch": 5.610411047143605, - "grad_norm": 0.007799685001373291, - "learning_rate": 0.00019998447724683217, - "loss": 46.0, - "step": 34838 - }, - { - "epoch": 5.610572084222392, - "grad_norm": 0.012057727202773094, - "learning_rate": 0.00019998447635544739, - "loss": 46.0, - "step": 34839 - }, - { - "epoch": 5.6107331213011795, - "grad_norm": 0.010302739217877388, - "learning_rate": 0.000199984475464037, - "loss": 46.0, - "step": 34840 - }, - { - "epoch": 5.610894158379967, - "grad_norm": 0.004059287719428539, - "learning_rate": 0.00019998447457260108, - "loss": 46.0, - "step": 34841 - }, - { - "epoch": 5.611055195458754, - "grad_norm": 0.010391690768301487, - "learning_rate": 0.00019998447368113956, - "loss": 46.0, - "step": 34842 - }, - { - "epoch": 5.611216232537542, - "grad_norm": 0.0031577947083860636, - "learning_rate": 0.0001999844727896524, - "loss": 46.0, - "step": 34843 - }, - { - "epoch": 5.611377269616329, - "grad_norm": 0.006777295842766762, - "learning_rate": 0.00019998447189813968, - "loss": 46.0, - "step": 34844 - }, - { - "epoch": 5.611538306695117, - "grad_norm": 0.004861742723733187, - "learning_rate": 0.00019998447100660137, - "loss": 46.0, - "step": 34845 - }, - { - "epoch": 5.611699343773904, - "grad_norm": 0.006205307319760323, - "learning_rate": 0.00019998447011503748, - "loss": 46.0, - "step": 34846 - }, - { - "epoch": 5.611860380852692, - "grad_norm": 0.002145164180546999, - "learning_rate": 0.00019998446922344797, - "loss": 46.0, - "step": 34847 - }, - { - "epoch": 5.612021417931478, - "grad_norm": 0.0015847224276512861, - "learning_rate": 0.00019998446833183287, - "loss": 46.0, - "step": 34848 - }, - { - "epoch": 5.612182455010267, - "grad_norm": 0.0029122214764356613, - "learning_rate": 0.0001999844674401922, - "loss": 46.0, - "step": 34849 - }, - { - "epoch": 5.612343492089053, - "grad_norm": 0.001439048908650875, - "learning_rate": 0.00019998446654852594, - "loss": 46.0, - "step": 34850 - }, - { - "epoch": 5.612504529167841, - "grad_norm": 0.007039692252874374, - "learning_rate": 0.00019998446565683406, - "loss": 46.0, - "step": 34851 - }, - { - "epoch": 5.612665566246628, - "grad_norm": 0.00641253124922514, - "learning_rate": 0.0001999844647651166, - "loss": 46.0, - "step": 34852 - }, - { - "epoch": 5.6128266033254155, - "grad_norm": 0.008297199383378029, - "learning_rate": 0.00019998446387337358, - "loss": 46.0, - "step": 34853 - }, - { - "epoch": 5.612987640404203, - "grad_norm": 0.014836383983492851, - "learning_rate": 0.00019998446298160493, - "loss": 46.0, - "step": 34854 - }, - { - "epoch": 5.61314867748299, - "grad_norm": 0.0032842091750353575, - "learning_rate": 0.00019998446208981073, - "loss": 46.0, - "step": 34855 - }, - { - "epoch": 5.613309714561778, - "grad_norm": 0.009159413166344166, - "learning_rate": 0.0001999844611979909, - "loss": 46.0, - "step": 34856 - }, - { - "epoch": 5.613470751640565, - "grad_norm": 0.018917739391326904, - "learning_rate": 0.0001999844603061455, - "loss": 46.0, - "step": 34857 - }, - { - "epoch": 5.613631788719353, - "grad_norm": 0.004317612387239933, - "learning_rate": 0.00019998445941427447, - "loss": 46.0, - "step": 34858 - }, - { - "epoch": 5.61379282579814, - "grad_norm": 0.001880093477666378, - "learning_rate": 0.0001999844585223779, - "loss": 46.0, - "step": 34859 - }, - { - "epoch": 5.613953862876928, - "grad_norm": 0.013252083212137222, - "learning_rate": 0.00019998445763045572, - "loss": 46.0, - "step": 34860 - }, - { - "epoch": 5.614114899955715, - "grad_norm": 0.0020481296814978123, - "learning_rate": 0.00019998445673850796, - "loss": 46.0, - "step": 34861 - }, - { - "epoch": 5.614275937034503, - "grad_norm": 0.0031048671808093786, - "learning_rate": 0.0001999844558465346, - "loss": 46.0, - "step": 34862 - }, - { - "epoch": 5.614436974113289, - "grad_norm": 0.00937010906636715, - "learning_rate": 0.00019998445495453563, - "loss": 46.0, - "step": 34863 - }, - { - "epoch": 5.6145980111920775, - "grad_norm": 0.0033450659830123186, - "learning_rate": 0.0001999844540625111, - "loss": 46.0, - "step": 34864 - }, - { - "epoch": 5.614759048270864, - "grad_norm": 0.007181660737842321, - "learning_rate": 0.00019998445317046098, - "loss": 46.0, - "step": 34865 - }, - { - "epoch": 5.6149200853496515, - "grad_norm": 0.00501789478585124, - "learning_rate": 0.00019998445227838524, - "loss": 46.0, - "step": 34866 - }, - { - "epoch": 5.615081122428439, - "grad_norm": 0.0010603123810142279, - "learning_rate": 0.00019998445138628396, - "loss": 46.0, - "step": 34867 - }, - { - "epoch": 5.615242159507226, - "grad_norm": 0.00131454027723521, - "learning_rate": 0.00019998445049415704, - "loss": 46.0, - "step": 34868 - }, - { - "epoch": 5.615403196586014, - "grad_norm": 0.002484876662492752, - "learning_rate": 0.00019998444960200453, - "loss": 46.0, - "step": 34869 - }, - { - "epoch": 5.615564233664801, - "grad_norm": 0.017979202792048454, - "learning_rate": 0.00019998444870982643, - "loss": 46.0, - "step": 34870 - }, - { - "epoch": 5.615725270743589, - "grad_norm": 0.008086731657385826, - "learning_rate": 0.00019998444781762275, - "loss": 46.0, - "step": 34871 - }, - { - "epoch": 5.615886307822376, - "grad_norm": 0.010307800024747849, - "learning_rate": 0.00019998444692539348, - "loss": 46.0, - "step": 34872 - }, - { - "epoch": 5.616047344901164, - "grad_norm": 0.005028403364121914, - "learning_rate": 0.00019998444603313865, - "loss": 46.0, - "step": 34873 - }, - { - "epoch": 5.616208381979951, - "grad_norm": 0.007739054970443249, - "learning_rate": 0.00019998444514085818, - "loss": 46.0, - "step": 34874 - }, - { - "epoch": 5.6163694190587385, - "grad_norm": 0.003156814491376281, - "learning_rate": 0.00019998444424855214, - "loss": 46.0, - "step": 34875 - }, - { - "epoch": 5.616530456137526, - "grad_norm": 0.00888319406658411, - "learning_rate": 0.0001999844433562205, - "loss": 46.0, - "step": 34876 - }, - { - "epoch": 5.6166914932163134, - "grad_norm": 0.017981408163905144, - "learning_rate": 0.00019998444246386327, - "loss": 46.0, - "step": 34877 - }, - { - "epoch": 5.6168525302951, - "grad_norm": 0.004737422801554203, - "learning_rate": 0.00019998444157148047, - "loss": 46.0, - "step": 34878 - }, - { - "epoch": 5.6170135673738875, - "grad_norm": 0.004558139014989138, - "learning_rate": 0.00019998444067907207, - "loss": 46.0, - "step": 34879 - }, - { - "epoch": 5.617174604452675, - "grad_norm": 0.004083253908902407, - "learning_rate": 0.00019998443978663807, - "loss": 46.0, - "step": 34880 - }, - { - "epoch": 5.617335641531462, - "grad_norm": 0.0034036540891975164, - "learning_rate": 0.0001999844388941785, - "loss": 46.0, - "step": 34881 - }, - { - "epoch": 5.61749667861025, - "grad_norm": 0.020808685570955276, - "learning_rate": 0.00019998443800169332, - "loss": 46.0, - "step": 34882 - }, - { - "epoch": 5.617657715689037, - "grad_norm": 0.004127429332584143, - "learning_rate": 0.00019998443710918254, - "loss": 46.0, - "step": 34883 - }, - { - "epoch": 5.617818752767825, - "grad_norm": 0.008937763050198555, - "learning_rate": 0.00019998443621664617, - "loss": 46.0, - "step": 34884 - }, - { - "epoch": 5.617979789846612, - "grad_norm": 0.0021199514158070087, - "learning_rate": 0.00019998443532408424, - "loss": 46.0, - "step": 34885 - }, - { - "epoch": 5.6181408269254, - "grad_norm": 0.009585599415004253, - "learning_rate": 0.0001999844344314967, - "loss": 46.0, - "step": 34886 - }, - { - "epoch": 5.618301864004187, - "grad_norm": 0.0030037083197385073, - "learning_rate": 0.00019998443353888353, - "loss": 46.0, - "step": 34887 - }, - { - "epoch": 5.6184629010829745, - "grad_norm": 0.008482609875500202, - "learning_rate": 0.00019998443264624481, - "loss": 46.0, - "step": 34888 - }, - { - "epoch": 5.618623938161762, - "grad_norm": 0.0035214766394346952, - "learning_rate": 0.00019998443175358053, - "loss": 46.0, - "step": 34889 - }, - { - "epoch": 5.618784975240549, - "grad_norm": 0.004729463253170252, - "learning_rate": 0.00019998443086089061, - "loss": 46.0, - "step": 34890 - }, - { - "epoch": 5.618946012319337, - "grad_norm": 0.0066327378153800964, - "learning_rate": 0.0001999844299681751, - "loss": 46.0, - "step": 34891 - }, - { - "epoch": 5.619107049398124, - "grad_norm": 0.0029992908239364624, - "learning_rate": 0.000199984429075434, - "loss": 46.0, - "step": 34892 - }, - { - "epoch": 5.619268086476911, - "grad_norm": 0.005086920224130154, - "learning_rate": 0.00019998442818266736, - "loss": 46.0, - "step": 34893 - }, - { - "epoch": 5.619429123555698, - "grad_norm": 0.0014695702120661736, - "learning_rate": 0.0001999844272898751, - "loss": 46.0, - "step": 34894 - }, - { - "epoch": 5.619590160634486, - "grad_norm": 0.002051666146144271, - "learning_rate": 0.0001999844263970572, - "loss": 46.0, - "step": 34895 - }, - { - "epoch": 5.619751197713273, - "grad_norm": 0.00768818985670805, - "learning_rate": 0.00019998442550421376, - "loss": 46.0, - "step": 34896 - }, - { - "epoch": 5.619912234792061, - "grad_norm": 0.006151929497718811, - "learning_rate": 0.00019998442461134473, - "loss": 46.0, - "step": 34897 - }, - { - "epoch": 5.620073271870848, - "grad_norm": 0.002680000849068165, - "learning_rate": 0.00019998442371845009, - "loss": 46.0, - "step": 34898 - }, - { - "epoch": 5.620234308949636, - "grad_norm": 0.005544870160520077, - "learning_rate": 0.00019998442282552988, - "loss": 46.0, - "step": 34899 - }, - { - "epoch": 5.620395346028423, - "grad_norm": 0.009820745326578617, - "learning_rate": 0.00019998442193258406, - "loss": 46.0, - "step": 34900 - }, - { - "epoch": 5.6205563831072105, - "grad_norm": 0.00863034836947918, - "learning_rate": 0.00019998442103961263, - "loss": 46.0, - "step": 34901 - }, - { - "epoch": 5.620717420185998, - "grad_norm": 0.0014620324363932014, - "learning_rate": 0.00019998442014661566, - "loss": 46.0, - "step": 34902 - }, - { - "epoch": 5.620878457264785, - "grad_norm": 0.012427587062120438, - "learning_rate": 0.00019998441925359308, - "loss": 46.0, - "step": 34903 - }, - { - "epoch": 5.621039494343573, - "grad_norm": 0.0019669944886118174, - "learning_rate": 0.00019998441836054488, - "loss": 46.0, - "step": 34904 - }, - { - "epoch": 5.62120053142236, - "grad_norm": 0.0017034054035320878, - "learning_rate": 0.00019998441746747113, - "loss": 46.0, - "step": 34905 - }, - { - "epoch": 5.621361568501148, - "grad_norm": 0.005487917456775904, - "learning_rate": 0.00019998441657437176, - "loss": 46.0, - "step": 34906 - }, - { - "epoch": 5.621522605579935, - "grad_norm": 0.0028911111876368523, - "learning_rate": 0.0001999844156812468, - "loss": 46.0, - "step": 34907 - }, - { - "epoch": 5.621683642658722, - "grad_norm": 0.005043532233685255, - "learning_rate": 0.00019998441478809626, - "loss": 46.0, - "step": 34908 - }, - { - "epoch": 5.621844679737509, - "grad_norm": 0.018376145511865616, - "learning_rate": 0.00019998441389492016, - "loss": 46.0, - "step": 34909 - }, - { - "epoch": 5.622005716816297, - "grad_norm": 0.002302020788192749, - "learning_rate": 0.0001999844130017184, - "loss": 46.0, - "step": 34910 - }, - { - "epoch": 5.622166753895084, - "grad_norm": 0.001525710104033351, - "learning_rate": 0.0001999844121084911, - "loss": 46.0, - "step": 34911 - }, - { - "epoch": 5.622327790973872, - "grad_norm": 0.004479615483433008, - "learning_rate": 0.0001999844112152382, - "loss": 46.0, - "step": 34912 - }, - { - "epoch": 5.622488828052659, - "grad_norm": 0.0035913384053856134, - "learning_rate": 0.00019998441032195968, - "loss": 46.0, - "step": 34913 - }, - { - "epoch": 5.6226498651314465, - "grad_norm": 0.0014180107973515987, - "learning_rate": 0.0001999844094286556, - "loss": 46.0, - "step": 34914 - }, - { - "epoch": 5.622810902210234, - "grad_norm": 0.010112438350915909, - "learning_rate": 0.00019998440853532593, - "loss": 46.0, - "step": 34915 - }, - { - "epoch": 5.622971939289021, - "grad_norm": 0.0008956961100921035, - "learning_rate": 0.00019998440764197066, - "loss": 46.0, - "step": 34916 - }, - { - "epoch": 5.623132976367809, - "grad_norm": 0.005347180180251598, - "learning_rate": 0.00019998440674858984, - "loss": 46.0, - "step": 34917 - }, - { - "epoch": 5.623294013446596, - "grad_norm": 0.0007655193912796676, - "learning_rate": 0.00019998440585518337, - "loss": 46.0, - "step": 34918 - }, - { - "epoch": 5.623455050525384, - "grad_norm": 0.009878878481686115, - "learning_rate": 0.00019998440496175134, - "loss": 46.0, - "step": 34919 - }, - { - "epoch": 5.623616087604171, - "grad_norm": 0.015162709169089794, - "learning_rate": 0.0001999844040682937, - "loss": 46.0, - "step": 34920 - }, - { - "epoch": 5.623777124682958, - "grad_norm": 0.008933644741773605, - "learning_rate": 0.00019998440317481046, - "loss": 46.0, - "step": 34921 - }, - { - "epoch": 5.623938161761746, - "grad_norm": 0.008423752151429653, - "learning_rate": 0.00019998440228130164, - "loss": 46.0, - "step": 34922 - }, - { - "epoch": 5.624099198840533, - "grad_norm": 0.010797766037285328, - "learning_rate": 0.00019998440138776724, - "loss": 46.0, - "step": 34923 - }, - { - "epoch": 5.62426023591932, - "grad_norm": 0.001507542678155005, - "learning_rate": 0.00019998440049420727, - "loss": 46.0, - "step": 34924 - }, - { - "epoch": 5.624421272998108, - "grad_norm": 0.0038560088723897934, - "learning_rate": 0.00019998439960062166, - "loss": 46.0, - "step": 34925 - }, - { - "epoch": 5.624582310076895, - "grad_norm": 0.0227101631462574, - "learning_rate": 0.0001999843987070105, - "loss": 46.0, - "step": 34926 - }, - { - "epoch": 5.6247433471556825, - "grad_norm": 0.00186068972107023, - "learning_rate": 0.00019998439781337372, - "loss": 46.0, - "step": 34927 - }, - { - "epoch": 5.62490438423447, - "grad_norm": 0.00811269972473383, - "learning_rate": 0.00019998439691971135, - "loss": 46.0, - "step": 34928 - }, - { - "epoch": 5.625065421313257, - "grad_norm": 0.0022010605316609144, - "learning_rate": 0.00019998439602602342, - "loss": 46.0, - "step": 34929 - }, - { - "epoch": 5.625226458392045, - "grad_norm": 0.005052570253610611, - "learning_rate": 0.00019998439513230988, - "loss": 46.0, - "step": 34930 - }, - { - "epoch": 5.625387495470832, - "grad_norm": 0.00520938728004694, - "learning_rate": 0.00019998439423857075, - "loss": 46.0, - "step": 34931 - }, - { - "epoch": 5.62554853254962, - "grad_norm": 0.0055502429604530334, - "learning_rate": 0.000199984393344806, - "loss": 46.0, - "step": 34932 - }, - { - "epoch": 5.625709569628407, - "grad_norm": 0.005287414416670799, - "learning_rate": 0.00019998439245101572, - "loss": 46.0, - "step": 34933 - }, - { - "epoch": 5.625870606707195, - "grad_norm": 0.007076835725456476, - "learning_rate": 0.0001999843915571998, - "loss": 46.0, - "step": 34934 - }, - { - "epoch": 5.626031643785982, - "grad_norm": 0.0077497451566159725, - "learning_rate": 0.00019998439066335833, - "loss": 46.0, - "step": 34935 - }, - { - "epoch": 5.626192680864769, - "grad_norm": 0.00195614667609334, - "learning_rate": 0.0001999843897694912, - "loss": 46.0, - "step": 34936 - }, - { - "epoch": 5.626353717943557, - "grad_norm": 0.00780531158670783, - "learning_rate": 0.00019998438887559855, - "loss": 46.0, - "step": 34937 - }, - { - "epoch": 5.6265147550223436, - "grad_norm": 0.0029682728927582502, - "learning_rate": 0.00019998438798168029, - "loss": 46.0, - "step": 34938 - }, - { - "epoch": 5.626675792101131, - "grad_norm": 0.0064385319128632545, - "learning_rate": 0.00019998438708773643, - "loss": 46.0, - "step": 34939 - }, - { - "epoch": 5.6268368291799185, - "grad_norm": 0.00434201629832387, - "learning_rate": 0.00019998438619376696, - "loss": 46.0, - "step": 34940 - }, - { - "epoch": 5.626997866258706, - "grad_norm": 0.002590473974123597, - "learning_rate": 0.00019998438529977196, - "loss": 46.0, - "step": 34941 - }, - { - "epoch": 5.627158903337493, - "grad_norm": 0.0050941938534379005, - "learning_rate": 0.00019998438440575132, - "loss": 46.0, - "step": 34942 - }, - { - "epoch": 5.627319940416281, - "grad_norm": 0.0027847914025187492, - "learning_rate": 0.0001999843835117051, - "loss": 46.0, - "step": 34943 - }, - { - "epoch": 5.627480977495068, - "grad_norm": 0.008297148160636425, - "learning_rate": 0.00019998438261763327, - "loss": 46.0, - "step": 34944 - }, - { - "epoch": 5.627642014573856, - "grad_norm": 0.0013164766132831573, - "learning_rate": 0.00019998438172353587, - "loss": 46.0, - "step": 34945 - }, - { - "epoch": 5.627803051652643, - "grad_norm": 0.014140353538095951, - "learning_rate": 0.00019998438082941287, - "loss": 46.0, - "step": 34946 - }, - { - "epoch": 5.627964088731431, - "grad_norm": 0.01257090549916029, - "learning_rate": 0.0001999843799352643, - "loss": 46.0, - "step": 34947 - }, - { - "epoch": 5.628125125810218, - "grad_norm": 0.012158217839896679, - "learning_rate": 0.0001999843790410901, - "loss": 46.0, - "step": 34948 - }, - { - "epoch": 5.6282861628890055, - "grad_norm": 0.005803667474538088, - "learning_rate": 0.00019998437814689032, - "loss": 46.0, - "step": 34949 - }, - { - "epoch": 5.628447199967793, - "grad_norm": 0.010137118399143219, - "learning_rate": 0.00019998437725266495, - "loss": 46.0, - "step": 34950 - }, - { - "epoch": 5.6286082370465795, - "grad_norm": 0.002603465225547552, - "learning_rate": 0.00019998437635841403, - "loss": 46.0, - "step": 34951 - }, - { - "epoch": 5.628769274125367, - "grad_norm": 0.008865314535796642, - "learning_rate": 0.0001999843754641375, - "loss": 46.0, - "step": 34952 - }, - { - "epoch": 5.628930311204154, - "grad_norm": 0.0072078160010278225, - "learning_rate": 0.00019998437456983536, - "loss": 46.0, - "step": 34953 - }, - { - "epoch": 5.629091348282942, - "grad_norm": 0.0025421488098800182, - "learning_rate": 0.00019998437367550764, - "loss": 46.0, - "step": 34954 - }, - { - "epoch": 5.629252385361729, - "grad_norm": 0.0029978107195347548, - "learning_rate": 0.00019998437278115434, - "loss": 46.0, - "step": 34955 - }, - { - "epoch": 5.629413422440517, - "grad_norm": 0.004747950471937656, - "learning_rate": 0.00019998437188677542, - "loss": 46.0, - "step": 34956 - }, - { - "epoch": 5.629574459519304, - "grad_norm": 0.02354920655488968, - "learning_rate": 0.00019998437099237094, - "loss": 46.0, - "step": 34957 - }, - { - "epoch": 5.629735496598092, - "grad_norm": 0.005115113221108913, - "learning_rate": 0.00019998437009794085, - "loss": 46.0, - "step": 34958 - }, - { - "epoch": 5.629896533676879, - "grad_norm": 0.0012572644045576453, - "learning_rate": 0.00019998436920348517, - "loss": 46.0, - "step": 34959 - }, - { - "epoch": 5.630057570755667, - "grad_norm": 0.001496284268796444, - "learning_rate": 0.0001999843683090039, - "loss": 46.0, - "step": 34960 - }, - { - "epoch": 5.630218607834454, - "grad_norm": 0.0006963298073969781, - "learning_rate": 0.00019998436741449705, - "loss": 46.0, - "step": 34961 - }, - { - "epoch": 5.6303796449132415, - "grad_norm": 0.017010528594255447, - "learning_rate": 0.00019998436651996458, - "loss": 46.0, - "step": 34962 - }, - { - "epoch": 5.630540681992029, - "grad_norm": 0.0035441096406430006, - "learning_rate": 0.00019998436562540658, - "loss": 46.0, - "step": 34963 - }, - { - "epoch": 5.630701719070816, - "grad_norm": 0.002389661269262433, - "learning_rate": 0.0001999843647308229, - "loss": 46.0, - "step": 34964 - }, - { - "epoch": 5.630862756149604, - "grad_norm": 0.0030964016914367676, - "learning_rate": 0.0001999843638362137, - "loss": 46.0, - "step": 34965 - }, - { - "epoch": 5.63102379322839, - "grad_norm": 0.0011341158533468843, - "learning_rate": 0.0001999843629415789, - "loss": 46.0, - "step": 34966 - }, - { - "epoch": 5.631184830307178, - "grad_norm": 0.0012163082137703896, - "learning_rate": 0.0001999843620469185, - "loss": 46.0, - "step": 34967 - }, - { - "epoch": 5.631345867385965, - "grad_norm": 0.006449075415730476, - "learning_rate": 0.00019998436115223252, - "loss": 46.0, - "step": 34968 - }, - { - "epoch": 5.631506904464753, - "grad_norm": 0.014941618777811527, - "learning_rate": 0.00019998436025752092, - "loss": 46.0, - "step": 34969 - }, - { - "epoch": 5.63166794154354, - "grad_norm": 0.005751014221459627, - "learning_rate": 0.00019998435936278375, - "loss": 46.0, - "step": 34970 - }, - { - "epoch": 5.631828978622328, - "grad_norm": 0.00354705355130136, - "learning_rate": 0.00019998435846802097, - "loss": 46.0, - "step": 34971 - }, - { - "epoch": 5.631990015701115, - "grad_norm": 0.0028325680177658796, - "learning_rate": 0.0001999843575732326, - "loss": 46.0, - "step": 34972 - }, - { - "epoch": 5.632151052779903, - "grad_norm": 0.004960860125720501, - "learning_rate": 0.00019998435667841865, - "loss": 46.0, - "step": 34973 - }, - { - "epoch": 5.63231208985869, - "grad_norm": 0.02809079922735691, - "learning_rate": 0.0001999843557835791, - "loss": 46.0, - "step": 34974 - }, - { - "epoch": 5.6324731269374775, - "grad_norm": 0.011901609599590302, - "learning_rate": 0.000199984354888714, - "loss": 46.0, - "step": 34975 - }, - { - "epoch": 5.632634164016265, - "grad_norm": 0.005198607221245766, - "learning_rate": 0.00019998435399382327, - "loss": 46.0, - "step": 34976 - }, - { - "epoch": 5.632795201095052, - "grad_norm": 0.020701995119452477, - "learning_rate": 0.00019998435309890697, - "loss": 46.0, - "step": 34977 - }, - { - "epoch": 5.63295623817384, - "grad_norm": 0.015023778192698956, - "learning_rate": 0.00019998435220396505, - "loss": 46.0, - "step": 34978 - }, - { - "epoch": 5.633117275252627, - "grad_norm": 0.014441955834627151, - "learning_rate": 0.00019998435130899757, - "loss": 46.0, - "step": 34979 - }, - { - "epoch": 5.633278312331415, - "grad_norm": 0.0021402090787887573, - "learning_rate": 0.00019998435041400448, - "loss": 46.0, - "step": 34980 - }, - { - "epoch": 5.633439349410201, - "grad_norm": 0.004062502179294825, - "learning_rate": 0.0001999843495189858, - "loss": 46.0, - "step": 34981 - }, - { - "epoch": 5.633600386488989, - "grad_norm": 0.006026627030223608, - "learning_rate": 0.00019998434862394157, - "loss": 46.0, - "step": 34982 - }, - { - "epoch": 5.633761423567776, - "grad_norm": 0.0017059509409591556, - "learning_rate": 0.00019998434772887169, - "loss": 46.0, - "step": 34983 - }, - { - "epoch": 5.633922460646564, - "grad_norm": 0.009850052185356617, - "learning_rate": 0.00019998434683377625, - "loss": 46.0, - "step": 34984 - }, - { - "epoch": 5.634083497725351, - "grad_norm": 0.0048669311217963696, - "learning_rate": 0.0001999843459386552, - "loss": 46.0, - "step": 34985 - }, - { - "epoch": 5.634244534804139, - "grad_norm": 0.004842791706323624, - "learning_rate": 0.00019998434504350858, - "loss": 46.0, - "step": 34986 - }, - { - "epoch": 5.634405571882926, - "grad_norm": 0.0023919185623526573, - "learning_rate": 0.00019998434414833635, - "loss": 46.0, - "step": 34987 - }, - { - "epoch": 5.6345666089617135, - "grad_norm": 0.005478554870933294, - "learning_rate": 0.00019998434325313856, - "loss": 46.0, - "step": 34988 - }, - { - "epoch": 5.634727646040501, - "grad_norm": 0.006147760897874832, - "learning_rate": 0.00019998434235791513, - "loss": 46.0, - "step": 34989 - }, - { - "epoch": 5.634888683119288, - "grad_norm": 0.002799938665702939, - "learning_rate": 0.00019998434146266617, - "loss": 46.0, - "step": 34990 - }, - { - "epoch": 5.635049720198076, - "grad_norm": 0.004861291963607073, - "learning_rate": 0.00019998434056739156, - "loss": 46.0, - "step": 34991 - }, - { - "epoch": 5.635210757276863, - "grad_norm": 0.01334360335022211, - "learning_rate": 0.0001999843396720914, - "loss": 46.0, - "step": 34992 - }, - { - "epoch": 5.635371794355651, - "grad_norm": 0.007166545372456312, - "learning_rate": 0.00019998433877676562, - "loss": 46.0, - "step": 34993 - }, - { - "epoch": 5.635532831434437, - "grad_norm": 0.0013273741351440549, - "learning_rate": 0.00019998433788141426, - "loss": 46.0, - "step": 34994 - }, - { - "epoch": 5.635693868513226, - "grad_norm": 0.0039773741737008095, - "learning_rate": 0.00019998433698603733, - "loss": 46.0, - "step": 34995 - }, - { - "epoch": 5.635854905592012, - "grad_norm": 0.004289644304662943, - "learning_rate": 0.0001999843360906348, - "loss": 46.0, - "step": 34996 - }, - { - "epoch": 5.6360159426708, - "grad_norm": 0.0035704439505934715, - "learning_rate": 0.00019998433519520666, - "loss": 46.0, - "step": 34997 - }, - { - "epoch": 5.636176979749587, - "grad_norm": 0.0017077274387702346, - "learning_rate": 0.00019998433429975295, - "loss": 46.0, - "step": 34998 - }, - { - "epoch": 5.6363380168283745, - "grad_norm": 0.008026796393096447, - "learning_rate": 0.00019998433340427365, - "loss": 46.0, - "step": 34999 - }, - { - "epoch": 5.636499053907162, - "grad_norm": 0.010748764500021935, - "learning_rate": 0.00019998433250876873, - "loss": 46.0, - "step": 35000 - }, - { - "epoch": 5.6366600909859494, - "grad_norm": 0.003786412300541997, - "learning_rate": 0.00019998433161323823, - "loss": 46.0, - "step": 35001 - }, - { - "epoch": 5.636821128064737, - "grad_norm": 0.006463125813752413, - "learning_rate": 0.00019998433071768216, - "loss": 46.0, - "step": 35002 - }, - { - "epoch": 5.636982165143524, - "grad_norm": 0.0012253804598003626, - "learning_rate": 0.00019998432982210046, - "loss": 46.0, - "step": 35003 - }, - { - "epoch": 5.637143202222312, - "grad_norm": 0.007236053701490164, - "learning_rate": 0.00019998432892649325, - "loss": 46.0, - "step": 35004 - }, - { - "epoch": 5.637304239301099, - "grad_norm": 0.0021753734908998013, - "learning_rate": 0.00019998432803086037, - "loss": 46.0, - "step": 35005 - }, - { - "epoch": 5.637465276379887, - "grad_norm": 0.0015616790624335408, - "learning_rate": 0.00019998432713520193, - "loss": 46.0, - "step": 35006 - }, - { - "epoch": 5.637626313458674, - "grad_norm": 0.0020285514183342457, - "learning_rate": 0.00019998432623951788, - "loss": 46.0, - "step": 35007 - }, - { - "epoch": 5.637787350537462, - "grad_norm": 0.0070483749732375145, - "learning_rate": 0.00019998432534380824, - "loss": 46.0, - "step": 35008 - }, - { - "epoch": 5.637948387616248, - "grad_norm": 0.002363958628848195, - "learning_rate": 0.000199984324448073, - "loss": 46.0, - "step": 35009 - }, - { - "epoch": 5.6381094246950365, - "grad_norm": 0.009358027018606663, - "learning_rate": 0.00019998432355231222, - "loss": 46.0, - "step": 35010 - }, - { - "epoch": 5.638270461773823, - "grad_norm": 0.0037781603168696165, - "learning_rate": 0.00019998432265652582, - "loss": 46.0, - "step": 35011 - }, - { - "epoch": 5.6384314988526105, - "grad_norm": 0.0015166779048740864, - "learning_rate": 0.00019998432176071383, - "loss": 46.0, - "step": 35012 - }, - { - "epoch": 5.638592535931398, - "grad_norm": 0.0035782321356236935, - "learning_rate": 0.00019998432086487626, - "loss": 46.0, - "step": 35013 - }, - { - "epoch": 5.638753573010185, - "grad_norm": 0.014220730401575565, - "learning_rate": 0.00019998431996901307, - "loss": 46.0, - "step": 35014 - }, - { - "epoch": 5.638914610088973, - "grad_norm": 0.0014554974623024464, - "learning_rate": 0.00019998431907312431, - "loss": 46.0, - "step": 35015 - }, - { - "epoch": 5.63907564716776, - "grad_norm": 0.010806080885231495, - "learning_rate": 0.00019998431817720995, - "loss": 46.0, - "step": 35016 - }, - { - "epoch": 5.639236684246548, - "grad_norm": 0.0052486443892121315, - "learning_rate": 0.00019998431728127, - "loss": 46.0, - "step": 35017 - }, - { - "epoch": 5.639397721325335, - "grad_norm": 0.006050060968846083, - "learning_rate": 0.00019998431638530449, - "loss": 46.0, - "step": 35018 - }, - { - "epoch": 5.639558758404123, - "grad_norm": 0.0034774153027683496, - "learning_rate": 0.00019998431548931333, - "loss": 46.0, - "step": 35019 - }, - { - "epoch": 5.63971979548291, - "grad_norm": 0.01162722334265709, - "learning_rate": 0.00019998431459329662, - "loss": 46.0, - "step": 35020 - }, - { - "epoch": 5.639880832561698, - "grad_norm": 0.0065436819568276405, - "learning_rate": 0.00019998431369725432, - "loss": 46.0, - "step": 35021 - }, - { - "epoch": 5.640041869640485, - "grad_norm": 0.0037487749941647053, - "learning_rate": 0.0001999843128011864, - "loss": 46.0, - "step": 35022 - }, - { - "epoch": 5.6402029067192725, - "grad_norm": 0.003316734917461872, - "learning_rate": 0.00019998431190509293, - "loss": 46.0, - "step": 35023 - }, - { - "epoch": 5.640363943798059, - "grad_norm": 0.0021634784061461687, - "learning_rate": 0.00019998431100897387, - "loss": 46.0, - "step": 35024 - }, - { - "epoch": 5.6405249808768465, - "grad_norm": 0.0069904625415802, - "learning_rate": 0.0001999843101128292, - "loss": 46.0, - "step": 35025 - }, - { - "epoch": 5.640686017955634, - "grad_norm": 0.006361029576510191, - "learning_rate": 0.00019998430921665893, - "loss": 46.0, - "step": 35026 - }, - { - "epoch": 5.640847055034421, - "grad_norm": 0.007781515829265118, - "learning_rate": 0.00019998430832046307, - "loss": 46.0, - "step": 35027 - }, - { - "epoch": 5.641008092113209, - "grad_norm": 0.0010851513361558318, - "learning_rate": 0.0001999843074242416, - "loss": 46.0, - "step": 35028 - }, - { - "epoch": 5.641169129191996, - "grad_norm": 0.009033090434968472, - "learning_rate": 0.00019998430652799458, - "loss": 46.0, - "step": 35029 - }, - { - "epoch": 5.641330166270784, - "grad_norm": 0.017299111932516098, - "learning_rate": 0.00019998430563172195, - "loss": 46.0, - "step": 35030 - }, - { - "epoch": 5.641491203349571, - "grad_norm": 0.004509763792157173, - "learning_rate": 0.00019998430473542375, - "loss": 46.0, - "step": 35031 - }, - { - "epoch": 5.641652240428359, - "grad_norm": 0.0031622734386473894, - "learning_rate": 0.0001999843038390999, - "loss": 46.0, - "step": 35032 - }, - { - "epoch": 5.641813277507146, - "grad_norm": 0.0007791734533384442, - "learning_rate": 0.00019998430294275053, - "loss": 46.0, - "step": 35033 - }, - { - "epoch": 5.641974314585934, - "grad_norm": 0.008482211269438267, - "learning_rate": 0.00019998430204637554, - "loss": 46.0, - "step": 35034 - }, - { - "epoch": 5.642135351664721, - "grad_norm": 0.0015810014447197318, - "learning_rate": 0.00019998430114997494, - "loss": 46.0, - "step": 35035 - }, - { - "epoch": 5.6422963887435085, - "grad_norm": 0.004794060252606869, - "learning_rate": 0.00019998430025354878, - "loss": 46.0, - "step": 35036 - }, - { - "epoch": 5.642457425822296, - "grad_norm": 0.0037337609101086855, - "learning_rate": 0.00019998429935709703, - "loss": 46.0, - "step": 35037 - }, - { - "epoch": 5.642618462901083, - "grad_norm": 0.004877662286162376, - "learning_rate": 0.0001999842984606197, - "loss": 46.0, - "step": 35038 - }, - { - "epoch": 5.64277949997987, - "grad_norm": 0.0019458206370472908, - "learning_rate": 0.0001999842975641167, - "loss": 46.0, - "step": 35039 - }, - { - "epoch": 5.642940537058657, - "grad_norm": 0.004025063943117857, - "learning_rate": 0.00019998429666758818, - "loss": 46.0, - "step": 35040 - }, - { - "epoch": 5.643101574137445, - "grad_norm": 0.013048715889453888, - "learning_rate": 0.00019998429577103405, - "loss": 46.0, - "step": 35041 - }, - { - "epoch": 5.643262611216232, - "grad_norm": 0.004395760595798492, - "learning_rate": 0.00019998429487445436, - "loss": 46.0, - "step": 35042 - }, - { - "epoch": 5.64342364829502, - "grad_norm": 0.001118919113650918, - "learning_rate": 0.00019998429397784904, - "loss": 46.0, - "step": 35043 - }, - { - "epoch": 5.643584685373807, - "grad_norm": 0.002619420178234577, - "learning_rate": 0.00019998429308121812, - "loss": 46.0, - "step": 35044 - }, - { - "epoch": 5.643745722452595, - "grad_norm": 0.015735985711216927, - "learning_rate": 0.00019998429218456165, - "loss": 46.0, - "step": 35045 - }, - { - "epoch": 5.643906759531382, - "grad_norm": 0.0015861950814723969, - "learning_rate": 0.00019998429128787956, - "loss": 46.0, - "step": 35046 - }, - { - "epoch": 5.6440677966101696, - "grad_norm": 0.004166461061686277, - "learning_rate": 0.0001999842903911719, - "loss": 46.0, - "step": 35047 - }, - { - "epoch": 5.644228833688957, - "grad_norm": 0.0022016323637217283, - "learning_rate": 0.00019998428949443865, - "loss": 46.0, - "step": 35048 - }, - { - "epoch": 5.6443898707677445, - "grad_norm": 0.00824781134724617, - "learning_rate": 0.0001999842885976798, - "loss": 46.0, - "step": 35049 - }, - { - "epoch": 5.644550907846532, - "grad_norm": 0.00235876371152699, - "learning_rate": 0.00019998428770089534, - "loss": 46.0, - "step": 35050 - }, - { - "epoch": 5.644711944925319, - "grad_norm": 0.009497219696640968, - "learning_rate": 0.00019998428680408528, - "loss": 46.0, - "step": 35051 - }, - { - "epoch": 5.644872982004107, - "grad_norm": 0.007655688561499119, - "learning_rate": 0.0001999842859072497, - "loss": 46.0, - "step": 35052 - }, - { - "epoch": 5.645034019082894, - "grad_norm": 0.00322348834015429, - "learning_rate": 0.00019998428501038847, - "loss": 46.0, - "step": 35053 - }, - { - "epoch": 5.645195056161681, - "grad_norm": 0.004492009524255991, - "learning_rate": 0.00019998428411350166, - "loss": 46.0, - "step": 35054 - }, - { - "epoch": 5.645356093240468, - "grad_norm": 0.0016542176017537713, - "learning_rate": 0.00019998428321658923, - "loss": 46.0, - "step": 35055 - }, - { - "epoch": 5.645517130319256, - "grad_norm": 0.006261840928345919, - "learning_rate": 0.00019998428231965125, - "loss": 46.0, - "step": 35056 - }, - { - "epoch": 5.645678167398043, - "grad_norm": 0.0060275401920080185, - "learning_rate": 0.00019998428142268767, - "loss": 46.0, - "step": 35057 - }, - { - "epoch": 5.645839204476831, - "grad_norm": 0.00190097582526505, - "learning_rate": 0.00019998428052569854, - "loss": 46.0, - "step": 35058 - }, - { - "epoch": 5.646000241555618, - "grad_norm": 0.00442290585488081, - "learning_rate": 0.00019998427962868376, - "loss": 46.0, - "step": 35059 - }, - { - "epoch": 5.6461612786344055, - "grad_norm": 0.0058402190916240215, - "learning_rate": 0.0001999842787316434, - "loss": 46.0, - "step": 35060 - }, - { - "epoch": 5.646322315713193, - "grad_norm": 0.0011229125084355474, - "learning_rate": 0.00019998427783457748, - "loss": 46.0, - "step": 35061 - }, - { - "epoch": 5.64648335279198, - "grad_norm": 0.011011681519448757, - "learning_rate": 0.0001999842769374859, - "loss": 46.0, - "step": 35062 - }, - { - "epoch": 5.646644389870768, - "grad_norm": 0.005655078683048487, - "learning_rate": 0.00019998427604036881, - "loss": 46.0, - "step": 35063 - }, - { - "epoch": 5.646805426949555, - "grad_norm": 0.003370630322024226, - "learning_rate": 0.00019998427514322608, - "loss": 46.0, - "step": 35064 - }, - { - "epoch": 5.646966464028343, - "grad_norm": 0.018145233392715454, - "learning_rate": 0.00019998427424605778, - "loss": 46.0, - "step": 35065 - }, - { - "epoch": 5.64712750110713, - "grad_norm": 0.003374564927071333, - "learning_rate": 0.0001999842733488639, - "loss": 46.0, - "step": 35066 - }, - { - "epoch": 5.647288538185918, - "grad_norm": 0.0029213225934654474, - "learning_rate": 0.0001999842724516444, - "loss": 46.0, - "step": 35067 - }, - { - "epoch": 5.647449575264705, - "grad_norm": 0.004805019125342369, - "learning_rate": 0.0001999842715543993, - "loss": 46.0, - "step": 35068 - }, - { - "epoch": 5.647610612343492, - "grad_norm": 0.0009276751079596579, - "learning_rate": 0.00019998427065712866, - "loss": 46.0, - "step": 35069 - }, - { - "epoch": 5.647771649422279, - "grad_norm": 0.0015234776074066758, - "learning_rate": 0.00019998426975983237, - "loss": 46.0, - "step": 35070 - }, - { - "epoch": 5.647932686501067, - "grad_norm": 0.010391012765467167, - "learning_rate": 0.00019998426886251052, - "loss": 46.0, - "step": 35071 - }, - { - "epoch": 5.648093723579854, - "grad_norm": 0.0023990606423467398, - "learning_rate": 0.00019998426796516308, - "loss": 46.0, - "step": 35072 - }, - { - "epoch": 5.6482547606586415, - "grad_norm": 0.0038860745262354612, - "learning_rate": 0.00019998426706779006, - "loss": 46.0, - "step": 35073 - }, - { - "epoch": 5.648415797737429, - "grad_norm": 0.005859003867954016, - "learning_rate": 0.00019998426617039142, - "loss": 46.0, - "step": 35074 - }, - { - "epoch": 5.648576834816216, - "grad_norm": 0.0016620560782030225, - "learning_rate": 0.00019998426527296722, - "loss": 46.0, - "step": 35075 - }, - { - "epoch": 5.648737871895004, - "grad_norm": 0.007439692039042711, - "learning_rate": 0.0001999842643755174, - "loss": 46.0, - "step": 35076 - }, - { - "epoch": 5.648898908973791, - "grad_norm": 0.0034636766649782658, - "learning_rate": 0.000199984263478042, - "loss": 46.0, - "step": 35077 - }, - { - "epoch": 5.649059946052579, - "grad_norm": 0.004421850200742483, - "learning_rate": 0.00019998426258054103, - "loss": 46.0, - "step": 35078 - }, - { - "epoch": 5.649220983131366, - "grad_norm": 0.004357545170933008, - "learning_rate": 0.00019998426168301445, - "loss": 46.0, - "step": 35079 - }, - { - "epoch": 5.649382020210154, - "grad_norm": 0.0015708237187936902, - "learning_rate": 0.0001999842607854623, - "loss": 46.0, - "step": 35080 - }, - { - "epoch": 5.649543057288941, - "grad_norm": 0.005285588093101978, - "learning_rate": 0.00019998425988788452, - "loss": 46.0, - "step": 35081 - }, - { - "epoch": 5.649704094367728, - "grad_norm": 0.010058462619781494, - "learning_rate": 0.00019998425899028118, - "loss": 46.0, - "step": 35082 - }, - { - "epoch": 5.649865131446516, - "grad_norm": 0.009400948882102966, - "learning_rate": 0.00019998425809265223, - "loss": 46.0, - "step": 35083 - }, - { - "epoch": 5.650026168525303, - "grad_norm": 0.003386115189641714, - "learning_rate": 0.0001999842571949977, - "loss": 46.0, - "step": 35084 - }, - { - "epoch": 5.65018720560409, - "grad_norm": 0.023702038452029228, - "learning_rate": 0.00019998425629731757, - "loss": 46.0, - "step": 35085 - }, - { - "epoch": 5.6503482426828775, - "grad_norm": 0.0018651328282430768, - "learning_rate": 0.00019998425539961186, - "loss": 46.0, - "step": 35086 - }, - { - "epoch": 5.650509279761665, - "grad_norm": 0.00218061450868845, - "learning_rate": 0.00019998425450188054, - "loss": 46.0, - "step": 35087 - }, - { - "epoch": 5.650670316840452, - "grad_norm": 0.002754851710051298, - "learning_rate": 0.00019998425360412365, - "loss": 46.0, - "step": 35088 - }, - { - "epoch": 5.65083135391924, - "grad_norm": 0.001908688573166728, - "learning_rate": 0.00019998425270634118, - "loss": 46.0, - "step": 35089 - }, - { - "epoch": 5.650992390998027, - "grad_norm": 0.011842873878777027, - "learning_rate": 0.0001999842518085331, - "loss": 46.0, - "step": 35090 - }, - { - "epoch": 5.651153428076815, - "grad_norm": 0.0058770552277565, - "learning_rate": 0.00019998425091069942, - "loss": 46.0, - "step": 35091 - }, - { - "epoch": 5.651314465155602, - "grad_norm": 0.011124215088784695, - "learning_rate": 0.00019998425001284016, - "loss": 46.0, - "step": 35092 - }, - { - "epoch": 5.65147550223439, - "grad_norm": 0.0013297568075358868, - "learning_rate": 0.00019998424911495534, - "loss": 46.0, - "step": 35093 - }, - { - "epoch": 5.651636539313177, - "grad_norm": 0.0024605095386505127, - "learning_rate": 0.00019998424821704487, - "loss": 46.0, - "step": 35094 - }, - { - "epoch": 5.651797576391965, - "grad_norm": 0.0023545182775706053, - "learning_rate": 0.00019998424731910882, - "loss": 46.0, - "step": 35095 - }, - { - "epoch": 5.651958613470752, - "grad_norm": 0.00208808365277946, - "learning_rate": 0.00019998424642114722, - "loss": 46.0, - "step": 35096 - }, - { - "epoch": 5.652119650549539, - "grad_norm": 0.006952462252229452, - "learning_rate": 0.00019998424552316002, - "loss": 46.0, - "step": 35097 - }, - { - "epoch": 5.652280687628327, - "grad_norm": 0.0029632486402988434, - "learning_rate": 0.0001999842446251472, - "loss": 46.0, - "step": 35098 - }, - { - "epoch": 5.6524417247071135, - "grad_norm": 0.006912900134921074, - "learning_rate": 0.0001999842437271088, - "loss": 46.0, - "step": 35099 - }, - { - "epoch": 5.652602761785901, - "grad_norm": 0.012486535124480724, - "learning_rate": 0.00019998424282904482, - "loss": 46.0, - "step": 35100 - }, - { - "epoch": 5.652763798864688, - "grad_norm": 0.002211576560512185, - "learning_rate": 0.00019998424193095525, - "loss": 46.0, - "step": 35101 - }, - { - "epoch": 5.652924835943476, - "grad_norm": 0.0016587079735472798, - "learning_rate": 0.0001999842410328401, - "loss": 46.0, - "step": 35102 - }, - { - "epoch": 5.653085873022263, - "grad_norm": 0.0007916645845398307, - "learning_rate": 0.00019998424013469932, - "loss": 46.0, - "step": 35103 - }, - { - "epoch": 5.653246910101051, - "grad_norm": 0.006694192532449961, - "learning_rate": 0.00019998423923653298, - "loss": 46.0, - "step": 35104 - }, - { - "epoch": 5.653407947179838, - "grad_norm": 0.006458644289523363, - "learning_rate": 0.000199984238338341, - "loss": 46.0, - "step": 35105 - }, - { - "epoch": 5.653568984258626, - "grad_norm": 0.0032324877101927996, - "learning_rate": 0.0001999842374401235, - "loss": 46.0, - "step": 35106 - }, - { - "epoch": 5.653730021337413, - "grad_norm": 0.0024469299241900444, - "learning_rate": 0.00019998423654188038, - "loss": 46.0, - "step": 35107 - }, - { - "epoch": 5.6538910584162005, - "grad_norm": 0.0029714880511164665, - "learning_rate": 0.00019998423564361167, - "loss": 46.0, - "step": 35108 - }, - { - "epoch": 5.654052095494988, - "grad_norm": 0.006573381833732128, - "learning_rate": 0.00019998423474531734, - "loss": 46.0, - "step": 35109 - }, - { - "epoch": 5.6542131325737754, - "grad_norm": 0.005313725210726261, - "learning_rate": 0.00019998423384699746, - "loss": 46.0, - "step": 35110 - }, - { - "epoch": 5.654374169652563, - "grad_norm": 0.00250240508466959, - "learning_rate": 0.000199984232948652, - "loss": 46.0, - "step": 35111 - }, - { - "epoch": 5.6545352067313495, - "grad_norm": 0.010545754805207253, - "learning_rate": 0.0001999842320502809, - "loss": 46.0, - "step": 35112 - }, - { - "epoch": 5.654696243810137, - "grad_norm": 0.009056560695171356, - "learning_rate": 0.00019998423115188426, - "loss": 46.0, - "step": 35113 - }, - { - "epoch": 5.654857280888924, - "grad_norm": 0.01301497407257557, - "learning_rate": 0.00019998423025346197, - "loss": 46.0, - "step": 35114 - }, - { - "epoch": 5.655018317967712, - "grad_norm": 0.0027683754451572895, - "learning_rate": 0.00019998422935501412, - "loss": 46.0, - "step": 35115 - }, - { - "epoch": 5.655179355046499, - "grad_norm": 0.0025818690191954374, - "learning_rate": 0.0001999842284565407, - "loss": 46.0, - "step": 35116 - }, - { - "epoch": 5.655340392125287, - "grad_norm": 0.009465574286878109, - "learning_rate": 0.00019998422755804164, - "loss": 46.0, - "step": 35117 - }, - { - "epoch": 5.655501429204074, - "grad_norm": 0.006048796698451042, - "learning_rate": 0.00019998422665951703, - "loss": 46.0, - "step": 35118 - }, - { - "epoch": 5.655662466282862, - "grad_norm": 0.0030552742537111044, - "learning_rate": 0.00019998422576096683, - "loss": 46.0, - "step": 35119 - }, - { - "epoch": 5.655823503361649, - "grad_norm": 0.012277650646865368, - "learning_rate": 0.00019998422486239102, - "loss": 46.0, - "step": 35120 - }, - { - "epoch": 5.6559845404404365, - "grad_norm": 0.004479520954191685, - "learning_rate": 0.00019998422396378965, - "loss": 46.0, - "step": 35121 - }, - { - "epoch": 5.656145577519224, - "grad_norm": 0.001483603729866445, - "learning_rate": 0.00019998422306516264, - "loss": 46.0, - "step": 35122 - }, - { - "epoch": 5.656306614598011, - "grad_norm": 0.002450986998155713, - "learning_rate": 0.00019998422216651007, - "loss": 46.0, - "step": 35123 - }, - { - "epoch": 5.656467651676799, - "grad_norm": 0.008994616568088531, - "learning_rate": 0.0001999842212678319, - "loss": 46.0, - "step": 35124 - }, - { - "epoch": 5.656628688755586, - "grad_norm": 0.014238941483199596, - "learning_rate": 0.00019998422036912814, - "loss": 46.0, - "step": 35125 - }, - { - "epoch": 5.656789725834374, - "grad_norm": 0.0024231786374002695, - "learning_rate": 0.0001999842194703988, - "loss": 46.0, - "step": 35126 - }, - { - "epoch": 5.65695076291316, - "grad_norm": 0.009421883150935173, - "learning_rate": 0.00019998421857164386, - "loss": 46.0, - "step": 35127 - }, - { - "epoch": 5.657111799991948, - "grad_norm": 0.011003999970853329, - "learning_rate": 0.00019998421767286332, - "loss": 46.0, - "step": 35128 - }, - { - "epoch": 5.657272837070735, - "grad_norm": 0.009684274904429913, - "learning_rate": 0.00019998421677405723, - "loss": 46.0, - "step": 35129 - }, - { - "epoch": 5.657433874149523, - "grad_norm": 0.0013297874247655272, - "learning_rate": 0.00019998421587522552, - "loss": 46.0, - "step": 35130 - }, - { - "epoch": 5.65759491122831, - "grad_norm": 0.006396141368895769, - "learning_rate": 0.00019998421497636822, - "loss": 46.0, - "step": 35131 - }, - { - "epoch": 5.657755948307098, - "grad_norm": 0.0022974489256739616, - "learning_rate": 0.00019998421407748531, - "loss": 46.0, - "step": 35132 - }, - { - "epoch": 5.657916985385885, - "grad_norm": 0.003934463020414114, - "learning_rate": 0.00019998421317857682, - "loss": 46.0, - "step": 35133 - }, - { - "epoch": 5.6580780224646725, - "grad_norm": 0.0018786523723974824, - "learning_rate": 0.00019998421227964276, - "loss": 46.0, - "step": 35134 - }, - { - "epoch": 5.65823905954346, - "grad_norm": 0.0068613323383033276, - "learning_rate": 0.0001999842113806831, - "loss": 46.0, - "step": 35135 - }, - { - "epoch": 5.658400096622247, - "grad_norm": 0.005072134081274271, - "learning_rate": 0.00019998421048169785, - "loss": 46.0, - "step": 35136 - }, - { - "epoch": 5.658561133701035, - "grad_norm": 0.010304098017513752, - "learning_rate": 0.00019998420958268698, - "loss": 46.0, - "step": 35137 - }, - { - "epoch": 5.658722170779822, - "grad_norm": 0.004165279679000378, - "learning_rate": 0.00019998420868365055, - "loss": 46.0, - "step": 35138 - }, - { - "epoch": 5.65888320785861, - "grad_norm": 0.01088697835803032, - "learning_rate": 0.0001999842077845885, - "loss": 46.0, - "step": 35139 - }, - { - "epoch": 5.659044244937397, - "grad_norm": 0.0009569205576553941, - "learning_rate": 0.0001999842068855009, - "loss": 46.0, - "step": 35140 - }, - { - "epoch": 5.659205282016185, - "grad_norm": 0.025378096848726273, - "learning_rate": 0.0001999842059863877, - "loss": 46.0, - "step": 35141 - }, - { - "epoch": 5.659366319094971, - "grad_norm": 0.002211647806689143, - "learning_rate": 0.0001999842050872489, - "loss": 46.0, - "step": 35142 - }, - { - "epoch": 5.659527356173759, - "grad_norm": 0.013512805104255676, - "learning_rate": 0.0001999842041880845, - "loss": 46.0, - "step": 35143 - }, - { - "epoch": 5.659688393252546, - "grad_norm": 0.0023886284325271845, - "learning_rate": 0.0001999842032888945, - "loss": 46.0, - "step": 35144 - }, - { - "epoch": 5.659849430331334, - "grad_norm": 0.004233606159687042, - "learning_rate": 0.00019998420238967894, - "loss": 46.0, - "step": 35145 - }, - { - "epoch": 5.660010467410121, - "grad_norm": 0.00212499825283885, - "learning_rate": 0.0001999842014904378, - "loss": 46.0, - "step": 35146 - }, - { - "epoch": 5.6601715044889085, - "grad_norm": 0.0006795204244554043, - "learning_rate": 0.00019998420059117104, - "loss": 46.0, - "step": 35147 - }, - { - "epoch": 5.660332541567696, - "grad_norm": 0.025922708213329315, - "learning_rate": 0.00019998419969187868, - "loss": 46.0, - "step": 35148 - }, - { - "epoch": 5.660493578646483, - "grad_norm": 0.026236068457365036, - "learning_rate": 0.00019998419879256073, - "loss": 46.0, - "step": 35149 - }, - { - "epoch": 5.660654615725271, - "grad_norm": 0.004925851244479418, - "learning_rate": 0.00019998419789321722, - "loss": 46.0, - "step": 35150 - }, - { - "epoch": 5.660815652804058, - "grad_norm": 0.004672850947827101, - "learning_rate": 0.0001999841969938481, - "loss": 46.0, - "step": 35151 - }, - { - "epoch": 5.660976689882846, - "grad_norm": 0.00997094251215458, - "learning_rate": 0.00019998419609445337, - "loss": 46.0, - "step": 35152 - }, - { - "epoch": 5.661137726961633, - "grad_norm": 0.005512923467904329, - "learning_rate": 0.0001999841951950331, - "loss": 46.0, - "step": 35153 - }, - { - "epoch": 5.661298764040421, - "grad_norm": 0.005155325401574373, - "learning_rate": 0.00019998419429558722, - "loss": 46.0, - "step": 35154 - }, - { - "epoch": 5.661459801119207, - "grad_norm": 0.0025486720260232687, - "learning_rate": 0.00019998419339611572, - "loss": 46.0, - "step": 35155 - }, - { - "epoch": 5.6616208381979956, - "grad_norm": 0.006066937930881977, - "learning_rate": 0.00019998419249661864, - "loss": 46.0, - "step": 35156 - }, - { - "epoch": 5.661781875276782, - "grad_norm": 0.005126535426825285, - "learning_rate": 0.000199984191597096, - "loss": 46.0, - "step": 35157 - }, - { - "epoch": 5.66194291235557, - "grad_norm": 0.010907979682087898, - "learning_rate": 0.00019998419069754773, - "loss": 46.0, - "step": 35158 - }, - { - "epoch": 5.662103949434357, - "grad_norm": 0.002863509114831686, - "learning_rate": 0.00019998418979797386, - "loss": 46.0, - "step": 35159 - }, - { - "epoch": 5.6622649865131445, - "grad_norm": 0.004635497462004423, - "learning_rate": 0.00019998418889837446, - "loss": 46.0, - "step": 35160 - }, - { - "epoch": 5.662426023591932, - "grad_norm": 0.0017995884409174323, - "learning_rate": 0.0001999841879987494, - "loss": 46.0, - "step": 35161 - }, - { - "epoch": 5.662587060670719, - "grad_norm": 0.0023648450151085854, - "learning_rate": 0.00019998418709909878, - "loss": 46.0, - "step": 35162 - }, - { - "epoch": 5.662748097749507, - "grad_norm": 0.007384988013654947, - "learning_rate": 0.00019998418619942258, - "loss": 46.0, - "step": 35163 - }, - { - "epoch": 5.662909134828294, - "grad_norm": 0.005359169561415911, - "learning_rate": 0.0001999841852997208, - "loss": 46.0, - "step": 35164 - }, - { - "epoch": 5.663070171907082, - "grad_norm": 0.0016436147270724177, - "learning_rate": 0.0001999841843999934, - "loss": 46.0, - "step": 35165 - }, - { - "epoch": 5.663231208985869, - "grad_norm": 0.002862224355340004, - "learning_rate": 0.00019998418350024042, - "loss": 46.0, - "step": 35166 - }, - { - "epoch": 5.663392246064657, - "grad_norm": 0.007666591554880142, - "learning_rate": 0.00019998418260046185, - "loss": 46.0, - "step": 35167 - }, - { - "epoch": 5.663553283143444, - "grad_norm": 0.002966654021292925, - "learning_rate": 0.0001999841817006577, - "loss": 46.0, - "step": 35168 - }, - { - "epoch": 5.6637143202222315, - "grad_norm": 0.0016130894655361772, - "learning_rate": 0.00019998418080082795, - "loss": 46.0, - "step": 35169 - }, - { - "epoch": 5.663875357301018, - "grad_norm": 0.003145878668874502, - "learning_rate": 0.0001999841799009726, - "loss": 46.0, - "step": 35170 - }, - { - "epoch": 5.664036394379806, - "grad_norm": 0.017086086794734, - "learning_rate": 0.00019998417900109165, - "loss": 46.0, - "step": 35171 - }, - { - "epoch": 5.664197431458593, - "grad_norm": 0.005891021341085434, - "learning_rate": 0.00019998417810118511, - "loss": 46.0, - "step": 35172 - }, - { - "epoch": 5.6643584685373805, - "grad_norm": 0.006925859954208136, - "learning_rate": 0.00019998417720125302, - "loss": 46.0, - "step": 35173 - }, - { - "epoch": 5.664519505616168, - "grad_norm": 0.0031833990942686796, - "learning_rate": 0.00019998417630129531, - "loss": 46.0, - "step": 35174 - }, - { - "epoch": 5.664680542694955, - "grad_norm": 0.006922585424035788, - "learning_rate": 0.000199984175401312, - "loss": 46.0, - "step": 35175 - }, - { - "epoch": 5.664841579773743, - "grad_norm": 0.0012990660034120083, - "learning_rate": 0.00019998417450130314, - "loss": 46.0, - "step": 35176 - }, - { - "epoch": 5.66500261685253, - "grad_norm": 0.002073422772809863, - "learning_rate": 0.00019998417360126864, - "loss": 46.0, - "step": 35177 - }, - { - "epoch": 5.665163653931318, - "grad_norm": 0.011119670234620571, - "learning_rate": 0.00019998417270120856, - "loss": 46.0, - "step": 35178 - }, - { - "epoch": 5.665324691010105, - "grad_norm": 0.004305449780076742, - "learning_rate": 0.00019998417180112292, - "loss": 46.0, - "step": 35179 - }, - { - "epoch": 5.665485728088893, - "grad_norm": 0.004370627924799919, - "learning_rate": 0.00019998417090101166, - "loss": 46.0, - "step": 35180 - }, - { - "epoch": 5.66564676516768, - "grad_norm": 0.002213856903836131, - "learning_rate": 0.00019998417000087482, - "loss": 46.0, - "step": 35181 - }, - { - "epoch": 5.6658078022464675, - "grad_norm": 0.015276379883289337, - "learning_rate": 0.00019998416910071238, - "loss": 46.0, - "step": 35182 - }, - { - "epoch": 5.665968839325255, - "grad_norm": 0.018438534811139107, - "learning_rate": 0.00019998416820052434, - "loss": 46.0, - "step": 35183 - }, - { - "epoch": 5.666129876404042, - "grad_norm": 0.0010997239733114839, - "learning_rate": 0.00019998416730031076, - "loss": 46.0, - "step": 35184 - }, - { - "epoch": 5.666290913482829, - "grad_norm": 0.015043821185827255, - "learning_rate": 0.00019998416640007154, - "loss": 46.0, - "step": 35185 - }, - { - "epoch": 5.666451950561616, - "grad_norm": 0.0033091683872044086, - "learning_rate": 0.00019998416549980673, - "loss": 46.0, - "step": 35186 - }, - { - "epoch": 5.666612987640404, - "grad_norm": 0.002052116673439741, - "learning_rate": 0.00019998416459951636, - "loss": 46.0, - "step": 35187 - }, - { - "epoch": 5.666774024719191, - "grad_norm": 0.004926176276057959, - "learning_rate": 0.00019998416369920036, - "loss": 46.0, - "step": 35188 - }, - { - "epoch": 5.666935061797979, - "grad_norm": 0.0025779197458177805, - "learning_rate": 0.00019998416279885881, - "loss": 46.0, - "step": 35189 - }, - { - "epoch": 5.667096098876766, - "grad_norm": 0.012536870315670967, - "learning_rate": 0.00019998416189849163, - "loss": 46.0, - "step": 35190 - }, - { - "epoch": 5.667257135955554, - "grad_norm": 0.004109465982764959, - "learning_rate": 0.0001999841609980989, - "loss": 46.0, - "step": 35191 - }, - { - "epoch": 5.667418173034341, - "grad_norm": 0.0008402433013543487, - "learning_rate": 0.00019998416009768056, - "loss": 46.0, - "step": 35192 - }, - { - "epoch": 5.667579210113129, - "grad_norm": 0.004334953147917986, - "learning_rate": 0.00019998415919723664, - "loss": 46.0, - "step": 35193 - }, - { - "epoch": 5.667740247191916, - "grad_norm": 0.004303778056055307, - "learning_rate": 0.0001999841582967671, - "loss": 46.0, - "step": 35194 - }, - { - "epoch": 5.6679012842707035, - "grad_norm": 0.008808623068034649, - "learning_rate": 0.00019998415739627196, - "loss": 46.0, - "step": 35195 - }, - { - "epoch": 5.668062321349491, - "grad_norm": 0.00121975876390934, - "learning_rate": 0.00019998415649575128, - "loss": 46.0, - "step": 35196 - }, - { - "epoch": 5.668223358428278, - "grad_norm": 0.002060552593320608, - "learning_rate": 0.000199984155595205, - "loss": 46.0, - "step": 35197 - }, - { - "epoch": 5.668384395507066, - "grad_norm": 0.0032270464580506086, - "learning_rate": 0.0001999841546946331, - "loss": 46.0, - "step": 35198 - }, - { - "epoch": 5.668545432585853, - "grad_norm": 0.002035292563959956, - "learning_rate": 0.00019998415379403561, - "loss": 46.0, - "step": 35199 - }, - { - "epoch": 5.66870646966464, - "grad_norm": 0.002801191294565797, - "learning_rate": 0.00019998415289341256, - "loss": 46.0, - "step": 35200 - }, - { - "epoch": 5.668867506743427, - "grad_norm": 0.011713326908648014, - "learning_rate": 0.0001999841519927639, - "loss": 46.0, - "step": 35201 - }, - { - "epoch": 5.669028543822215, - "grad_norm": 0.004615049343556166, - "learning_rate": 0.00019998415109208963, - "loss": 46.0, - "step": 35202 - }, - { - "epoch": 5.669189580901002, - "grad_norm": 0.01085527054965496, - "learning_rate": 0.00019998415019138982, - "loss": 46.0, - "step": 35203 - }, - { - "epoch": 5.66935061797979, - "grad_norm": 0.0012078335275873542, - "learning_rate": 0.00019998414929066436, - "loss": 46.0, - "step": 35204 - }, - { - "epoch": 5.669511655058577, - "grad_norm": 0.005233896430581808, - "learning_rate": 0.00019998414838991334, - "loss": 46.0, - "step": 35205 - }, - { - "epoch": 5.669672692137365, - "grad_norm": 0.001537130563519895, - "learning_rate": 0.0001999841474891367, - "loss": 46.0, - "step": 35206 - }, - { - "epoch": 5.669833729216152, - "grad_norm": 0.011775372549891472, - "learning_rate": 0.00019998414658833454, - "loss": 46.0, - "step": 35207 - }, - { - "epoch": 5.6699947662949395, - "grad_norm": 0.008319418877363205, - "learning_rate": 0.0001999841456875067, - "loss": 46.0, - "step": 35208 - }, - { - "epoch": 5.670155803373727, - "grad_norm": 0.0018707100534811616, - "learning_rate": 0.00019998414478665334, - "loss": 46.0, - "step": 35209 - }, - { - "epoch": 5.670316840452514, - "grad_norm": 0.0029868409037590027, - "learning_rate": 0.00019998414388577436, - "loss": 46.0, - "step": 35210 - }, - { - "epoch": 5.670477877531302, - "grad_norm": 0.01568084955215454, - "learning_rate": 0.0001999841429848698, - "loss": 46.0, - "step": 35211 - }, - { - "epoch": 5.670638914610089, - "grad_norm": 0.005840482655912638, - "learning_rate": 0.00019998414208393964, - "loss": 46.0, - "step": 35212 - }, - { - "epoch": 5.670799951688877, - "grad_norm": 0.0013728828635066748, - "learning_rate": 0.0001999841411829839, - "loss": 46.0, - "step": 35213 - }, - { - "epoch": 5.670960988767664, - "grad_norm": 0.006242575589567423, - "learning_rate": 0.00019998414028200254, - "loss": 46.0, - "step": 35214 - }, - { - "epoch": 5.671122025846451, - "grad_norm": 0.004143235739320517, - "learning_rate": 0.00019998413938099562, - "loss": 46.0, - "step": 35215 - }, - { - "epoch": 5.671283062925238, - "grad_norm": 0.005876758135855198, - "learning_rate": 0.0001999841384799631, - "loss": 46.0, - "step": 35216 - }, - { - "epoch": 5.671444100004026, - "grad_norm": 0.0059021892957389355, - "learning_rate": 0.000199984137578905, - "loss": 46.0, - "step": 35217 - }, - { - "epoch": 5.671605137082813, - "grad_norm": 0.012267417274415493, - "learning_rate": 0.00019998413667782127, - "loss": 46.0, - "step": 35218 - }, - { - "epoch": 5.671766174161601, - "grad_norm": 0.0027927905321121216, - "learning_rate": 0.000199984135776712, - "loss": 46.0, - "step": 35219 - }, - { - "epoch": 5.671927211240388, - "grad_norm": 0.0007919935742393136, - "learning_rate": 0.0001999841348755771, - "loss": 46.0, - "step": 35220 - }, - { - "epoch": 5.6720882483191755, - "grad_norm": 0.002068153116852045, - "learning_rate": 0.00019998413397441663, - "loss": 46.0, - "step": 35221 - }, - { - "epoch": 5.672249285397963, - "grad_norm": 0.006332676392048597, - "learning_rate": 0.00019998413307323055, - "loss": 46.0, - "step": 35222 - }, - { - "epoch": 5.67241032247675, - "grad_norm": 0.001940408954396844, - "learning_rate": 0.00019998413217201894, - "loss": 46.0, - "step": 35223 - }, - { - "epoch": 5.672571359555538, - "grad_norm": 0.0012789057800546288, - "learning_rate": 0.00019998413127078165, - "loss": 46.0, - "step": 35224 - }, - { - "epoch": 5.672732396634325, - "grad_norm": 0.01430367399007082, - "learning_rate": 0.0001999841303695188, - "loss": 46.0, - "step": 35225 - }, - { - "epoch": 5.672893433713113, - "grad_norm": 0.0017292052507400513, - "learning_rate": 0.0001999841294682304, - "loss": 46.0, - "step": 35226 - }, - { - "epoch": 5.6730544707919, - "grad_norm": 0.004862557630985975, - "learning_rate": 0.00019998412856691636, - "loss": 46.0, - "step": 35227 - }, - { - "epoch": 5.673215507870687, - "grad_norm": 0.0033276767935603857, - "learning_rate": 0.00019998412766557676, - "loss": 46.0, - "step": 35228 - }, - { - "epoch": 5.673376544949475, - "grad_norm": 0.001874297740869224, - "learning_rate": 0.00019998412676421157, - "loss": 46.0, - "step": 35229 - }, - { - "epoch": 5.673537582028262, - "grad_norm": 0.011564243584871292, - "learning_rate": 0.0001999841258628208, - "loss": 46.0, - "step": 35230 - }, - { - "epoch": 5.673698619107049, - "grad_norm": 0.01121477223932743, - "learning_rate": 0.00019998412496140437, - "loss": 46.0, - "step": 35231 - }, - { - "epoch": 5.6738596561858365, - "grad_norm": 0.002650787588208914, - "learning_rate": 0.00019998412405996242, - "loss": 46.0, - "step": 35232 - }, - { - "epoch": 5.674020693264624, - "grad_norm": 0.004733751993626356, - "learning_rate": 0.00019998412315849485, - "loss": 46.0, - "step": 35233 - }, - { - "epoch": 5.6741817303434114, - "grad_norm": 0.006988368928432465, - "learning_rate": 0.0001999841222570017, - "loss": 46.0, - "step": 35234 - }, - { - "epoch": 5.674342767422199, - "grad_norm": 0.0037831293884664774, - "learning_rate": 0.00019998412135548296, - "loss": 46.0, - "step": 35235 - }, - { - "epoch": 5.674503804500986, - "grad_norm": 0.024250324815511703, - "learning_rate": 0.0001999841204539386, - "loss": 46.0, - "step": 35236 - }, - { - "epoch": 5.674664841579774, - "grad_norm": 0.0014020624803379178, - "learning_rate": 0.00019998411955236866, - "loss": 46.0, - "step": 35237 - }, - { - "epoch": 5.674825878658561, - "grad_norm": 0.019380206242203712, - "learning_rate": 0.00019998411865077315, - "loss": 46.0, - "step": 35238 - }, - { - "epoch": 5.674986915737349, - "grad_norm": 0.006248376332223415, - "learning_rate": 0.00019998411774915204, - "loss": 46.0, - "step": 35239 - }, - { - "epoch": 5.675147952816136, - "grad_norm": 0.0027235010638833046, - "learning_rate": 0.00019998411684750536, - "loss": 46.0, - "step": 35240 - }, - { - "epoch": 5.675308989894924, - "grad_norm": 0.007138578686863184, - "learning_rate": 0.00019998411594583307, - "loss": 46.0, - "step": 35241 - }, - { - "epoch": 5.675470026973711, - "grad_norm": 0.006366404704749584, - "learning_rate": 0.0001999841150441352, - "loss": 46.0, - "step": 35242 - }, - { - "epoch": 5.675631064052498, - "grad_norm": 0.0036704582162201405, - "learning_rate": 0.0001999841141424117, - "loss": 46.0, - "step": 35243 - }, - { - "epoch": 5.675792101131286, - "grad_norm": 0.01133080292493105, - "learning_rate": 0.00019998411324066264, - "loss": 46.0, - "step": 35244 - }, - { - "epoch": 5.6759531382100725, - "grad_norm": 0.003278769087046385, - "learning_rate": 0.00019998411233888798, - "loss": 46.0, - "step": 35245 - }, - { - "epoch": 5.67611417528886, - "grad_norm": 0.007010200992226601, - "learning_rate": 0.00019998411143708772, - "loss": 46.0, - "step": 35246 - }, - { - "epoch": 5.676275212367647, - "grad_norm": 0.013183454982936382, - "learning_rate": 0.0001999841105352619, - "loss": 46.0, - "step": 35247 - }, - { - "epoch": 5.676436249446435, - "grad_norm": 0.0028443082701414824, - "learning_rate": 0.00019998410963341048, - "loss": 46.0, - "step": 35248 - }, - { - "epoch": 5.676597286525222, - "grad_norm": 0.0029535042122006416, - "learning_rate": 0.00019998410873153346, - "loss": 46.0, - "step": 35249 - }, - { - "epoch": 5.67675832360401, - "grad_norm": 0.0029641715809702873, - "learning_rate": 0.00019998410782963086, - "loss": 46.0, - "step": 35250 - }, - { - "epoch": 5.676919360682797, - "grad_norm": 0.0032623030710965395, - "learning_rate": 0.00019998410692770264, - "loss": 46.0, - "step": 35251 - }, - { - "epoch": 5.677080397761585, - "grad_norm": 0.006206602323800325, - "learning_rate": 0.00019998410602574886, - "loss": 46.0, - "step": 35252 - }, - { - "epoch": 5.677241434840372, - "grad_norm": 0.004512494429945946, - "learning_rate": 0.00019998410512376947, - "loss": 46.0, - "step": 35253 - }, - { - "epoch": 5.67740247191916, - "grad_norm": 0.005040169693529606, - "learning_rate": 0.0001999841042217645, - "loss": 46.0, - "step": 35254 - }, - { - "epoch": 5.677563508997947, - "grad_norm": 0.002800359157845378, - "learning_rate": 0.00019998410331973393, - "loss": 46.0, - "step": 35255 - }, - { - "epoch": 5.6777245460767345, - "grad_norm": 0.009813678450882435, - "learning_rate": 0.0001999841024176778, - "loss": 46.0, - "step": 35256 - }, - { - "epoch": 5.677885583155522, - "grad_norm": 0.0008741789497435093, - "learning_rate": 0.00019998410151559606, - "loss": 46.0, - "step": 35257 - }, - { - "epoch": 5.6780466202343085, - "grad_norm": 0.001780940336175263, - "learning_rate": 0.0001999841006134887, - "loss": 46.0, - "step": 35258 - }, - { - "epoch": 5.678207657313096, - "grad_norm": 0.003297362709417939, - "learning_rate": 0.0001999840997113558, - "loss": 46.0, - "step": 35259 - }, - { - "epoch": 5.678368694391883, - "grad_norm": 0.013721270486712456, - "learning_rate": 0.00019998409880919726, - "loss": 46.0, - "step": 35260 - }, - { - "epoch": 5.678529731470671, - "grad_norm": 0.003660049755126238, - "learning_rate": 0.00019998409790701315, - "loss": 46.0, - "step": 35261 - }, - { - "epoch": 5.678690768549458, - "grad_norm": 0.005717565305531025, - "learning_rate": 0.00019998409700480347, - "loss": 46.0, - "step": 35262 - }, - { - "epoch": 5.678851805628246, - "grad_norm": 0.002729204948991537, - "learning_rate": 0.00019998409610256815, - "loss": 46.0, - "step": 35263 - }, - { - "epoch": 5.679012842707033, - "grad_norm": 0.008691055700182915, - "learning_rate": 0.00019998409520030727, - "loss": 46.0, - "step": 35264 - }, - { - "epoch": 5.679173879785821, - "grad_norm": 0.011858818121254444, - "learning_rate": 0.0001999840942980208, - "loss": 46.0, - "step": 35265 - }, - { - "epoch": 5.679334916864608, - "grad_norm": 0.008600740693509579, - "learning_rate": 0.00019998409339570876, - "loss": 46.0, - "step": 35266 - }, - { - "epoch": 5.679495953943396, - "grad_norm": 0.010730131529271603, - "learning_rate": 0.00019998409249337112, - "loss": 46.0, - "step": 35267 - }, - { - "epoch": 5.679656991022183, - "grad_norm": 0.001740789390169084, - "learning_rate": 0.00019998409159100786, - "loss": 46.0, - "step": 35268 - }, - { - "epoch": 5.6798180281009705, - "grad_norm": 0.003853529691696167, - "learning_rate": 0.00019998409068861902, - "loss": 46.0, - "step": 35269 - }, - { - "epoch": 5.679979065179758, - "grad_norm": 0.003495418466627598, - "learning_rate": 0.0001999840897862046, - "loss": 46.0, - "step": 35270 - }, - { - "epoch": 5.680140102258545, - "grad_norm": 0.0079538868740201, - "learning_rate": 0.00019998408888376458, - "loss": 46.0, - "step": 35271 - }, - { - "epoch": 5.680301139337333, - "grad_norm": 0.0051788100972771645, - "learning_rate": 0.000199984087981299, - "loss": 46.0, - "step": 35272 - }, - { - "epoch": 5.680462176416119, - "grad_norm": 0.010263034142553806, - "learning_rate": 0.0001999840870788078, - "loss": 46.0, - "step": 35273 - }, - { - "epoch": 5.680623213494907, - "grad_norm": 0.003974579274654388, - "learning_rate": 0.00019998408617629099, - "loss": 46.0, - "step": 35274 - }, - { - "epoch": 5.680784250573694, - "grad_norm": 0.003546706400811672, - "learning_rate": 0.00019998408527374862, - "loss": 46.0, - "step": 35275 - }, - { - "epoch": 5.680945287652482, - "grad_norm": 0.012054373510181904, - "learning_rate": 0.00019998408437118067, - "loss": 46.0, - "step": 35276 - }, - { - "epoch": 5.681106324731269, - "grad_norm": 0.004586960654705763, - "learning_rate": 0.0001999840834685871, - "loss": 46.0, - "step": 35277 - }, - { - "epoch": 5.681267361810057, - "grad_norm": 0.0035653235390782356, - "learning_rate": 0.00019998408256596793, - "loss": 46.0, - "step": 35278 - }, - { - "epoch": 5.681428398888844, - "grad_norm": 0.0019957139156758785, - "learning_rate": 0.00019998408166332321, - "loss": 46.0, - "step": 35279 - }, - { - "epoch": 5.6815894359676316, - "grad_norm": 0.007851950824260712, - "learning_rate": 0.00019998408076065286, - "loss": 46.0, - "step": 35280 - }, - { - "epoch": 5.681750473046419, - "grad_norm": 0.015349440276622772, - "learning_rate": 0.00019998407985795695, - "loss": 46.0, - "step": 35281 - }, - { - "epoch": 5.6819115101252065, - "grad_norm": 0.0078217051923275, - "learning_rate": 0.00019998407895523544, - "loss": 46.0, - "step": 35282 - }, - { - "epoch": 5.682072547203994, - "grad_norm": 0.002218423644080758, - "learning_rate": 0.00019998407805248833, - "loss": 46.0, - "step": 35283 - }, - { - "epoch": 5.682233584282781, - "grad_norm": 0.0015029021305963397, - "learning_rate": 0.00019998407714971563, - "loss": 46.0, - "step": 35284 - }, - { - "epoch": 5.682394621361569, - "grad_norm": 0.0012599461479112506, - "learning_rate": 0.00019998407624691736, - "loss": 46.0, - "step": 35285 - }, - { - "epoch": 5.682555658440356, - "grad_norm": 0.004044000990688801, - "learning_rate": 0.00019998407534409343, - "loss": 46.0, - "step": 35286 - }, - { - "epoch": 5.682716695519144, - "grad_norm": 0.002554306061938405, - "learning_rate": 0.000199984074441244, - "loss": 46.0, - "step": 35287 - }, - { - "epoch": 5.68287773259793, - "grad_norm": 0.004146645776927471, - "learning_rate": 0.00019998407353836895, - "loss": 46.0, - "step": 35288 - }, - { - "epoch": 5.683038769676718, - "grad_norm": 0.009110577404499054, - "learning_rate": 0.0001999840726354683, - "loss": 46.0, - "step": 35289 - }, - { - "epoch": 5.683199806755505, - "grad_norm": 0.016336863860487938, - "learning_rate": 0.00019998407173254203, - "loss": 46.0, - "step": 35290 - }, - { - "epoch": 5.683360843834293, - "grad_norm": 0.01600031740963459, - "learning_rate": 0.00019998407082959021, - "loss": 46.0, - "step": 35291 - }, - { - "epoch": 5.68352188091308, - "grad_norm": 0.0050653861835598946, - "learning_rate": 0.00019998406992661281, - "loss": 46.0, - "step": 35292 - }, - { - "epoch": 5.6836829179918675, - "grad_norm": 0.00461761187762022, - "learning_rate": 0.00019998406902360977, - "loss": 46.0, - "step": 35293 - }, - { - "epoch": 5.683843955070655, - "grad_norm": 0.013072383590042591, - "learning_rate": 0.00019998406812058117, - "loss": 46.0, - "step": 35294 - }, - { - "epoch": 5.684004992149442, - "grad_norm": 0.0012824031291529536, - "learning_rate": 0.00019998406721752698, - "loss": 46.0, - "step": 35295 - }, - { - "epoch": 5.68416602922823, - "grad_norm": 0.00237314123660326, - "learning_rate": 0.00019998406631444718, - "loss": 46.0, - "step": 35296 - }, - { - "epoch": 5.684327066307017, - "grad_norm": 0.0017943476559594274, - "learning_rate": 0.0001999840654113418, - "loss": 46.0, - "step": 35297 - }, - { - "epoch": 5.684488103385805, - "grad_norm": 0.011635672301054, - "learning_rate": 0.00019998406450821087, - "loss": 46.0, - "step": 35298 - }, - { - "epoch": 5.684649140464592, - "grad_norm": 0.01504228450357914, - "learning_rate": 0.00019998406360505428, - "loss": 46.0, - "step": 35299 - }, - { - "epoch": 5.68481017754338, - "grad_norm": 0.003236522199586034, - "learning_rate": 0.00019998406270187212, - "loss": 46.0, - "step": 35300 - }, - { - "epoch": 5.684971214622166, - "grad_norm": 0.0021517241839319468, - "learning_rate": 0.0001999840617986644, - "loss": 46.0, - "step": 35301 - }, - { - "epoch": 5.685132251700955, - "grad_norm": 0.0018133294070139527, - "learning_rate": 0.0001999840608954311, - "loss": 46.0, - "step": 35302 - }, - { - "epoch": 5.685293288779741, - "grad_norm": 0.00377635401673615, - "learning_rate": 0.00019998405999217215, - "loss": 46.0, - "step": 35303 - }, - { - "epoch": 5.685454325858529, - "grad_norm": 0.002605409361422062, - "learning_rate": 0.00019998405908888765, - "loss": 46.0, - "step": 35304 - }, - { - "epoch": 5.685615362937316, - "grad_norm": 0.0031169280409812927, - "learning_rate": 0.00019998405818557756, - "loss": 46.0, - "step": 35305 - }, - { - "epoch": 5.6857764000161035, - "grad_norm": 0.0050070141442120075, - "learning_rate": 0.00019998405728224183, - "loss": 46.0, - "step": 35306 - }, - { - "epoch": 5.685937437094891, - "grad_norm": 0.005279757548123598, - "learning_rate": 0.00019998405637888057, - "loss": 46.0, - "step": 35307 - }, - { - "epoch": 5.686098474173678, - "grad_norm": 0.004055046942085028, - "learning_rate": 0.0001999840554754937, - "loss": 46.0, - "step": 35308 - }, - { - "epoch": 5.686259511252466, - "grad_norm": 0.002828560769557953, - "learning_rate": 0.00019998405457208123, - "loss": 46.0, - "step": 35309 - }, - { - "epoch": 5.686420548331253, - "grad_norm": 0.0008795047178864479, - "learning_rate": 0.00019998405366864318, - "loss": 46.0, - "step": 35310 - }, - { - "epoch": 5.686581585410041, - "grad_norm": 0.005033840425312519, - "learning_rate": 0.00019998405276517952, - "loss": 46.0, - "step": 35311 - }, - { - "epoch": 5.686742622488828, - "grad_norm": 0.003187753725796938, - "learning_rate": 0.00019998405186169026, - "loss": 46.0, - "step": 35312 - }, - { - "epoch": 5.686903659567616, - "grad_norm": 0.006427199114114046, - "learning_rate": 0.00019998405095817545, - "loss": 46.0, - "step": 35313 - }, - { - "epoch": 5.687064696646403, - "grad_norm": 0.0017722093034535646, - "learning_rate": 0.00019998405005463503, - "loss": 46.0, - "step": 35314 - }, - { - "epoch": 5.687225733725191, - "grad_norm": 0.015021676197648048, - "learning_rate": 0.00019998404915106904, - "loss": 46.0, - "step": 35315 - }, - { - "epoch": 5.687386770803977, - "grad_norm": 0.00340224988758564, - "learning_rate": 0.00019998404824747744, - "loss": 46.0, - "step": 35316 - }, - { - "epoch": 5.6875478078827655, - "grad_norm": 0.0029090321622788906, - "learning_rate": 0.00019998404734386022, - "loss": 46.0, - "step": 35317 - }, - { - "epoch": 5.687708844961552, - "grad_norm": 0.010697041638195515, - "learning_rate": 0.00019998404644021745, - "loss": 46.0, - "step": 35318 - }, - { - "epoch": 5.6878698820403395, - "grad_norm": 0.0056275296956300735, - "learning_rate": 0.00019998404553654906, - "loss": 46.0, - "step": 35319 - }, - { - "epoch": 5.688030919119127, - "grad_norm": 0.002728802850469947, - "learning_rate": 0.00019998404463285508, - "loss": 46.0, - "step": 35320 - }, - { - "epoch": 5.688191956197914, - "grad_norm": 0.007858066819608212, - "learning_rate": 0.00019998404372913555, - "loss": 46.0, - "step": 35321 - }, - { - "epoch": 5.688352993276702, - "grad_norm": 0.0014615998370572925, - "learning_rate": 0.0001999840428253904, - "loss": 46.0, - "step": 35322 - }, - { - "epoch": 5.688514030355489, - "grad_norm": 0.009267511777579784, - "learning_rate": 0.0001999840419216197, - "loss": 46.0, - "step": 35323 - }, - { - "epoch": 5.688675067434277, - "grad_norm": 0.0037768869660794735, - "learning_rate": 0.00019998404101782336, - "loss": 46.0, - "step": 35324 - }, - { - "epoch": 5.688836104513064, - "grad_norm": 0.0030597448348999023, - "learning_rate": 0.00019998404011400142, - "loss": 46.0, - "step": 35325 - }, - { - "epoch": 5.688997141591852, - "grad_norm": 0.010552025400102139, - "learning_rate": 0.00019998403921015392, - "loss": 46.0, - "step": 35326 - }, - { - "epoch": 5.689158178670639, - "grad_norm": 0.0018648904515430331, - "learning_rate": 0.0001999840383062808, - "loss": 46.0, - "step": 35327 - }, - { - "epoch": 5.689319215749427, - "grad_norm": 0.0015851183561608195, - "learning_rate": 0.0001999840374023821, - "loss": 46.0, - "step": 35328 - }, - { - "epoch": 5.689480252828214, - "grad_norm": 0.004243799950927496, - "learning_rate": 0.00019998403649845785, - "loss": 46.0, - "step": 35329 - }, - { - "epoch": 5.6896412899070015, - "grad_norm": 0.006032241974025965, - "learning_rate": 0.00019998403559450795, - "loss": 46.0, - "step": 35330 - }, - { - "epoch": 5.689802326985788, - "grad_norm": 0.0018145754002034664, - "learning_rate": 0.0001999840346905325, - "loss": 46.0, - "step": 35331 - }, - { - "epoch": 5.689963364064576, - "grad_norm": 0.0034648312721401453, - "learning_rate": 0.00019998403378653146, - "loss": 46.0, - "step": 35332 - }, - { - "epoch": 5.690124401143363, - "grad_norm": 0.0028540475759655237, - "learning_rate": 0.0001999840328825048, - "loss": 46.0, - "step": 35333 - }, - { - "epoch": 5.69028543822215, - "grad_norm": 0.022783741354942322, - "learning_rate": 0.0001999840319784526, - "loss": 46.0, - "step": 35334 - }, - { - "epoch": 5.690446475300938, - "grad_norm": 0.0026501058600842953, - "learning_rate": 0.00019998403107437476, - "loss": 46.0, - "step": 35335 - }, - { - "epoch": 5.690607512379725, - "grad_norm": 0.005448977928608656, - "learning_rate": 0.00019998403017027134, - "loss": 46.0, - "step": 35336 - }, - { - "epoch": 5.690768549458513, - "grad_norm": 0.000529517128597945, - "learning_rate": 0.00019998402926614233, - "loss": 46.0, - "step": 35337 - }, - { - "epoch": 5.6909295865373, - "grad_norm": 0.0025161313824355602, - "learning_rate": 0.00019998402836198773, - "loss": 46.0, - "step": 35338 - }, - { - "epoch": 5.691090623616088, - "grad_norm": 0.011775963939726353, - "learning_rate": 0.00019998402745780754, - "loss": 46.0, - "step": 35339 - }, - { - "epoch": 5.691251660694875, - "grad_norm": 0.017960337921977043, - "learning_rate": 0.00019998402655360177, - "loss": 46.0, - "step": 35340 - }, - { - "epoch": 5.6914126977736625, - "grad_norm": 0.0029451975133270025, - "learning_rate": 0.0001999840256493704, - "loss": 46.0, - "step": 35341 - }, - { - "epoch": 5.69157373485245, - "grad_norm": 0.003964627161622047, - "learning_rate": 0.00019998402474511343, - "loss": 46.0, - "step": 35342 - }, - { - "epoch": 5.6917347719312374, - "grad_norm": 0.0017763259820640087, - "learning_rate": 0.00019998402384083087, - "loss": 46.0, - "step": 35343 - }, - { - "epoch": 5.691895809010025, - "grad_norm": 0.005671083461493254, - "learning_rate": 0.00019998402293652272, - "loss": 46.0, - "step": 35344 - }, - { - "epoch": 5.692056846088812, - "grad_norm": 0.012492327019572258, - "learning_rate": 0.000199984022032189, - "loss": 46.0, - "step": 35345 - }, - { - "epoch": 5.692217883167599, - "grad_norm": 0.007014209404587746, - "learning_rate": 0.00019998402112782966, - "loss": 46.0, - "step": 35346 - }, - { - "epoch": 5.692378920246386, - "grad_norm": 0.010842365212738514, - "learning_rate": 0.00019998402022344475, - "loss": 46.0, - "step": 35347 - }, - { - "epoch": 5.692539957325174, - "grad_norm": 0.023855527862906456, - "learning_rate": 0.00019998401931903425, - "loss": 46.0, - "step": 35348 - }, - { - "epoch": 5.692700994403961, - "grad_norm": 0.0020728951785713434, - "learning_rate": 0.00019998401841459816, - "loss": 46.0, - "step": 35349 - }, - { - "epoch": 5.692862031482749, - "grad_norm": 0.0015600932529196143, - "learning_rate": 0.00019998401751013646, - "loss": 46.0, - "step": 35350 - }, - { - "epoch": 5.693023068561536, - "grad_norm": 0.00148162676487118, - "learning_rate": 0.0001999840166056492, - "loss": 46.0, - "step": 35351 - }, - { - "epoch": 5.693184105640324, - "grad_norm": 0.0023797908797860146, - "learning_rate": 0.00019998401570113633, - "loss": 46.0, - "step": 35352 - }, - { - "epoch": 5.693345142719111, - "grad_norm": 0.008714855648577213, - "learning_rate": 0.00019998401479659784, - "loss": 46.0, - "step": 35353 - }, - { - "epoch": 5.6935061797978985, - "grad_norm": 0.0029996950179338455, - "learning_rate": 0.0001999840138920338, - "loss": 46.0, - "step": 35354 - }, - { - "epoch": 5.693667216876686, - "grad_norm": 0.0018862314755097032, - "learning_rate": 0.00019998401298744418, - "loss": 46.0, - "step": 35355 - }, - { - "epoch": 5.693828253955473, - "grad_norm": 0.004545945208519697, - "learning_rate": 0.00019998401208282893, - "loss": 46.0, - "step": 35356 - }, - { - "epoch": 5.693989291034261, - "grad_norm": 0.008998136967420578, - "learning_rate": 0.0001999840111781881, - "loss": 46.0, - "step": 35357 - }, - { - "epoch": 5.694150328113048, - "grad_norm": 0.0057633561082184315, - "learning_rate": 0.0001999840102735217, - "loss": 46.0, - "step": 35358 - }, - { - "epoch": 5.694311365191836, - "grad_norm": 0.006195182912051678, - "learning_rate": 0.00019998400936882972, - "loss": 46.0, - "step": 35359 - }, - { - "epoch": 5.694472402270623, - "grad_norm": 0.002205311320722103, - "learning_rate": 0.0001999840084641121, - "loss": 46.0, - "step": 35360 - }, - { - "epoch": 5.69463343934941, - "grad_norm": 0.007667542900890112, - "learning_rate": 0.0001999840075593689, - "loss": 46.0, - "step": 35361 - }, - { - "epoch": 5.694794476428197, - "grad_norm": 0.008176380768418312, - "learning_rate": 0.00019998400665460013, - "loss": 46.0, - "step": 35362 - }, - { - "epoch": 5.694955513506985, - "grad_norm": 0.00937776267528534, - "learning_rate": 0.00019998400574980577, - "loss": 46.0, - "step": 35363 - }, - { - "epoch": 5.695116550585772, - "grad_norm": 0.0012654049787670374, - "learning_rate": 0.0001999840048449858, - "loss": 46.0, - "step": 35364 - }, - { - "epoch": 5.69527758766456, - "grad_norm": 0.003352086991071701, - "learning_rate": 0.00019998400394014026, - "loss": 46.0, - "step": 35365 - }, - { - "epoch": 5.695438624743347, - "grad_norm": 0.003260578028857708, - "learning_rate": 0.0001999840030352691, - "loss": 46.0, - "step": 35366 - }, - { - "epoch": 5.6955996618221345, - "grad_norm": 0.0036888134200125933, - "learning_rate": 0.00019998400213037238, - "loss": 46.0, - "step": 35367 - }, - { - "epoch": 5.695760698900922, - "grad_norm": 0.00892405305057764, - "learning_rate": 0.00019998400122545008, - "loss": 46.0, - "step": 35368 - }, - { - "epoch": 5.695921735979709, - "grad_norm": 0.003858861979097128, - "learning_rate": 0.00019998400032050214, - "loss": 46.0, - "step": 35369 - }, - { - "epoch": 5.696082773058497, - "grad_norm": 0.004568117670714855, - "learning_rate": 0.00019998399941552865, - "loss": 46.0, - "step": 35370 - }, - { - "epoch": 5.696243810137284, - "grad_norm": 0.008835841901600361, - "learning_rate": 0.00019998399851052956, - "loss": 46.0, - "step": 35371 - }, - { - "epoch": 5.696404847216072, - "grad_norm": 0.012812651693820953, - "learning_rate": 0.00019998399760550486, - "loss": 46.0, - "step": 35372 - }, - { - "epoch": 5.696565884294859, - "grad_norm": 0.002240388421341777, - "learning_rate": 0.0001999839967004546, - "loss": 46.0, - "step": 35373 - }, - { - "epoch": 5.696726921373647, - "grad_norm": 0.00512683903798461, - "learning_rate": 0.00019998399579537873, - "loss": 46.0, - "step": 35374 - }, - { - "epoch": 5.696887958452434, - "grad_norm": 0.005152825731784105, - "learning_rate": 0.00019998399489027727, - "loss": 46.0, - "step": 35375 - }, - { - "epoch": 5.697048995531221, - "grad_norm": 0.0022141211666166782, - "learning_rate": 0.00019998399398515022, - "loss": 46.0, - "step": 35376 - }, - { - "epoch": 5.697210032610008, - "grad_norm": 0.0014364334056153893, - "learning_rate": 0.0001999839930799976, - "loss": 46.0, - "step": 35377 - }, - { - "epoch": 5.697371069688796, - "grad_norm": 0.001976205036044121, - "learning_rate": 0.00019998399217481937, - "loss": 46.0, - "step": 35378 - }, - { - "epoch": 5.697532106767583, - "grad_norm": 0.0047792126424610615, - "learning_rate": 0.00019998399126961553, - "loss": 46.0, - "step": 35379 - }, - { - "epoch": 5.6976931438463705, - "grad_norm": 0.0037175470497459173, - "learning_rate": 0.00019998399036438613, - "loss": 46.0, - "step": 35380 - }, - { - "epoch": 5.697854180925158, - "grad_norm": 0.007630035746842623, - "learning_rate": 0.00019998398945913112, - "loss": 46.0, - "step": 35381 - }, - { - "epoch": 5.698015218003945, - "grad_norm": 0.0025430191308259964, - "learning_rate": 0.00019998398855385055, - "loss": 46.0, - "step": 35382 - }, - { - "epoch": 5.698176255082733, - "grad_norm": 0.001435289392247796, - "learning_rate": 0.00019998398764854437, - "loss": 46.0, - "step": 35383 - }, - { - "epoch": 5.69833729216152, - "grad_norm": 0.0018268882995471358, - "learning_rate": 0.00019998398674321257, - "loss": 46.0, - "step": 35384 - }, - { - "epoch": 5.698498329240308, - "grad_norm": 0.0006946501671336591, - "learning_rate": 0.0001999839858378552, - "loss": 46.0, - "step": 35385 - }, - { - "epoch": 5.698659366319095, - "grad_norm": 0.010200402699410915, - "learning_rate": 0.00019998398493247226, - "loss": 46.0, - "step": 35386 - }, - { - "epoch": 5.698820403397883, - "grad_norm": 0.0021359389647841454, - "learning_rate": 0.0001999839840270637, - "loss": 46.0, - "step": 35387 - }, - { - "epoch": 5.69898144047667, - "grad_norm": 0.008337379433214664, - "learning_rate": 0.00019998398312162956, - "loss": 46.0, - "step": 35388 - }, - { - "epoch": 5.699142477555457, - "grad_norm": 0.0247037410736084, - "learning_rate": 0.00019998398221616985, - "loss": 46.0, - "step": 35389 - }, - { - "epoch": 5.699303514634245, - "grad_norm": 0.00543738529086113, - "learning_rate": 0.00019998398131068453, - "loss": 46.0, - "step": 35390 - }, - { - "epoch": 5.699464551713032, - "grad_norm": 0.0012550480896607041, - "learning_rate": 0.00019998398040517362, - "loss": 46.0, - "step": 35391 - }, - { - "epoch": 5.699625588791819, - "grad_norm": 0.005019910167902708, - "learning_rate": 0.00019998397949963712, - "loss": 46.0, - "step": 35392 - }, - { - "epoch": 5.6997866258706065, - "grad_norm": 0.019118789583444595, - "learning_rate": 0.000199983978594075, - "loss": 46.0, - "step": 35393 - }, - { - "epoch": 5.699947662949394, - "grad_norm": 0.0034586978144943714, - "learning_rate": 0.00019998397768848737, - "loss": 46.0, - "step": 35394 - }, - { - "epoch": 5.700108700028181, - "grad_norm": 0.00217807712033391, - "learning_rate": 0.00019998397678287408, - "loss": 46.0, - "step": 35395 - }, - { - "epoch": 5.700269737106969, - "grad_norm": 0.0029005524702370167, - "learning_rate": 0.0001999839758772352, - "loss": 46.0, - "step": 35396 - }, - { - "epoch": 5.700430774185756, - "grad_norm": 0.0030601024627685547, - "learning_rate": 0.00019998397497157075, - "loss": 46.0, - "step": 35397 - }, - { - "epoch": 5.700591811264544, - "grad_norm": 0.002155889756977558, - "learning_rate": 0.0001999839740658807, - "loss": 46.0, - "step": 35398 - }, - { - "epoch": 5.700752848343331, - "grad_norm": 0.0024871493224054575, - "learning_rate": 0.00019998397316016507, - "loss": 46.0, - "step": 35399 - }, - { - "epoch": 5.700913885422119, - "grad_norm": 0.001718775718472898, - "learning_rate": 0.00019998397225442385, - "loss": 46.0, - "step": 35400 - }, - { - "epoch": 5.701074922500906, - "grad_norm": 0.0031118469778448343, - "learning_rate": 0.00019998397134865704, - "loss": 46.0, - "step": 35401 - }, - { - "epoch": 5.7012359595796935, - "grad_norm": 0.013429944403469563, - "learning_rate": 0.00019998397044286464, - "loss": 46.0, - "step": 35402 - }, - { - "epoch": 5.701396996658481, - "grad_norm": 0.005788861308246851, - "learning_rate": 0.00019998396953704663, - "loss": 46.0, - "step": 35403 - }, - { - "epoch": 5.7015580337372676, - "grad_norm": 0.002211272483691573, - "learning_rate": 0.00019998396863120304, - "loss": 46.0, - "step": 35404 - }, - { - "epoch": 5.701719070816056, - "grad_norm": 0.00576765276491642, - "learning_rate": 0.00019998396772533385, - "loss": 46.0, - "step": 35405 - }, - { - "epoch": 5.7018801078948425, - "grad_norm": 0.0010375359561294317, - "learning_rate": 0.00019998396681943908, - "loss": 46.0, - "step": 35406 - }, - { - "epoch": 5.70204114497363, - "grad_norm": 0.00444755470380187, - "learning_rate": 0.00019998396591351872, - "loss": 46.0, - "step": 35407 - }, - { - "epoch": 5.702202182052417, - "grad_norm": 0.008206761442124844, - "learning_rate": 0.00019998396500757278, - "loss": 46.0, - "step": 35408 - }, - { - "epoch": 5.702363219131205, - "grad_norm": 0.002685726620256901, - "learning_rate": 0.00019998396410160122, - "loss": 46.0, - "step": 35409 - }, - { - "epoch": 5.702524256209992, - "grad_norm": 0.0029550758190453053, - "learning_rate": 0.00019998396319560407, - "loss": 46.0, - "step": 35410 - }, - { - "epoch": 5.70268529328878, - "grad_norm": 0.00653796223923564, - "learning_rate": 0.00019998396228958137, - "loss": 46.0, - "step": 35411 - }, - { - "epoch": 5.702846330367567, - "grad_norm": 0.007673420011997223, - "learning_rate": 0.00019998396138353304, - "loss": 46.0, - "step": 35412 - }, - { - "epoch": 5.703007367446355, - "grad_norm": 0.008077666163444519, - "learning_rate": 0.00019998396047745916, - "loss": 46.0, - "step": 35413 - }, - { - "epoch": 5.703168404525142, - "grad_norm": 0.019116917625069618, - "learning_rate": 0.00019998395957135964, - "loss": 46.0, - "step": 35414 - }, - { - "epoch": 5.7033294416039295, - "grad_norm": 0.006062595173716545, - "learning_rate": 0.00019998395866523456, - "loss": 46.0, - "step": 35415 - }, - { - "epoch": 5.703490478682717, - "grad_norm": 0.007338200695812702, - "learning_rate": 0.00019998395775908386, - "loss": 46.0, - "step": 35416 - }, - { - "epoch": 5.703651515761504, - "grad_norm": 0.003945605829358101, - "learning_rate": 0.0001999839568529076, - "loss": 46.0, - "step": 35417 - }, - { - "epoch": 5.703812552840292, - "grad_norm": 0.004946451168507338, - "learning_rate": 0.00019998395594670573, - "loss": 46.0, - "step": 35418 - }, - { - "epoch": 5.703973589919078, - "grad_norm": 0.0020150833297520876, - "learning_rate": 0.0001999839550404783, - "loss": 46.0, - "step": 35419 - }, - { - "epoch": 5.704134626997866, - "grad_norm": 0.012988763861358166, - "learning_rate": 0.00019998395413422525, - "loss": 46.0, - "step": 35420 - }, - { - "epoch": 5.704295664076653, - "grad_norm": 0.004056087229400873, - "learning_rate": 0.0001999839532279466, - "loss": 46.0, - "step": 35421 - }, - { - "epoch": 5.704456701155441, - "grad_norm": 0.00551618542522192, - "learning_rate": 0.0001999839523216424, - "loss": 46.0, - "step": 35422 - }, - { - "epoch": 5.704617738234228, - "grad_norm": 0.0016281232237815857, - "learning_rate": 0.0001999839514153126, - "loss": 46.0, - "step": 35423 - }, - { - "epoch": 5.704778775313016, - "grad_norm": 0.0025894027203321457, - "learning_rate": 0.00019998395050895717, - "loss": 46.0, - "step": 35424 - }, - { - "epoch": 5.704939812391803, - "grad_norm": 0.0021113725379109383, - "learning_rate": 0.0001999839496025762, - "loss": 46.0, - "step": 35425 - }, - { - "epoch": 5.705100849470591, - "grad_norm": 0.013794437982141972, - "learning_rate": 0.00019998394869616962, - "loss": 46.0, - "step": 35426 - }, - { - "epoch": 5.705261886549378, - "grad_norm": 0.008361543528735638, - "learning_rate": 0.00019998394778973744, - "loss": 46.0, - "step": 35427 - }, - { - "epoch": 5.7054229236281655, - "grad_norm": 0.005546377040445805, - "learning_rate": 0.00019998394688327964, - "loss": 46.0, - "step": 35428 - }, - { - "epoch": 5.705583960706953, - "grad_norm": 0.003545439802110195, - "learning_rate": 0.0001999839459767963, - "loss": 46.0, - "step": 35429 - }, - { - "epoch": 5.70574499778574, - "grad_norm": 0.0045602587051689625, - "learning_rate": 0.00019998394507028734, - "loss": 46.0, - "step": 35430 - }, - { - "epoch": 5.705906034864528, - "grad_norm": 0.0029252043459564447, - "learning_rate": 0.0001999839441637528, - "loss": 46.0, - "step": 35431 - }, - { - "epoch": 5.706067071943315, - "grad_norm": 0.0065379999577999115, - "learning_rate": 0.00019998394325719266, - "loss": 46.0, - "step": 35432 - }, - { - "epoch": 5.706228109022103, - "grad_norm": 0.0016173160402104259, - "learning_rate": 0.00019998394235060696, - "loss": 46.0, - "step": 35433 - }, - { - "epoch": 5.706389146100889, - "grad_norm": 0.02710697054862976, - "learning_rate": 0.00019998394144399564, - "loss": 46.0, - "step": 35434 - }, - { - "epoch": 5.706550183179677, - "grad_norm": 0.008578195236623287, - "learning_rate": 0.00019998394053735873, - "loss": 46.0, - "step": 35435 - }, - { - "epoch": 5.706711220258464, - "grad_norm": 0.013228898867964745, - "learning_rate": 0.00019998393963069626, - "loss": 46.0, - "step": 35436 - }, - { - "epoch": 5.706872257337252, - "grad_norm": 0.003564579878002405, - "learning_rate": 0.00019998393872400815, - "loss": 46.0, - "step": 35437 - }, - { - "epoch": 5.707033294416039, - "grad_norm": 0.005520430859178305, - "learning_rate": 0.00019998393781729448, - "loss": 46.0, - "step": 35438 - }, - { - "epoch": 5.707194331494827, - "grad_norm": 0.004034971818327904, - "learning_rate": 0.00019998393691055523, - "loss": 46.0, - "step": 35439 - }, - { - "epoch": 5.707355368573614, - "grad_norm": 0.0052180602215230465, - "learning_rate": 0.00019998393600379038, - "loss": 46.0, - "step": 35440 - }, - { - "epoch": 5.7075164056524015, - "grad_norm": 0.0028647335711866617, - "learning_rate": 0.0001999839350969999, - "loss": 46.0, - "step": 35441 - }, - { - "epoch": 5.707677442731189, - "grad_norm": 0.00260867178440094, - "learning_rate": 0.00019998393419018385, - "loss": 46.0, - "step": 35442 - }, - { - "epoch": 5.707838479809976, - "grad_norm": 0.01264884416013956, - "learning_rate": 0.00019998393328334225, - "loss": 46.0, - "step": 35443 - }, - { - "epoch": 5.707999516888764, - "grad_norm": 0.0020286054350435734, - "learning_rate": 0.00019998393237647503, - "loss": 46.0, - "step": 35444 - }, - { - "epoch": 5.708160553967551, - "grad_norm": 0.005394205451011658, - "learning_rate": 0.0001999839314695822, - "loss": 46.0, - "step": 35445 - }, - { - "epoch": 5.708321591046339, - "grad_norm": 0.002989766653627157, - "learning_rate": 0.0001999839305626638, - "loss": 46.0, - "step": 35446 - }, - { - "epoch": 5.708482628125126, - "grad_norm": 0.0017508756136521697, - "learning_rate": 0.00019998392965571982, - "loss": 46.0, - "step": 35447 - }, - { - "epoch": 5.708643665203914, - "grad_norm": 0.010966694913804531, - "learning_rate": 0.00019998392874875026, - "loss": 46.0, - "step": 35448 - }, - { - "epoch": 5.7088047022827, - "grad_norm": 0.0068923430517315865, - "learning_rate": 0.00019998392784175505, - "loss": 46.0, - "step": 35449 - }, - { - "epoch": 5.708965739361488, - "grad_norm": 0.0124368192628026, - "learning_rate": 0.00019998392693473428, - "loss": 46.0, - "step": 35450 - }, - { - "epoch": 5.709126776440275, - "grad_norm": 0.01080751046538353, - "learning_rate": 0.00019998392602768795, - "loss": 46.0, - "step": 35451 - }, - { - "epoch": 5.709287813519063, - "grad_norm": 0.0024276564363390207, - "learning_rate": 0.000199983925120616, - "loss": 46.0, - "step": 35452 - }, - { - "epoch": 5.70944885059785, - "grad_norm": 0.0005492082564160228, - "learning_rate": 0.00019998392421351845, - "loss": 46.0, - "step": 35453 - }, - { - "epoch": 5.7096098876766375, - "grad_norm": 0.003871447639539838, - "learning_rate": 0.00019998392330639533, - "loss": 46.0, - "step": 35454 - }, - { - "epoch": 5.709770924755425, - "grad_norm": 0.004460901487618685, - "learning_rate": 0.00019998392239924662, - "loss": 46.0, - "step": 35455 - }, - { - "epoch": 5.709931961834212, - "grad_norm": 0.0016366359777748585, - "learning_rate": 0.00019998392149207233, - "loss": 46.0, - "step": 35456 - }, - { - "epoch": 5.710092998913, - "grad_norm": 0.0019701600540429354, - "learning_rate": 0.0001999839205848724, - "loss": 46.0, - "step": 35457 - }, - { - "epoch": 5.710254035991787, - "grad_norm": 0.003288273001089692, - "learning_rate": 0.0001999839196776469, - "loss": 46.0, - "step": 35458 - }, - { - "epoch": 5.710415073070575, - "grad_norm": 0.010506383143365383, - "learning_rate": 0.00019998391877039585, - "loss": 46.0, - "step": 35459 - }, - { - "epoch": 5.710576110149362, - "grad_norm": 0.011146649718284607, - "learning_rate": 0.00019998391786311915, - "loss": 46.0, - "step": 35460 - }, - { - "epoch": 5.71073714722815, - "grad_norm": 0.008965682238340378, - "learning_rate": 0.0001999839169558169, - "loss": 46.0, - "step": 35461 - }, - { - "epoch": 5.710898184306936, - "grad_norm": 0.0030571077950298786, - "learning_rate": 0.00019998391604848906, - "loss": 46.0, - "step": 35462 - }, - { - "epoch": 5.7110592213857245, - "grad_norm": 0.0015387525781989098, - "learning_rate": 0.00019998391514113563, - "loss": 46.0, - "step": 35463 - }, - { - "epoch": 5.711220258464511, - "grad_norm": 0.003867823863402009, - "learning_rate": 0.00019998391423375658, - "loss": 46.0, - "step": 35464 - }, - { - "epoch": 5.7113812955432985, - "grad_norm": 0.004653410520404577, - "learning_rate": 0.00019998391332635195, - "loss": 46.0, - "step": 35465 - }, - { - "epoch": 5.711542332622086, - "grad_norm": 0.0009822207503020763, - "learning_rate": 0.00019998391241892173, - "loss": 46.0, - "step": 35466 - }, - { - "epoch": 5.7117033697008734, - "grad_norm": 0.007330250460654497, - "learning_rate": 0.00019998391151146593, - "loss": 46.0, - "step": 35467 - }, - { - "epoch": 5.711864406779661, - "grad_norm": 0.003616988891735673, - "learning_rate": 0.0001999839106039845, - "loss": 46.0, - "step": 35468 - }, - { - "epoch": 5.712025443858448, - "grad_norm": 0.0022851997055113316, - "learning_rate": 0.00019998390969647753, - "loss": 46.0, - "step": 35469 - }, - { - "epoch": 5.712186480937236, - "grad_norm": 0.010202893987298012, - "learning_rate": 0.00019998390878894496, - "loss": 46.0, - "step": 35470 - }, - { - "epoch": 5.712347518016023, - "grad_norm": 0.016665631905198097, - "learning_rate": 0.00019998390788138678, - "loss": 46.0, - "step": 35471 - }, - { - "epoch": 5.712508555094811, - "grad_norm": 0.014059074223041534, - "learning_rate": 0.000199983906973803, - "loss": 46.0, - "step": 35472 - }, - { - "epoch": 5.712669592173598, - "grad_norm": 0.006730437278747559, - "learning_rate": 0.00019998390606619366, - "loss": 46.0, - "step": 35473 - }, - { - "epoch": 5.712830629252386, - "grad_norm": 0.006679036654531956, - "learning_rate": 0.00019998390515855871, - "loss": 46.0, - "step": 35474 - }, - { - "epoch": 5.712991666331173, - "grad_norm": 0.003336266614496708, - "learning_rate": 0.00019998390425089818, - "loss": 46.0, - "step": 35475 - }, - { - "epoch": 5.7131527034099605, - "grad_norm": 0.005979595240205526, - "learning_rate": 0.0001999839033432121, - "loss": 46.0, - "step": 35476 - }, - { - "epoch": 5.713313740488747, - "grad_norm": 0.018231607973575592, - "learning_rate": 0.00019998390243550036, - "loss": 46.0, - "step": 35477 - }, - { - "epoch": 5.713474777567535, - "grad_norm": 0.002581343287602067, - "learning_rate": 0.00019998390152776305, - "loss": 46.0, - "step": 35478 - }, - { - "epoch": 5.713635814646322, - "grad_norm": 0.004995923954993486, - "learning_rate": 0.00019998390062000017, - "loss": 46.0, - "step": 35479 - }, - { - "epoch": 5.713796851725109, - "grad_norm": 0.0024382679257541895, - "learning_rate": 0.00019998389971221167, - "loss": 46.0, - "step": 35480 - }, - { - "epoch": 5.713957888803897, - "grad_norm": 0.010854941792786121, - "learning_rate": 0.0001999838988043976, - "loss": 46.0, - "step": 35481 - }, - { - "epoch": 5.714118925882684, - "grad_norm": 0.01824612356722355, - "learning_rate": 0.00019998389789655793, - "loss": 46.0, - "step": 35482 - }, - { - "epoch": 5.714279962961472, - "grad_norm": 0.002907045418396592, - "learning_rate": 0.00019998389698869267, - "loss": 46.0, - "step": 35483 - }, - { - "epoch": 5.714441000040259, - "grad_norm": 0.010969860479235649, - "learning_rate": 0.0001999838960808018, - "loss": 46.0, - "step": 35484 - }, - { - "epoch": 5.714602037119047, - "grad_norm": 0.0015488305361941457, - "learning_rate": 0.00019998389517288535, - "loss": 46.0, - "step": 35485 - }, - { - "epoch": 5.714763074197834, - "grad_norm": 0.0008086160523816943, - "learning_rate": 0.00019998389426494333, - "loss": 46.0, - "step": 35486 - }, - { - "epoch": 5.714924111276622, - "grad_norm": 0.0008083141874521971, - "learning_rate": 0.0001999838933569757, - "loss": 46.0, - "step": 35487 - }, - { - "epoch": 5.715085148355409, - "grad_norm": 0.00237140990793705, - "learning_rate": 0.0001999838924489825, - "loss": 46.0, - "step": 35488 - }, - { - "epoch": 5.7152461854341965, - "grad_norm": 0.006972777657210827, - "learning_rate": 0.0001999838915409637, - "loss": 46.0, - "step": 35489 - }, - { - "epoch": 5.715407222512984, - "grad_norm": 0.006063186097890139, - "learning_rate": 0.00019998389063291932, - "loss": 46.0, - "step": 35490 - }, - { - "epoch": 5.715568259591771, - "grad_norm": 0.004452622961252928, - "learning_rate": 0.00019998388972484934, - "loss": 46.0, - "step": 35491 - }, - { - "epoch": 5.715729296670558, - "grad_norm": 0.00532748457044363, - "learning_rate": 0.00019998388881675374, - "loss": 46.0, - "step": 35492 - }, - { - "epoch": 5.715890333749345, - "grad_norm": 0.002177457557991147, - "learning_rate": 0.0001999838879086326, - "loss": 46.0, - "step": 35493 - }, - { - "epoch": 5.716051370828133, - "grad_norm": 0.007527903653681278, - "learning_rate": 0.00019998388700048582, - "loss": 46.0, - "step": 35494 - }, - { - "epoch": 5.71621240790692, - "grad_norm": 0.0010966056725010276, - "learning_rate": 0.00019998388609231347, - "loss": 46.0, - "step": 35495 - }, - { - "epoch": 5.716373444985708, - "grad_norm": 0.005752748344093561, - "learning_rate": 0.00019998388518411555, - "loss": 46.0, - "step": 35496 - }, - { - "epoch": 5.716534482064495, - "grad_norm": 0.003387514501810074, - "learning_rate": 0.000199983884275892, - "loss": 46.0, - "step": 35497 - }, - { - "epoch": 5.716695519143283, - "grad_norm": 0.007491863798350096, - "learning_rate": 0.00019998388336764288, - "loss": 46.0, - "step": 35498 - }, - { - "epoch": 5.71685655622207, - "grad_norm": 0.002125262515619397, - "learning_rate": 0.00019998388245936818, - "loss": 46.0, - "step": 35499 - }, - { - "epoch": 5.717017593300858, - "grad_norm": 0.0027601609472185373, - "learning_rate": 0.00019998388155106786, - "loss": 46.0, - "step": 35500 - }, - { - "epoch": 5.717178630379645, - "grad_norm": 0.005047936458140612, - "learning_rate": 0.00019998388064274198, - "loss": 46.0, - "step": 35501 - }, - { - "epoch": 5.7173396674584325, - "grad_norm": 0.0047386400401592255, - "learning_rate": 0.0001999838797343905, - "loss": 46.0, - "step": 35502 - }, - { - "epoch": 5.71750070453722, - "grad_norm": 0.009273567236959934, - "learning_rate": 0.0001999838788260134, - "loss": 46.0, - "step": 35503 - }, - { - "epoch": 5.717661741616007, - "grad_norm": 0.003139413660392165, - "learning_rate": 0.00019998387791761077, - "loss": 46.0, - "step": 35504 - }, - { - "epoch": 5.717822778694795, - "grad_norm": 0.0024542633909732103, - "learning_rate": 0.00019998387700918252, - "loss": 46.0, - "step": 35505 - }, - { - "epoch": 5.717983815773582, - "grad_norm": 0.004819979891180992, - "learning_rate": 0.00019998387610072865, - "loss": 46.0, - "step": 35506 - }, - { - "epoch": 5.718144852852369, - "grad_norm": 0.01143036037683487, - "learning_rate": 0.00019998387519224922, - "loss": 46.0, - "step": 35507 - }, - { - "epoch": 5.718305889931156, - "grad_norm": 0.013003865256905556, - "learning_rate": 0.00019998387428374418, - "loss": 46.0, - "step": 35508 - }, - { - "epoch": 5.718466927009944, - "grad_norm": 0.009061514399945736, - "learning_rate": 0.00019998387337521355, - "loss": 46.0, - "step": 35509 - }, - { - "epoch": 5.718627964088731, - "grad_norm": 0.028800861909985542, - "learning_rate": 0.00019998387246665736, - "loss": 46.0, - "step": 35510 - }, - { - "epoch": 5.718789001167519, - "grad_norm": 0.002898689592257142, - "learning_rate": 0.00019998387155807556, - "loss": 46.0, - "step": 35511 - }, - { - "epoch": 5.718950038246306, - "grad_norm": 0.0011271587572991848, - "learning_rate": 0.00019998387064946817, - "loss": 46.0, - "step": 35512 - }, - { - "epoch": 5.7191110753250936, - "grad_norm": 0.014553409069776535, - "learning_rate": 0.00019998386974083522, - "loss": 46.0, - "step": 35513 - }, - { - "epoch": 5.719272112403881, - "grad_norm": 0.004143801983445883, - "learning_rate": 0.00019998386883217663, - "loss": 46.0, - "step": 35514 - }, - { - "epoch": 5.7194331494826685, - "grad_norm": 0.0005808683345094323, - "learning_rate": 0.00019998386792349245, - "loss": 46.0, - "step": 35515 - }, - { - "epoch": 5.719594186561456, - "grad_norm": 0.006136034149676561, - "learning_rate": 0.0001999838670147827, - "loss": 46.0, - "step": 35516 - }, - { - "epoch": 5.719755223640243, - "grad_norm": 0.002585548674687743, - "learning_rate": 0.00019998386610604735, - "loss": 46.0, - "step": 35517 - }, - { - "epoch": 5.719916260719031, - "grad_norm": 0.005272881127893925, - "learning_rate": 0.00019998386519728641, - "loss": 46.0, - "step": 35518 - }, - { - "epoch": 5.720077297797818, - "grad_norm": 0.007837477140128613, - "learning_rate": 0.0001999838642884999, - "loss": 46.0, - "step": 35519 - }, - { - "epoch": 5.720238334876606, - "grad_norm": 0.0058410619385540485, - "learning_rate": 0.00019998386337968777, - "loss": 46.0, - "step": 35520 - }, - { - "epoch": 5.720399371955393, - "grad_norm": 0.004429119173437357, - "learning_rate": 0.00019998386247085007, - "loss": 46.0, - "step": 35521 - }, - { - "epoch": 5.72056040903418, - "grad_norm": 0.005085925571620464, - "learning_rate": 0.00019998386156198675, - "loss": 46.0, - "step": 35522 - }, - { - "epoch": 5.720721446112967, - "grad_norm": 0.00603752164170146, - "learning_rate": 0.00019998386065309787, - "loss": 46.0, - "step": 35523 - }, - { - "epoch": 5.720882483191755, - "grad_norm": 0.007471588905900717, - "learning_rate": 0.0001999838597441834, - "loss": 46.0, - "step": 35524 - }, - { - "epoch": 5.721043520270542, - "grad_norm": 0.0014969459734857082, - "learning_rate": 0.00019998385883524333, - "loss": 46.0, - "step": 35525 - }, - { - "epoch": 5.7212045573493295, - "grad_norm": 0.0057539427652955055, - "learning_rate": 0.00019998385792627767, - "loss": 46.0, - "step": 35526 - }, - { - "epoch": 5.721365594428117, - "grad_norm": 0.004889465402811766, - "learning_rate": 0.00019998385701728641, - "loss": 46.0, - "step": 35527 - }, - { - "epoch": 5.721526631506904, - "grad_norm": 0.010457836091518402, - "learning_rate": 0.0001999838561082696, - "loss": 46.0, - "step": 35528 - }, - { - "epoch": 5.721687668585692, - "grad_norm": 0.001264468184672296, - "learning_rate": 0.00019998385519922715, - "loss": 46.0, - "step": 35529 - }, - { - "epoch": 5.721848705664479, - "grad_norm": 0.0017279409803450108, - "learning_rate": 0.00019998385429015913, - "loss": 46.0, - "step": 35530 - }, - { - "epoch": 5.722009742743267, - "grad_norm": 0.0076323822140693665, - "learning_rate": 0.0001999838533810655, - "loss": 46.0, - "step": 35531 - }, - { - "epoch": 5.722170779822054, - "grad_norm": 0.01451136451214552, - "learning_rate": 0.00019998385247194632, - "loss": 46.0, - "step": 35532 - }, - { - "epoch": 5.722331816900842, - "grad_norm": 0.007001708727329969, - "learning_rate": 0.00019998385156280152, - "loss": 46.0, - "step": 35533 - }, - { - "epoch": 5.722492853979629, - "grad_norm": 0.001599399489350617, - "learning_rate": 0.00019998385065363113, - "loss": 46.0, - "step": 35534 - }, - { - "epoch": 5.722653891058416, - "grad_norm": 0.0036499109119176865, - "learning_rate": 0.00019998384974443515, - "loss": 46.0, - "step": 35535 - }, - { - "epoch": 5.722814928137204, - "grad_norm": 0.0013848617672920227, - "learning_rate": 0.00019998384883521358, - "loss": 46.0, - "step": 35536 - }, - { - "epoch": 5.722975965215991, - "grad_norm": 0.0069944728165864944, - "learning_rate": 0.0001999838479259664, - "loss": 46.0, - "step": 35537 - }, - { - "epoch": 5.723137002294778, - "grad_norm": 0.00514630414545536, - "learning_rate": 0.00019998384701669367, - "loss": 46.0, - "step": 35538 - }, - { - "epoch": 5.7232980393735655, - "grad_norm": 0.0030399570241570473, - "learning_rate": 0.00019998384610739534, - "loss": 46.0, - "step": 35539 - }, - { - "epoch": 5.723459076452353, - "grad_norm": 0.003940983675420284, - "learning_rate": 0.00019998384519807137, - "loss": 46.0, - "step": 35540 - }, - { - "epoch": 5.72362011353114, - "grad_norm": 0.021705487743020058, - "learning_rate": 0.00019998384428872185, - "loss": 46.0, - "step": 35541 - }, - { - "epoch": 5.723781150609928, - "grad_norm": 0.003187623806297779, - "learning_rate": 0.00019998384337934676, - "loss": 46.0, - "step": 35542 - }, - { - "epoch": 5.723942187688715, - "grad_norm": 0.0025115148164331913, - "learning_rate": 0.00019998384246994606, - "loss": 46.0, - "step": 35543 - }, - { - "epoch": 5.724103224767503, - "grad_norm": 0.019701754674315453, - "learning_rate": 0.00019998384156051974, - "loss": 46.0, - "step": 35544 - }, - { - "epoch": 5.72426426184629, - "grad_norm": 0.022417917847633362, - "learning_rate": 0.00019998384065106787, - "loss": 46.0, - "step": 35545 - }, - { - "epoch": 5.724425298925078, - "grad_norm": 0.010307450778782368, - "learning_rate": 0.00019998383974159038, - "loss": 46.0, - "step": 35546 - }, - { - "epoch": 5.724586336003865, - "grad_norm": 0.004518875852227211, - "learning_rate": 0.00019998383883208733, - "loss": 46.0, - "step": 35547 - }, - { - "epoch": 5.724747373082653, - "grad_norm": 0.013035601004958153, - "learning_rate": 0.00019998383792255866, - "loss": 46.0, - "step": 35548 - }, - { - "epoch": 5.72490841016144, - "grad_norm": 0.00627890694886446, - "learning_rate": 0.00019998383701300444, - "loss": 46.0, - "step": 35549 - }, - { - "epoch": 5.725069447240227, - "grad_norm": 0.001404904993250966, - "learning_rate": 0.00019998383610342457, - "loss": 46.0, - "step": 35550 - }, - { - "epoch": 5.725230484319015, - "grad_norm": 0.007445406634360552, - "learning_rate": 0.00019998383519381915, - "loss": 46.0, - "step": 35551 - }, - { - "epoch": 5.7253915213978015, - "grad_norm": 0.0019616286735981703, - "learning_rate": 0.00019998383428418813, - "loss": 46.0, - "step": 35552 - }, - { - "epoch": 5.725552558476589, - "grad_norm": 0.0012010824866592884, - "learning_rate": 0.00019998383337453153, - "loss": 46.0, - "step": 35553 - }, - { - "epoch": 5.725713595555376, - "grad_norm": 0.010856564156711102, - "learning_rate": 0.00019998383246484932, - "loss": 46.0, - "step": 35554 - }, - { - "epoch": 5.725874632634164, - "grad_norm": 0.009345943108201027, - "learning_rate": 0.00019998383155514154, - "loss": 46.0, - "step": 35555 - }, - { - "epoch": 5.726035669712951, - "grad_norm": 0.0037518595345318317, - "learning_rate": 0.00019998383064540813, - "loss": 46.0, - "step": 35556 - }, - { - "epoch": 5.726196706791739, - "grad_norm": 0.007128858473151922, - "learning_rate": 0.00019998382973564918, - "loss": 46.0, - "step": 35557 - }, - { - "epoch": 5.726357743870526, - "grad_norm": 0.007594176102429628, - "learning_rate": 0.0001999838288258646, - "loss": 46.0, - "step": 35558 - }, - { - "epoch": 5.726518780949314, - "grad_norm": 0.0034325586166232824, - "learning_rate": 0.00019998382791605447, - "loss": 46.0, - "step": 35559 - }, - { - "epoch": 5.726679818028101, - "grad_norm": 0.0015808983007445931, - "learning_rate": 0.0001999838270062187, - "loss": 46.0, - "step": 35560 - }, - { - "epoch": 5.726840855106889, - "grad_norm": 0.01398675236850977, - "learning_rate": 0.00019998382609635738, - "loss": 46.0, - "step": 35561 - }, - { - "epoch": 5.727001892185676, - "grad_norm": 0.010379220359027386, - "learning_rate": 0.00019998382518647044, - "loss": 46.0, - "step": 35562 - }, - { - "epoch": 5.7271629292644635, - "grad_norm": 0.0023658093996345997, - "learning_rate": 0.00019998382427655794, - "loss": 46.0, - "step": 35563 - }, - { - "epoch": 5.727323966343251, - "grad_norm": 0.003652440384030342, - "learning_rate": 0.0001999838233666198, - "loss": 46.0, - "step": 35564 - }, - { - "epoch": 5.7274850034220375, - "grad_norm": 0.007258872967213392, - "learning_rate": 0.0001999838224566561, - "loss": 46.0, - "step": 35565 - }, - { - "epoch": 5.727646040500825, - "grad_norm": 0.00116466183681041, - "learning_rate": 0.0001999838215466668, - "loss": 46.0, - "step": 35566 - }, - { - "epoch": 5.727807077579612, - "grad_norm": 0.0013377966824918985, - "learning_rate": 0.00019998382063665196, - "loss": 46.0, - "step": 35567 - }, - { - "epoch": 5.7279681146584, - "grad_norm": 0.0017657188000157475, - "learning_rate": 0.00019998381972661147, - "loss": 46.0, - "step": 35568 - }, - { - "epoch": 5.728129151737187, - "grad_norm": 0.0041077835485339165, - "learning_rate": 0.0001999838188165454, - "loss": 46.0, - "step": 35569 - }, - { - "epoch": 5.728290188815975, - "grad_norm": 0.008129140362143517, - "learning_rate": 0.00019998381790645376, - "loss": 46.0, - "step": 35570 - }, - { - "epoch": 5.728451225894762, - "grad_norm": 0.0026882204692810774, - "learning_rate": 0.00019998381699633654, - "loss": 46.0, - "step": 35571 - }, - { - "epoch": 5.72861226297355, - "grad_norm": 0.0014871583553031087, - "learning_rate": 0.00019998381608619367, - "loss": 46.0, - "step": 35572 - }, - { - "epoch": 5.728773300052337, - "grad_norm": 0.0032231139484792948, - "learning_rate": 0.00019998381517602525, - "loss": 46.0, - "step": 35573 - }, - { - "epoch": 5.7289343371311245, - "grad_norm": 0.0014955142978578806, - "learning_rate": 0.00019998381426583123, - "loss": 46.0, - "step": 35574 - }, - { - "epoch": 5.729095374209912, - "grad_norm": 0.005558602046221495, - "learning_rate": 0.00019998381335561163, - "loss": 46.0, - "step": 35575 - }, - { - "epoch": 5.7292564112886994, - "grad_norm": 0.0031127077527344227, - "learning_rate": 0.00019998381244536645, - "loss": 46.0, - "step": 35576 - }, - { - "epoch": 5.729417448367487, - "grad_norm": 0.0014645750634372234, - "learning_rate": 0.00019998381153509565, - "loss": 46.0, - "step": 35577 - }, - { - "epoch": 5.729578485446274, - "grad_norm": 0.005130200181156397, - "learning_rate": 0.00019998381062479926, - "loss": 46.0, - "step": 35578 - }, - { - "epoch": 5.729739522525062, - "grad_norm": 0.021106954663991928, - "learning_rate": 0.0001999838097144773, - "loss": 46.0, - "step": 35579 - }, - { - "epoch": 5.729900559603848, - "grad_norm": 0.017933879047632217, - "learning_rate": 0.00019998380880412975, - "loss": 46.0, - "step": 35580 - }, - { - "epoch": 5.730061596682636, - "grad_norm": 0.00688158068805933, - "learning_rate": 0.0001999838078937566, - "loss": 46.0, - "step": 35581 - }, - { - "epoch": 5.730222633761423, - "grad_norm": 0.0023649160284549, - "learning_rate": 0.00019998380698335784, - "loss": 46.0, - "step": 35582 - }, - { - "epoch": 5.730383670840211, - "grad_norm": 0.004906385205686092, - "learning_rate": 0.0001999838060729335, - "loss": 46.0, - "step": 35583 - }, - { - "epoch": 5.730544707918998, - "grad_norm": 0.0034793447703123093, - "learning_rate": 0.00019998380516248357, - "loss": 46.0, - "step": 35584 - }, - { - "epoch": 5.730705744997786, - "grad_norm": 0.008268315345048904, - "learning_rate": 0.00019998380425200808, - "loss": 46.0, - "step": 35585 - }, - { - "epoch": 5.730866782076573, - "grad_norm": 0.0066382382065057755, - "learning_rate": 0.00019998380334150694, - "loss": 46.0, - "step": 35586 - }, - { - "epoch": 5.7310278191553605, - "grad_norm": 0.0013370001688599586, - "learning_rate": 0.00019998380243098026, - "loss": 46.0, - "step": 35587 - }, - { - "epoch": 5.731188856234148, - "grad_norm": 0.016605084761977196, - "learning_rate": 0.00019998380152042798, - "loss": 46.0, - "step": 35588 - }, - { - "epoch": 5.731349893312935, - "grad_norm": 0.0036441953852772713, - "learning_rate": 0.0001999838006098501, - "loss": 46.0, - "step": 35589 - }, - { - "epoch": 5.731510930391723, - "grad_norm": 0.0015499975997954607, - "learning_rate": 0.00019998379969924664, - "loss": 46.0, - "step": 35590 - }, - { - "epoch": 5.73167196747051, - "grad_norm": 0.0052222684025764465, - "learning_rate": 0.0001999837987886176, - "loss": 46.0, - "step": 35591 - }, - { - "epoch": 5.731833004549298, - "grad_norm": 0.002013799035921693, - "learning_rate": 0.00019998379787796293, - "loss": 46.0, - "step": 35592 - }, - { - "epoch": 5.731994041628085, - "grad_norm": 0.001408213283866644, - "learning_rate": 0.00019998379696728268, - "loss": 46.0, - "step": 35593 - }, - { - "epoch": 5.732155078706873, - "grad_norm": 0.0074705276638269424, - "learning_rate": 0.00019998379605657687, - "loss": 46.0, - "step": 35594 - }, - { - "epoch": 5.732316115785659, - "grad_norm": 0.002812921768054366, - "learning_rate": 0.00019998379514584542, - "loss": 46.0, - "step": 35595 - }, - { - "epoch": 5.732477152864447, - "grad_norm": 0.0022542367223650217, - "learning_rate": 0.00019998379423508844, - "loss": 46.0, - "step": 35596 - }, - { - "epoch": 5.732638189943234, - "grad_norm": 0.002422074321657419, - "learning_rate": 0.00019998379332430584, - "loss": 46.0, - "step": 35597 - }, - { - "epoch": 5.732799227022022, - "grad_norm": 0.009186848066747189, - "learning_rate": 0.00019998379241349762, - "loss": 46.0, - "step": 35598 - }, - { - "epoch": 5.732960264100809, - "grad_norm": 0.0007388254161924124, - "learning_rate": 0.00019998379150266385, - "loss": 46.0, - "step": 35599 - }, - { - "epoch": 5.7331213011795965, - "grad_norm": 0.0061682178638875484, - "learning_rate": 0.00019998379059180447, - "loss": 46.0, - "step": 35600 - }, - { - "epoch": 5.733282338258384, - "grad_norm": 0.003723063040524721, - "learning_rate": 0.0001999837896809195, - "loss": 46.0, - "step": 35601 - }, - { - "epoch": 5.733443375337171, - "grad_norm": 0.0012781383702531457, - "learning_rate": 0.00019998378877000893, - "loss": 46.0, - "step": 35602 - }, - { - "epoch": 5.733604412415959, - "grad_norm": 0.0009361369884572923, - "learning_rate": 0.0001999837878590728, - "loss": 46.0, - "step": 35603 - }, - { - "epoch": 5.733765449494746, - "grad_norm": 0.00432397099211812, - "learning_rate": 0.00019998378694811107, - "loss": 46.0, - "step": 35604 - }, - { - "epoch": 5.733926486573534, - "grad_norm": 0.0022777237463742495, - "learning_rate": 0.00019998378603712372, - "loss": 46.0, - "step": 35605 - }, - { - "epoch": 5.734087523652321, - "grad_norm": 0.0024943219032138586, - "learning_rate": 0.0001999837851261108, - "loss": 46.0, - "step": 35606 - }, - { - "epoch": 5.734248560731109, - "grad_norm": 0.003432539524510503, - "learning_rate": 0.0001999837842150723, - "loss": 46.0, - "step": 35607 - }, - { - "epoch": 5.734409597809895, - "grad_norm": 0.010588981211185455, - "learning_rate": 0.0001999837833040082, - "loss": 46.0, - "step": 35608 - }, - { - "epoch": 5.734570634888684, - "grad_norm": 0.002912891795858741, - "learning_rate": 0.0001999837823929185, - "loss": 46.0, - "step": 35609 - }, - { - "epoch": 5.73473167196747, - "grad_norm": 0.006764731369912624, - "learning_rate": 0.00019998378148180322, - "loss": 46.0, - "step": 35610 - }, - { - "epoch": 5.734892709046258, - "grad_norm": 0.019105613231658936, - "learning_rate": 0.00019998378057066237, - "loss": 46.0, - "step": 35611 - }, - { - "epoch": 5.735053746125045, - "grad_norm": 0.005462784320116043, - "learning_rate": 0.00019998377965949589, - "loss": 46.0, - "step": 35612 - }, - { - "epoch": 5.7352147832038325, - "grad_norm": 0.005559720564633608, - "learning_rate": 0.00019998377874830384, - "loss": 46.0, - "step": 35613 - }, - { - "epoch": 5.73537582028262, - "grad_norm": 0.006342420820146799, - "learning_rate": 0.0001999837778370862, - "loss": 46.0, - "step": 35614 - }, - { - "epoch": 5.735536857361407, - "grad_norm": 0.0035092777106910944, - "learning_rate": 0.00019998377692584295, - "loss": 46.0, - "step": 35615 - }, - { - "epoch": 5.735697894440195, - "grad_norm": 0.0015751500613987446, - "learning_rate": 0.00019998377601457415, - "loss": 46.0, - "step": 35616 - }, - { - "epoch": 5.735858931518982, - "grad_norm": 0.013914305716753006, - "learning_rate": 0.00019998377510327972, - "loss": 46.0, - "step": 35617 - }, - { - "epoch": 5.73601996859777, - "grad_norm": 0.009146296419203281, - "learning_rate": 0.0001999837741919597, - "loss": 46.0, - "step": 35618 - }, - { - "epoch": 5.736181005676557, - "grad_norm": 0.003716784995049238, - "learning_rate": 0.00019998377328061411, - "loss": 46.0, - "step": 35619 - }, - { - "epoch": 5.736342042755345, - "grad_norm": 0.00288646318949759, - "learning_rate": 0.0001999837723692429, - "loss": 46.0, - "step": 35620 - }, - { - "epoch": 5.736503079834132, - "grad_norm": 0.003845304949209094, - "learning_rate": 0.00019998377145784613, - "loss": 46.0, - "step": 35621 - }, - { - "epoch": 5.7366641169129196, - "grad_norm": 0.0012538401642814279, - "learning_rate": 0.00019998377054642377, - "loss": 46.0, - "step": 35622 - }, - { - "epoch": 5.736825153991706, - "grad_norm": 0.0018852229695767164, - "learning_rate": 0.0001999837696349758, - "loss": 46.0, - "step": 35623 - }, - { - "epoch": 5.7369861910704945, - "grad_norm": 0.003224159125238657, - "learning_rate": 0.00019998376872350227, - "loss": 46.0, - "step": 35624 - }, - { - "epoch": 5.737147228149281, - "grad_norm": 0.00536219822242856, - "learning_rate": 0.00019998376781200312, - "loss": 46.0, - "step": 35625 - }, - { - "epoch": 5.7373082652280685, - "grad_norm": 0.016085563227534294, - "learning_rate": 0.00019998376690047838, - "loss": 46.0, - "step": 35626 - }, - { - "epoch": 5.737469302306856, - "grad_norm": 0.004196074325591326, - "learning_rate": 0.00019998376598892806, - "loss": 46.0, - "step": 35627 - }, - { - "epoch": 5.737630339385643, - "grad_norm": 0.006946604233235121, - "learning_rate": 0.00019998376507735212, - "loss": 46.0, - "step": 35628 - }, - { - "epoch": 5.737791376464431, - "grad_norm": 0.0011422872776165605, - "learning_rate": 0.00019998376416575063, - "loss": 46.0, - "step": 35629 - }, - { - "epoch": 5.737952413543218, - "grad_norm": 0.007601715158671141, - "learning_rate": 0.00019998376325412355, - "loss": 46.0, - "step": 35630 - }, - { - "epoch": 5.738113450622006, - "grad_norm": 0.0023788739927113056, - "learning_rate": 0.00019998376234247085, - "loss": 46.0, - "step": 35631 - }, - { - "epoch": 5.738274487700793, - "grad_norm": 0.004944286309182644, - "learning_rate": 0.0001999837614307926, - "loss": 46.0, - "step": 35632 - }, - { - "epoch": 5.738435524779581, - "grad_norm": 0.007749716751277447, - "learning_rate": 0.00019998376051908872, - "loss": 46.0, - "step": 35633 - }, - { - "epoch": 5.738596561858368, - "grad_norm": 0.004866868257522583, - "learning_rate": 0.00019998375960735923, - "loss": 46.0, - "step": 35634 - }, - { - "epoch": 5.7387575989371555, - "grad_norm": 0.001834242488257587, - "learning_rate": 0.00019998375869560418, - "loss": 46.0, - "step": 35635 - }, - { - "epoch": 5.738918636015943, - "grad_norm": 0.007009696215391159, - "learning_rate": 0.00019998375778382355, - "loss": 46.0, - "step": 35636 - }, - { - "epoch": 5.73907967309473, - "grad_norm": 0.009853570722043514, - "learning_rate": 0.0001999837568720173, - "loss": 46.0, - "step": 35637 - }, - { - "epoch": 5.739240710173517, - "grad_norm": 0.005257311277091503, - "learning_rate": 0.0001999837559601855, - "loss": 46.0, - "step": 35638 - }, - { - "epoch": 5.739401747252305, - "grad_norm": 0.0093264514580369, - "learning_rate": 0.00019998375504832807, - "loss": 46.0, - "step": 35639 - }, - { - "epoch": 5.739562784331092, - "grad_norm": 0.006017458625137806, - "learning_rate": 0.0001999837541364451, - "loss": 46.0, - "step": 35640 - }, - { - "epoch": 5.739723821409879, - "grad_norm": 0.005904871504753828, - "learning_rate": 0.0001999837532245365, - "loss": 46.0, - "step": 35641 - }, - { - "epoch": 5.739884858488667, - "grad_norm": 0.0026297178119421005, - "learning_rate": 0.00019998375231260228, - "loss": 46.0, - "step": 35642 - }, - { - "epoch": 5.740045895567454, - "grad_norm": 0.004746863152831793, - "learning_rate": 0.00019998375140064254, - "loss": 46.0, - "step": 35643 - }, - { - "epoch": 5.740206932646242, - "grad_norm": 0.007847358472645283, - "learning_rate": 0.00019998375048865715, - "loss": 46.0, - "step": 35644 - }, - { - "epoch": 5.740367969725029, - "grad_norm": 0.001639272435568273, - "learning_rate": 0.0001999837495766462, - "loss": 46.0, - "step": 35645 - }, - { - "epoch": 5.740529006803817, - "grad_norm": 0.006071700248867273, - "learning_rate": 0.00019998374866460965, - "loss": 46.0, - "step": 35646 - }, - { - "epoch": 5.740690043882604, - "grad_norm": 0.0010262627620249987, - "learning_rate": 0.0001999837477525475, - "loss": 46.0, - "step": 35647 - }, - { - "epoch": 5.7408510809613915, - "grad_norm": 0.0063830227591097355, - "learning_rate": 0.00019998374684045977, - "loss": 46.0, - "step": 35648 - }, - { - "epoch": 5.741012118040179, - "grad_norm": 0.0019633148331195116, - "learning_rate": 0.00019998374592834645, - "loss": 46.0, - "step": 35649 - }, - { - "epoch": 5.741173155118966, - "grad_norm": 0.005420413333922625, - "learning_rate": 0.00019998374501620757, - "loss": 46.0, - "step": 35650 - }, - { - "epoch": 5.741334192197754, - "grad_norm": 0.0025122330989688635, - "learning_rate": 0.00019998374410404304, - "loss": 46.0, - "step": 35651 - }, - { - "epoch": 5.741495229276541, - "grad_norm": 0.0027262959629297256, - "learning_rate": 0.00019998374319185296, - "loss": 46.0, - "step": 35652 - }, - { - "epoch": 5.741656266355328, - "grad_norm": 0.0015047615161165595, - "learning_rate": 0.00019998374227963726, - "loss": 46.0, - "step": 35653 - }, - { - "epoch": 5.741817303434115, - "grad_norm": 0.0015291034942492843, - "learning_rate": 0.00019998374136739598, - "loss": 46.0, - "step": 35654 - }, - { - "epoch": 5.741978340512903, - "grad_norm": 0.0024005360901355743, - "learning_rate": 0.00019998374045512914, - "loss": 46.0, - "step": 35655 - }, - { - "epoch": 5.74213937759169, - "grad_norm": 0.014162642881274223, - "learning_rate": 0.00019998373954283668, - "loss": 46.0, - "step": 35656 - }, - { - "epoch": 5.742300414670478, - "grad_norm": 0.002535266103222966, - "learning_rate": 0.00019998373863051866, - "loss": 46.0, - "step": 35657 - }, - { - "epoch": 5.742461451749265, - "grad_norm": 0.008849915117025375, - "learning_rate": 0.000199983737718175, - "loss": 46.0, - "step": 35658 - }, - { - "epoch": 5.742622488828053, - "grad_norm": 0.0026538653764873743, - "learning_rate": 0.00019998373680580578, - "loss": 46.0, - "step": 35659 - }, - { - "epoch": 5.74278352590684, - "grad_norm": 0.0032929491717368364, - "learning_rate": 0.00019998373589341095, - "loss": 46.0, - "step": 35660 - }, - { - "epoch": 5.7429445629856275, - "grad_norm": 0.013679970987141132, - "learning_rate": 0.00019998373498099055, - "loss": 46.0, - "step": 35661 - }, - { - "epoch": 5.743105600064415, - "grad_norm": 0.0015535237034782767, - "learning_rate": 0.00019998373406854457, - "loss": 46.0, - "step": 35662 - }, - { - "epoch": 5.743266637143202, - "grad_norm": 0.004194140899926424, - "learning_rate": 0.00019998373315607295, - "loss": 46.0, - "step": 35663 - }, - { - "epoch": 5.74342767422199, - "grad_norm": 0.00208999658934772, - "learning_rate": 0.00019998373224357576, - "loss": 46.0, - "step": 35664 - }, - { - "epoch": 5.743588711300777, - "grad_norm": 0.004345456603914499, - "learning_rate": 0.000199983731331053, - "loss": 46.0, - "step": 35665 - }, - { - "epoch": 5.743749748379565, - "grad_norm": 0.004026954062283039, - "learning_rate": 0.00019998373041850464, - "loss": 46.0, - "step": 35666 - }, - { - "epoch": 5.743910785458352, - "grad_norm": 0.003164366353303194, - "learning_rate": 0.00019998372950593067, - "loss": 46.0, - "step": 35667 - }, - { - "epoch": 5.744071822537139, - "grad_norm": 0.002806995762512088, - "learning_rate": 0.00019998372859333113, - "loss": 46.0, - "step": 35668 - }, - { - "epoch": 5.744232859615926, - "grad_norm": 0.004286506678909063, - "learning_rate": 0.00019998372768070602, - "loss": 46.0, - "step": 35669 - }, - { - "epoch": 5.744393896694714, - "grad_norm": 0.0030181854963302612, - "learning_rate": 0.00019998372676805528, - "loss": 46.0, - "step": 35670 - }, - { - "epoch": 5.744554933773501, - "grad_norm": 0.002859077649191022, - "learning_rate": 0.00019998372585537896, - "loss": 46.0, - "step": 35671 - }, - { - "epoch": 5.744715970852289, - "grad_norm": 0.021344603970646858, - "learning_rate": 0.00019998372494267708, - "loss": 46.0, - "step": 35672 - }, - { - "epoch": 5.744877007931076, - "grad_norm": 0.0027133887633681297, - "learning_rate": 0.0001999837240299496, - "loss": 46.0, - "step": 35673 - }, - { - "epoch": 5.7450380450098635, - "grad_norm": 0.003130651544779539, - "learning_rate": 0.00019998372311719645, - "loss": 46.0, - "step": 35674 - }, - { - "epoch": 5.745199082088651, - "grad_norm": 0.003432266879826784, - "learning_rate": 0.0001999837222044178, - "loss": 46.0, - "step": 35675 - }, - { - "epoch": 5.745360119167438, - "grad_norm": 0.007205731235444546, - "learning_rate": 0.00019998372129161353, - "loss": 46.0, - "step": 35676 - }, - { - "epoch": 5.745521156246226, - "grad_norm": 0.008296342566609383, - "learning_rate": 0.00019998372037878368, - "loss": 46.0, - "step": 35677 - }, - { - "epoch": 5.745682193325013, - "grad_norm": 0.002843861235305667, - "learning_rate": 0.00019998371946592823, - "loss": 46.0, - "step": 35678 - }, - { - "epoch": 5.745843230403801, - "grad_norm": 0.005493230652064085, - "learning_rate": 0.00019998371855304718, - "loss": 46.0, - "step": 35679 - }, - { - "epoch": 5.746004267482588, - "grad_norm": 0.01543631311506033, - "learning_rate": 0.00019998371764014055, - "loss": 46.0, - "step": 35680 - }, - { - "epoch": 5.746165304561376, - "grad_norm": 0.006336414720863104, - "learning_rate": 0.00019998371672720833, - "loss": 46.0, - "step": 35681 - }, - { - "epoch": 5.746326341640163, - "grad_norm": 0.0014595752581954002, - "learning_rate": 0.00019998371581425052, - "loss": 46.0, - "step": 35682 - }, - { - "epoch": 5.74648737871895, - "grad_norm": 0.01127290166914463, - "learning_rate": 0.0001999837149012671, - "loss": 46.0, - "step": 35683 - }, - { - "epoch": 5.746648415797737, - "grad_norm": 0.0035653971135616302, - "learning_rate": 0.0001999837139882581, - "loss": 46.0, - "step": 35684 - }, - { - "epoch": 5.746809452876525, - "grad_norm": 0.007793842814862728, - "learning_rate": 0.00019998371307522353, - "loss": 46.0, - "step": 35685 - }, - { - "epoch": 5.746970489955312, - "grad_norm": 0.008492912165820599, - "learning_rate": 0.00019998371216216337, - "loss": 46.0, - "step": 35686 - }, - { - "epoch": 5.7471315270340995, - "grad_norm": 0.0024247774854302406, - "learning_rate": 0.00019998371124907758, - "loss": 46.0, - "step": 35687 - }, - { - "epoch": 5.747292564112887, - "grad_norm": 0.0038225636817514896, - "learning_rate": 0.00019998371033596622, - "loss": 46.0, - "step": 35688 - }, - { - "epoch": 5.747453601191674, - "grad_norm": 0.001692612306214869, - "learning_rate": 0.00019998370942282928, - "loss": 46.0, - "step": 35689 - }, - { - "epoch": 5.747614638270462, - "grad_norm": 0.021703489124774933, - "learning_rate": 0.00019998370850966675, - "loss": 46.0, - "step": 35690 - }, - { - "epoch": 5.747775675349249, - "grad_norm": 0.0015950225060805678, - "learning_rate": 0.0001999837075964786, - "loss": 46.0, - "step": 35691 - }, - { - "epoch": 5.747936712428037, - "grad_norm": 0.004989693872630596, - "learning_rate": 0.0001999837066832649, - "loss": 46.0, - "step": 35692 - }, - { - "epoch": 5.748097749506824, - "grad_norm": 0.014336420223116875, - "learning_rate": 0.00019998370577002555, - "loss": 46.0, - "step": 35693 - }, - { - "epoch": 5.748258786585612, - "grad_norm": 0.014689628034830093, - "learning_rate": 0.00019998370485676067, - "loss": 46.0, - "step": 35694 - }, - { - "epoch": 5.748419823664399, - "grad_norm": 0.005761393811553717, - "learning_rate": 0.00019998370394347018, - "loss": 46.0, - "step": 35695 - }, - { - "epoch": 5.748580860743186, - "grad_norm": 0.0015850585186854005, - "learning_rate": 0.0001999837030301541, - "loss": 46.0, - "step": 35696 - }, - { - "epoch": 5.748741897821974, - "grad_norm": 0.012452409602701664, - "learning_rate": 0.00019998370211681243, - "loss": 46.0, - "step": 35697 - }, - { - "epoch": 5.7489029349007605, - "grad_norm": 0.0020173839293420315, - "learning_rate": 0.00019998370120344517, - "loss": 46.0, - "step": 35698 - }, - { - "epoch": 5.749063971979548, - "grad_norm": 0.020098872482776642, - "learning_rate": 0.0001999837002900523, - "loss": 46.0, - "step": 35699 - }, - { - "epoch": 5.7492250090583354, - "grad_norm": 0.002782142488285899, - "learning_rate": 0.00019998369937663385, - "loss": 46.0, - "step": 35700 - }, - { - "epoch": 5.749386046137123, - "grad_norm": 0.007855231873691082, - "learning_rate": 0.00019998369846318983, - "loss": 46.0, - "step": 35701 - }, - { - "epoch": 5.74954708321591, - "grad_norm": 0.0024741862434893847, - "learning_rate": 0.0001999836975497202, - "loss": 46.0, - "step": 35702 - }, - { - "epoch": 5.749708120294698, - "grad_norm": 0.02188832126557827, - "learning_rate": 0.00019998369663622498, - "loss": 46.0, - "step": 35703 - }, - { - "epoch": 5.749869157373485, - "grad_norm": 0.01413187850266695, - "learning_rate": 0.00019998369572270418, - "loss": 46.0, - "step": 35704 - }, - { - "epoch": 5.750030194452273, - "grad_norm": 0.0059817396104335785, - "learning_rate": 0.00019998369480915779, - "loss": 46.0, - "step": 35705 - }, - { - "epoch": 5.75019123153106, - "grad_norm": 0.0071333060041069984, - "learning_rate": 0.00019998369389558578, - "loss": 46.0, - "step": 35706 - }, - { - "epoch": 5.750352268609848, - "grad_norm": 0.009184440597891808, - "learning_rate": 0.0001999836929819882, - "loss": 46.0, - "step": 35707 - }, - { - "epoch": 5.750513305688635, - "grad_norm": 0.0009316659416072071, - "learning_rate": 0.00019998369206836503, - "loss": 46.0, - "step": 35708 - }, - { - "epoch": 5.7506743427674225, - "grad_norm": 0.016463695093989372, - "learning_rate": 0.00019998369115471626, - "loss": 46.0, - "step": 35709 - }, - { - "epoch": 5.75083537984621, - "grad_norm": 0.005868117790669203, - "learning_rate": 0.0001999836902410419, - "loss": 46.0, - "step": 35710 - }, - { - "epoch": 5.7509964169249965, - "grad_norm": 0.0034173561725765467, - "learning_rate": 0.00019998368932734197, - "loss": 46.0, - "step": 35711 - }, - { - "epoch": 5.751157454003785, - "grad_norm": 0.0017595123499631882, - "learning_rate": 0.00019998368841361644, - "loss": 46.0, - "step": 35712 - }, - { - "epoch": 5.751318491082571, - "grad_norm": 0.003993146121501923, - "learning_rate": 0.00019998368749986532, - "loss": 46.0, - "step": 35713 - }, - { - "epoch": 5.751479528161359, - "grad_norm": 0.005458069033920765, - "learning_rate": 0.0001999836865860886, - "loss": 46.0, - "step": 35714 - }, - { - "epoch": 5.751640565240146, - "grad_norm": 0.004291501361876726, - "learning_rate": 0.0001999836856722863, - "loss": 46.0, - "step": 35715 - }, - { - "epoch": 5.751801602318934, - "grad_norm": 0.004495375324040651, - "learning_rate": 0.0001999836847584584, - "loss": 46.0, - "step": 35716 - }, - { - "epoch": 5.751962639397721, - "grad_norm": 0.004737691953778267, - "learning_rate": 0.0001999836838446049, - "loss": 46.0, - "step": 35717 - }, - { - "epoch": 5.752123676476509, - "grad_norm": 0.014758976176381111, - "learning_rate": 0.00019998368293072582, - "loss": 46.0, - "step": 35718 - }, - { - "epoch": 5.752284713555296, - "grad_norm": 0.0030611951369792223, - "learning_rate": 0.00019998368201682115, - "loss": 46.0, - "step": 35719 - }, - { - "epoch": 5.752445750634084, - "grad_norm": 0.0074820262379944324, - "learning_rate": 0.0001999836811028909, - "loss": 46.0, - "step": 35720 - }, - { - "epoch": 5.752606787712871, - "grad_norm": 0.016567546874284744, - "learning_rate": 0.00019998368018893503, - "loss": 46.0, - "step": 35721 - }, - { - "epoch": 5.7527678247916585, - "grad_norm": 0.002446531318128109, - "learning_rate": 0.0001999836792749536, - "loss": 46.0, - "step": 35722 - }, - { - "epoch": 5.752928861870446, - "grad_norm": 0.002018690574914217, - "learning_rate": 0.00019998367836094656, - "loss": 46.0, - "step": 35723 - }, - { - "epoch": 5.753089898949233, - "grad_norm": 0.004184822551906109, - "learning_rate": 0.00019998367744691395, - "loss": 46.0, - "step": 35724 - }, - { - "epoch": 5.753250936028021, - "grad_norm": 0.004262144677340984, - "learning_rate": 0.00019998367653285574, - "loss": 46.0, - "step": 35725 - }, - { - "epoch": 5.753411973106807, - "grad_norm": 0.009587960317730904, - "learning_rate": 0.00019998367561877193, - "loss": 46.0, - "step": 35726 - }, - { - "epoch": 5.753573010185595, - "grad_norm": 0.0008531140047125518, - "learning_rate": 0.00019998367470466251, - "loss": 46.0, - "step": 35727 - }, - { - "epoch": 5.753734047264382, - "grad_norm": 0.001791357877664268, - "learning_rate": 0.00019998367379052754, - "loss": 46.0, - "step": 35728 - }, - { - "epoch": 5.75389508434317, - "grad_norm": 0.0028573365416377783, - "learning_rate": 0.00019998367287636697, - "loss": 46.0, - "step": 35729 - }, - { - "epoch": 5.754056121421957, - "grad_norm": 0.005356810986995697, - "learning_rate": 0.00019998367196218082, - "loss": 46.0, - "step": 35730 - }, - { - "epoch": 5.754217158500745, - "grad_norm": 0.0017005581175908446, - "learning_rate": 0.00019998367104796905, - "loss": 46.0, - "step": 35731 - }, - { - "epoch": 5.754378195579532, - "grad_norm": 0.0020110388286411762, - "learning_rate": 0.0001999836701337317, - "loss": 46.0, - "step": 35732 - }, - { - "epoch": 5.75453923265832, - "grad_norm": 0.0012521251337602735, - "learning_rate": 0.00019998366921946875, - "loss": 46.0, - "step": 35733 - }, - { - "epoch": 5.754700269737107, - "grad_norm": 0.009996892884373665, - "learning_rate": 0.00019998366830518025, - "loss": 46.0, - "step": 35734 - }, - { - "epoch": 5.7548613068158945, - "grad_norm": 0.0013321025762706995, - "learning_rate": 0.0001999836673908661, - "loss": 46.0, - "step": 35735 - }, - { - "epoch": 5.755022343894682, - "grad_norm": 0.0023752159904688597, - "learning_rate": 0.0001999836664765264, - "loss": 46.0, - "step": 35736 - }, - { - "epoch": 5.755183380973469, - "grad_norm": 0.002858733059838414, - "learning_rate": 0.00019998366556216112, - "loss": 46.0, - "step": 35737 - }, - { - "epoch": 5.755344418052257, - "grad_norm": 0.0037748587783426046, - "learning_rate": 0.0001999836646477702, - "loss": 46.0, - "step": 35738 - }, - { - "epoch": 5.755505455131044, - "grad_norm": 0.004789280705153942, - "learning_rate": 0.00019998366373335372, - "loss": 46.0, - "step": 35739 - }, - { - "epoch": 5.755666492209832, - "grad_norm": 0.007376205641776323, - "learning_rate": 0.00019998366281891167, - "loss": 46.0, - "step": 35740 - }, - { - "epoch": 5.755827529288618, - "grad_norm": 0.005412856582552195, - "learning_rate": 0.000199983661904444, - "loss": 46.0, - "step": 35741 - }, - { - "epoch": 5.755988566367406, - "grad_norm": 0.005582737270742655, - "learning_rate": 0.00019998366098995075, - "loss": 46.0, - "step": 35742 - }, - { - "epoch": 5.756149603446193, - "grad_norm": 0.019184790551662445, - "learning_rate": 0.0001999836600754319, - "loss": 46.0, - "step": 35743 - }, - { - "epoch": 5.756310640524981, - "grad_norm": 0.008804554119706154, - "learning_rate": 0.00019998365916088745, - "loss": 46.0, - "step": 35744 - }, - { - "epoch": 5.756471677603768, - "grad_norm": 0.008403409272432327, - "learning_rate": 0.00019998365824631744, - "loss": 46.0, - "step": 35745 - }, - { - "epoch": 5.7566327146825556, - "grad_norm": 0.0019840106833726168, - "learning_rate": 0.0001999836573317218, - "loss": 46.0, - "step": 35746 - }, - { - "epoch": 5.756793751761343, - "grad_norm": 0.00988280214369297, - "learning_rate": 0.0001999836564171006, - "loss": 46.0, - "step": 35747 - }, - { - "epoch": 5.7569547888401305, - "grad_norm": 0.0018794741481542587, - "learning_rate": 0.00019998365550245382, - "loss": 46.0, - "step": 35748 - }, - { - "epoch": 5.757115825918918, - "grad_norm": 0.002330838004127145, - "learning_rate": 0.0001999836545877814, - "loss": 46.0, - "step": 35749 - }, - { - "epoch": 5.757276862997705, - "grad_norm": 0.01236631441861391, - "learning_rate": 0.00019998365367308342, - "loss": 46.0, - "step": 35750 - }, - { - "epoch": 5.757437900076493, - "grad_norm": 0.001956024905666709, - "learning_rate": 0.00019998365275835986, - "loss": 46.0, - "step": 35751 - }, - { - "epoch": 5.75759893715528, - "grad_norm": 0.006627036724239588, - "learning_rate": 0.0001999836518436107, - "loss": 46.0, - "step": 35752 - }, - { - "epoch": 5.757759974234068, - "grad_norm": 0.002990341978147626, - "learning_rate": 0.00019998365092883594, - "loss": 46.0, - "step": 35753 - }, - { - "epoch": 5.757921011312855, - "grad_norm": 0.006379584316164255, - "learning_rate": 0.0001999836500140356, - "loss": 46.0, - "step": 35754 - }, - { - "epoch": 5.758082048391643, - "grad_norm": 0.004898947663605213, - "learning_rate": 0.00019998364909920967, - "loss": 46.0, - "step": 35755 - }, - { - "epoch": 5.758243085470429, - "grad_norm": 0.007700498215854168, - "learning_rate": 0.00019998364818435814, - "loss": 46.0, - "step": 35756 - }, - { - "epoch": 5.758404122549217, - "grad_norm": 0.006034488324075937, - "learning_rate": 0.00019998364726948103, - "loss": 46.0, - "step": 35757 - }, - { - "epoch": 5.758565159628004, - "grad_norm": 0.00562693364918232, - "learning_rate": 0.00019998364635457833, - "loss": 46.0, - "step": 35758 - }, - { - "epoch": 5.7587261967067915, - "grad_norm": 0.003450853517279029, - "learning_rate": 0.00019998364543965004, - "loss": 46.0, - "step": 35759 - }, - { - "epoch": 5.758887233785579, - "grad_norm": 0.0028900671750307083, - "learning_rate": 0.00019998364452469613, - "loss": 46.0, - "step": 35760 - }, - { - "epoch": 5.759048270864366, - "grad_norm": 0.009149179793894291, - "learning_rate": 0.00019998364360971667, - "loss": 46.0, - "step": 35761 - }, - { - "epoch": 5.759209307943154, - "grad_norm": 0.0018029846251010895, - "learning_rate": 0.0001999836426947116, - "loss": 46.0, - "step": 35762 - }, - { - "epoch": 5.759370345021941, - "grad_norm": 0.0027878256514668465, - "learning_rate": 0.00019998364177968095, - "loss": 46.0, - "step": 35763 - }, - { - "epoch": 5.759531382100729, - "grad_norm": 0.0033499824348837137, - "learning_rate": 0.0001999836408646247, - "loss": 46.0, - "step": 35764 - }, - { - "epoch": 5.759692419179516, - "grad_norm": 0.003248290391638875, - "learning_rate": 0.00019998363994954286, - "loss": 46.0, - "step": 35765 - }, - { - "epoch": 5.759853456258304, - "grad_norm": 0.004588032606989145, - "learning_rate": 0.0001999836390344354, - "loss": 46.0, - "step": 35766 - }, - { - "epoch": 5.760014493337091, - "grad_norm": 0.0025692398194223642, - "learning_rate": 0.0001999836381193024, - "loss": 46.0, - "step": 35767 - }, - { - "epoch": 5.760175530415879, - "grad_norm": 0.0012325189309194684, - "learning_rate": 0.00019998363720414376, - "loss": 46.0, - "step": 35768 - }, - { - "epoch": 5.760336567494665, - "grad_norm": 0.007198629900813103, - "learning_rate": 0.00019998363628895958, - "loss": 46.0, - "step": 35769 - }, - { - "epoch": 5.7604976045734535, - "grad_norm": 0.005350594874471426, - "learning_rate": 0.00019998363537374977, - "loss": 46.0, - "step": 35770 - }, - { - "epoch": 5.76065864165224, - "grad_norm": 0.008711155503988266, - "learning_rate": 0.00019998363445851438, - "loss": 46.0, - "step": 35771 - }, - { - "epoch": 5.7608196787310275, - "grad_norm": 0.002675512572750449, - "learning_rate": 0.0001999836335432534, - "loss": 46.0, - "step": 35772 - }, - { - "epoch": 5.760980715809815, - "grad_norm": 0.014440230093896389, - "learning_rate": 0.00019998363262796684, - "loss": 46.0, - "step": 35773 - }, - { - "epoch": 5.761141752888602, - "grad_norm": 0.0012159671168774366, - "learning_rate": 0.00019998363171265472, - "loss": 46.0, - "step": 35774 - }, - { - "epoch": 5.76130278996739, - "grad_norm": 0.0015782787231728435, - "learning_rate": 0.00019998363079731695, - "loss": 46.0, - "step": 35775 - }, - { - "epoch": 5.761463827046177, - "grad_norm": 0.001120796543546021, - "learning_rate": 0.0001999836298819536, - "loss": 46.0, - "step": 35776 - }, - { - "epoch": 5.761624864124965, - "grad_norm": 0.010463908314704895, - "learning_rate": 0.0001999836289665647, - "loss": 46.0, - "step": 35777 - }, - { - "epoch": 5.761785901203752, - "grad_norm": 0.0022101362701505423, - "learning_rate": 0.00019998362805115016, - "loss": 46.0, - "step": 35778 - }, - { - "epoch": 5.76194693828254, - "grad_norm": 0.0038292030803859234, - "learning_rate": 0.00019998362713571005, - "loss": 46.0, - "step": 35779 - }, - { - "epoch": 5.762107975361327, - "grad_norm": 0.010869057849049568, - "learning_rate": 0.00019998362622024435, - "loss": 46.0, - "step": 35780 - }, - { - "epoch": 5.762269012440115, - "grad_norm": 0.002579993335530162, - "learning_rate": 0.00019998362530475306, - "loss": 46.0, - "step": 35781 - }, - { - "epoch": 5.762430049518902, - "grad_norm": 0.012837952934205532, - "learning_rate": 0.00019998362438923616, - "loss": 46.0, - "step": 35782 - }, - { - "epoch": 5.7625910865976895, - "grad_norm": 0.006787641905248165, - "learning_rate": 0.0001999836234736937, - "loss": 46.0, - "step": 35783 - }, - { - "epoch": 5.762752123676476, - "grad_norm": 0.018284711986780167, - "learning_rate": 0.00019998362255812564, - "loss": 46.0, - "step": 35784 - }, - { - "epoch": 5.762913160755264, - "grad_norm": 0.009029623121023178, - "learning_rate": 0.00019998362164253198, - "loss": 46.0, - "step": 35785 - }, - { - "epoch": 5.763074197834051, - "grad_norm": 0.004047457128763199, - "learning_rate": 0.00019998362072691275, - "loss": 46.0, - "step": 35786 - }, - { - "epoch": 5.763235234912838, - "grad_norm": 0.0036357128992676735, - "learning_rate": 0.0001999836198112679, - "loss": 46.0, - "step": 35787 - }, - { - "epoch": 5.763396271991626, - "grad_norm": 0.012972780503332615, - "learning_rate": 0.00019998361889559746, - "loss": 46.0, - "step": 35788 - }, - { - "epoch": 5.763557309070413, - "grad_norm": 0.0011942959390580654, - "learning_rate": 0.00019998361797990145, - "loss": 46.0, - "step": 35789 - }, - { - "epoch": 5.763718346149201, - "grad_norm": 0.010222087614238262, - "learning_rate": 0.00019998361706417985, - "loss": 46.0, - "step": 35790 - }, - { - "epoch": 5.763879383227988, - "grad_norm": 0.0014351614518091083, - "learning_rate": 0.00019998361614843266, - "loss": 46.0, - "step": 35791 - }, - { - "epoch": 5.764040420306776, - "grad_norm": 0.03153061121702194, - "learning_rate": 0.00019998361523265986, - "loss": 46.0, - "step": 35792 - }, - { - "epoch": 5.764201457385563, - "grad_norm": 0.01333332434296608, - "learning_rate": 0.00019998361431686147, - "loss": 46.0, - "step": 35793 - }, - { - "epoch": 5.764362494464351, - "grad_norm": 0.0015930727822706103, - "learning_rate": 0.0001999836134010375, - "loss": 46.0, - "step": 35794 - }, - { - "epoch": 5.764523531543138, - "grad_norm": 0.00261515355668962, - "learning_rate": 0.00019998361248518796, - "loss": 46.0, - "step": 35795 - }, - { - "epoch": 5.7646845686219255, - "grad_norm": 0.003101665060967207, - "learning_rate": 0.0001999836115693128, - "loss": 46.0, - "step": 35796 - }, - { - "epoch": 5.764845605700713, - "grad_norm": 0.002982706530019641, - "learning_rate": 0.00019998361065341205, - "loss": 46.0, - "step": 35797 - }, - { - "epoch": 5.7650066427795, - "grad_norm": 0.003136037616059184, - "learning_rate": 0.00019998360973748572, - "loss": 46.0, - "step": 35798 - }, - { - "epoch": 5.765167679858287, - "grad_norm": 0.009190799668431282, - "learning_rate": 0.0001999836088215338, - "loss": 46.0, - "step": 35799 - }, - { - "epoch": 5.765328716937074, - "grad_norm": 0.00977477990090847, - "learning_rate": 0.00019998360790555628, - "loss": 46.0, - "step": 35800 - }, - { - "epoch": 5.765489754015862, - "grad_norm": 0.0027433226350694895, - "learning_rate": 0.0001999836069895532, - "loss": 46.0, - "step": 35801 - }, - { - "epoch": 5.765650791094649, - "grad_norm": 0.00669348007068038, - "learning_rate": 0.0001999836060735245, - "loss": 46.0, - "step": 35802 - }, - { - "epoch": 5.765811828173437, - "grad_norm": 0.007097512949258089, - "learning_rate": 0.0001999836051574702, - "loss": 46.0, - "step": 35803 - }, - { - "epoch": 5.765972865252224, - "grad_norm": 0.0016350100049749017, - "learning_rate": 0.00019998360424139033, - "loss": 46.0, - "step": 35804 - }, - { - "epoch": 5.766133902331012, - "grad_norm": 0.021562431007623672, - "learning_rate": 0.00019998360332528485, - "loss": 46.0, - "step": 35805 - }, - { - "epoch": 5.766294939409799, - "grad_norm": 0.012011711485683918, - "learning_rate": 0.0001999836024091538, - "loss": 46.0, - "step": 35806 - }, - { - "epoch": 5.7664559764885865, - "grad_norm": 0.017741721123456955, - "learning_rate": 0.00019998360149299713, - "loss": 46.0, - "step": 35807 - }, - { - "epoch": 5.766617013567374, - "grad_norm": 0.004392742179334164, - "learning_rate": 0.00019998360057681488, - "loss": 46.0, - "step": 35808 - }, - { - "epoch": 5.7667780506461614, - "grad_norm": 0.01222624909132719, - "learning_rate": 0.00019998359966060707, - "loss": 46.0, - "step": 35809 - }, - { - "epoch": 5.766939087724949, - "grad_norm": 0.009547607973217964, - "learning_rate": 0.00019998359874437367, - "loss": 46.0, - "step": 35810 - }, - { - "epoch": 5.767100124803736, - "grad_norm": 0.0027848128229379654, - "learning_rate": 0.00019998359782811463, - "loss": 46.0, - "step": 35811 - }, - { - "epoch": 5.767261161882524, - "grad_norm": 0.0015407265163958073, - "learning_rate": 0.00019998359691183004, - "loss": 46.0, - "step": 35812 - }, - { - "epoch": 5.767422198961311, - "grad_norm": 0.002375328214839101, - "learning_rate": 0.00019998359599551985, - "loss": 46.0, - "step": 35813 - }, - { - "epoch": 5.767583236040098, - "grad_norm": 0.003431180026382208, - "learning_rate": 0.00019998359507918405, - "loss": 46.0, - "step": 35814 - }, - { - "epoch": 5.767744273118885, - "grad_norm": 0.007888766936957836, - "learning_rate": 0.0001999835941628227, - "loss": 46.0, - "step": 35815 - }, - { - "epoch": 5.767905310197673, - "grad_norm": 0.008914914913475513, - "learning_rate": 0.0001999835932464357, - "loss": 46.0, - "step": 35816 - }, - { - "epoch": 5.76806634727646, - "grad_norm": 0.002494643209502101, - "learning_rate": 0.00019998359233002315, - "loss": 46.0, - "step": 35817 - }, - { - "epoch": 5.768227384355248, - "grad_norm": 0.004079343751072884, - "learning_rate": 0.00019998359141358503, - "loss": 46.0, - "step": 35818 - }, - { - "epoch": 5.768388421434035, - "grad_norm": 0.0044632237404584885, - "learning_rate": 0.00019998359049712127, - "loss": 46.0, - "step": 35819 - }, - { - "epoch": 5.7685494585128225, - "grad_norm": 0.007729186676442623, - "learning_rate": 0.00019998358958063194, - "loss": 46.0, - "step": 35820 - }, - { - "epoch": 5.76871049559161, - "grad_norm": 0.0020596070680767298, - "learning_rate": 0.00019998358866411703, - "loss": 46.0, - "step": 35821 - }, - { - "epoch": 5.768871532670397, - "grad_norm": 0.0034962501376867294, - "learning_rate": 0.00019998358774757653, - "loss": 46.0, - "step": 35822 - }, - { - "epoch": 5.769032569749185, - "grad_norm": 0.0007968193385750055, - "learning_rate": 0.00019998358683101045, - "loss": 46.0, - "step": 35823 - }, - { - "epoch": 5.769193606827972, - "grad_norm": 0.0038508260622620583, - "learning_rate": 0.00019998358591441875, - "loss": 46.0, - "step": 35824 - }, - { - "epoch": 5.76935464390676, - "grad_norm": 0.0031962452922016382, - "learning_rate": 0.00019998358499780146, - "loss": 46.0, - "step": 35825 - }, - { - "epoch": 5.769515680985547, - "grad_norm": 0.009239432401955128, - "learning_rate": 0.0001999835840811586, - "loss": 46.0, - "step": 35826 - }, - { - "epoch": 5.769676718064335, - "grad_norm": 0.002511995378881693, - "learning_rate": 0.0001999835831644901, - "loss": 46.0, - "step": 35827 - }, - { - "epoch": 5.769837755143122, - "grad_norm": 0.001880061929114163, - "learning_rate": 0.00019998358224779605, - "loss": 46.0, - "step": 35828 - }, - { - "epoch": 5.769998792221909, - "grad_norm": 0.011502335779368877, - "learning_rate": 0.00019998358133107642, - "loss": 46.0, - "step": 35829 - }, - { - "epoch": 5.770159829300696, - "grad_norm": 0.0020040462259203196, - "learning_rate": 0.0001999835804143312, - "loss": 46.0, - "step": 35830 - }, - { - "epoch": 5.770320866379484, - "grad_norm": 0.0027627963572740555, - "learning_rate": 0.00019998357949756036, - "loss": 46.0, - "step": 35831 - }, - { - "epoch": 5.770481903458271, - "grad_norm": 0.0052684820257127285, - "learning_rate": 0.00019998357858076396, - "loss": 46.0, - "step": 35832 - }, - { - "epoch": 5.7706429405370585, - "grad_norm": 0.00824128556996584, - "learning_rate": 0.00019998357766394195, - "loss": 46.0, - "step": 35833 - }, - { - "epoch": 5.770803977615846, - "grad_norm": 0.0081133171916008, - "learning_rate": 0.00019998357674709432, - "loss": 46.0, - "step": 35834 - }, - { - "epoch": 5.770965014694633, - "grad_norm": 0.00839622039347887, - "learning_rate": 0.00019998357583022117, - "loss": 46.0, - "step": 35835 - }, - { - "epoch": 5.771126051773421, - "grad_norm": 0.003653403138741851, - "learning_rate": 0.0001999835749133224, - "loss": 46.0, - "step": 35836 - }, - { - "epoch": 5.771287088852208, - "grad_norm": 0.007322989404201508, - "learning_rate": 0.000199983573996398, - "loss": 46.0, - "step": 35837 - }, - { - "epoch": 5.771448125930996, - "grad_norm": 0.010368783958256245, - "learning_rate": 0.00019998357307944806, - "loss": 46.0, - "step": 35838 - }, - { - "epoch": 5.771609163009783, - "grad_norm": 0.0215225238353014, - "learning_rate": 0.0001999835721624725, - "loss": 46.0, - "step": 35839 - }, - { - "epoch": 5.771770200088571, - "grad_norm": 0.01646846905350685, - "learning_rate": 0.00019998357124547135, - "loss": 46.0, - "step": 35840 - }, - { - "epoch": 5.771931237167358, - "grad_norm": 0.00138341065030545, - "learning_rate": 0.00019998357032844464, - "loss": 46.0, - "step": 35841 - }, - { - "epoch": 5.772092274246145, - "grad_norm": 0.005273793358355761, - "learning_rate": 0.0001999835694113923, - "loss": 46.0, - "step": 35842 - }, - { - "epoch": 5.772253311324933, - "grad_norm": 0.0074575068429112434, - "learning_rate": 0.00019998356849431438, - "loss": 46.0, - "step": 35843 - }, - { - "epoch": 5.77241434840372, - "grad_norm": 0.008774465881288052, - "learning_rate": 0.00019998356757721088, - "loss": 46.0, - "step": 35844 - }, - { - "epoch": 5.772575385482507, - "grad_norm": 0.001621044473722577, - "learning_rate": 0.00019998356666008177, - "loss": 46.0, - "step": 35845 - }, - { - "epoch": 5.7727364225612945, - "grad_norm": 0.004730668850243092, - "learning_rate": 0.00019998356574292713, - "loss": 46.0, - "step": 35846 - }, - { - "epoch": 5.772897459640082, - "grad_norm": 0.0032650940120220184, - "learning_rate": 0.00019998356482574682, - "loss": 46.0, - "step": 35847 - }, - { - "epoch": 5.773058496718869, - "grad_norm": 0.004273801576346159, - "learning_rate": 0.00019998356390854097, - "loss": 46.0, - "step": 35848 - }, - { - "epoch": 5.773219533797657, - "grad_norm": 0.007242194842547178, - "learning_rate": 0.0001999835629913095, - "loss": 46.0, - "step": 35849 - }, - { - "epoch": 5.773380570876444, - "grad_norm": 0.002563401823863387, - "learning_rate": 0.00019998356207405243, - "loss": 46.0, - "step": 35850 - }, - { - "epoch": 5.773541607955232, - "grad_norm": 0.001910101855173707, - "learning_rate": 0.00019998356115676983, - "loss": 46.0, - "step": 35851 - }, - { - "epoch": 5.773702645034019, - "grad_norm": 0.003323824843391776, - "learning_rate": 0.00019998356023946158, - "loss": 46.0, - "step": 35852 - }, - { - "epoch": 5.773863682112807, - "grad_norm": 0.01273073349148035, - "learning_rate": 0.00019998355932212774, - "loss": 46.0, - "step": 35853 - }, - { - "epoch": 5.774024719191594, - "grad_norm": 0.007262049242854118, - "learning_rate": 0.00019998355840476837, - "loss": 46.0, - "step": 35854 - }, - { - "epoch": 5.7741857562703816, - "grad_norm": 0.0019591215532273054, - "learning_rate": 0.00019998355748738336, - "loss": 46.0, - "step": 35855 - }, - { - "epoch": 5.774346793349169, - "grad_norm": 0.001541677862405777, - "learning_rate": 0.00019998355656997277, - "loss": 46.0, - "step": 35856 - }, - { - "epoch": 5.774507830427956, - "grad_norm": 0.003725646995007992, - "learning_rate": 0.00019998355565253658, - "loss": 46.0, - "step": 35857 - }, - { - "epoch": 5.774668867506744, - "grad_norm": 0.02196687087416649, - "learning_rate": 0.00019998355473507478, - "loss": 46.0, - "step": 35858 - }, - { - "epoch": 5.7748299045855305, - "grad_norm": 0.004947022069245577, - "learning_rate": 0.00019998355381758745, - "loss": 46.0, - "step": 35859 - }, - { - "epoch": 5.774990941664318, - "grad_norm": 0.004878837149590254, - "learning_rate": 0.00019998355290007448, - "loss": 46.0, - "step": 35860 - }, - { - "epoch": 5.775151978743105, - "grad_norm": 0.0014319252222776413, - "learning_rate": 0.00019998355198253595, - "loss": 46.0, - "step": 35861 - }, - { - "epoch": 5.775313015821893, - "grad_norm": 0.0016407968942075968, - "learning_rate": 0.00019998355106497182, - "loss": 46.0, - "step": 35862 - }, - { - "epoch": 5.77547405290068, - "grad_norm": 0.00904154498130083, - "learning_rate": 0.0001999835501473821, - "loss": 46.0, - "step": 35863 - }, - { - "epoch": 5.775635089979468, - "grad_norm": 0.002735934918746352, - "learning_rate": 0.00019998354922976677, - "loss": 46.0, - "step": 35864 - }, - { - "epoch": 5.775796127058255, - "grad_norm": 0.0019785065669566393, - "learning_rate": 0.00019998354831212586, - "loss": 46.0, - "step": 35865 - }, - { - "epoch": 5.775957164137043, - "grad_norm": 0.0018252504523843527, - "learning_rate": 0.00019998354739445934, - "loss": 46.0, - "step": 35866 - }, - { - "epoch": 5.77611820121583, - "grad_norm": 0.0024098071735352278, - "learning_rate": 0.00019998354647676728, - "loss": 46.0, - "step": 35867 - }, - { - "epoch": 5.7762792382946175, - "grad_norm": 0.0038775037974119186, - "learning_rate": 0.00019998354555904958, - "loss": 46.0, - "step": 35868 - }, - { - "epoch": 5.776440275373405, - "grad_norm": 0.003925892990082502, - "learning_rate": 0.0001999835446413063, - "loss": 46.0, - "step": 35869 - }, - { - "epoch": 5.776601312452192, - "grad_norm": 0.007865316234529018, - "learning_rate": 0.00019998354372353748, - "loss": 46.0, - "step": 35870 - }, - { - "epoch": 5.77676234953098, - "grad_norm": 0.0033923930022865534, - "learning_rate": 0.00019998354280574302, - "loss": 46.0, - "step": 35871 - }, - { - "epoch": 5.7769233866097665, - "grad_norm": 0.002170517109334469, - "learning_rate": 0.00019998354188792297, - "loss": 46.0, - "step": 35872 - }, - { - "epoch": 5.777084423688554, - "grad_norm": 0.002143422607332468, - "learning_rate": 0.00019998354097007734, - "loss": 46.0, - "step": 35873 - }, - { - "epoch": 5.777245460767341, - "grad_norm": 0.00580269331112504, - "learning_rate": 0.0001999835400522061, - "loss": 46.0, - "step": 35874 - }, - { - "epoch": 5.777406497846129, - "grad_norm": 0.013656793162226677, - "learning_rate": 0.00019998353913430929, - "loss": 46.0, - "step": 35875 - }, - { - "epoch": 5.777567534924916, - "grad_norm": 0.003275335766375065, - "learning_rate": 0.0001999835382163869, - "loss": 46.0, - "step": 35876 - }, - { - "epoch": 5.777728572003704, - "grad_norm": 0.0021338691003620625, - "learning_rate": 0.00019998353729843888, - "loss": 46.0, - "step": 35877 - }, - { - "epoch": 5.777889609082491, - "grad_norm": 0.0022901701740920544, - "learning_rate": 0.0001999835363804653, - "loss": 46.0, - "step": 35878 - }, - { - "epoch": 5.778050646161279, - "grad_norm": 0.0018274775939062238, - "learning_rate": 0.00019998353546246613, - "loss": 46.0, - "step": 35879 - }, - { - "epoch": 5.778211683240066, - "grad_norm": 0.0016587969148531556, - "learning_rate": 0.00019998353454444136, - "loss": 46.0, - "step": 35880 - }, - { - "epoch": 5.7783727203188535, - "grad_norm": 0.003965029958635569, - "learning_rate": 0.00019998353362639103, - "loss": 46.0, - "step": 35881 - }, - { - "epoch": 5.778533757397641, - "grad_norm": 0.005459026899188757, - "learning_rate": 0.00019998353270831505, - "loss": 46.0, - "step": 35882 - }, - { - "epoch": 5.778694794476428, - "grad_norm": 0.0031608333811163902, - "learning_rate": 0.00019998353179021352, - "loss": 46.0, - "step": 35883 - }, - { - "epoch": 5.778855831555216, - "grad_norm": 0.0010634392965584993, - "learning_rate": 0.00019998353087208638, - "loss": 46.0, - "step": 35884 - }, - { - "epoch": 5.779016868634003, - "grad_norm": 0.0027363006956875324, - "learning_rate": 0.00019998352995393367, - "loss": 46.0, - "step": 35885 - }, - { - "epoch": 5.779177905712791, - "grad_norm": 0.00758574390783906, - "learning_rate": 0.00019998352903575537, - "loss": 46.0, - "step": 35886 - }, - { - "epoch": 5.779338942791577, - "grad_norm": 0.002652771072462201, - "learning_rate": 0.00019998352811755147, - "loss": 46.0, - "step": 35887 - }, - { - "epoch": 5.779499979870365, - "grad_norm": 0.008481164462864399, - "learning_rate": 0.00019998352719932197, - "loss": 46.0, - "step": 35888 - }, - { - "epoch": 5.779661016949152, - "grad_norm": 0.0027792537584900856, - "learning_rate": 0.0001999835262810669, - "loss": 46.0, - "step": 35889 - }, - { - "epoch": 5.77982205402794, - "grad_norm": 0.033703941851854324, - "learning_rate": 0.0001999835253627862, - "loss": 46.0, - "step": 35890 - }, - { - "epoch": 5.779983091106727, - "grad_norm": 0.008566014468669891, - "learning_rate": 0.0001999835244444799, - "loss": 46.0, - "step": 35891 - }, - { - "epoch": 5.780144128185515, - "grad_norm": 0.0037682177498936653, - "learning_rate": 0.00019998352352614806, - "loss": 46.0, - "step": 35892 - }, - { - "epoch": 5.780305165264302, - "grad_norm": 0.002675719792023301, - "learning_rate": 0.00019998352260779063, - "loss": 46.0, - "step": 35893 - }, - { - "epoch": 5.7804662023430895, - "grad_norm": 0.013582653366029263, - "learning_rate": 0.0001999835216894076, - "loss": 46.0, - "step": 35894 - }, - { - "epoch": 5.780627239421877, - "grad_norm": 0.004690753761678934, - "learning_rate": 0.00019998352077099898, - "loss": 46.0, - "step": 35895 - }, - { - "epoch": 5.780788276500664, - "grad_norm": 0.004897431004792452, - "learning_rate": 0.00019998351985256476, - "loss": 46.0, - "step": 35896 - }, - { - "epoch": 5.780949313579452, - "grad_norm": 0.002507381374016404, - "learning_rate": 0.00019998351893410493, - "loss": 46.0, - "step": 35897 - }, - { - "epoch": 5.781110350658239, - "grad_norm": 0.0008928921306505799, - "learning_rate": 0.00019998351801561953, - "loss": 46.0, - "step": 35898 - }, - { - "epoch": 5.781271387737027, - "grad_norm": 0.037514083087444305, - "learning_rate": 0.00019998351709710855, - "loss": 46.0, - "step": 35899 - }, - { - "epoch": 5.781432424815814, - "grad_norm": 0.002529543824493885, - "learning_rate": 0.00019998351617857196, - "loss": 46.0, - "step": 35900 - }, - { - "epoch": 5.781593461894602, - "grad_norm": 0.017014116048812866, - "learning_rate": 0.0001999835152600098, - "loss": 46.0, - "step": 35901 - }, - { - "epoch": 5.781754498973388, - "grad_norm": 0.002385465195402503, - "learning_rate": 0.00019998351434142203, - "loss": 46.0, - "step": 35902 - }, - { - "epoch": 5.781915536052176, - "grad_norm": 0.00308678369037807, - "learning_rate": 0.0001999835134228087, - "loss": 46.0, - "step": 35903 - }, - { - "epoch": 5.782076573130963, - "grad_norm": 0.003813399001955986, - "learning_rate": 0.00019998351250416976, - "loss": 46.0, - "step": 35904 - }, - { - "epoch": 5.782237610209751, - "grad_norm": 0.011835162527859211, - "learning_rate": 0.0001999835115855052, - "loss": 46.0, - "step": 35905 - }, - { - "epoch": 5.782398647288538, - "grad_norm": 0.004578090272843838, - "learning_rate": 0.00019998351066681508, - "loss": 46.0, - "step": 35906 - }, - { - "epoch": 5.7825596843673255, - "grad_norm": 0.007764613721519709, - "learning_rate": 0.00019998350974809938, - "loss": 46.0, - "step": 35907 - }, - { - "epoch": 5.782720721446113, - "grad_norm": 0.002877760212868452, - "learning_rate": 0.00019998350882935806, - "loss": 46.0, - "step": 35908 - }, - { - "epoch": 5.7828817585249, - "grad_norm": 0.002632005140185356, - "learning_rate": 0.00019998350791059118, - "loss": 46.0, - "step": 35909 - }, - { - "epoch": 5.783042795603688, - "grad_norm": 0.0037167989648878574, - "learning_rate": 0.00019998350699179868, - "loss": 46.0, - "step": 35910 - }, - { - "epoch": 5.783203832682475, - "grad_norm": 0.003435267833992839, - "learning_rate": 0.0001999835060729806, - "loss": 46.0, - "step": 35911 - }, - { - "epoch": 5.783364869761263, - "grad_norm": 0.005022314842790365, - "learning_rate": 0.00019998350515413693, - "loss": 46.0, - "step": 35912 - }, - { - "epoch": 5.78352590684005, - "grad_norm": 0.008201629854738712, - "learning_rate": 0.00019998350423526765, - "loss": 46.0, - "step": 35913 - }, - { - "epoch": 5.783686943918838, - "grad_norm": 0.001541821868158877, - "learning_rate": 0.00019998350331637283, - "loss": 46.0, - "step": 35914 - }, - { - "epoch": 5.783847980997624, - "grad_norm": 0.006183285266160965, - "learning_rate": 0.00019998350239745237, - "loss": 46.0, - "step": 35915 - }, - { - "epoch": 5.7840090180764125, - "grad_norm": 0.018910681828856468, - "learning_rate": 0.00019998350147850636, - "loss": 46.0, - "step": 35916 - }, - { - "epoch": 5.784170055155199, - "grad_norm": 0.00981220230460167, - "learning_rate": 0.00019998350055953475, - "loss": 46.0, - "step": 35917 - }, - { - "epoch": 5.784331092233987, - "grad_norm": 0.006154462695121765, - "learning_rate": 0.0001999834996405375, - "loss": 46.0, - "step": 35918 - }, - { - "epoch": 5.784492129312774, - "grad_norm": 0.019950637593865395, - "learning_rate": 0.00019998349872151473, - "loss": 46.0, - "step": 35919 - }, - { - "epoch": 5.7846531663915615, - "grad_norm": 0.006219244562089443, - "learning_rate": 0.00019998349780246633, - "loss": 46.0, - "step": 35920 - }, - { - "epoch": 5.784814203470349, - "grad_norm": 0.0019006497459486127, - "learning_rate": 0.00019998349688339233, - "loss": 46.0, - "step": 35921 - }, - { - "epoch": 5.784975240549136, - "grad_norm": 0.0022500548511743546, - "learning_rate": 0.0001999834959642928, - "loss": 46.0, - "step": 35922 - }, - { - "epoch": 5.785136277627924, - "grad_norm": 0.006770461797714233, - "learning_rate": 0.0001999834950451676, - "loss": 46.0, - "step": 35923 - }, - { - "epoch": 5.785297314706711, - "grad_norm": 0.0028491022530943155, - "learning_rate": 0.00019998349412601684, - "loss": 46.0, - "step": 35924 - }, - { - "epoch": 5.785458351785499, - "grad_norm": 0.001747211441397667, - "learning_rate": 0.0001999834932068405, - "loss": 46.0, - "step": 35925 - }, - { - "epoch": 5.785619388864286, - "grad_norm": 0.000976066745352, - "learning_rate": 0.00019998349228763856, - "loss": 46.0, - "step": 35926 - }, - { - "epoch": 5.785780425943074, - "grad_norm": 0.004377948585897684, - "learning_rate": 0.000199983491368411, - "loss": 46.0, - "step": 35927 - }, - { - "epoch": 5.785941463021861, - "grad_norm": 0.0019161331001669168, - "learning_rate": 0.0001999834904491579, - "loss": 46.0, - "step": 35928 - }, - { - "epoch": 5.7861025001006485, - "grad_norm": 0.01532067358493805, - "learning_rate": 0.0001999834895298792, - "loss": 46.0, - "step": 35929 - }, - { - "epoch": 5.786263537179435, - "grad_norm": 0.0036150391679257154, - "learning_rate": 0.0001999834886105749, - "loss": 46.0, - "step": 35930 - }, - { - "epoch": 5.786424574258223, - "grad_norm": 0.0017206162447109818, - "learning_rate": 0.000199983487691245, - "loss": 46.0, - "step": 35931 - }, - { - "epoch": 5.78658561133701, - "grad_norm": 0.0028035107534378767, - "learning_rate": 0.00019998348677188955, - "loss": 46.0, - "step": 35932 - }, - { - "epoch": 5.7867466484157974, - "grad_norm": 0.0009451525984331965, - "learning_rate": 0.00019998348585250847, - "loss": 46.0, - "step": 35933 - }, - { - "epoch": 5.786907685494585, - "grad_norm": 0.012045511975884438, - "learning_rate": 0.0001999834849331018, - "loss": 46.0, - "step": 35934 - }, - { - "epoch": 5.787068722573372, - "grad_norm": 0.004879689309746027, - "learning_rate": 0.00019998348401366954, - "loss": 46.0, - "step": 35935 - }, - { - "epoch": 5.78722975965216, - "grad_norm": 0.0011944433208554983, - "learning_rate": 0.0001999834830942117, - "loss": 46.0, - "step": 35936 - }, - { - "epoch": 5.787390796730947, - "grad_norm": 0.009134566411376, - "learning_rate": 0.00019998348217472827, - "loss": 46.0, - "step": 35937 - }, - { - "epoch": 5.787551833809735, - "grad_norm": 0.0070863342843949795, - "learning_rate": 0.00019998348125521923, - "loss": 46.0, - "step": 35938 - }, - { - "epoch": 5.787712870888522, - "grad_norm": 0.0019568628631532192, - "learning_rate": 0.00019998348033568462, - "loss": 46.0, - "step": 35939 - }, - { - "epoch": 5.78787390796731, - "grad_norm": 0.007232495583593845, - "learning_rate": 0.00019998347941612443, - "loss": 46.0, - "step": 35940 - }, - { - "epoch": 5.788034945046097, - "grad_norm": 0.006487681530416012, - "learning_rate": 0.00019998347849653863, - "loss": 46.0, - "step": 35941 - }, - { - "epoch": 5.7881959821248845, - "grad_norm": 0.002533015562221408, - "learning_rate": 0.00019998347757692724, - "loss": 46.0, - "step": 35942 - }, - { - "epoch": 5.788357019203672, - "grad_norm": 0.002100419718772173, - "learning_rate": 0.00019998347665729026, - "loss": 46.0, - "step": 35943 - }, - { - "epoch": 5.788518056282459, - "grad_norm": 0.004103851970285177, - "learning_rate": 0.0001999834757376277, - "loss": 46.0, - "step": 35944 - }, - { - "epoch": 5.788679093361246, - "grad_norm": 0.002914935350418091, - "learning_rate": 0.00019998347481793954, - "loss": 46.0, - "step": 35945 - }, - { - "epoch": 5.788840130440034, - "grad_norm": 0.0027448509354144335, - "learning_rate": 0.00019998347389822577, - "loss": 46.0, - "step": 35946 - }, - { - "epoch": 5.789001167518821, - "grad_norm": 0.0037534795701503754, - "learning_rate": 0.00019998347297848644, - "loss": 46.0, - "step": 35947 - }, - { - "epoch": 5.789162204597608, - "grad_norm": 0.004226305056363344, - "learning_rate": 0.0001999834720587215, - "loss": 46.0, - "step": 35948 - }, - { - "epoch": 5.789323241676396, - "grad_norm": 0.001660239533521235, - "learning_rate": 0.00019998347113893097, - "loss": 46.0, - "step": 35949 - }, - { - "epoch": 5.789484278755183, - "grad_norm": 0.0029238113202154636, - "learning_rate": 0.00019998347021911488, - "loss": 46.0, - "step": 35950 - }, - { - "epoch": 5.789645315833971, - "grad_norm": 0.011482351459562778, - "learning_rate": 0.00019998346929927318, - "loss": 46.0, - "step": 35951 - }, - { - "epoch": 5.789806352912758, - "grad_norm": 0.004890913609415293, - "learning_rate": 0.00019998346837940586, - "loss": 46.0, - "step": 35952 - }, - { - "epoch": 5.789967389991546, - "grad_norm": 0.0017212658422067761, - "learning_rate": 0.000199983467459513, - "loss": 46.0, - "step": 35953 - }, - { - "epoch": 5.790128427070333, - "grad_norm": 0.009074416011571884, - "learning_rate": 0.00019998346653959449, - "loss": 46.0, - "step": 35954 - }, - { - "epoch": 5.7902894641491205, - "grad_norm": 0.004019788466393948, - "learning_rate": 0.00019998346561965043, - "loss": 46.0, - "step": 35955 - }, - { - "epoch": 5.790450501227908, - "grad_norm": 0.00943315215408802, - "learning_rate": 0.0001999834646996808, - "loss": 46.0, - "step": 35956 - }, - { - "epoch": 5.790611538306695, - "grad_norm": 0.003384312381967902, - "learning_rate": 0.00019998346377968554, - "loss": 46.0, - "step": 35957 - }, - { - "epoch": 5.790772575385483, - "grad_norm": 0.0022111611906439066, - "learning_rate": 0.0001999834628596647, - "loss": 46.0, - "step": 35958 - }, - { - "epoch": 5.79093361246427, - "grad_norm": 0.001141060027293861, - "learning_rate": 0.00019998346193961827, - "loss": 46.0, - "step": 35959 - }, - { - "epoch": 5.791094649543057, - "grad_norm": 0.007703473791480064, - "learning_rate": 0.00019998346101954625, - "loss": 46.0, - "step": 35960 - }, - { - "epoch": 5.791255686621844, - "grad_norm": 0.0030749852303415537, - "learning_rate": 0.00019998346009944862, - "loss": 46.0, - "step": 35961 - }, - { - "epoch": 5.791416723700632, - "grad_norm": 0.001100547262467444, - "learning_rate": 0.00019998345917932544, - "loss": 46.0, - "step": 35962 - }, - { - "epoch": 5.791577760779419, - "grad_norm": 0.007251678965985775, - "learning_rate": 0.00019998345825917663, - "loss": 46.0, - "step": 35963 - }, - { - "epoch": 5.791738797858207, - "grad_norm": 0.004754495806992054, - "learning_rate": 0.00019998345733900227, - "loss": 46.0, - "step": 35964 - }, - { - "epoch": 5.791899834936994, - "grad_norm": 0.002452654065564275, - "learning_rate": 0.0001999834564188023, - "loss": 46.0, - "step": 35965 - }, - { - "epoch": 5.792060872015782, - "grad_norm": 0.007526395842432976, - "learning_rate": 0.00019998345549857672, - "loss": 46.0, - "step": 35966 - }, - { - "epoch": 5.792221909094569, - "grad_norm": 0.007444023620337248, - "learning_rate": 0.0001999834545783256, - "loss": 46.0, - "step": 35967 - }, - { - "epoch": 5.7923829461733565, - "grad_norm": 0.0014156947145238519, - "learning_rate": 0.00019998345365804886, - "loss": 46.0, - "step": 35968 - }, - { - "epoch": 5.792543983252144, - "grad_norm": 0.0034335653763264418, - "learning_rate": 0.0001999834527377465, - "loss": 46.0, - "step": 35969 - }, - { - "epoch": 5.792705020330931, - "grad_norm": 0.0055056908167898655, - "learning_rate": 0.0001999834518174186, - "loss": 46.0, - "step": 35970 - }, - { - "epoch": 5.792866057409719, - "grad_norm": 0.011387539096176624, - "learning_rate": 0.00019998345089706506, - "loss": 46.0, - "step": 35971 - }, - { - "epoch": 5.793027094488506, - "grad_norm": 0.030026080086827278, - "learning_rate": 0.00019998344997668594, - "loss": 46.0, - "step": 35972 - }, - { - "epoch": 5.793188131567294, - "grad_norm": 0.002667773747816682, - "learning_rate": 0.00019998344905628127, - "loss": 46.0, - "step": 35973 - }, - { - "epoch": 5.793349168646081, - "grad_norm": 0.01391933299601078, - "learning_rate": 0.00019998344813585095, - "loss": 46.0, - "step": 35974 - }, - { - "epoch": 5.793510205724868, - "grad_norm": 0.0012938538566231728, - "learning_rate": 0.00019998344721539507, - "loss": 46.0, - "step": 35975 - }, - { - "epoch": 5.793671242803655, - "grad_norm": 0.002052146242931485, - "learning_rate": 0.0001999834462949136, - "loss": 46.0, - "step": 35976 - }, - { - "epoch": 5.793832279882443, - "grad_norm": 0.0018925995100289583, - "learning_rate": 0.00019998344537440653, - "loss": 46.0, - "step": 35977 - }, - { - "epoch": 5.79399331696123, - "grad_norm": 0.0006377461249940097, - "learning_rate": 0.00019998344445387392, - "loss": 46.0, - "step": 35978 - }, - { - "epoch": 5.7941543540400176, - "grad_norm": 0.011470134370028973, - "learning_rate": 0.00019998344353331567, - "loss": 46.0, - "step": 35979 - }, - { - "epoch": 5.794315391118805, - "grad_norm": 0.007166468072682619, - "learning_rate": 0.00019998344261273182, - "loss": 46.0, - "step": 35980 - }, - { - "epoch": 5.7944764281975925, - "grad_norm": 0.002545740222558379, - "learning_rate": 0.00019998344169212242, - "loss": 46.0, - "step": 35981 - }, - { - "epoch": 5.79463746527638, - "grad_norm": 0.0030606503132730722, - "learning_rate": 0.00019998344077148738, - "loss": 46.0, - "step": 35982 - }, - { - "epoch": 5.794798502355167, - "grad_norm": 0.002506427001208067, - "learning_rate": 0.00019998343985082678, - "loss": 46.0, - "step": 35983 - }, - { - "epoch": 5.794959539433955, - "grad_norm": 0.006900331936776638, - "learning_rate": 0.0001999834389301406, - "loss": 46.0, - "step": 35984 - }, - { - "epoch": 5.795120576512742, - "grad_norm": 0.015905145555734634, - "learning_rate": 0.00019998343800942882, - "loss": 46.0, - "step": 35985 - }, - { - "epoch": 5.79528161359153, - "grad_norm": 0.015668099746108055, - "learning_rate": 0.00019998343708869145, - "loss": 46.0, - "step": 35986 - }, - { - "epoch": 5.795442650670317, - "grad_norm": 0.002261025132611394, - "learning_rate": 0.00019998343616792848, - "loss": 46.0, - "step": 35987 - }, - { - "epoch": 5.795603687749105, - "grad_norm": 0.00249306159093976, - "learning_rate": 0.0001999834352471399, - "loss": 46.0, - "step": 35988 - }, - { - "epoch": 5.795764724827892, - "grad_norm": 0.003147156210616231, - "learning_rate": 0.00019998343432632579, - "loss": 46.0, - "step": 35989 - }, - { - "epoch": 5.795925761906679, - "grad_norm": 0.008631477132439613, - "learning_rate": 0.00019998343340548602, - "loss": 46.0, - "step": 35990 - }, - { - "epoch": 5.796086798985466, - "grad_norm": 0.001991981640458107, - "learning_rate": 0.0001999834324846207, - "loss": 46.0, - "step": 35991 - }, - { - "epoch": 5.7962478360642535, - "grad_norm": 0.005137927830219269, - "learning_rate": 0.00019998343156372978, - "loss": 46.0, - "step": 35992 - }, - { - "epoch": 5.796408873143041, - "grad_norm": 0.0016772891394793987, - "learning_rate": 0.00019998343064281325, - "loss": 46.0, - "step": 35993 - }, - { - "epoch": 5.796569910221828, - "grad_norm": 0.002501783426851034, - "learning_rate": 0.0001999834297218712, - "loss": 46.0, - "step": 35994 - }, - { - "epoch": 5.796730947300616, - "grad_norm": 0.004704023711383343, - "learning_rate": 0.0001999834288009035, - "loss": 46.0, - "step": 35995 - }, - { - "epoch": 5.796891984379403, - "grad_norm": 0.0033317310735583305, - "learning_rate": 0.0001999834278799102, - "loss": 46.0, - "step": 35996 - }, - { - "epoch": 5.797053021458191, - "grad_norm": 0.001641907263547182, - "learning_rate": 0.00019998342695889132, - "loss": 46.0, - "step": 35997 - }, - { - "epoch": 5.797214058536978, - "grad_norm": 0.0008138873963616788, - "learning_rate": 0.00019998342603784686, - "loss": 46.0, - "step": 35998 - }, - { - "epoch": 5.797375095615766, - "grad_norm": 0.011361321434378624, - "learning_rate": 0.00019998342511677678, - "loss": 46.0, - "step": 35999 - }, - { - "epoch": 5.797536132694553, - "grad_norm": 0.002221940318122506, - "learning_rate": 0.00019998342419568114, - "loss": 46.0, - "step": 36000 - }, - { - "epoch": 5.797697169773341, - "grad_norm": 0.005986008793115616, - "learning_rate": 0.00019998342327455992, - "loss": 46.0, - "step": 36001 - }, - { - "epoch": 5.797858206852128, - "grad_norm": 0.003031674772500992, - "learning_rate": 0.0001999834223534131, - "loss": 46.0, - "step": 36002 - }, - { - "epoch": 5.798019243930915, - "grad_norm": 0.0016920288326218724, - "learning_rate": 0.00019998342143224068, - "loss": 46.0, - "step": 36003 - }, - { - "epoch": 5.798180281009703, - "grad_norm": 0.030749212950468063, - "learning_rate": 0.0001999834205110427, - "loss": 46.0, - "step": 36004 - }, - { - "epoch": 5.7983413180884895, - "grad_norm": 0.008248284459114075, - "learning_rate": 0.00019998341958981907, - "loss": 46.0, - "step": 36005 - }, - { - "epoch": 5.798502355167277, - "grad_norm": 0.0041768429800868034, - "learning_rate": 0.00019998341866856988, - "loss": 46.0, - "step": 36006 - }, - { - "epoch": 5.798663392246064, - "grad_norm": 0.010321359150111675, - "learning_rate": 0.0001999834177472951, - "loss": 46.0, - "step": 36007 - }, - { - "epoch": 5.798824429324852, - "grad_norm": 0.013382167555391788, - "learning_rate": 0.00019998341682599474, - "loss": 46.0, - "step": 36008 - }, - { - "epoch": 5.798985466403639, - "grad_norm": 0.007584656123071909, - "learning_rate": 0.0001999834159046688, - "loss": 46.0, - "step": 36009 - }, - { - "epoch": 5.799146503482427, - "grad_norm": 0.0026863585226237774, - "learning_rate": 0.00019998341498331723, - "loss": 46.0, - "step": 36010 - }, - { - "epoch": 5.799307540561214, - "grad_norm": 0.007454379461705685, - "learning_rate": 0.0001999834140619401, - "loss": 46.0, - "step": 36011 - }, - { - "epoch": 5.799468577640002, - "grad_norm": 0.00596819119527936, - "learning_rate": 0.00019998341314053736, - "loss": 46.0, - "step": 36012 - }, - { - "epoch": 5.799629614718789, - "grad_norm": 0.002117754425853491, - "learning_rate": 0.00019998341221910904, - "loss": 46.0, - "step": 36013 - }, - { - "epoch": 5.799790651797577, - "grad_norm": 0.009177391417324543, - "learning_rate": 0.00019998341129765513, - "loss": 46.0, - "step": 36014 - }, - { - "epoch": 5.799951688876364, - "grad_norm": 0.006089060567319393, - "learning_rate": 0.0001999834103761756, - "loss": 46.0, - "step": 36015 - }, - { - "epoch": 5.8001127259551515, - "grad_norm": 0.0027072851080447435, - "learning_rate": 0.0001999834094546705, - "loss": 46.0, - "step": 36016 - }, - { - "epoch": 5.800273763033939, - "grad_norm": 0.008395212702453136, - "learning_rate": 0.00019998340853313984, - "loss": 46.0, - "step": 36017 - }, - { - "epoch": 5.8004348001127255, - "grad_norm": 0.0014350964920595288, - "learning_rate": 0.00019998340761158355, - "loss": 46.0, - "step": 36018 - }, - { - "epoch": 5.800595837191514, - "grad_norm": 0.006828527897596359, - "learning_rate": 0.0001999834066900017, - "loss": 46.0, - "step": 36019 - }, - { - "epoch": 5.8007568742703, - "grad_norm": 0.00606875866651535, - "learning_rate": 0.00019998340576839424, - "loss": 46.0, - "step": 36020 - }, - { - "epoch": 5.800917911349088, - "grad_norm": 0.0013484942028298974, - "learning_rate": 0.0001999834048467612, - "loss": 46.0, - "step": 36021 - }, - { - "epoch": 5.801078948427875, - "grad_norm": 0.0018275863258168101, - "learning_rate": 0.00019998340392510255, - "loss": 46.0, - "step": 36022 - }, - { - "epoch": 5.801239985506663, - "grad_norm": 0.00551186315715313, - "learning_rate": 0.0001999834030034183, - "loss": 46.0, - "step": 36023 - }, - { - "epoch": 5.80140102258545, - "grad_norm": 0.0013101280201226473, - "learning_rate": 0.0001999834020817085, - "loss": 46.0, - "step": 36024 - }, - { - "epoch": 5.801562059664238, - "grad_norm": 0.0013992469757795334, - "learning_rate": 0.0001999834011599731, - "loss": 46.0, - "step": 36025 - }, - { - "epoch": 5.801723096743025, - "grad_norm": 0.0021910525392740965, - "learning_rate": 0.0001999834002382121, - "loss": 46.0, - "step": 36026 - }, - { - "epoch": 5.801884133821813, - "grad_norm": 0.0008445768035016954, - "learning_rate": 0.0001999833993164255, - "loss": 46.0, - "step": 36027 - }, - { - "epoch": 5.8020451709006, - "grad_norm": 0.0015970169333741069, - "learning_rate": 0.00019998339839461332, - "loss": 46.0, - "step": 36028 - }, - { - "epoch": 5.8022062079793875, - "grad_norm": 0.0012416945537552238, - "learning_rate": 0.00019998339747277554, - "loss": 46.0, - "step": 36029 - }, - { - "epoch": 5.802367245058175, - "grad_norm": 0.00425972742959857, - "learning_rate": 0.00019998339655091218, - "loss": 46.0, - "step": 36030 - }, - { - "epoch": 5.802528282136962, - "grad_norm": 0.002603266155347228, - "learning_rate": 0.0001999833956290232, - "loss": 46.0, - "step": 36031 - }, - { - "epoch": 5.80268931921575, - "grad_norm": 0.013226257637143135, - "learning_rate": 0.00019998339470710867, - "loss": 46.0, - "step": 36032 - }, - { - "epoch": 5.802850356294536, - "grad_norm": 0.014850325882434845, - "learning_rate": 0.00019998339378516855, - "loss": 46.0, - "step": 36033 - }, - { - "epoch": 5.803011393373324, - "grad_norm": 0.0041097248904407024, - "learning_rate": 0.0001999833928632028, - "loss": 46.0, - "step": 36034 - }, - { - "epoch": 5.803172430452111, - "grad_norm": 0.002324201399460435, - "learning_rate": 0.00019998339194121151, - "loss": 46.0, - "step": 36035 - }, - { - "epoch": 5.803333467530899, - "grad_norm": 0.005739324726164341, - "learning_rate": 0.0001999833910191946, - "loss": 46.0, - "step": 36036 - }, - { - "epoch": 5.803494504609686, - "grad_norm": 0.012247636914253235, - "learning_rate": 0.0001999833900971521, - "loss": 46.0, - "step": 36037 - }, - { - "epoch": 5.803655541688474, - "grad_norm": 0.0008543399744667113, - "learning_rate": 0.00019998338917508402, - "loss": 46.0, - "step": 36038 - }, - { - "epoch": 5.803816578767261, - "grad_norm": 0.002412332920357585, - "learning_rate": 0.00019998338825299032, - "loss": 46.0, - "step": 36039 - }, - { - "epoch": 5.8039776158460485, - "grad_norm": 0.02353476732969284, - "learning_rate": 0.00019998338733087106, - "loss": 46.0, - "step": 36040 - }, - { - "epoch": 5.804138652924836, - "grad_norm": 0.006275770720094442, - "learning_rate": 0.0001999833864087262, - "loss": 46.0, - "step": 36041 - }, - { - "epoch": 5.8042996900036234, - "grad_norm": 0.008256159722805023, - "learning_rate": 0.00019998338548655578, - "loss": 46.0, - "step": 36042 - }, - { - "epoch": 5.804460727082411, - "grad_norm": 0.030541103333234787, - "learning_rate": 0.00019998338456435973, - "loss": 46.0, - "step": 36043 - }, - { - "epoch": 5.804621764161198, - "grad_norm": 0.0055891480296850204, - "learning_rate": 0.0001999833836421381, - "loss": 46.0, - "step": 36044 - }, - { - "epoch": 5.804782801239986, - "grad_norm": 0.011011507362127304, - "learning_rate": 0.00019998338271989087, - "loss": 46.0, - "step": 36045 - }, - { - "epoch": 5.804943838318773, - "grad_norm": 0.011860811151564121, - "learning_rate": 0.00019998338179761803, - "loss": 46.0, - "step": 36046 - }, - { - "epoch": 5.805104875397561, - "grad_norm": 0.01165796723216772, - "learning_rate": 0.00019998338087531966, - "loss": 46.0, - "step": 36047 - }, - { - "epoch": 5.805265912476347, - "grad_norm": 0.0028356791008263826, - "learning_rate": 0.00019998337995299565, - "loss": 46.0, - "step": 36048 - }, - { - "epoch": 5.805426949555135, - "grad_norm": 0.00123371253721416, - "learning_rate": 0.00019998337903064605, - "loss": 46.0, - "step": 36049 - }, - { - "epoch": 5.805587986633922, - "grad_norm": 0.01672019436955452, - "learning_rate": 0.0001999833781082709, - "loss": 46.0, - "step": 36050 - }, - { - "epoch": 5.80574902371271, - "grad_norm": 0.004154611844569445, - "learning_rate": 0.00019998337718587012, - "loss": 46.0, - "step": 36051 - }, - { - "epoch": 5.805910060791497, - "grad_norm": 0.007643844000995159, - "learning_rate": 0.00019998337626344375, - "loss": 46.0, - "step": 36052 - }, - { - "epoch": 5.8060710978702845, - "grad_norm": 0.0030058391857892275, - "learning_rate": 0.00019998337534099183, - "loss": 46.0, - "step": 36053 - }, - { - "epoch": 5.806232134949072, - "grad_norm": 0.006475790403783321, - "learning_rate": 0.00019998337441851427, - "loss": 46.0, - "step": 36054 - }, - { - "epoch": 5.806393172027859, - "grad_norm": 0.004651803057640791, - "learning_rate": 0.00019998337349601115, - "loss": 46.0, - "step": 36055 - }, - { - "epoch": 5.806554209106647, - "grad_norm": 0.0032809628173708916, - "learning_rate": 0.00019998337257348244, - "loss": 46.0, - "step": 36056 - }, - { - "epoch": 5.806715246185434, - "grad_norm": 0.004969972651451826, - "learning_rate": 0.00019998337165092812, - "loss": 46.0, - "step": 36057 - }, - { - "epoch": 5.806876283264222, - "grad_norm": 0.002350902184844017, - "learning_rate": 0.00019998337072834823, - "loss": 46.0, - "step": 36058 - }, - { - "epoch": 5.807037320343009, - "grad_norm": 0.006224953103810549, - "learning_rate": 0.00019998336980574274, - "loss": 46.0, - "step": 36059 - }, - { - "epoch": 5.807198357421797, - "grad_norm": 0.018788155168294907, - "learning_rate": 0.00019998336888311165, - "loss": 46.0, - "step": 36060 - }, - { - "epoch": 5.807359394500584, - "grad_norm": 0.0013146103592589498, - "learning_rate": 0.00019998336796045498, - "loss": 46.0, - "step": 36061 - }, - { - "epoch": 5.807520431579372, - "grad_norm": 0.002196866786107421, - "learning_rate": 0.00019998336703777272, - "loss": 46.0, - "step": 36062 - }, - { - "epoch": 5.807681468658158, - "grad_norm": 0.00149641465395689, - "learning_rate": 0.00019998336611506487, - "loss": 46.0, - "step": 36063 - }, - { - "epoch": 5.807842505736946, - "grad_norm": 0.005241080652922392, - "learning_rate": 0.00019998336519233144, - "loss": 46.0, - "step": 36064 - }, - { - "epoch": 5.808003542815733, - "grad_norm": 0.0010303921299055219, - "learning_rate": 0.0001999833642695724, - "loss": 46.0, - "step": 36065 - }, - { - "epoch": 5.8081645798945205, - "grad_norm": 0.004168648272752762, - "learning_rate": 0.00019998336334678776, - "loss": 46.0, - "step": 36066 - }, - { - "epoch": 5.808325616973308, - "grad_norm": 0.006564165931195021, - "learning_rate": 0.00019998336242397753, - "loss": 46.0, - "step": 36067 - }, - { - "epoch": 5.808486654052095, - "grad_norm": 0.005773855838924646, - "learning_rate": 0.00019998336150114175, - "loss": 46.0, - "step": 36068 - }, - { - "epoch": 5.808647691130883, - "grad_norm": 0.014673485420644283, - "learning_rate": 0.00019998336057828033, - "loss": 46.0, - "step": 36069 - }, - { - "epoch": 5.80880872820967, - "grad_norm": 0.006893686484545469, - "learning_rate": 0.00019998335965539337, - "loss": 46.0, - "step": 36070 - }, - { - "epoch": 5.808969765288458, - "grad_norm": 0.0011093391804024577, - "learning_rate": 0.00019998335873248077, - "loss": 46.0, - "step": 36071 - }, - { - "epoch": 5.809130802367245, - "grad_norm": 0.009555998258292675, - "learning_rate": 0.00019998335780954261, - "loss": 46.0, - "step": 36072 - }, - { - "epoch": 5.809291839446033, - "grad_norm": 0.005293944850564003, - "learning_rate": 0.00019998335688657884, - "loss": 46.0, - "step": 36073 - }, - { - "epoch": 5.80945287652482, - "grad_norm": 0.0034722699783742428, - "learning_rate": 0.00019998335596358948, - "loss": 46.0, - "step": 36074 - }, - { - "epoch": 5.809613913603608, - "grad_norm": 0.005422583315521479, - "learning_rate": 0.00019998335504057456, - "loss": 46.0, - "step": 36075 - }, - { - "epoch": 5.809774950682394, - "grad_norm": 0.0013842072803527117, - "learning_rate": 0.00019998335411753403, - "loss": 46.0, - "step": 36076 - }, - { - "epoch": 5.8099359877611825, - "grad_norm": 0.004719553515315056, - "learning_rate": 0.0001999833531944679, - "loss": 46.0, - "step": 36077 - }, - { - "epoch": 5.810097024839969, - "grad_norm": 0.0023871390148997307, - "learning_rate": 0.0001999833522713762, - "loss": 46.0, - "step": 36078 - }, - { - "epoch": 5.8102580619187565, - "grad_norm": 0.004307557828724384, - "learning_rate": 0.00019998335134825888, - "loss": 46.0, - "step": 36079 - }, - { - "epoch": 5.810419098997544, - "grad_norm": 0.003083202987909317, - "learning_rate": 0.00019998335042511597, - "loss": 46.0, - "step": 36080 - }, - { - "epoch": 5.810580136076331, - "grad_norm": 0.0027006755117326975, - "learning_rate": 0.0001999833495019475, - "loss": 46.0, - "step": 36081 - }, - { - "epoch": 5.810741173155119, - "grad_norm": 0.0022710133343935013, - "learning_rate": 0.00019998334857875344, - "loss": 46.0, - "step": 36082 - }, - { - "epoch": 5.810902210233906, - "grad_norm": 0.006473921705037355, - "learning_rate": 0.00019998334765553377, - "loss": 46.0, - "step": 36083 - }, - { - "epoch": 5.811063247312694, - "grad_norm": 0.009408203884959221, - "learning_rate": 0.0001999833467322885, - "loss": 46.0, - "step": 36084 - }, - { - "epoch": 5.811224284391481, - "grad_norm": 0.002478943672031164, - "learning_rate": 0.00019998334580901766, - "loss": 46.0, - "step": 36085 - }, - { - "epoch": 5.811385321470269, - "grad_norm": 0.0016110965516418219, - "learning_rate": 0.0001999833448857212, - "loss": 46.0, - "step": 36086 - }, - { - "epoch": 5.811546358549056, - "grad_norm": 0.0034495352301746607, - "learning_rate": 0.00019998334396239918, - "loss": 46.0, - "step": 36087 - }, - { - "epoch": 5.8117073956278436, - "grad_norm": 0.000996622839011252, - "learning_rate": 0.00019998334303905158, - "loss": 46.0, - "step": 36088 - }, - { - "epoch": 5.811868432706631, - "grad_norm": 0.02360527217388153, - "learning_rate": 0.00019998334211567836, - "loss": 46.0, - "step": 36089 - }, - { - "epoch": 5.8120294697854185, - "grad_norm": 0.008745023980736732, - "learning_rate": 0.00019998334119227957, - "loss": 46.0, - "step": 36090 - }, - { - "epoch": 5.812190506864205, - "grad_norm": 0.0018846254097297788, - "learning_rate": 0.00019998334026885518, - "loss": 46.0, - "step": 36091 - }, - { - "epoch": 5.812351543942993, - "grad_norm": 0.00911658350378275, - "learning_rate": 0.0001999833393454052, - "loss": 46.0, - "step": 36092 - }, - { - "epoch": 5.81251258102178, - "grad_norm": 0.0025152629241347313, - "learning_rate": 0.00019998333842192963, - "loss": 46.0, - "step": 36093 - }, - { - "epoch": 5.812673618100567, - "grad_norm": 0.004125799052417278, - "learning_rate": 0.00019998333749842847, - "loss": 46.0, - "step": 36094 - }, - { - "epoch": 5.812834655179355, - "grad_norm": 0.0018885155441239476, - "learning_rate": 0.00019998333657490172, - "loss": 46.0, - "step": 36095 - }, - { - "epoch": 5.812995692258142, - "grad_norm": 0.004321923479437828, - "learning_rate": 0.00019998333565134937, - "loss": 46.0, - "step": 36096 - }, - { - "epoch": 5.81315672933693, - "grad_norm": 0.006099647842347622, - "learning_rate": 0.00019998333472777145, - "loss": 46.0, - "step": 36097 - }, - { - "epoch": 5.813317766415717, - "grad_norm": 0.00210051448084414, - "learning_rate": 0.0001999833338041679, - "loss": 46.0, - "step": 36098 - }, - { - "epoch": 5.813478803494505, - "grad_norm": 0.002787800971418619, - "learning_rate": 0.0001999833328805388, - "loss": 46.0, - "step": 36099 - }, - { - "epoch": 5.813639840573292, - "grad_norm": 0.01858663558959961, - "learning_rate": 0.00019998333195688409, - "loss": 46.0, - "step": 36100 - }, - { - "epoch": 5.8138008776520795, - "grad_norm": 0.00549480551853776, - "learning_rate": 0.0001999833310332038, - "loss": 46.0, - "step": 36101 - }, - { - "epoch": 5.813961914730867, - "grad_norm": 0.002554436679929495, - "learning_rate": 0.0001999833301094979, - "loss": 46.0, - "step": 36102 - }, - { - "epoch": 5.814122951809654, - "grad_norm": 0.0012868873309344053, - "learning_rate": 0.00019998332918576641, - "loss": 46.0, - "step": 36103 - }, - { - "epoch": 5.814283988888442, - "grad_norm": 0.014549266546964645, - "learning_rate": 0.00019998332826200933, - "loss": 46.0, - "step": 36104 - }, - { - "epoch": 5.814445025967229, - "grad_norm": 0.0019923283252865076, - "learning_rate": 0.0001999833273382267, - "loss": 46.0, - "step": 36105 - }, - { - "epoch": 5.814606063046016, - "grad_norm": 0.0033107066992670298, - "learning_rate": 0.00019998332641441846, - "loss": 46.0, - "step": 36106 - }, - { - "epoch": 5.814767100124803, - "grad_norm": 0.003413548693060875, - "learning_rate": 0.0001999833254905846, - "loss": 46.0, - "step": 36107 - }, - { - "epoch": 5.814928137203591, - "grad_norm": 0.0015034214593470097, - "learning_rate": 0.00019998332456672518, - "loss": 46.0, - "step": 36108 - }, - { - "epoch": 5.815089174282378, - "grad_norm": 0.011482627131044865, - "learning_rate": 0.00019998332364284016, - "loss": 46.0, - "step": 36109 - }, - { - "epoch": 5.815250211361166, - "grad_norm": 0.0032607412431389093, - "learning_rate": 0.00019998332271892955, - "loss": 46.0, - "step": 36110 - }, - { - "epoch": 5.815411248439953, - "grad_norm": 0.00533420080319047, - "learning_rate": 0.00019998332179499336, - "loss": 46.0, - "step": 36111 - }, - { - "epoch": 5.815572285518741, - "grad_norm": 0.008268029429018497, - "learning_rate": 0.00019998332087103155, - "loss": 46.0, - "step": 36112 - }, - { - "epoch": 5.815733322597528, - "grad_norm": 0.0037971429992467165, - "learning_rate": 0.00019998331994704416, - "loss": 46.0, - "step": 36113 - }, - { - "epoch": 5.8158943596763155, - "grad_norm": 0.008767115883529186, - "learning_rate": 0.00019998331902303118, - "loss": 46.0, - "step": 36114 - }, - { - "epoch": 5.816055396755103, - "grad_norm": 0.0005402269889600575, - "learning_rate": 0.00019998331809899263, - "loss": 46.0, - "step": 36115 - }, - { - "epoch": 5.81621643383389, - "grad_norm": 0.003751007141545415, - "learning_rate": 0.00019998331717492845, - "loss": 46.0, - "step": 36116 - }, - { - "epoch": 5.816377470912678, - "grad_norm": 0.007433861494064331, - "learning_rate": 0.00019998331625083873, - "loss": 46.0, - "step": 36117 - }, - { - "epoch": 5.816538507991465, - "grad_norm": 0.0012924092588946223, - "learning_rate": 0.00019998331532672338, - "loss": 46.0, - "step": 36118 - }, - { - "epoch": 5.816699545070253, - "grad_norm": 0.009297915734350681, - "learning_rate": 0.00019998331440258243, - "loss": 46.0, - "step": 36119 - }, - { - "epoch": 5.81686058214904, - "grad_norm": 0.002273964462801814, - "learning_rate": 0.00019998331347841593, - "loss": 46.0, - "step": 36120 - }, - { - "epoch": 5.817021619227827, - "grad_norm": 0.01792997121810913, - "learning_rate": 0.00019998331255422383, - "loss": 46.0, - "step": 36121 - }, - { - "epoch": 5.817182656306614, - "grad_norm": 0.0037858830764889717, - "learning_rate": 0.00019998331163000616, - "loss": 46.0, - "step": 36122 - }, - { - "epoch": 5.817343693385402, - "grad_norm": 0.005455431528389454, - "learning_rate": 0.00019998331070576283, - "loss": 46.0, - "step": 36123 - }, - { - "epoch": 5.817504730464189, - "grad_norm": 0.0041198753751814365, - "learning_rate": 0.00019998330978149395, - "loss": 46.0, - "step": 36124 - }, - { - "epoch": 5.817665767542977, - "grad_norm": 0.003341143950819969, - "learning_rate": 0.00019998330885719949, - "loss": 46.0, - "step": 36125 - }, - { - "epoch": 5.817826804621764, - "grad_norm": 0.004304523579776287, - "learning_rate": 0.00019998330793287943, - "loss": 46.0, - "step": 36126 - }, - { - "epoch": 5.8179878417005515, - "grad_norm": 0.024700313806533813, - "learning_rate": 0.0001999833070085338, - "loss": 46.0, - "step": 36127 - }, - { - "epoch": 5.818148878779339, - "grad_norm": 0.0043274881318211555, - "learning_rate": 0.00019998330608416253, - "loss": 46.0, - "step": 36128 - }, - { - "epoch": 5.818309915858126, - "grad_norm": 0.0014909901656210423, - "learning_rate": 0.00019998330515976572, - "loss": 46.0, - "step": 36129 - }, - { - "epoch": 5.818470952936914, - "grad_norm": 0.015636015683412552, - "learning_rate": 0.00019998330423534326, - "loss": 46.0, - "step": 36130 - }, - { - "epoch": 5.818631990015701, - "grad_norm": 0.004163338802754879, - "learning_rate": 0.00019998330331089527, - "loss": 46.0, - "step": 36131 - }, - { - "epoch": 5.818793027094489, - "grad_norm": 0.001807473599910736, - "learning_rate": 0.00019998330238642166, - "loss": 46.0, - "step": 36132 - }, - { - "epoch": 5.818954064173276, - "grad_norm": 0.0048170145601034164, - "learning_rate": 0.00019998330146192247, - "loss": 46.0, - "step": 36133 - }, - { - "epoch": 5.819115101252064, - "grad_norm": 0.013081304728984833, - "learning_rate": 0.00019998330053739766, - "loss": 46.0, - "step": 36134 - }, - { - "epoch": 5.819276138330851, - "grad_norm": 0.0010845721699297428, - "learning_rate": 0.0001999832996128473, - "loss": 46.0, - "step": 36135 - }, - { - "epoch": 5.819437175409638, - "grad_norm": 0.015675462782382965, - "learning_rate": 0.0001999832986882713, - "loss": 46.0, - "step": 36136 - }, - { - "epoch": 5.819598212488425, - "grad_norm": 0.0058937156572937965, - "learning_rate": 0.00019998329776366977, - "loss": 46.0, - "step": 36137 - }, - { - "epoch": 5.819759249567213, - "grad_norm": 0.0021797495428472757, - "learning_rate": 0.00019998329683904262, - "loss": 46.0, - "step": 36138 - }, - { - "epoch": 5.819920286646, - "grad_norm": 0.009142951108515263, - "learning_rate": 0.00019998329591438987, - "loss": 46.0, - "step": 36139 - }, - { - "epoch": 5.8200813237247875, - "grad_norm": 0.007929841056466103, - "learning_rate": 0.00019998329498971154, - "loss": 46.0, - "step": 36140 - }, - { - "epoch": 5.820242360803575, - "grad_norm": 0.0022620921954512596, - "learning_rate": 0.00019998329406500763, - "loss": 46.0, - "step": 36141 - }, - { - "epoch": 5.820403397882362, - "grad_norm": 0.009755589067935944, - "learning_rate": 0.00019998329314027812, - "loss": 46.0, - "step": 36142 - }, - { - "epoch": 5.82056443496115, - "grad_norm": 0.01037575677037239, - "learning_rate": 0.000199983292215523, - "loss": 46.0, - "step": 36143 - }, - { - "epoch": 5.820725472039937, - "grad_norm": 0.0069565302692353725, - "learning_rate": 0.00019998329129074233, - "loss": 46.0, - "step": 36144 - }, - { - "epoch": 5.820886509118725, - "grad_norm": 0.008227053098380566, - "learning_rate": 0.00019998329036593603, - "loss": 46.0, - "step": 36145 - }, - { - "epoch": 5.821047546197512, - "grad_norm": 0.009839381091296673, - "learning_rate": 0.00019998328944110418, - "loss": 46.0, - "step": 36146 - }, - { - "epoch": 5.8212085832763, - "grad_norm": 0.009177645668387413, - "learning_rate": 0.00019998328851624671, - "loss": 46.0, - "step": 36147 - }, - { - "epoch": 5.821369620355087, - "grad_norm": 0.0018640889320522547, - "learning_rate": 0.00019998328759136363, - "loss": 46.0, - "step": 36148 - }, - { - "epoch": 5.821530657433874, - "grad_norm": 0.006727991160005331, - "learning_rate": 0.00019998328666645502, - "loss": 46.0, - "step": 36149 - }, - { - "epoch": 5.821691694512662, - "grad_norm": 0.0049310834147036076, - "learning_rate": 0.0001999832857415208, - "loss": 46.0, - "step": 36150 - }, - { - "epoch": 5.821852731591449, - "grad_norm": 0.001063590869307518, - "learning_rate": 0.00019998328481656095, - "loss": 46.0, - "step": 36151 - }, - { - "epoch": 5.822013768670236, - "grad_norm": 0.011167946271598339, - "learning_rate": 0.00019998328389157552, - "loss": 46.0, - "step": 36152 - }, - { - "epoch": 5.8221748057490235, - "grad_norm": 0.003994588274508715, - "learning_rate": 0.00019998328296656453, - "loss": 46.0, - "step": 36153 - }, - { - "epoch": 5.822335842827811, - "grad_norm": 0.012875864282250404, - "learning_rate": 0.00019998328204152792, - "loss": 46.0, - "step": 36154 - }, - { - "epoch": 5.822496879906598, - "grad_norm": 0.001265470520593226, - "learning_rate": 0.00019998328111646573, - "loss": 46.0, - "step": 36155 - }, - { - "epoch": 5.822657916985386, - "grad_norm": 0.0006662584492005408, - "learning_rate": 0.00019998328019137795, - "loss": 46.0, - "step": 36156 - }, - { - "epoch": 5.822818954064173, - "grad_norm": 0.0029247228521853685, - "learning_rate": 0.00019998327926626459, - "loss": 46.0, - "step": 36157 - }, - { - "epoch": 5.822979991142961, - "grad_norm": 0.00492109265178442, - "learning_rate": 0.00019998327834112563, - "loss": 46.0, - "step": 36158 - }, - { - "epoch": 5.823141028221748, - "grad_norm": 0.010264286771416664, - "learning_rate": 0.00019998327741596107, - "loss": 46.0, - "step": 36159 - }, - { - "epoch": 5.823302065300536, - "grad_norm": 0.0062038651667535305, - "learning_rate": 0.00019998327649077094, - "loss": 46.0, - "step": 36160 - }, - { - "epoch": 5.823463102379323, - "grad_norm": 0.006900831591337919, - "learning_rate": 0.0001999832755655552, - "loss": 46.0, - "step": 36161 - }, - { - "epoch": 5.8236241394581105, - "grad_norm": 0.007807180285453796, - "learning_rate": 0.00019998327464031387, - "loss": 46.0, - "step": 36162 - }, - { - "epoch": 5.823785176536898, - "grad_norm": 0.0009305981802754104, - "learning_rate": 0.00019998327371504698, - "loss": 46.0, - "step": 36163 - }, - { - "epoch": 5.8239462136156845, - "grad_norm": 0.004487654194235802, - "learning_rate": 0.00019998327278975445, - "loss": 46.0, - "step": 36164 - }, - { - "epoch": 5.824107250694473, - "grad_norm": 0.0012005011085420847, - "learning_rate": 0.00019998327186443639, - "loss": 46.0, - "step": 36165 - }, - { - "epoch": 5.8242682877732594, - "grad_norm": 0.005168136674910784, - "learning_rate": 0.00019998327093909268, - "loss": 46.0, - "step": 36166 - }, - { - "epoch": 5.824429324852047, - "grad_norm": 0.006324934773147106, - "learning_rate": 0.00019998327001372342, - "loss": 46.0, - "step": 36167 - }, - { - "epoch": 5.824590361930834, - "grad_norm": 0.004080827347934246, - "learning_rate": 0.00019998326908832854, - "loss": 46.0, - "step": 36168 - }, - { - "epoch": 5.824751399009622, - "grad_norm": 0.012046740390360355, - "learning_rate": 0.0001999832681629081, - "loss": 46.0, - "step": 36169 - }, - { - "epoch": 5.824912436088409, - "grad_norm": 0.012234746478497982, - "learning_rate": 0.00019998326723746202, - "loss": 46.0, - "step": 36170 - }, - { - "epoch": 5.825073473167197, - "grad_norm": 0.0036519921850413084, - "learning_rate": 0.0001999832663119904, - "loss": 46.0, - "step": 36171 - }, - { - "epoch": 5.825234510245984, - "grad_norm": 0.00487400870770216, - "learning_rate": 0.00019998326538649318, - "loss": 46.0, - "step": 36172 - }, - { - "epoch": 5.825395547324772, - "grad_norm": 0.022288190200924873, - "learning_rate": 0.00019998326446097036, - "loss": 46.0, - "step": 36173 - }, - { - "epoch": 5.825556584403559, - "grad_norm": 0.00806272029876709, - "learning_rate": 0.00019998326353542193, - "loss": 46.0, - "step": 36174 - }, - { - "epoch": 5.8257176214823465, - "grad_norm": 0.007910950109362602, - "learning_rate": 0.00019998326260984794, - "loss": 46.0, - "step": 36175 - }, - { - "epoch": 5.825878658561134, - "grad_norm": 0.0019355147378519177, - "learning_rate": 0.00019998326168424834, - "loss": 46.0, - "step": 36176 - }, - { - "epoch": 5.826039695639921, - "grad_norm": 0.0026842239312827587, - "learning_rate": 0.00019998326075862318, - "loss": 46.0, - "step": 36177 - }, - { - "epoch": 5.826200732718709, - "grad_norm": 0.012238512746989727, - "learning_rate": 0.0001999832598329724, - "loss": 46.0, - "step": 36178 - }, - { - "epoch": 5.826361769797495, - "grad_norm": 0.0280852522701025, - "learning_rate": 0.00019998325890729603, - "loss": 46.0, - "step": 36179 - }, - { - "epoch": 5.826522806876283, - "grad_norm": 0.00547018414363265, - "learning_rate": 0.00019998325798159408, - "loss": 46.0, - "step": 36180 - }, - { - "epoch": 5.82668384395507, - "grad_norm": 0.005220636259764433, - "learning_rate": 0.00019998325705586654, - "loss": 46.0, - "step": 36181 - }, - { - "epoch": 5.826844881033858, - "grad_norm": 0.007964136078953743, - "learning_rate": 0.0001999832561301134, - "loss": 46.0, - "step": 36182 - }, - { - "epoch": 5.827005918112645, - "grad_norm": 0.0066356235183775425, - "learning_rate": 0.00019998325520433467, - "loss": 46.0, - "step": 36183 - }, - { - "epoch": 5.827166955191433, - "grad_norm": 0.005015928763896227, - "learning_rate": 0.00019998325427853035, - "loss": 46.0, - "step": 36184 - }, - { - "epoch": 5.82732799227022, - "grad_norm": 0.005267851520329714, - "learning_rate": 0.00019998325335270046, - "loss": 46.0, - "step": 36185 - }, - { - "epoch": 5.827489029349008, - "grad_norm": 0.000808143406175077, - "learning_rate": 0.00019998325242684496, - "loss": 46.0, - "step": 36186 - }, - { - "epoch": 5.827650066427795, - "grad_norm": 0.010387449525296688, - "learning_rate": 0.00019998325150096387, - "loss": 46.0, - "step": 36187 - }, - { - "epoch": 5.8278111035065825, - "grad_norm": 0.015588668175041676, - "learning_rate": 0.0001999832505750572, - "loss": 46.0, - "step": 36188 - }, - { - "epoch": 5.82797214058537, - "grad_norm": 0.010332321748137474, - "learning_rate": 0.00019998324964912493, - "loss": 46.0, - "step": 36189 - }, - { - "epoch": 5.828133177664157, - "grad_norm": 0.0015874354867264628, - "learning_rate": 0.00019998324872316705, - "loss": 46.0, - "step": 36190 - }, - { - "epoch": 5.828294214742945, - "grad_norm": 0.0019170109881088138, - "learning_rate": 0.0001999832477971836, - "loss": 46.0, - "step": 36191 - }, - { - "epoch": 5.828455251821732, - "grad_norm": 0.0032974437344819307, - "learning_rate": 0.00019998324687117456, - "loss": 46.0, - "step": 36192 - }, - { - "epoch": 5.82861628890052, - "grad_norm": 0.003428238909691572, - "learning_rate": 0.00019998324594513992, - "loss": 46.0, - "step": 36193 - }, - { - "epoch": 5.828777325979306, - "grad_norm": 0.006415089126676321, - "learning_rate": 0.00019998324501907972, - "loss": 46.0, - "step": 36194 - }, - { - "epoch": 5.828938363058094, - "grad_norm": 0.0022081760689616203, - "learning_rate": 0.0001999832440929939, - "loss": 46.0, - "step": 36195 - }, - { - "epoch": 5.829099400136881, - "grad_norm": 0.002088650595396757, - "learning_rate": 0.00019998324316688248, - "loss": 46.0, - "step": 36196 - }, - { - "epoch": 5.829260437215669, - "grad_norm": 0.0030758059583604336, - "learning_rate": 0.0001999832422407455, - "loss": 46.0, - "step": 36197 - }, - { - "epoch": 5.829421474294456, - "grad_norm": 0.001338680856861174, - "learning_rate": 0.0001999832413145829, - "loss": 46.0, - "step": 36198 - }, - { - "epoch": 5.829582511373244, - "grad_norm": 0.0012243675300851464, - "learning_rate": 0.00019998324038839472, - "loss": 46.0, - "step": 36199 - }, - { - "epoch": 5.829743548452031, - "grad_norm": 0.005510313436388969, - "learning_rate": 0.00019998323946218097, - "loss": 46.0, - "step": 36200 - }, - { - "epoch": 5.8299045855308185, - "grad_norm": 0.004977809730917215, - "learning_rate": 0.0001999832385359416, - "loss": 46.0, - "step": 36201 - }, - { - "epoch": 5.830065622609606, - "grad_norm": 0.002307595917955041, - "learning_rate": 0.00019998323760967668, - "loss": 46.0, - "step": 36202 - }, - { - "epoch": 5.830226659688393, - "grad_norm": 0.0046867793425917625, - "learning_rate": 0.00019998323668338615, - "loss": 46.0, - "step": 36203 - }, - { - "epoch": 5.830387696767181, - "grad_norm": 0.0035534542985260487, - "learning_rate": 0.00019998323575707002, - "loss": 46.0, - "step": 36204 - }, - { - "epoch": 5.830548733845968, - "grad_norm": 0.006141949445009232, - "learning_rate": 0.00019998323483072828, - "loss": 46.0, - "step": 36205 - }, - { - "epoch": 5.830709770924756, - "grad_norm": 0.007672695908695459, - "learning_rate": 0.00019998323390436098, - "loss": 46.0, - "step": 36206 - }, - { - "epoch": 5.830870808003543, - "grad_norm": 0.001081821508705616, - "learning_rate": 0.0001999832329779681, - "loss": 46.0, - "step": 36207 - }, - { - "epoch": 5.831031845082331, - "grad_norm": 0.0015900125727057457, - "learning_rate": 0.00019998323205154962, - "loss": 46.0, - "step": 36208 - }, - { - "epoch": 5.831192882161117, - "grad_norm": 0.006711884867399931, - "learning_rate": 0.00019998323112510553, - "loss": 46.0, - "step": 36209 - }, - { - "epoch": 5.831353919239905, - "grad_norm": 0.006019486580044031, - "learning_rate": 0.00019998323019863586, - "loss": 46.0, - "step": 36210 - }, - { - "epoch": 5.831514956318692, - "grad_norm": 0.0029552823398262262, - "learning_rate": 0.0001999832292721406, - "loss": 46.0, - "step": 36211 - }, - { - "epoch": 5.8316759933974796, - "grad_norm": 0.005867298226803541, - "learning_rate": 0.00019998322834561974, - "loss": 46.0, - "step": 36212 - }, - { - "epoch": 5.831837030476267, - "grad_norm": 0.011626696214079857, - "learning_rate": 0.0001999832274190733, - "loss": 46.0, - "step": 36213 - }, - { - "epoch": 5.8319980675550545, - "grad_norm": 0.003413523081690073, - "learning_rate": 0.00019998322649250125, - "loss": 46.0, - "step": 36214 - }, - { - "epoch": 5.832159104633842, - "grad_norm": 0.005220881197601557, - "learning_rate": 0.00019998322556590364, - "loss": 46.0, - "step": 36215 - }, - { - "epoch": 5.832320141712629, - "grad_norm": 0.005811305250972509, - "learning_rate": 0.00019998322463928042, - "loss": 46.0, - "step": 36216 - }, - { - "epoch": 5.832481178791417, - "grad_norm": 0.0025376961566507816, - "learning_rate": 0.0001999832237126316, - "loss": 46.0, - "step": 36217 - }, - { - "epoch": 5.832642215870204, - "grad_norm": 0.0055940719321370125, - "learning_rate": 0.00019998322278595723, - "loss": 46.0, - "step": 36218 - }, - { - "epoch": 5.832803252948992, - "grad_norm": 0.004168991465121508, - "learning_rate": 0.00019998322185925722, - "loss": 46.0, - "step": 36219 - }, - { - "epoch": 5.832964290027779, - "grad_norm": 0.0012310562888160348, - "learning_rate": 0.00019998322093253167, - "loss": 46.0, - "step": 36220 - }, - { - "epoch": 5.833125327106567, - "grad_norm": 0.0031461971811950207, - "learning_rate": 0.00019998322000578048, - "loss": 46.0, - "step": 36221 - }, - { - "epoch": 5.833286364185353, - "grad_norm": 0.0010055401362478733, - "learning_rate": 0.00019998321907900373, - "loss": 46.0, - "step": 36222 - }, - { - "epoch": 5.8334474012641415, - "grad_norm": 0.004415613599121571, - "learning_rate": 0.00019998321815220137, - "loss": 46.0, - "step": 36223 - }, - { - "epoch": 5.833608438342928, - "grad_norm": 0.008054523728787899, - "learning_rate": 0.00019998321722537344, - "loss": 46.0, - "step": 36224 - }, - { - "epoch": 5.8337694754217155, - "grad_norm": 0.0010442027123644948, - "learning_rate": 0.00019998321629851993, - "loss": 46.0, - "step": 36225 - }, - { - "epoch": 5.833930512500503, - "grad_norm": 0.0011952792992815375, - "learning_rate": 0.0001999832153716408, - "loss": 46.0, - "step": 36226 - }, - { - "epoch": 5.83409154957929, - "grad_norm": 0.005662546958774328, - "learning_rate": 0.0001999832144447361, - "loss": 46.0, - "step": 36227 - }, - { - "epoch": 5.834252586658078, - "grad_norm": 0.006468662992119789, - "learning_rate": 0.00019998321351780577, - "loss": 46.0, - "step": 36228 - }, - { - "epoch": 5.834413623736865, - "grad_norm": 0.006029517389833927, - "learning_rate": 0.00019998321259084988, - "loss": 46.0, - "step": 36229 - }, - { - "epoch": 5.834574660815653, - "grad_norm": 0.00356446229852736, - "learning_rate": 0.0001999832116638684, - "loss": 46.0, - "step": 36230 - }, - { - "epoch": 5.83473569789444, - "grad_norm": 0.0048367357812821865, - "learning_rate": 0.00019998321073686132, - "loss": 46.0, - "step": 36231 - }, - { - "epoch": 5.834896734973228, - "grad_norm": 0.0046705519780516624, - "learning_rate": 0.00019998320980982868, - "loss": 46.0, - "step": 36232 - }, - { - "epoch": 5.835057772052015, - "grad_norm": 0.005171570926904678, - "learning_rate": 0.00019998320888277041, - "loss": 46.0, - "step": 36233 - }, - { - "epoch": 5.835218809130803, - "grad_norm": 0.0043447124771773815, - "learning_rate": 0.00019998320795568657, - "loss": 46.0, - "step": 36234 - }, - { - "epoch": 5.83537984620959, - "grad_norm": 0.000931622285861522, - "learning_rate": 0.00019998320702857713, - "loss": 46.0, - "step": 36235 - }, - { - "epoch": 5.8355408832883775, - "grad_norm": 0.015586710534989834, - "learning_rate": 0.00019998320610144213, - "loss": 46.0, - "step": 36236 - }, - { - "epoch": 5.835701920367164, - "grad_norm": 0.00594808254390955, - "learning_rate": 0.0001999832051742815, - "loss": 46.0, - "step": 36237 - }, - { - "epoch": 5.835862957445952, - "grad_norm": 0.004046519286930561, - "learning_rate": 0.0001999832042470953, - "loss": 46.0, - "step": 36238 - }, - { - "epoch": 5.836023994524739, - "grad_norm": 0.009893613867461681, - "learning_rate": 0.00019998320331988348, - "loss": 46.0, - "step": 36239 - }, - { - "epoch": 5.836185031603526, - "grad_norm": 0.009663325734436512, - "learning_rate": 0.00019998320239264609, - "loss": 46.0, - "step": 36240 - }, - { - "epoch": 5.836346068682314, - "grad_norm": 0.0025598008651286364, - "learning_rate": 0.00019998320146538313, - "loss": 46.0, - "step": 36241 - }, - { - "epoch": 5.836507105761101, - "grad_norm": 0.006545356009155512, - "learning_rate": 0.00019998320053809455, - "loss": 46.0, - "step": 36242 - }, - { - "epoch": 5.836668142839889, - "grad_norm": 0.01965847983956337, - "learning_rate": 0.0001999831996107804, - "loss": 46.0, - "step": 36243 - }, - { - "epoch": 5.836829179918676, - "grad_norm": 0.007745584473013878, - "learning_rate": 0.00019998319868344064, - "loss": 46.0, - "step": 36244 - }, - { - "epoch": 5.836990216997464, - "grad_norm": 0.003792156232520938, - "learning_rate": 0.0001999831977560753, - "loss": 46.0, - "step": 36245 - }, - { - "epoch": 5.837151254076251, - "grad_norm": 0.0022242029663175344, - "learning_rate": 0.00019998319682868436, - "loss": 46.0, - "step": 36246 - }, - { - "epoch": 5.837312291155039, - "grad_norm": 0.005726187955588102, - "learning_rate": 0.00019998319590126785, - "loss": 46.0, - "step": 36247 - }, - { - "epoch": 5.837473328233826, - "grad_norm": 0.006139697041362524, - "learning_rate": 0.00019998319497382573, - "loss": 46.0, - "step": 36248 - }, - { - "epoch": 5.8376343653126135, - "grad_norm": 0.0064367749728262424, - "learning_rate": 0.00019998319404635804, - "loss": 46.0, - "step": 36249 - }, - { - "epoch": 5.837795402391401, - "grad_norm": 0.0029867093544453382, - "learning_rate": 0.00019998319311886474, - "loss": 46.0, - "step": 36250 - }, - { - "epoch": 5.837956439470188, - "grad_norm": 0.002268476877361536, - "learning_rate": 0.00019998319219134586, - "loss": 46.0, - "step": 36251 - }, - { - "epoch": 5.838117476548975, - "grad_norm": 0.002606463385745883, - "learning_rate": 0.00019998319126380139, - "loss": 46.0, - "step": 36252 - }, - { - "epoch": 5.838278513627763, - "grad_norm": 0.013271412812173367, - "learning_rate": 0.00019998319033623133, - "loss": 46.0, - "step": 36253 - }, - { - "epoch": 5.83843955070655, - "grad_norm": 0.009845655411481857, - "learning_rate": 0.00019998318940863565, - "loss": 46.0, - "step": 36254 - }, - { - "epoch": 5.838600587785337, - "grad_norm": 0.006141947582364082, - "learning_rate": 0.00019998318848101442, - "loss": 46.0, - "step": 36255 - }, - { - "epoch": 5.838761624864125, - "grad_norm": 0.00165889086201787, - "learning_rate": 0.00019998318755336757, - "loss": 46.0, - "step": 36256 - }, - { - "epoch": 5.838922661942912, - "grad_norm": 0.0033974875696003437, - "learning_rate": 0.00019998318662569516, - "loss": 46.0, - "step": 36257 - }, - { - "epoch": 5.8390836990217, - "grad_norm": 0.000862635497469455, - "learning_rate": 0.00019998318569799714, - "loss": 46.0, - "step": 36258 - }, - { - "epoch": 5.839244736100487, - "grad_norm": 0.002606874331831932, - "learning_rate": 0.0001999831847702735, - "loss": 46.0, - "step": 36259 - }, - { - "epoch": 5.839405773179275, - "grad_norm": 0.01638873852789402, - "learning_rate": 0.00019998318384252433, - "loss": 46.0, - "step": 36260 - }, - { - "epoch": 5.839566810258062, - "grad_norm": 0.008939722552895546, - "learning_rate": 0.00019998318291474952, - "loss": 46.0, - "step": 36261 - }, - { - "epoch": 5.8397278473368495, - "grad_norm": 0.021137308329343796, - "learning_rate": 0.00019998318198694915, - "loss": 46.0, - "step": 36262 - }, - { - "epoch": 5.839888884415637, - "grad_norm": 0.00622069276869297, - "learning_rate": 0.0001999831810591232, - "loss": 46.0, - "step": 36263 - }, - { - "epoch": 5.840049921494424, - "grad_norm": 0.002902314765378833, - "learning_rate": 0.00019998318013127162, - "loss": 46.0, - "step": 36264 - }, - { - "epoch": 5.840210958573212, - "grad_norm": 0.0034124949015676975, - "learning_rate": 0.00019998317920339446, - "loss": 46.0, - "step": 36265 - }, - { - "epoch": 5.840371995651999, - "grad_norm": 0.0028374777175486088, - "learning_rate": 0.00019998317827549174, - "loss": 46.0, - "step": 36266 - }, - { - "epoch": 5.840533032730786, - "grad_norm": 0.008170254528522491, - "learning_rate": 0.00019998317734756338, - "loss": 46.0, - "step": 36267 - }, - { - "epoch": 5.840694069809573, - "grad_norm": 0.0020993370562791824, - "learning_rate": 0.00019998317641960946, - "loss": 46.0, - "step": 36268 - }, - { - "epoch": 5.840855106888361, - "grad_norm": 0.028592543676495552, - "learning_rate": 0.00019998317549162995, - "loss": 46.0, - "step": 36269 - }, - { - "epoch": 5.841016143967148, - "grad_norm": 0.0036306146066635847, - "learning_rate": 0.00019998317456362483, - "loss": 46.0, - "step": 36270 - }, - { - "epoch": 5.841177181045936, - "grad_norm": 0.007978690788149834, - "learning_rate": 0.00019998317363559417, - "loss": 46.0, - "step": 36271 - }, - { - "epoch": 5.841338218124723, - "grad_norm": 0.022357061505317688, - "learning_rate": 0.00019998317270753785, - "loss": 46.0, - "step": 36272 - }, - { - "epoch": 5.8414992552035105, - "grad_norm": 0.0019512284779921174, - "learning_rate": 0.000199983171779456, - "loss": 46.0, - "step": 36273 - }, - { - "epoch": 5.841660292282298, - "grad_norm": 0.015916481614112854, - "learning_rate": 0.00019998317085134852, - "loss": 46.0, - "step": 36274 - }, - { - "epoch": 5.8418213293610854, - "grad_norm": 0.013155380263924599, - "learning_rate": 0.00019998316992321546, - "loss": 46.0, - "step": 36275 - }, - { - "epoch": 5.841982366439873, - "grad_norm": 0.003157972823828459, - "learning_rate": 0.0001999831689950568, - "loss": 46.0, - "step": 36276 - }, - { - "epoch": 5.84214340351866, - "grad_norm": 0.0020603672601282597, - "learning_rate": 0.00019998316806687258, - "loss": 46.0, - "step": 36277 - }, - { - "epoch": 5.842304440597448, - "grad_norm": 0.001891984255053103, - "learning_rate": 0.00019998316713866273, - "loss": 46.0, - "step": 36278 - }, - { - "epoch": 5.842465477676235, - "grad_norm": 0.014637486077845097, - "learning_rate": 0.00019998316621042732, - "loss": 46.0, - "step": 36279 - }, - { - "epoch": 5.842626514755023, - "grad_norm": 0.005434535443782806, - "learning_rate": 0.00019998316528216633, - "loss": 46.0, - "step": 36280 - }, - { - "epoch": 5.84278755183381, - "grad_norm": 0.011293580755591393, - "learning_rate": 0.00019998316435387972, - "loss": 46.0, - "step": 36281 - }, - { - "epoch": 5.842948588912597, - "grad_norm": 0.001783306710422039, - "learning_rate": 0.0001999831634255675, - "loss": 46.0, - "step": 36282 - }, - { - "epoch": 5.843109625991384, - "grad_norm": 0.020579611882567406, - "learning_rate": 0.00019998316249722974, - "loss": 46.0, - "step": 36283 - }, - { - "epoch": 5.843270663070172, - "grad_norm": 0.0007040241616778076, - "learning_rate": 0.00019998316156886637, - "loss": 46.0, - "step": 36284 - }, - { - "epoch": 5.843431700148959, - "grad_norm": 0.012248336337506771, - "learning_rate": 0.0001999831606404774, - "loss": 46.0, - "step": 36285 - }, - { - "epoch": 5.8435927372277465, - "grad_norm": 0.013011324219405651, - "learning_rate": 0.00019998315971206284, - "loss": 46.0, - "step": 36286 - }, - { - "epoch": 5.843753774306534, - "grad_norm": 0.00786654930561781, - "learning_rate": 0.0001999831587836227, - "loss": 46.0, - "step": 36287 - }, - { - "epoch": 5.843914811385321, - "grad_norm": 0.0006335290381684899, - "learning_rate": 0.00019998315785515696, - "loss": 46.0, - "step": 36288 - }, - { - "epoch": 5.844075848464109, - "grad_norm": 0.0006969252717681229, - "learning_rate": 0.00019998315692666566, - "loss": 46.0, - "step": 36289 - }, - { - "epoch": 5.844236885542896, - "grad_norm": 0.006457506213337183, - "learning_rate": 0.00019998315599814874, - "loss": 46.0, - "step": 36290 - }, - { - "epoch": 5.844397922621684, - "grad_norm": 0.0030615711584687233, - "learning_rate": 0.0001999831550696062, - "loss": 46.0, - "step": 36291 - }, - { - "epoch": 5.844558959700471, - "grad_norm": 0.005610724911093712, - "learning_rate": 0.0001999831541410381, - "loss": 46.0, - "step": 36292 - }, - { - "epoch": 5.844719996779259, - "grad_norm": 0.002621626714244485, - "learning_rate": 0.00019998315321244443, - "loss": 46.0, - "step": 36293 - }, - { - "epoch": 5.844881033858046, - "grad_norm": 0.005308116786181927, - "learning_rate": 0.00019998315228382516, - "loss": 46.0, - "step": 36294 - }, - { - "epoch": 5.845042070936834, - "grad_norm": 0.0016062383074313402, - "learning_rate": 0.00019998315135518027, - "loss": 46.0, - "step": 36295 - }, - { - "epoch": 5.845203108015621, - "grad_norm": 0.002875788602977991, - "learning_rate": 0.0001999831504265098, - "loss": 46.0, - "step": 36296 - }, - { - "epoch": 5.845364145094408, - "grad_norm": 0.005451788194477558, - "learning_rate": 0.00019998314949781377, - "loss": 46.0, - "step": 36297 - }, - { - "epoch": 5.845525182173195, - "grad_norm": 0.0073241665959358215, - "learning_rate": 0.00019998314856909213, - "loss": 46.0, - "step": 36298 - }, - { - "epoch": 5.8456862192519825, - "grad_norm": 0.003614874789491296, - "learning_rate": 0.00019998314764034492, - "loss": 46.0, - "step": 36299 - }, - { - "epoch": 5.84584725633077, - "grad_norm": 0.003140663728117943, - "learning_rate": 0.00019998314671157208, - "loss": 46.0, - "step": 36300 - }, - { - "epoch": 5.846008293409557, - "grad_norm": 0.005265682470053434, - "learning_rate": 0.00019998314578277367, - "loss": 46.0, - "step": 36301 - }, - { - "epoch": 5.846169330488345, - "grad_norm": 0.005553533788770437, - "learning_rate": 0.00019998314485394968, - "loss": 46.0, - "step": 36302 - }, - { - "epoch": 5.846330367567132, - "grad_norm": 0.0012782574631273746, - "learning_rate": 0.00019998314392510007, - "loss": 46.0, - "step": 36303 - }, - { - "epoch": 5.84649140464592, - "grad_norm": 0.0017012894386425614, - "learning_rate": 0.0001999831429962249, - "loss": 46.0, - "step": 36304 - }, - { - "epoch": 5.846652441724707, - "grad_norm": 0.0031299844849854708, - "learning_rate": 0.00019998314206732412, - "loss": 46.0, - "step": 36305 - }, - { - "epoch": 5.846813478803495, - "grad_norm": 0.007620898075401783, - "learning_rate": 0.00019998314113839775, - "loss": 46.0, - "step": 36306 - }, - { - "epoch": 5.846974515882282, - "grad_norm": 0.005849129986017942, - "learning_rate": 0.0001999831402094458, - "loss": 46.0, - "step": 36307 - }, - { - "epoch": 5.84713555296107, - "grad_norm": 0.0035931537859141827, - "learning_rate": 0.00019998313928046825, - "loss": 46.0, - "step": 36308 - }, - { - "epoch": 5.847296590039857, - "grad_norm": 0.0025375315453857183, - "learning_rate": 0.00019998313835146512, - "loss": 46.0, - "step": 36309 - }, - { - "epoch": 5.847457627118644, - "grad_norm": 0.010508900508284569, - "learning_rate": 0.0001999831374224364, - "loss": 46.0, - "step": 36310 - }, - { - "epoch": 5.847618664197432, - "grad_norm": 0.0019658776000142097, - "learning_rate": 0.00019998313649338204, - "loss": 46.0, - "step": 36311 - }, - { - "epoch": 5.8477797012762185, - "grad_norm": 0.00748297106474638, - "learning_rate": 0.00019998313556430217, - "loss": 46.0, - "step": 36312 - }, - { - "epoch": 5.847940738355006, - "grad_norm": 0.005840287543833256, - "learning_rate": 0.00019998313463519667, - "loss": 46.0, - "step": 36313 - }, - { - "epoch": 5.848101775433793, - "grad_norm": 0.010568608529865742, - "learning_rate": 0.00019998313370606557, - "loss": 46.0, - "step": 36314 - }, - { - "epoch": 5.848262812512581, - "grad_norm": 0.0030749221332371235, - "learning_rate": 0.00019998313277690887, - "loss": 46.0, - "step": 36315 - }, - { - "epoch": 5.848423849591368, - "grad_norm": 0.002867255825549364, - "learning_rate": 0.00019998313184772662, - "loss": 46.0, - "step": 36316 - }, - { - "epoch": 5.848584886670156, - "grad_norm": 0.007032209541648626, - "learning_rate": 0.00019998313091851877, - "loss": 46.0, - "step": 36317 - }, - { - "epoch": 5.848745923748943, - "grad_norm": 0.007477450184524059, - "learning_rate": 0.0001999831299892853, - "loss": 46.0, - "step": 36318 - }, - { - "epoch": 5.848906960827731, - "grad_norm": 0.012234101071953773, - "learning_rate": 0.00019998312906002624, - "loss": 46.0, - "step": 36319 - }, - { - "epoch": 5.849067997906518, - "grad_norm": 0.005514687858521938, - "learning_rate": 0.00019998312813074163, - "loss": 46.0, - "step": 36320 - }, - { - "epoch": 5.8492290349853056, - "grad_norm": 0.002884516492486, - "learning_rate": 0.00019998312720143142, - "loss": 46.0, - "step": 36321 - }, - { - "epoch": 5.849390072064093, - "grad_norm": 0.019594764336943626, - "learning_rate": 0.00019998312627209558, - "loss": 46.0, - "step": 36322 - }, - { - "epoch": 5.8495511091428805, - "grad_norm": 0.004947365261614323, - "learning_rate": 0.00019998312534273417, - "loss": 46.0, - "step": 36323 - }, - { - "epoch": 5.849712146221668, - "grad_norm": 0.00443473132327199, - "learning_rate": 0.00019998312441334718, - "loss": 46.0, - "step": 36324 - }, - { - "epoch": 5.8498731833004545, - "grad_norm": 0.002974113682284951, - "learning_rate": 0.00019998312348393462, - "loss": 46.0, - "step": 36325 - }, - { - "epoch": 5.850034220379243, - "grad_norm": 0.010956228710711002, - "learning_rate": 0.00019998312255449643, - "loss": 46.0, - "step": 36326 - }, - { - "epoch": 5.850195257458029, - "grad_norm": 0.0021672870498150587, - "learning_rate": 0.00019998312162503265, - "loss": 46.0, - "step": 36327 - }, - { - "epoch": 5.850356294536817, - "grad_norm": 0.0028575530741363764, - "learning_rate": 0.0001999831206955433, - "loss": 46.0, - "step": 36328 - }, - { - "epoch": 5.850517331615604, - "grad_norm": 0.00432627834379673, - "learning_rate": 0.00019998311976602838, - "loss": 46.0, - "step": 36329 - }, - { - "epoch": 5.850678368694392, - "grad_norm": 0.0027518507558852434, - "learning_rate": 0.0001999831188364878, - "loss": 46.0, - "step": 36330 - }, - { - "epoch": 5.850839405773179, - "grad_norm": 0.004824016243219376, - "learning_rate": 0.00019998311790692168, - "loss": 46.0, - "step": 36331 - }, - { - "epoch": 5.851000442851967, - "grad_norm": 0.0047799102030694485, - "learning_rate": 0.00019998311697732996, - "loss": 46.0, - "step": 36332 - }, - { - "epoch": 5.851161479930754, - "grad_norm": 0.004516626708209515, - "learning_rate": 0.00019998311604771266, - "loss": 46.0, - "step": 36333 - }, - { - "epoch": 5.8513225170095415, - "grad_norm": 0.0026720231398940086, - "learning_rate": 0.00019998311511806977, - "loss": 46.0, - "step": 36334 - }, - { - "epoch": 5.851483554088329, - "grad_norm": 0.003077418077737093, - "learning_rate": 0.00019998311418840126, - "loss": 46.0, - "step": 36335 - }, - { - "epoch": 5.851644591167116, - "grad_norm": 0.0037507705856114626, - "learning_rate": 0.0001999831132587072, - "loss": 46.0, - "step": 36336 - }, - { - "epoch": 5.851805628245904, - "grad_norm": 0.006240047048777342, - "learning_rate": 0.00019998311232898751, - "loss": 46.0, - "step": 36337 - }, - { - "epoch": 5.851966665324691, - "grad_norm": 0.0036264536902308464, - "learning_rate": 0.00019998311139924227, - "loss": 46.0, - "step": 36338 - }, - { - "epoch": 5.852127702403479, - "grad_norm": 0.0033457425888627768, - "learning_rate": 0.0001999831104694714, - "loss": 46.0, - "step": 36339 - }, - { - "epoch": 5.852288739482265, - "grad_norm": 0.002295734593644738, - "learning_rate": 0.00019998310953967495, - "loss": 46.0, - "step": 36340 - }, - { - "epoch": 5.852449776561053, - "grad_norm": 0.014742361381649971, - "learning_rate": 0.00019998310860985292, - "loss": 46.0, - "step": 36341 - }, - { - "epoch": 5.85261081363984, - "grad_norm": 0.0023084236308932304, - "learning_rate": 0.0001999831076800053, - "loss": 46.0, - "step": 36342 - }, - { - "epoch": 5.852771850718628, - "grad_norm": 0.0025524224620312452, - "learning_rate": 0.00019998310675013208, - "loss": 46.0, - "step": 36343 - }, - { - "epoch": 5.852932887797415, - "grad_norm": 0.0047472259029746056, - "learning_rate": 0.00019998310582023329, - "loss": 46.0, - "step": 36344 - }, - { - "epoch": 5.853093924876203, - "grad_norm": 0.003503593150526285, - "learning_rate": 0.00019998310489030888, - "loss": 46.0, - "step": 36345 - }, - { - "epoch": 5.85325496195499, - "grad_norm": 0.004680312238633633, - "learning_rate": 0.0001999831039603589, - "loss": 46.0, - "step": 36346 - }, - { - "epoch": 5.8534159990337775, - "grad_norm": 0.002561883069574833, - "learning_rate": 0.0001999831030303833, - "loss": 46.0, - "step": 36347 - }, - { - "epoch": 5.853577036112565, - "grad_norm": 0.007297783624380827, - "learning_rate": 0.00019998310210038214, - "loss": 46.0, - "step": 36348 - }, - { - "epoch": 5.853738073191352, - "grad_norm": 0.003774170530959964, - "learning_rate": 0.00019998310117035536, - "loss": 46.0, - "step": 36349 - }, - { - "epoch": 5.85389911027014, - "grad_norm": 0.006983309984207153, - "learning_rate": 0.00019998310024030302, - "loss": 46.0, - "step": 36350 - }, - { - "epoch": 5.854060147348927, - "grad_norm": 0.0037038393784314394, - "learning_rate": 0.0001999830993102251, - "loss": 46.0, - "step": 36351 - }, - { - "epoch": 5.854221184427715, - "grad_norm": 0.003117919433861971, - "learning_rate": 0.00019998309838012158, - "loss": 46.0, - "step": 36352 - }, - { - "epoch": 5.854382221506502, - "grad_norm": 0.0026418811175972223, - "learning_rate": 0.00019998309744999245, - "loss": 46.0, - "step": 36353 - }, - { - "epoch": 5.85454325858529, - "grad_norm": 0.004526711534708738, - "learning_rate": 0.00019998309651983774, - "loss": 46.0, - "step": 36354 - }, - { - "epoch": 5.854704295664076, - "grad_norm": 0.002510344609618187, - "learning_rate": 0.00019998309558965743, - "loss": 46.0, - "step": 36355 - }, - { - "epoch": 5.854865332742864, - "grad_norm": 0.005148347932845354, - "learning_rate": 0.00019998309465945154, - "loss": 46.0, - "step": 36356 - }, - { - "epoch": 5.855026369821651, - "grad_norm": 0.010461927391588688, - "learning_rate": 0.00019998309372922006, - "loss": 46.0, - "step": 36357 - }, - { - "epoch": 5.855187406900439, - "grad_norm": 0.002579952822998166, - "learning_rate": 0.000199983092798963, - "loss": 46.0, - "step": 36358 - }, - { - "epoch": 5.855348443979226, - "grad_norm": 0.0033212583512067795, - "learning_rate": 0.00019998309186868032, - "loss": 46.0, - "step": 36359 - }, - { - "epoch": 5.8555094810580135, - "grad_norm": 0.0040848152711987495, - "learning_rate": 0.00019998309093837205, - "loss": 46.0, - "step": 36360 - }, - { - "epoch": 5.855670518136801, - "grad_norm": 0.007760020438581705, - "learning_rate": 0.0001999830900080382, - "loss": 46.0, - "step": 36361 - }, - { - "epoch": 5.855831555215588, - "grad_norm": 0.0017486640717834234, - "learning_rate": 0.00019998308907767879, - "loss": 46.0, - "step": 36362 - }, - { - "epoch": 5.855992592294376, - "grad_norm": 0.0010235154768452048, - "learning_rate": 0.00019998308814729376, - "loss": 46.0, - "step": 36363 - }, - { - "epoch": 5.856153629373163, - "grad_norm": 0.002583742141723633, - "learning_rate": 0.00019998308721688312, - "loss": 46.0, - "step": 36364 - }, - { - "epoch": 5.856314666451951, - "grad_norm": 0.0033911135978996754, - "learning_rate": 0.00019998308628644694, - "loss": 46.0, - "step": 36365 - }, - { - "epoch": 5.856475703530738, - "grad_norm": 0.006008876487612724, - "learning_rate": 0.00019998308535598512, - "loss": 46.0, - "step": 36366 - }, - { - "epoch": 5.856636740609526, - "grad_norm": 0.008264592848718166, - "learning_rate": 0.00019998308442549775, - "loss": 46.0, - "step": 36367 - }, - { - "epoch": 5.856797777688313, - "grad_norm": 0.0016596839996054769, - "learning_rate": 0.00019998308349498478, - "loss": 46.0, - "step": 36368 - }, - { - "epoch": 5.856958814767101, - "grad_norm": 0.0009562142658978701, - "learning_rate": 0.0001999830825644462, - "loss": 46.0, - "step": 36369 - }, - { - "epoch": 5.857119851845887, - "grad_norm": 0.00973709486424923, - "learning_rate": 0.00019998308163388204, - "loss": 46.0, - "step": 36370 - }, - { - "epoch": 5.857280888924675, - "grad_norm": 0.011221935041248798, - "learning_rate": 0.0001999830807032923, - "loss": 46.0, - "step": 36371 - }, - { - "epoch": 5.857441926003462, - "grad_norm": 0.019078610464930534, - "learning_rate": 0.00019998307977267695, - "loss": 46.0, - "step": 36372 - }, - { - "epoch": 5.8576029630822495, - "grad_norm": 0.016757983714342117, - "learning_rate": 0.000199983078842036, - "loss": 46.0, - "step": 36373 - }, - { - "epoch": 5.857764000161037, - "grad_norm": 0.008735565468668938, - "learning_rate": 0.00019998307791136948, - "loss": 46.0, - "step": 36374 - }, - { - "epoch": 5.857925037239824, - "grad_norm": 0.0015567387454211712, - "learning_rate": 0.00019998307698067738, - "loss": 46.0, - "step": 36375 - }, - { - "epoch": 5.858086074318612, - "grad_norm": 0.005044006276875734, - "learning_rate": 0.00019998307604995966, - "loss": 46.0, - "step": 36376 - }, - { - "epoch": 5.858247111397399, - "grad_norm": 0.0026960745453834534, - "learning_rate": 0.0001999830751192164, - "loss": 46.0, - "step": 36377 - }, - { - "epoch": 5.858408148476187, - "grad_norm": 0.017673548310995102, - "learning_rate": 0.0001999830741884475, - "loss": 46.0, - "step": 36378 - }, - { - "epoch": 5.858569185554974, - "grad_norm": 0.002498205518350005, - "learning_rate": 0.00019998307325765302, - "loss": 46.0, - "step": 36379 - }, - { - "epoch": 5.858730222633762, - "grad_norm": 0.0016205819556489587, - "learning_rate": 0.00019998307232683296, - "loss": 46.0, - "step": 36380 - }, - { - "epoch": 5.858891259712549, - "grad_norm": 0.0027848598547279835, - "learning_rate": 0.00019998307139598728, - "loss": 46.0, - "step": 36381 - }, - { - "epoch": 5.8590522967913365, - "grad_norm": 0.0014832587912678719, - "learning_rate": 0.00019998307046511604, - "loss": 46.0, - "step": 36382 - }, - { - "epoch": 5.859213333870123, - "grad_norm": 0.003027052618563175, - "learning_rate": 0.00019998306953421922, - "loss": 46.0, - "step": 36383 - }, - { - "epoch": 5.8593743709489114, - "grad_norm": 0.0014254313427954912, - "learning_rate": 0.00019998306860329678, - "loss": 46.0, - "step": 36384 - }, - { - "epoch": 5.859535408027698, - "grad_norm": 0.00411205505952239, - "learning_rate": 0.00019998306767234878, - "loss": 46.0, - "step": 36385 - }, - { - "epoch": 5.8596964451064855, - "grad_norm": 0.0035211164504289627, - "learning_rate": 0.00019998306674137516, - "loss": 46.0, - "step": 36386 - }, - { - "epoch": 5.859857482185273, - "grad_norm": 0.008908253163099289, - "learning_rate": 0.00019998306581037596, - "loss": 46.0, - "step": 36387 - }, - { - "epoch": 5.86001851926406, - "grad_norm": 0.011570955626666546, - "learning_rate": 0.00019998306487935117, - "loss": 46.0, - "step": 36388 - }, - { - "epoch": 5.860179556342848, - "grad_norm": 0.002055235905572772, - "learning_rate": 0.00019998306394830077, - "loss": 46.0, - "step": 36389 - }, - { - "epoch": 5.860340593421635, - "grad_norm": 0.010774900205433369, - "learning_rate": 0.0001999830630172248, - "loss": 46.0, - "step": 36390 - }, - { - "epoch": 5.860501630500423, - "grad_norm": 0.011273350566625595, - "learning_rate": 0.00019998306208612326, - "loss": 46.0, - "step": 36391 - }, - { - "epoch": 5.86066266757921, - "grad_norm": 0.008249950595200062, - "learning_rate": 0.00019998306115499612, - "loss": 46.0, - "step": 36392 - }, - { - "epoch": 5.860823704657998, - "grad_norm": 0.010889701545238495, - "learning_rate": 0.00019998306022384337, - "loss": 46.0, - "step": 36393 - }, - { - "epoch": 5.860984741736785, - "grad_norm": 0.0038173669017851353, - "learning_rate": 0.00019998305929266503, - "loss": 46.0, - "step": 36394 - }, - { - "epoch": 5.8611457788155725, - "grad_norm": 0.0039767855778336525, - "learning_rate": 0.00019998305836146113, - "loss": 46.0, - "step": 36395 - }, - { - "epoch": 5.86130681589436, - "grad_norm": 0.0031858470756560564, - "learning_rate": 0.0001999830574302316, - "loss": 46.0, - "step": 36396 - }, - { - "epoch": 5.861467852973147, - "grad_norm": 0.0016732686199247837, - "learning_rate": 0.0001999830564989765, - "loss": 46.0, - "step": 36397 - }, - { - "epoch": 5.861628890051934, - "grad_norm": 0.003488459624350071, - "learning_rate": 0.00019998305556769578, - "loss": 46.0, - "step": 36398 - }, - { - "epoch": 5.861789927130722, - "grad_norm": 0.004864083137363195, - "learning_rate": 0.0001999830546363895, - "loss": 46.0, - "step": 36399 - }, - { - "epoch": 5.861950964209509, - "grad_norm": 0.011952180415391922, - "learning_rate": 0.00019998305370505764, - "loss": 46.0, - "step": 36400 - }, - { - "epoch": 5.862112001288296, - "grad_norm": 0.009938381612300873, - "learning_rate": 0.00019998305277370016, - "loss": 46.0, - "step": 36401 - }, - { - "epoch": 5.862273038367084, - "grad_norm": 0.0026790171395987272, - "learning_rate": 0.0001999830518423171, - "loss": 46.0, - "step": 36402 - }, - { - "epoch": 5.862434075445871, - "grad_norm": 0.0014267965452745557, - "learning_rate": 0.00019998305091090845, - "loss": 46.0, - "step": 36403 - }, - { - "epoch": 5.862595112524659, - "grad_norm": 0.00914012175053358, - "learning_rate": 0.00019998304997947424, - "loss": 46.0, - "step": 36404 - }, - { - "epoch": 5.862756149603446, - "grad_norm": 0.002978983335196972, - "learning_rate": 0.0001999830490480144, - "loss": 46.0, - "step": 36405 - }, - { - "epoch": 5.862917186682234, - "grad_norm": 0.01818298175930977, - "learning_rate": 0.00019998304811652898, - "loss": 46.0, - "step": 36406 - }, - { - "epoch": 5.863078223761021, - "grad_norm": 0.002179454080760479, - "learning_rate": 0.00019998304718501798, - "loss": 46.0, - "step": 36407 - }, - { - "epoch": 5.8632392608398085, - "grad_norm": 0.008454645052552223, - "learning_rate": 0.00019998304625348136, - "loss": 46.0, - "step": 36408 - }, - { - "epoch": 5.863400297918596, - "grad_norm": 0.0026484730187803507, - "learning_rate": 0.0001999830453219192, - "loss": 46.0, - "step": 36409 - }, - { - "epoch": 5.863561334997383, - "grad_norm": 0.00836569257080555, - "learning_rate": 0.0001999830443903314, - "loss": 46.0, - "step": 36410 - }, - { - "epoch": 5.863722372076171, - "grad_norm": 0.004000257700681686, - "learning_rate": 0.000199983043458718, - "loss": 46.0, - "step": 36411 - }, - { - "epoch": 5.863883409154958, - "grad_norm": 0.0014719569589942694, - "learning_rate": 0.00019998304252707904, - "loss": 46.0, - "step": 36412 - }, - { - "epoch": 5.864044446233745, - "grad_norm": 0.009013321250677109, - "learning_rate": 0.0001999830415954145, - "loss": 46.0, - "step": 36413 - }, - { - "epoch": 5.864205483312532, - "grad_norm": 0.0021256129257380962, - "learning_rate": 0.00019998304066372435, - "loss": 46.0, - "step": 36414 - }, - { - "epoch": 5.86436652039132, - "grad_norm": 0.004540143068879843, - "learning_rate": 0.00019998303973200863, - "loss": 46.0, - "step": 36415 - }, - { - "epoch": 5.864527557470107, - "grad_norm": 0.003974375780671835, - "learning_rate": 0.0001999830388002673, - "loss": 46.0, - "step": 36416 - }, - { - "epoch": 5.864688594548895, - "grad_norm": 0.002385141095146537, - "learning_rate": 0.00019998303786850037, - "loss": 46.0, - "step": 36417 - }, - { - "epoch": 5.864849631627682, - "grad_norm": 0.010230368934571743, - "learning_rate": 0.00019998303693670788, - "loss": 46.0, - "step": 36418 - }, - { - "epoch": 5.86501066870647, - "grad_norm": 0.016091374680399895, - "learning_rate": 0.00019998303600488976, - "loss": 46.0, - "step": 36419 - }, - { - "epoch": 5.865171705785257, - "grad_norm": 0.003257193835452199, - "learning_rate": 0.0001999830350730461, - "loss": 46.0, - "step": 36420 - }, - { - "epoch": 5.8653327428640445, - "grad_norm": 0.004660800565034151, - "learning_rate": 0.0001999830341411768, - "loss": 46.0, - "step": 36421 - }, - { - "epoch": 5.865493779942832, - "grad_norm": 0.01390034519135952, - "learning_rate": 0.00019998303320928194, - "loss": 46.0, - "step": 36422 - }, - { - "epoch": 5.865654817021619, - "grad_norm": 0.005368242971599102, - "learning_rate": 0.0001999830322773615, - "loss": 46.0, - "step": 36423 - }, - { - "epoch": 5.865815854100407, - "grad_norm": 0.004282278008759022, - "learning_rate": 0.00019998303134541545, - "loss": 46.0, - "step": 36424 - }, - { - "epoch": 5.865976891179194, - "grad_norm": 0.0017107764724642038, - "learning_rate": 0.0001999830304134438, - "loss": 46.0, - "step": 36425 - }, - { - "epoch": 5.866137928257982, - "grad_norm": 0.011549933813512325, - "learning_rate": 0.00019998302948144657, - "loss": 46.0, - "step": 36426 - }, - { - "epoch": 5.866298965336769, - "grad_norm": 0.006075679790228605, - "learning_rate": 0.00019998302854942374, - "loss": 46.0, - "step": 36427 - }, - { - "epoch": 5.866460002415556, - "grad_norm": 0.00539898918941617, - "learning_rate": 0.00019998302761737536, - "loss": 46.0, - "step": 36428 - }, - { - "epoch": 5.866621039494343, - "grad_norm": 0.010720433667302132, - "learning_rate": 0.00019998302668530133, - "loss": 46.0, - "step": 36429 - }, - { - "epoch": 5.866782076573131, - "grad_norm": 0.003219319274649024, - "learning_rate": 0.00019998302575320175, - "loss": 46.0, - "step": 36430 - }, - { - "epoch": 5.866943113651918, - "grad_norm": 0.01537872664630413, - "learning_rate": 0.00019998302482107655, - "loss": 46.0, - "step": 36431 - }, - { - "epoch": 5.867104150730706, - "grad_norm": 0.003630524268373847, - "learning_rate": 0.0001999830238889258, - "loss": 46.0, - "step": 36432 - }, - { - "epoch": 5.867265187809493, - "grad_norm": 0.005842970684170723, - "learning_rate": 0.00019998302295674942, - "loss": 46.0, - "step": 36433 - }, - { - "epoch": 5.8674262248882805, - "grad_norm": 0.003947276622056961, - "learning_rate": 0.00019998302202454745, - "loss": 46.0, - "step": 36434 - }, - { - "epoch": 5.867587261967068, - "grad_norm": 0.006555921398103237, - "learning_rate": 0.00019998302109231993, - "loss": 46.0, - "step": 36435 - }, - { - "epoch": 5.867748299045855, - "grad_norm": 0.006180745083838701, - "learning_rate": 0.0001999830201600668, - "loss": 46.0, - "step": 36436 - }, - { - "epoch": 5.867909336124643, - "grad_norm": 0.024030642583966255, - "learning_rate": 0.00019998301922778805, - "loss": 46.0, - "step": 36437 - }, - { - "epoch": 5.86807037320343, - "grad_norm": 0.006887456867843866, - "learning_rate": 0.00019998301829548374, - "loss": 46.0, - "step": 36438 - }, - { - "epoch": 5.868231410282218, - "grad_norm": 0.020555047318339348, - "learning_rate": 0.00019998301736315384, - "loss": 46.0, - "step": 36439 - }, - { - "epoch": 5.868392447361005, - "grad_norm": 0.01198846660554409, - "learning_rate": 0.00019998301643079836, - "loss": 46.0, - "step": 36440 - }, - { - "epoch": 5.868553484439793, - "grad_norm": 0.010833866894245148, - "learning_rate": 0.00019998301549841726, - "loss": 46.0, - "step": 36441 - }, - { - "epoch": 5.86871452151858, - "grad_norm": 0.004562126472592354, - "learning_rate": 0.00019998301456601057, - "loss": 46.0, - "step": 36442 - }, - { - "epoch": 5.868875558597367, - "grad_norm": 0.006616507191210985, - "learning_rate": 0.0001999830136335783, - "loss": 46.0, - "step": 36443 - }, - { - "epoch": 5.869036595676154, - "grad_norm": 0.001832295092754066, - "learning_rate": 0.00019998301270112044, - "loss": 46.0, - "step": 36444 - }, - { - "epoch": 5.8691976327549416, - "grad_norm": 0.0022298151161521673, - "learning_rate": 0.000199983011768637, - "loss": 46.0, - "step": 36445 - }, - { - "epoch": 5.869358669833729, - "grad_norm": 0.007917076349258423, - "learning_rate": 0.00019998301083612796, - "loss": 46.0, - "step": 36446 - }, - { - "epoch": 5.8695197069125165, - "grad_norm": 0.011314750649034977, - "learning_rate": 0.00019998300990359334, - "loss": 46.0, - "step": 36447 - }, - { - "epoch": 5.869680743991304, - "grad_norm": 0.00800985749810934, - "learning_rate": 0.0001999830089710331, - "loss": 46.0, - "step": 36448 - }, - { - "epoch": 5.869841781070091, - "grad_norm": 0.0070522441528737545, - "learning_rate": 0.0001999830080384473, - "loss": 46.0, - "step": 36449 - }, - { - "epoch": 5.870002818148879, - "grad_norm": 0.008857863955199718, - "learning_rate": 0.0001999830071058359, - "loss": 46.0, - "step": 36450 - }, - { - "epoch": 5.870163855227666, - "grad_norm": 0.007319858763366938, - "learning_rate": 0.00019998300617319893, - "loss": 46.0, - "step": 36451 - }, - { - "epoch": 5.870324892306454, - "grad_norm": 0.002215184736996889, - "learning_rate": 0.00019998300524053632, - "loss": 46.0, - "step": 36452 - }, - { - "epoch": 5.870485929385241, - "grad_norm": 0.006677348166704178, - "learning_rate": 0.00019998300430784814, - "loss": 46.0, - "step": 36453 - }, - { - "epoch": 5.870646966464029, - "grad_norm": 0.00866650603711605, - "learning_rate": 0.00019998300337513439, - "loss": 46.0, - "step": 36454 - }, - { - "epoch": 5.870808003542816, - "grad_norm": 0.002989734522998333, - "learning_rate": 0.00019998300244239504, - "loss": 46.0, - "step": 36455 - }, - { - "epoch": 5.870969040621603, - "grad_norm": 0.014965535141527653, - "learning_rate": 0.00019998300150963008, - "loss": 46.0, - "step": 36456 - }, - { - "epoch": 5.871130077700391, - "grad_norm": 0.000794540683273226, - "learning_rate": 0.00019998300057683953, - "loss": 46.0, - "step": 36457 - }, - { - "epoch": 5.8712911147791775, - "grad_norm": 0.0042830901220440865, - "learning_rate": 0.0001999829996440234, - "loss": 46.0, - "step": 36458 - }, - { - "epoch": 5.871452151857965, - "grad_norm": 0.002588928211480379, - "learning_rate": 0.00019998299871118168, - "loss": 46.0, - "step": 36459 - }, - { - "epoch": 5.871613188936752, - "grad_norm": 0.001984506845474243, - "learning_rate": 0.0001999829977783144, - "loss": 46.0, - "step": 36460 - }, - { - "epoch": 5.87177422601554, - "grad_norm": 0.005372058600187302, - "learning_rate": 0.00019998299684542147, - "loss": 46.0, - "step": 36461 - }, - { - "epoch": 5.871935263094327, - "grad_norm": 0.00507509196177125, - "learning_rate": 0.000199982995912503, - "loss": 46.0, - "step": 36462 - }, - { - "epoch": 5.872096300173115, - "grad_norm": 0.004046759568154812, - "learning_rate": 0.00019998299497955892, - "loss": 46.0, - "step": 36463 - }, - { - "epoch": 5.872257337251902, - "grad_norm": 0.010963904671370983, - "learning_rate": 0.00019998299404658923, - "loss": 46.0, - "step": 36464 - }, - { - "epoch": 5.87241837433069, - "grad_norm": 0.012687266804277897, - "learning_rate": 0.00019998299311359402, - "loss": 46.0, - "step": 36465 - }, - { - "epoch": 5.872579411409477, - "grad_norm": 0.004060177132487297, - "learning_rate": 0.00019998299218057316, - "loss": 46.0, - "step": 36466 - }, - { - "epoch": 5.872740448488265, - "grad_norm": 0.0034510409459471703, - "learning_rate": 0.0001999829912475267, - "loss": 46.0, - "step": 36467 - }, - { - "epoch": 5.872901485567052, - "grad_norm": 0.013134202919900417, - "learning_rate": 0.00019998299031445468, - "loss": 46.0, - "step": 36468 - }, - { - "epoch": 5.8730625226458395, - "grad_norm": 0.0029541996773332357, - "learning_rate": 0.00019998298938135703, - "loss": 46.0, - "step": 36469 - }, - { - "epoch": 5.873223559724627, - "grad_norm": 0.007828726433217525, - "learning_rate": 0.00019998298844823382, - "loss": 46.0, - "step": 36470 - }, - { - "epoch": 5.8733845968034135, - "grad_norm": 0.017941521480679512, - "learning_rate": 0.00019998298751508503, - "loss": 46.0, - "step": 36471 - }, - { - "epoch": 5.873545633882202, - "grad_norm": 0.002676720265299082, - "learning_rate": 0.00019998298658191065, - "loss": 46.0, - "step": 36472 - }, - { - "epoch": 5.873706670960988, - "grad_norm": 0.004685257561504841, - "learning_rate": 0.00019998298564871065, - "loss": 46.0, - "step": 36473 - }, - { - "epoch": 5.873867708039776, - "grad_norm": 0.007790350820869207, - "learning_rate": 0.0001999829847154851, - "loss": 46.0, - "step": 36474 - }, - { - "epoch": 5.874028745118563, - "grad_norm": 0.005516765173524618, - "learning_rate": 0.00019998298378223392, - "loss": 46.0, - "step": 36475 - }, - { - "epoch": 5.874189782197351, - "grad_norm": 0.003555112751200795, - "learning_rate": 0.00019998298284895717, - "loss": 46.0, - "step": 36476 - }, - { - "epoch": 5.874350819276138, - "grad_norm": 0.005349715240299702, - "learning_rate": 0.00019998298191565482, - "loss": 46.0, - "step": 36477 - }, - { - "epoch": 5.874511856354926, - "grad_norm": 0.01286313682794571, - "learning_rate": 0.00019998298098232686, - "loss": 46.0, - "step": 36478 - }, - { - "epoch": 5.874672893433713, - "grad_norm": 0.003224862040951848, - "learning_rate": 0.00019998298004897334, - "loss": 46.0, - "step": 36479 - }, - { - "epoch": 5.874833930512501, - "grad_norm": 0.001877399394288659, - "learning_rate": 0.00019998297911559424, - "loss": 46.0, - "step": 36480 - }, - { - "epoch": 5.874994967591288, - "grad_norm": 0.0032142880372703075, - "learning_rate": 0.00019998297818218952, - "loss": 46.0, - "step": 36481 - }, - { - "epoch": 5.8751560046700755, - "grad_norm": 0.0013049393892288208, - "learning_rate": 0.00019998297724875923, - "loss": 46.0, - "step": 36482 - }, - { - "epoch": 5.875317041748863, - "grad_norm": 0.0024424907751381397, - "learning_rate": 0.00019998297631530334, - "loss": 46.0, - "step": 36483 - }, - { - "epoch": 5.87547807882765, - "grad_norm": 0.0020358997862786055, - "learning_rate": 0.00019998297538182186, - "loss": 46.0, - "step": 36484 - }, - { - "epoch": 5.875639115906438, - "grad_norm": 0.0046354178339242935, - "learning_rate": 0.0001999829744483148, - "loss": 46.0, - "step": 36485 - }, - { - "epoch": 5.875800152985224, - "grad_norm": 0.012167622335255146, - "learning_rate": 0.00019998297351478213, - "loss": 46.0, - "step": 36486 - }, - { - "epoch": 5.875961190064012, - "grad_norm": 0.0034470614045858383, - "learning_rate": 0.00019998297258122386, - "loss": 46.0, - "step": 36487 - }, - { - "epoch": 5.876122227142799, - "grad_norm": 0.001387792406603694, - "learning_rate": 0.00019998297164764003, - "loss": 46.0, - "step": 36488 - }, - { - "epoch": 5.876283264221587, - "grad_norm": 0.0025637776125222445, - "learning_rate": 0.0001999829707140306, - "loss": 46.0, - "step": 36489 - }, - { - "epoch": 5.876444301300374, - "grad_norm": 0.00794809591025114, - "learning_rate": 0.00019998296978039558, - "loss": 46.0, - "step": 36490 - }, - { - "epoch": 5.876605338379162, - "grad_norm": 0.00251396419480443, - "learning_rate": 0.00019998296884673496, - "loss": 46.0, - "step": 36491 - }, - { - "epoch": 5.876766375457949, - "grad_norm": 0.0030576088465750217, - "learning_rate": 0.00019998296791304875, - "loss": 46.0, - "step": 36492 - }, - { - "epoch": 5.876927412536737, - "grad_norm": 0.004708112683147192, - "learning_rate": 0.00019998296697933696, - "loss": 46.0, - "step": 36493 - }, - { - "epoch": 5.877088449615524, - "grad_norm": 0.002636307617649436, - "learning_rate": 0.00019998296604559958, - "loss": 46.0, - "step": 36494 - }, - { - "epoch": 5.8772494866943115, - "grad_norm": 0.011757121421396732, - "learning_rate": 0.0001999829651118366, - "loss": 46.0, - "step": 36495 - }, - { - "epoch": 5.877410523773099, - "grad_norm": 0.002685957122594118, - "learning_rate": 0.00019998296417804803, - "loss": 46.0, - "step": 36496 - }, - { - "epoch": 5.877571560851886, - "grad_norm": 0.0022678556852042675, - "learning_rate": 0.00019998296324423386, - "loss": 46.0, - "step": 36497 - }, - { - "epoch": 5.877732597930674, - "grad_norm": 0.0013451090781018138, - "learning_rate": 0.00019998296231039413, - "loss": 46.0, - "step": 36498 - }, - { - "epoch": 5.877893635009461, - "grad_norm": 0.006251571234315634, - "learning_rate": 0.00019998296137652878, - "loss": 46.0, - "step": 36499 - }, - { - "epoch": 5.878054672088249, - "grad_norm": 0.008110497146844864, - "learning_rate": 0.00019998296044263785, - "loss": 46.0, - "step": 36500 - }, - { - "epoch": 5.878215709167035, - "grad_norm": 0.0063213468529284, - "learning_rate": 0.00019998295950872133, - "loss": 46.0, - "step": 36501 - }, - { - "epoch": 5.878376746245823, - "grad_norm": 0.0010849563404917717, - "learning_rate": 0.00019998295857477923, - "loss": 46.0, - "step": 36502 - }, - { - "epoch": 5.87853778332461, - "grad_norm": 0.002168738516047597, - "learning_rate": 0.0001999829576408115, - "loss": 46.0, - "step": 36503 - }, - { - "epoch": 5.878698820403398, - "grad_norm": 0.008430509828031063, - "learning_rate": 0.00019998295670681823, - "loss": 46.0, - "step": 36504 - }, - { - "epoch": 5.878859857482185, - "grad_norm": 0.0007118963985703886, - "learning_rate": 0.00019998295577279936, - "loss": 46.0, - "step": 36505 - }, - { - "epoch": 5.8790208945609725, - "grad_norm": 0.003994202706962824, - "learning_rate": 0.00019998295483875488, - "loss": 46.0, - "step": 36506 - }, - { - "epoch": 5.87918193163976, - "grad_norm": 0.0018941318849101663, - "learning_rate": 0.0001999829539046848, - "loss": 46.0, - "step": 36507 - }, - { - "epoch": 5.8793429687185474, - "grad_norm": 0.0021580078173428774, - "learning_rate": 0.00019998295297058918, - "loss": 46.0, - "step": 36508 - }, - { - "epoch": 5.879504005797335, - "grad_norm": 0.0025449655950069427, - "learning_rate": 0.00019998295203646794, - "loss": 46.0, - "step": 36509 - }, - { - "epoch": 5.879665042876122, - "grad_norm": 0.0011550434865057468, - "learning_rate": 0.0001999829511023211, - "loss": 46.0, - "step": 36510 - }, - { - "epoch": 5.87982607995491, - "grad_norm": 0.0047856662422418594, - "learning_rate": 0.00019998295016814867, - "loss": 46.0, - "step": 36511 - }, - { - "epoch": 5.879987117033697, - "grad_norm": 0.002842686604708433, - "learning_rate": 0.00019998294923395066, - "loss": 46.0, - "step": 36512 - }, - { - "epoch": 5.880148154112485, - "grad_norm": 0.005742587614804506, - "learning_rate": 0.00019998294829972707, - "loss": 46.0, - "step": 36513 - }, - { - "epoch": 5.880309191191272, - "grad_norm": 0.004429700318723917, - "learning_rate": 0.00019998294736547787, - "loss": 46.0, - "step": 36514 - }, - { - "epoch": 5.88047022827006, - "grad_norm": 0.004376598633825779, - "learning_rate": 0.00019998294643120307, - "loss": 46.0, - "step": 36515 - }, - { - "epoch": 5.880631265348846, - "grad_norm": 0.002024495741352439, - "learning_rate": 0.00019998294549690272, - "loss": 46.0, - "step": 36516 - }, - { - "epoch": 5.880792302427634, - "grad_norm": 0.002057221019640565, - "learning_rate": 0.00019998294456257672, - "loss": 46.0, - "step": 36517 - }, - { - "epoch": 5.880953339506421, - "grad_norm": 0.006331522483378649, - "learning_rate": 0.00019998294362822517, - "loss": 46.0, - "step": 36518 - }, - { - "epoch": 5.8811143765852085, - "grad_norm": 0.0012722924584522843, - "learning_rate": 0.00019998294269384803, - "loss": 46.0, - "step": 36519 - }, - { - "epoch": 5.881275413663996, - "grad_norm": 0.0025560788344591856, - "learning_rate": 0.0001999829417594453, - "loss": 46.0, - "step": 36520 - }, - { - "epoch": 5.881436450742783, - "grad_norm": 0.0037808669731020927, - "learning_rate": 0.00019998294082501696, - "loss": 46.0, - "step": 36521 - }, - { - "epoch": 5.881597487821571, - "grad_norm": 0.006396201904863119, - "learning_rate": 0.00019998293989056305, - "loss": 46.0, - "step": 36522 - }, - { - "epoch": 5.881758524900358, - "grad_norm": 0.003589616622775793, - "learning_rate": 0.00019998293895608354, - "loss": 46.0, - "step": 36523 - }, - { - "epoch": 5.881919561979146, - "grad_norm": 0.0021946635097265244, - "learning_rate": 0.00019998293802157843, - "loss": 46.0, - "step": 36524 - }, - { - "epoch": 5.882080599057933, - "grad_norm": 0.006754905916750431, - "learning_rate": 0.00019998293708704774, - "loss": 46.0, - "step": 36525 - }, - { - "epoch": 5.882241636136721, - "grad_norm": 0.005507329013198614, - "learning_rate": 0.00019998293615249146, - "loss": 46.0, - "step": 36526 - }, - { - "epoch": 5.882402673215508, - "grad_norm": 0.001875278539955616, - "learning_rate": 0.0001999829352179096, - "loss": 46.0, - "step": 36527 - }, - { - "epoch": 5.882563710294296, - "grad_norm": 0.011401423253118992, - "learning_rate": 0.00019998293428330211, - "loss": 46.0, - "step": 36528 - }, - { - "epoch": 5.882724747373083, - "grad_norm": 0.005787578411400318, - "learning_rate": 0.0001999829333486691, - "loss": 46.0, - "step": 36529 - }, - { - "epoch": 5.8828857844518705, - "grad_norm": 0.003180295694619417, - "learning_rate": 0.00019998293241401042, - "loss": 46.0, - "step": 36530 - }, - { - "epoch": 5.883046821530657, - "grad_norm": 0.002754882210865617, - "learning_rate": 0.00019998293147932618, - "loss": 46.0, - "step": 36531 - }, - { - "epoch": 5.8832078586094445, - "grad_norm": 0.007766124326735735, - "learning_rate": 0.00019998293054461638, - "loss": 46.0, - "step": 36532 - }, - { - "epoch": 5.883368895688232, - "grad_norm": 0.0023852549493312836, - "learning_rate": 0.00019998292960988096, - "loss": 46.0, - "step": 36533 - }, - { - "epoch": 5.883529932767019, - "grad_norm": 0.002860397333279252, - "learning_rate": 0.00019998292867511993, - "loss": 46.0, - "step": 36534 - }, - { - "epoch": 5.883690969845807, - "grad_norm": 0.003329914528876543, - "learning_rate": 0.00019998292774033337, - "loss": 46.0, - "step": 36535 - }, - { - "epoch": 5.883852006924594, - "grad_norm": 0.012467294931411743, - "learning_rate": 0.00019998292680552116, - "loss": 46.0, - "step": 36536 - }, - { - "epoch": 5.884013044003382, - "grad_norm": 0.0048507400788366795, - "learning_rate": 0.0001999829258706834, - "loss": 46.0, - "step": 36537 - }, - { - "epoch": 5.884174081082169, - "grad_norm": 0.0010412726551294327, - "learning_rate": 0.00019998292493582002, - "loss": 46.0, - "step": 36538 - }, - { - "epoch": 5.884335118160957, - "grad_norm": 0.009102469310164452, - "learning_rate": 0.00019998292400093108, - "loss": 46.0, - "step": 36539 - }, - { - "epoch": 5.884496155239744, - "grad_norm": 0.009847925044596195, - "learning_rate": 0.00019998292306601652, - "loss": 46.0, - "step": 36540 - }, - { - "epoch": 5.884657192318532, - "grad_norm": 0.0020139359403401613, - "learning_rate": 0.00019998292213107636, - "loss": 46.0, - "step": 36541 - }, - { - "epoch": 5.884818229397319, - "grad_norm": 0.006518482230603695, - "learning_rate": 0.00019998292119611065, - "loss": 46.0, - "step": 36542 - }, - { - "epoch": 5.8849792664761065, - "grad_norm": 0.0015360381221398711, - "learning_rate": 0.00019998292026111934, - "loss": 46.0, - "step": 36543 - }, - { - "epoch": 5.885140303554893, - "grad_norm": 0.0035662108566612005, - "learning_rate": 0.00019998291932610244, - "loss": 46.0, - "step": 36544 - }, - { - "epoch": 5.885301340633681, - "grad_norm": 0.00753018818795681, - "learning_rate": 0.00019998291839105992, - "loss": 46.0, - "step": 36545 - }, - { - "epoch": 5.885462377712468, - "grad_norm": 0.003252539085224271, - "learning_rate": 0.00019998291745599182, - "loss": 46.0, - "step": 36546 - }, - { - "epoch": 5.885623414791255, - "grad_norm": 0.0040799705311656, - "learning_rate": 0.00019998291652089813, - "loss": 46.0, - "step": 36547 - }, - { - "epoch": 5.885784451870043, - "grad_norm": 0.0009041695157065988, - "learning_rate": 0.00019998291558577888, - "loss": 46.0, - "step": 36548 - }, - { - "epoch": 5.88594548894883, - "grad_norm": 0.01083538867533207, - "learning_rate": 0.000199982914650634, - "loss": 46.0, - "step": 36549 - }, - { - "epoch": 5.886106526027618, - "grad_norm": 0.012440397404134274, - "learning_rate": 0.00019998291371546356, - "loss": 46.0, - "step": 36550 - }, - { - "epoch": 5.886267563106405, - "grad_norm": 0.022902099415659904, - "learning_rate": 0.00019998291278026752, - "loss": 46.0, - "step": 36551 - }, - { - "epoch": 5.886428600185193, - "grad_norm": 0.003278498537838459, - "learning_rate": 0.0001999829118450459, - "loss": 46.0, - "step": 36552 - }, - { - "epoch": 5.88658963726398, - "grad_norm": 0.0028832892421633005, - "learning_rate": 0.00019998291090979865, - "loss": 46.0, - "step": 36553 - }, - { - "epoch": 5.8867506743427676, - "grad_norm": 0.007608143147081137, - "learning_rate": 0.00019998290997452585, - "loss": 46.0, - "step": 36554 - }, - { - "epoch": 5.886911711421555, - "grad_norm": 0.01458243653178215, - "learning_rate": 0.00019998290903922743, - "loss": 46.0, - "step": 36555 - }, - { - "epoch": 5.8870727485003425, - "grad_norm": 0.005875394679605961, - "learning_rate": 0.00019998290810390343, - "loss": 46.0, - "step": 36556 - }, - { - "epoch": 5.88723378557913, - "grad_norm": 0.007683324161916971, - "learning_rate": 0.00019998290716855387, - "loss": 46.0, - "step": 36557 - }, - { - "epoch": 5.887394822657917, - "grad_norm": 0.003210650524124503, - "learning_rate": 0.00019998290623317867, - "loss": 46.0, - "step": 36558 - }, - { - "epoch": 5.887555859736704, - "grad_norm": 0.008093958720564842, - "learning_rate": 0.0001999829052977779, - "loss": 46.0, - "step": 36559 - }, - { - "epoch": 5.887716896815492, - "grad_norm": 0.0030021965503692627, - "learning_rate": 0.00019998290436235155, - "loss": 46.0, - "step": 36560 - }, - { - "epoch": 5.887877933894279, - "grad_norm": 0.015265724621713161, - "learning_rate": 0.00019998290342689958, - "loss": 46.0, - "step": 36561 - }, - { - "epoch": 5.888038970973066, - "grad_norm": 0.003575454233214259, - "learning_rate": 0.00019998290249142206, - "loss": 46.0, - "step": 36562 - }, - { - "epoch": 5.888200008051854, - "grad_norm": 0.005558281671255827, - "learning_rate": 0.00019998290155591892, - "loss": 46.0, - "step": 36563 - }, - { - "epoch": 5.888361045130641, - "grad_norm": 0.0057060206308960915, - "learning_rate": 0.0001999829006203902, - "loss": 46.0, - "step": 36564 - }, - { - "epoch": 5.888522082209429, - "grad_norm": 0.018302863463759422, - "learning_rate": 0.0001999828996848359, - "loss": 46.0, - "step": 36565 - }, - { - "epoch": 5.888683119288216, - "grad_norm": 0.005606064572930336, - "learning_rate": 0.000199982898749256, - "loss": 46.0, - "step": 36566 - }, - { - "epoch": 5.8888441563670035, - "grad_norm": 0.0022443048655986786, - "learning_rate": 0.00019998289781365052, - "loss": 46.0, - "step": 36567 - }, - { - "epoch": 5.889005193445791, - "grad_norm": 0.009120387956500053, - "learning_rate": 0.00019998289687801944, - "loss": 46.0, - "step": 36568 - }, - { - "epoch": 5.889166230524578, - "grad_norm": 0.0007909274427220225, - "learning_rate": 0.00019998289594236275, - "loss": 46.0, - "step": 36569 - }, - { - "epoch": 5.889327267603366, - "grad_norm": 0.00211282423697412, - "learning_rate": 0.00019998289500668047, - "loss": 46.0, - "step": 36570 - }, - { - "epoch": 5.889488304682153, - "grad_norm": 0.0032763362396508455, - "learning_rate": 0.00019998289407097264, - "loss": 46.0, - "step": 36571 - }, - { - "epoch": 5.889649341760941, - "grad_norm": 0.006326448637992144, - "learning_rate": 0.00019998289313523918, - "loss": 46.0, - "step": 36572 - }, - { - "epoch": 5.889810378839728, - "grad_norm": 0.004175235517323017, - "learning_rate": 0.00019998289219948015, - "loss": 46.0, - "step": 36573 - }, - { - "epoch": 5.889971415918515, - "grad_norm": 0.0026226313784718513, - "learning_rate": 0.00019998289126369555, - "loss": 46.0, - "step": 36574 - }, - { - "epoch": 5.890132452997302, - "grad_norm": 0.007245667744427919, - "learning_rate": 0.0001999828903278853, - "loss": 46.0, - "step": 36575 - }, - { - "epoch": 5.89029349007609, - "grad_norm": 0.009717113338410854, - "learning_rate": 0.0001999828893920495, - "loss": 46.0, - "step": 36576 - }, - { - "epoch": 5.890454527154877, - "grad_norm": 0.010242221876978874, - "learning_rate": 0.00019998288845618812, - "loss": 46.0, - "step": 36577 - }, - { - "epoch": 5.890615564233665, - "grad_norm": 0.026576397940516472, - "learning_rate": 0.00019998288752030112, - "loss": 46.0, - "step": 36578 - }, - { - "epoch": 5.890776601312452, - "grad_norm": 0.001437407685443759, - "learning_rate": 0.00019998288658438856, - "loss": 46.0, - "step": 36579 - }, - { - "epoch": 5.8909376383912395, - "grad_norm": 0.009794313460588455, - "learning_rate": 0.00019998288564845038, - "loss": 46.0, - "step": 36580 - }, - { - "epoch": 5.891098675470027, - "grad_norm": 0.005004182457923889, - "learning_rate": 0.00019998288471248662, - "loss": 46.0, - "step": 36581 - }, - { - "epoch": 5.891259712548814, - "grad_norm": 0.01586543396115303, - "learning_rate": 0.00019998288377649727, - "loss": 46.0, - "step": 36582 - }, - { - "epoch": 5.891420749627602, - "grad_norm": 0.012091053649783134, - "learning_rate": 0.00019998288284048236, - "loss": 46.0, - "step": 36583 - }, - { - "epoch": 5.891581786706389, - "grad_norm": 0.006689284462481737, - "learning_rate": 0.0001999828819044418, - "loss": 46.0, - "step": 36584 - }, - { - "epoch": 5.891742823785177, - "grad_norm": 0.001232705544680357, - "learning_rate": 0.0001999828809683757, - "loss": 46.0, - "step": 36585 - }, - { - "epoch": 5.891903860863964, - "grad_norm": 0.003674765583127737, - "learning_rate": 0.00019998288003228397, - "loss": 46.0, - "step": 36586 - }, - { - "epoch": 5.892064897942752, - "grad_norm": 0.0030214886646717787, - "learning_rate": 0.00019998287909616665, - "loss": 46.0, - "step": 36587 - }, - { - "epoch": 5.892225935021539, - "grad_norm": 0.003527117194607854, - "learning_rate": 0.00019998287816002378, - "loss": 46.0, - "step": 36588 - }, - { - "epoch": 5.892386972100326, - "grad_norm": 0.002677063923329115, - "learning_rate": 0.0001999828772238553, - "loss": 46.0, - "step": 36589 - }, - { - "epoch": 5.892548009179113, - "grad_norm": 0.003265031846240163, - "learning_rate": 0.00019998287628766122, - "loss": 46.0, - "step": 36590 - }, - { - "epoch": 5.892709046257901, - "grad_norm": 0.00640291441231966, - "learning_rate": 0.00019998287535144156, - "loss": 46.0, - "step": 36591 - }, - { - "epoch": 5.892870083336688, - "grad_norm": 0.004319123458117247, - "learning_rate": 0.0001999828744151963, - "loss": 46.0, - "step": 36592 - }, - { - "epoch": 5.8930311204154755, - "grad_norm": 0.01202433928847313, - "learning_rate": 0.00019998287347892547, - "loss": 46.0, - "step": 36593 - }, - { - "epoch": 5.893192157494263, - "grad_norm": 0.0032576615922152996, - "learning_rate": 0.00019998287254262902, - "loss": 46.0, - "step": 36594 - }, - { - "epoch": 5.89335319457305, - "grad_norm": 0.011612916365265846, - "learning_rate": 0.000199982871606307, - "loss": 46.0, - "step": 36595 - }, - { - "epoch": 5.893514231651838, - "grad_norm": 0.00565600348636508, - "learning_rate": 0.0001999828706699594, - "loss": 46.0, - "step": 36596 - }, - { - "epoch": 5.893675268730625, - "grad_norm": 0.0027215436566621065, - "learning_rate": 0.00019998286973358617, - "loss": 46.0, - "step": 36597 - }, - { - "epoch": 5.893836305809413, - "grad_norm": 0.0009165478986687958, - "learning_rate": 0.00019998286879718737, - "loss": 46.0, - "step": 36598 - }, - { - "epoch": 5.8939973428882, - "grad_norm": 0.002467401558533311, - "learning_rate": 0.000199982867860763, - "loss": 46.0, - "step": 36599 - }, - { - "epoch": 5.894158379966988, - "grad_norm": 0.004900219384580851, - "learning_rate": 0.00019998286692431304, - "loss": 46.0, - "step": 36600 - }, - { - "epoch": 5.894319417045775, - "grad_norm": 0.01821213588118553, - "learning_rate": 0.00019998286598783745, - "loss": 46.0, - "step": 36601 - }, - { - "epoch": 5.894480454124563, - "grad_norm": 0.0020022073294967413, - "learning_rate": 0.00019998286505133628, - "loss": 46.0, - "step": 36602 - }, - { - "epoch": 5.89464149120335, - "grad_norm": 0.017472347244620323, - "learning_rate": 0.00019998286411480954, - "loss": 46.0, - "step": 36603 - }, - { - "epoch": 5.894802528282137, - "grad_norm": 0.002706969855353236, - "learning_rate": 0.0001999828631782572, - "loss": 46.0, - "step": 36604 - }, - { - "epoch": 5.894963565360924, - "grad_norm": 0.0010954283643513918, - "learning_rate": 0.00019998286224167928, - "loss": 46.0, - "step": 36605 - }, - { - "epoch": 5.8951246024397115, - "grad_norm": 0.008376320824027061, - "learning_rate": 0.00019998286130507573, - "loss": 46.0, - "step": 36606 - }, - { - "epoch": 5.895285639518499, - "grad_norm": 0.002078205579891801, - "learning_rate": 0.00019998286036844665, - "loss": 46.0, - "step": 36607 - }, - { - "epoch": 5.895446676597286, - "grad_norm": 0.00561330933123827, - "learning_rate": 0.00019998285943179192, - "loss": 46.0, - "step": 36608 - }, - { - "epoch": 5.895607713676074, - "grad_norm": 0.0016333928797394037, - "learning_rate": 0.00019998285849511164, - "loss": 46.0, - "step": 36609 - }, - { - "epoch": 5.895768750754861, - "grad_norm": 0.0012208586558699608, - "learning_rate": 0.00019998285755840577, - "loss": 46.0, - "step": 36610 - }, - { - "epoch": 5.895929787833649, - "grad_norm": 0.004889764357358217, - "learning_rate": 0.00019998285662167428, - "loss": 46.0, - "step": 36611 - }, - { - "epoch": 5.896090824912436, - "grad_norm": 0.015877557918429375, - "learning_rate": 0.0001999828556849172, - "loss": 46.0, - "step": 36612 - }, - { - "epoch": 5.896251861991224, - "grad_norm": 0.009460185654461384, - "learning_rate": 0.00019998285474813457, - "loss": 46.0, - "step": 36613 - }, - { - "epoch": 5.896412899070011, - "grad_norm": 0.0036141336895525455, - "learning_rate": 0.00019998285381132632, - "loss": 46.0, - "step": 36614 - }, - { - "epoch": 5.8965739361487985, - "grad_norm": 0.007342739962041378, - "learning_rate": 0.0001999828528744925, - "loss": 46.0, - "step": 36615 - }, - { - "epoch": 5.896734973227586, - "grad_norm": 0.006484126206487417, - "learning_rate": 0.00019998285193763307, - "loss": 46.0, - "step": 36616 - }, - { - "epoch": 5.896896010306373, - "grad_norm": 0.006829795893281698, - "learning_rate": 0.00019998285100074806, - "loss": 46.0, - "step": 36617 - }, - { - "epoch": 5.897057047385161, - "grad_norm": 0.001765860477462411, - "learning_rate": 0.00019998285006383743, - "loss": 46.0, - "step": 36618 - }, - { - "epoch": 5.8972180844639475, - "grad_norm": 0.000543889997061342, - "learning_rate": 0.00019998284912690125, - "loss": 46.0, - "step": 36619 - }, - { - "epoch": 5.897379121542735, - "grad_norm": 0.0029645240865647793, - "learning_rate": 0.00019998284818993945, - "loss": 46.0, - "step": 36620 - }, - { - "epoch": 5.897540158621522, - "grad_norm": 0.002774882595986128, - "learning_rate": 0.0001999828472529521, - "loss": 46.0, - "step": 36621 - }, - { - "epoch": 5.89770119570031, - "grad_norm": 0.0020182610023766756, - "learning_rate": 0.00019998284631593912, - "loss": 46.0, - "step": 36622 - }, - { - "epoch": 5.897862232779097, - "grad_norm": 0.00874936580657959, - "learning_rate": 0.00019998284537890056, - "loss": 46.0, - "step": 36623 - }, - { - "epoch": 5.898023269857885, - "grad_norm": 0.021953793242573738, - "learning_rate": 0.00019998284444183639, - "loss": 46.0, - "step": 36624 - }, - { - "epoch": 5.898184306936672, - "grad_norm": 0.006183766759932041, - "learning_rate": 0.00019998284350474665, - "loss": 46.0, - "step": 36625 - }, - { - "epoch": 5.89834534401546, - "grad_norm": 0.007318221498280764, - "learning_rate": 0.00019998284256763133, - "loss": 46.0, - "step": 36626 - }, - { - "epoch": 5.898506381094247, - "grad_norm": 0.007123200222849846, - "learning_rate": 0.00019998284163049042, - "loss": 46.0, - "step": 36627 - }, - { - "epoch": 5.8986674181730345, - "grad_norm": 0.003372452687472105, - "learning_rate": 0.00019998284069332393, - "loss": 46.0, - "step": 36628 - }, - { - "epoch": 5.898828455251822, - "grad_norm": 0.005449937656521797, - "learning_rate": 0.00019998283975613182, - "loss": 46.0, - "step": 36629 - }, - { - "epoch": 5.898989492330609, - "grad_norm": 0.003368463134393096, - "learning_rate": 0.00019998283881891412, - "loss": 46.0, - "step": 36630 - }, - { - "epoch": 5.899150529409397, - "grad_norm": 0.0032204920426011086, - "learning_rate": 0.00019998283788167084, - "loss": 46.0, - "step": 36631 - }, - { - "epoch": 5.8993115664881834, - "grad_norm": 0.0011918552918359637, - "learning_rate": 0.00019998283694440194, - "loss": 46.0, - "step": 36632 - }, - { - "epoch": 5.899472603566972, - "grad_norm": 0.003111568745225668, - "learning_rate": 0.00019998283600710748, - "loss": 46.0, - "step": 36633 - }, - { - "epoch": 5.899633640645758, - "grad_norm": 0.006619945168495178, - "learning_rate": 0.00019998283506978743, - "loss": 46.0, - "step": 36634 - }, - { - "epoch": 5.899794677724546, - "grad_norm": 0.005429692566394806, - "learning_rate": 0.0001999828341324418, - "loss": 46.0, - "step": 36635 - }, - { - "epoch": 5.899955714803333, - "grad_norm": 0.0039047219324856997, - "learning_rate": 0.00019998283319507055, - "loss": 46.0, - "step": 36636 - }, - { - "epoch": 5.900116751882121, - "grad_norm": 0.004827241878956556, - "learning_rate": 0.00019998283225767374, - "loss": 46.0, - "step": 36637 - }, - { - "epoch": 5.900277788960908, - "grad_norm": 0.008907639421522617, - "learning_rate": 0.0001999828313202513, - "loss": 46.0, - "step": 36638 - }, - { - "epoch": 5.900438826039696, - "grad_norm": 0.001424536225385964, - "learning_rate": 0.0001999828303828033, - "loss": 46.0, - "step": 36639 - }, - { - "epoch": 5.900599863118483, - "grad_norm": 0.003321142867207527, - "learning_rate": 0.00019998282944532972, - "loss": 46.0, - "step": 36640 - }, - { - "epoch": 5.9007609001972705, - "grad_norm": 0.002521553775295615, - "learning_rate": 0.00019998282850783054, - "loss": 46.0, - "step": 36641 - }, - { - "epoch": 5.900921937276058, - "grad_norm": 0.0020214428659528494, - "learning_rate": 0.00019998282757030574, - "loss": 46.0, - "step": 36642 - }, - { - "epoch": 5.901082974354845, - "grad_norm": 0.0028818012215197086, - "learning_rate": 0.00019998282663275538, - "loss": 46.0, - "step": 36643 - }, - { - "epoch": 5.901244011433633, - "grad_norm": 0.025667818263173103, - "learning_rate": 0.0001999828256951794, - "loss": 46.0, - "step": 36644 - }, - { - "epoch": 5.90140504851242, - "grad_norm": 0.002696142066270113, - "learning_rate": 0.00019998282475757785, - "loss": 46.0, - "step": 36645 - }, - { - "epoch": 5.901566085591208, - "grad_norm": 0.006760791875422001, - "learning_rate": 0.00019998282381995073, - "loss": 46.0, - "step": 36646 - }, - { - "epoch": 5.901727122669994, - "grad_norm": 0.0014532756758853793, - "learning_rate": 0.000199982822882298, - "loss": 46.0, - "step": 36647 - }, - { - "epoch": 5.901888159748782, - "grad_norm": 0.02876780927181244, - "learning_rate": 0.00019998282194461968, - "loss": 46.0, - "step": 36648 - }, - { - "epoch": 5.902049196827569, - "grad_norm": 0.00649761687964201, - "learning_rate": 0.00019998282100691577, - "loss": 46.0, - "step": 36649 - }, - { - "epoch": 5.902210233906357, - "grad_norm": 0.002604781650006771, - "learning_rate": 0.00019998282006918627, - "loss": 46.0, - "step": 36650 - }, - { - "epoch": 5.902371270985144, - "grad_norm": 0.0011523580178618431, - "learning_rate": 0.00019998281913143117, - "loss": 46.0, - "step": 36651 - }, - { - "epoch": 5.902532308063932, - "grad_norm": 0.007648960687220097, - "learning_rate": 0.0001999828181936505, - "loss": 46.0, - "step": 36652 - }, - { - "epoch": 5.902693345142719, - "grad_norm": 0.002044451190158725, - "learning_rate": 0.0001999828172558442, - "loss": 46.0, - "step": 36653 - }, - { - "epoch": 5.9028543822215065, - "grad_norm": 0.0013684118166565895, - "learning_rate": 0.00019998281631801234, - "loss": 46.0, - "step": 36654 - }, - { - "epoch": 5.903015419300294, - "grad_norm": 0.005389058962464333, - "learning_rate": 0.00019998281538015489, - "loss": 46.0, - "step": 36655 - }, - { - "epoch": 5.903176456379081, - "grad_norm": 0.0020386220421642065, - "learning_rate": 0.00019998281444227184, - "loss": 46.0, - "step": 36656 - }, - { - "epoch": 5.903337493457869, - "grad_norm": 0.004870891105383635, - "learning_rate": 0.00019998281350436324, - "loss": 46.0, - "step": 36657 - }, - { - "epoch": 5.903498530536656, - "grad_norm": 0.011061694473028183, - "learning_rate": 0.000199982812566429, - "loss": 46.0, - "step": 36658 - }, - { - "epoch": 5.903659567615444, - "grad_norm": 0.006413934286683798, - "learning_rate": 0.00019998281162846916, - "loss": 46.0, - "step": 36659 - }, - { - "epoch": 5.903820604694231, - "grad_norm": 0.002223755931481719, - "learning_rate": 0.00019998281069048376, - "loss": 46.0, - "step": 36660 - }, - { - "epoch": 5.903981641773019, - "grad_norm": 0.001216917997226119, - "learning_rate": 0.00019998280975247275, - "loss": 46.0, - "step": 36661 - }, - { - "epoch": 5.904142678851805, - "grad_norm": 0.0033759952057152987, - "learning_rate": 0.00019998280881443619, - "loss": 46.0, - "step": 36662 - }, - { - "epoch": 5.904303715930593, - "grad_norm": 0.009596017189323902, - "learning_rate": 0.000199982807876374, - "loss": 46.0, - "step": 36663 - }, - { - "epoch": 5.90446475300938, - "grad_norm": 0.00442937295883894, - "learning_rate": 0.00019998280693828623, - "loss": 46.0, - "step": 36664 - }, - { - "epoch": 5.904625790088168, - "grad_norm": 0.0015244719106703997, - "learning_rate": 0.0001999828060001729, - "loss": 46.0, - "step": 36665 - }, - { - "epoch": 5.904786827166955, - "grad_norm": 0.008727251552045345, - "learning_rate": 0.00019998280506203393, - "loss": 46.0, - "step": 36666 - }, - { - "epoch": 5.9049478642457425, - "grad_norm": 0.024323971942067146, - "learning_rate": 0.00019998280412386938, - "loss": 46.0, - "step": 36667 - }, - { - "epoch": 5.90510890132453, - "grad_norm": 0.0024902704171836376, - "learning_rate": 0.00019998280318567926, - "loss": 46.0, - "step": 36668 - }, - { - "epoch": 5.905269938403317, - "grad_norm": 0.0018617215100675821, - "learning_rate": 0.00019998280224746355, - "loss": 46.0, - "step": 36669 - }, - { - "epoch": 5.905430975482105, - "grad_norm": 0.002323055872693658, - "learning_rate": 0.00019998280130922223, - "loss": 46.0, - "step": 36670 - }, - { - "epoch": 5.905592012560892, - "grad_norm": 0.013949236832559109, - "learning_rate": 0.00019998280037095533, - "loss": 46.0, - "step": 36671 - }, - { - "epoch": 5.90575304963968, - "grad_norm": 0.0015396467642858624, - "learning_rate": 0.00019998279943266286, - "loss": 46.0, - "step": 36672 - }, - { - "epoch": 5.905914086718467, - "grad_norm": 0.0022673136554658413, - "learning_rate": 0.00019998279849434475, - "loss": 46.0, - "step": 36673 - }, - { - "epoch": 5.906075123797255, - "grad_norm": 0.005111353471875191, - "learning_rate": 0.00019998279755600108, - "loss": 46.0, - "step": 36674 - }, - { - "epoch": 5.906236160876042, - "grad_norm": 0.0019955916795879602, - "learning_rate": 0.00019998279661763183, - "loss": 46.0, - "step": 36675 - }, - { - "epoch": 5.9063971979548295, - "grad_norm": 0.004853942431509495, - "learning_rate": 0.00019998279567923696, - "loss": 46.0, - "step": 36676 - }, - { - "epoch": 5.906558235033616, - "grad_norm": 0.0062437900342047215, - "learning_rate": 0.0001999827947408165, - "loss": 46.0, - "step": 36677 - }, - { - "epoch": 5.9067192721124036, - "grad_norm": 0.0019858789164572954, - "learning_rate": 0.00019998279380237049, - "loss": 46.0, - "step": 36678 - }, - { - "epoch": 5.906880309191191, - "grad_norm": 0.0011909750755876303, - "learning_rate": 0.00019998279286389888, - "loss": 46.0, - "step": 36679 - }, - { - "epoch": 5.9070413462699785, - "grad_norm": 0.0011144229210913181, - "learning_rate": 0.00019998279192540164, - "loss": 46.0, - "step": 36680 - }, - { - "epoch": 5.907202383348766, - "grad_norm": 0.0036076551768928766, - "learning_rate": 0.00019998279098687883, - "loss": 46.0, - "step": 36681 - }, - { - "epoch": 5.907363420427553, - "grad_norm": 0.005558940581977367, - "learning_rate": 0.00019998279004833044, - "loss": 46.0, - "step": 36682 - }, - { - "epoch": 5.907524457506341, - "grad_norm": 0.002356515731662512, - "learning_rate": 0.00019998278910975646, - "loss": 46.0, - "step": 36683 - }, - { - "epoch": 5.907685494585128, - "grad_norm": 0.00231341365724802, - "learning_rate": 0.00019998278817115686, - "loss": 46.0, - "step": 36684 - }, - { - "epoch": 5.907846531663916, - "grad_norm": 0.001637893496081233, - "learning_rate": 0.0001999827872325317, - "loss": 46.0, - "step": 36685 - }, - { - "epoch": 5.908007568742703, - "grad_norm": 0.0010874888394027948, - "learning_rate": 0.00019998278629388097, - "loss": 46.0, - "step": 36686 - }, - { - "epoch": 5.908168605821491, - "grad_norm": 0.0027469471096992493, - "learning_rate": 0.00019998278535520459, - "loss": 46.0, - "step": 36687 - }, - { - "epoch": 5.908329642900278, - "grad_norm": 0.0027042501606047153, - "learning_rate": 0.00019998278441650267, - "loss": 46.0, - "step": 36688 - }, - { - "epoch": 5.9084906799790655, - "grad_norm": 0.006202050019055605, - "learning_rate": 0.0001999827834777751, - "loss": 46.0, - "step": 36689 - }, - { - "epoch": 5.908651717057852, - "grad_norm": 0.016852544620633125, - "learning_rate": 0.000199982782539022, - "loss": 46.0, - "step": 36690 - }, - { - "epoch": 5.90881275413664, - "grad_norm": 0.003232463961467147, - "learning_rate": 0.00019998278160024332, - "loss": 46.0, - "step": 36691 - }, - { - "epoch": 5.908973791215427, - "grad_norm": 0.0029984028078615665, - "learning_rate": 0.00019998278066143897, - "loss": 46.0, - "step": 36692 - }, - { - "epoch": 5.909134828294214, - "grad_norm": 0.002600082429125905, - "learning_rate": 0.0001999827797226091, - "loss": 46.0, - "step": 36693 - }, - { - "epoch": 5.909295865373002, - "grad_norm": 0.011613850481808186, - "learning_rate": 0.00019998277878375363, - "loss": 46.0, - "step": 36694 - }, - { - "epoch": 5.909456902451789, - "grad_norm": 0.009321661666035652, - "learning_rate": 0.00019998277784487255, - "loss": 46.0, - "step": 36695 - }, - { - "epoch": 5.909617939530577, - "grad_norm": 0.0020441017113626003, - "learning_rate": 0.0001999827769059659, - "loss": 46.0, - "step": 36696 - }, - { - "epoch": 5.909778976609364, - "grad_norm": 0.002522408729419112, - "learning_rate": 0.00019998277596703363, - "loss": 46.0, - "step": 36697 - }, - { - "epoch": 5.909940013688152, - "grad_norm": 0.019608579576015472, - "learning_rate": 0.00019998277502807579, - "loss": 46.0, - "step": 36698 - }, - { - "epoch": 5.910101050766939, - "grad_norm": 0.003051810199394822, - "learning_rate": 0.00019998277408909236, - "loss": 46.0, - "step": 36699 - }, - { - "epoch": 5.910262087845727, - "grad_norm": 0.002684236504137516, - "learning_rate": 0.0001999827731500833, - "loss": 46.0, - "step": 36700 - }, - { - "epoch": 5.910423124924514, - "grad_norm": 0.013379686512053013, - "learning_rate": 0.00019998277221104874, - "loss": 46.0, - "step": 36701 - }, - { - "epoch": 5.9105841620033015, - "grad_norm": 0.0016601013485342264, - "learning_rate": 0.00019998277127198852, - "loss": 46.0, - "step": 36702 - }, - { - "epoch": 5.910745199082089, - "grad_norm": 0.004295967984944582, - "learning_rate": 0.00019998277033290272, - "loss": 46.0, - "step": 36703 - }, - { - "epoch": 5.910906236160876, - "grad_norm": 0.002593794371932745, - "learning_rate": 0.00019998276939379132, - "loss": 46.0, - "step": 36704 - }, - { - "epoch": 5.911067273239663, - "grad_norm": 0.0030752660240978003, - "learning_rate": 0.00019998276845465434, - "loss": 46.0, - "step": 36705 - }, - { - "epoch": 5.911228310318451, - "grad_norm": 0.011294949799776077, - "learning_rate": 0.00019998276751549175, - "loss": 46.0, - "step": 36706 - }, - { - "epoch": 5.911389347397238, - "grad_norm": 0.011487973853945732, - "learning_rate": 0.00019998276657630363, - "loss": 46.0, - "step": 36707 - }, - { - "epoch": 5.911550384476025, - "grad_norm": 0.00791802629828453, - "learning_rate": 0.00019998276563708989, - "loss": 46.0, - "step": 36708 - }, - { - "epoch": 5.911711421554813, - "grad_norm": 0.0009943817276507616, - "learning_rate": 0.00019998276469785053, - "loss": 46.0, - "step": 36709 - }, - { - "epoch": 5.9118724586336, - "grad_norm": 0.005660542752593756, - "learning_rate": 0.0001999827637585856, - "loss": 46.0, - "step": 36710 - }, - { - "epoch": 5.912033495712388, - "grad_norm": 0.0018545347265899181, - "learning_rate": 0.0001999827628192951, - "loss": 46.0, - "step": 36711 - }, - { - "epoch": 5.912194532791175, - "grad_norm": 0.0020563986618071795, - "learning_rate": 0.00019998276187997897, - "loss": 46.0, - "step": 36712 - }, - { - "epoch": 5.912355569869963, - "grad_norm": 0.0018423276487737894, - "learning_rate": 0.0001999827609406373, - "loss": 46.0, - "step": 36713 - }, - { - "epoch": 5.91251660694875, - "grad_norm": 0.003212996758520603, - "learning_rate": 0.00019998276000126998, - "loss": 46.0, - "step": 36714 - }, - { - "epoch": 5.9126776440275375, - "grad_norm": 0.0018341336399316788, - "learning_rate": 0.0001999827590618771, - "loss": 46.0, - "step": 36715 - }, - { - "epoch": 5.912838681106325, - "grad_norm": 0.0019206481520086527, - "learning_rate": 0.0001999827581224586, - "loss": 46.0, - "step": 36716 - }, - { - "epoch": 5.912999718185112, - "grad_norm": 0.01229285728186369, - "learning_rate": 0.00019998275718301458, - "loss": 46.0, - "step": 36717 - }, - { - "epoch": 5.9131607552639, - "grad_norm": 0.002880864543840289, - "learning_rate": 0.00019998275624354492, - "loss": 46.0, - "step": 36718 - }, - { - "epoch": 5.913321792342687, - "grad_norm": 0.006600085645914078, - "learning_rate": 0.00019998275530404966, - "loss": 46.0, - "step": 36719 - }, - { - "epoch": 5.913482829421474, - "grad_norm": 0.004796166904270649, - "learning_rate": 0.00019998275436452882, - "loss": 46.0, - "step": 36720 - }, - { - "epoch": 5.913643866500261, - "grad_norm": 0.016062308102846146, - "learning_rate": 0.0001999827534249824, - "loss": 46.0, - "step": 36721 - }, - { - "epoch": 5.913804903579049, - "grad_norm": 0.0097150644287467, - "learning_rate": 0.0001999827524854104, - "loss": 46.0, - "step": 36722 - }, - { - "epoch": 5.913965940657836, - "grad_norm": 0.004015443846583366, - "learning_rate": 0.00019998275154581278, - "loss": 46.0, - "step": 36723 - }, - { - "epoch": 5.914126977736624, - "grad_norm": 0.0021828943863511086, - "learning_rate": 0.0001999827506061896, - "loss": 46.0, - "step": 36724 - }, - { - "epoch": 5.914288014815411, - "grad_norm": 0.005539830774068832, - "learning_rate": 0.0001999827496665408, - "loss": 46.0, - "step": 36725 - }, - { - "epoch": 5.914449051894199, - "grad_norm": 0.0038171743508428335, - "learning_rate": 0.00019998274872686645, - "loss": 46.0, - "step": 36726 - }, - { - "epoch": 5.914610088972986, - "grad_norm": 0.005330363754183054, - "learning_rate": 0.00019998274778716647, - "loss": 46.0, - "step": 36727 - }, - { - "epoch": 5.9147711260517735, - "grad_norm": 0.00901678204536438, - "learning_rate": 0.0001999827468474409, - "loss": 46.0, - "step": 36728 - }, - { - "epoch": 5.914932163130561, - "grad_norm": 0.004908686503767967, - "learning_rate": 0.00019998274590768975, - "loss": 46.0, - "step": 36729 - }, - { - "epoch": 5.915093200209348, - "grad_norm": 0.001657033571973443, - "learning_rate": 0.00019998274496791304, - "loss": 46.0, - "step": 36730 - }, - { - "epoch": 5.915254237288136, - "grad_norm": 0.0011588465422391891, - "learning_rate": 0.0001999827440281107, - "loss": 46.0, - "step": 36731 - }, - { - "epoch": 5.915415274366923, - "grad_norm": 0.0031610203441232443, - "learning_rate": 0.00019998274308828274, - "loss": 46.0, - "step": 36732 - }, - { - "epoch": 5.915576311445711, - "grad_norm": 0.0034914512652903795, - "learning_rate": 0.00019998274214842927, - "loss": 46.0, - "step": 36733 - }, - { - "epoch": 5.915737348524498, - "grad_norm": 0.006047399248927832, - "learning_rate": 0.00019998274120855015, - "loss": 46.0, - "step": 36734 - }, - { - "epoch": 5.915898385603285, - "grad_norm": 0.0038502642419189215, - "learning_rate": 0.00019998274026864548, - "loss": 46.0, - "step": 36735 - }, - { - "epoch": 5.916059422682072, - "grad_norm": 0.010237429291009903, - "learning_rate": 0.0001999827393287152, - "loss": 46.0, - "step": 36736 - }, - { - "epoch": 5.91622045976086, - "grad_norm": 0.0006746245780959725, - "learning_rate": 0.0001999827383887593, - "loss": 46.0, - "step": 36737 - }, - { - "epoch": 5.916381496839647, - "grad_norm": 0.0018468481721356511, - "learning_rate": 0.00019998273744877785, - "loss": 46.0, - "step": 36738 - }, - { - "epoch": 5.9165425339184345, - "grad_norm": 0.005950854625552893, - "learning_rate": 0.0001999827365087708, - "loss": 46.0, - "step": 36739 - }, - { - "epoch": 5.916703570997222, - "grad_norm": 0.004224453587085009, - "learning_rate": 0.00019998273556873816, - "loss": 46.0, - "step": 36740 - }, - { - "epoch": 5.9168646080760094, - "grad_norm": 0.002298058243468404, - "learning_rate": 0.0001999827346286799, - "loss": 46.0, - "step": 36741 - }, - { - "epoch": 5.917025645154797, - "grad_norm": 0.0024822931736707687, - "learning_rate": 0.00019998273368859607, - "loss": 46.0, - "step": 36742 - }, - { - "epoch": 5.917186682233584, - "grad_norm": 0.002132893307134509, - "learning_rate": 0.00019998273274848667, - "loss": 46.0, - "step": 36743 - }, - { - "epoch": 5.917347719312372, - "grad_norm": 0.0025236632209271193, - "learning_rate": 0.00019998273180835168, - "loss": 46.0, - "step": 36744 - }, - { - "epoch": 5.917508756391159, - "grad_norm": 0.008523834869265556, - "learning_rate": 0.00019998273086819108, - "loss": 46.0, - "step": 36745 - }, - { - "epoch": 5.917669793469947, - "grad_norm": 0.0021781628020107746, - "learning_rate": 0.00019998272992800492, - "loss": 46.0, - "step": 36746 - }, - { - "epoch": 5.917830830548734, - "grad_norm": 0.003650036873295903, - "learning_rate": 0.00019998272898779314, - "loss": 46.0, - "step": 36747 - }, - { - "epoch": 5.917991867627522, - "grad_norm": 0.013499385677278042, - "learning_rate": 0.00019998272804755575, - "loss": 46.0, - "step": 36748 - }, - { - "epoch": 5.918152904706309, - "grad_norm": 0.005816685501486063, - "learning_rate": 0.0001999827271072928, - "loss": 46.0, - "step": 36749 - }, - { - "epoch": 5.918313941785096, - "grad_norm": 0.019862115383148193, - "learning_rate": 0.00019998272616700424, - "loss": 46.0, - "step": 36750 - }, - { - "epoch": 5.918474978863883, - "grad_norm": 0.006577814929187298, - "learning_rate": 0.00019998272522669011, - "loss": 46.0, - "step": 36751 - }, - { - "epoch": 5.9186360159426705, - "grad_norm": 0.006244547665119171, - "learning_rate": 0.00019998272428635037, - "loss": 46.0, - "step": 36752 - }, - { - "epoch": 5.918797053021458, - "grad_norm": 0.006949910894036293, - "learning_rate": 0.00019998272334598505, - "loss": 46.0, - "step": 36753 - }, - { - "epoch": 5.918958090100245, - "grad_norm": 0.021270664408802986, - "learning_rate": 0.00019998272240559414, - "loss": 46.0, - "step": 36754 - }, - { - "epoch": 5.919119127179033, - "grad_norm": 0.007246372755616903, - "learning_rate": 0.00019998272146517764, - "loss": 46.0, - "step": 36755 - }, - { - "epoch": 5.91928016425782, - "grad_norm": 0.008000325411558151, - "learning_rate": 0.00019998272052473555, - "loss": 46.0, - "step": 36756 - }, - { - "epoch": 5.919441201336608, - "grad_norm": 0.0015457691624760628, - "learning_rate": 0.00019998271958426787, - "loss": 46.0, - "step": 36757 - }, - { - "epoch": 5.919602238415395, - "grad_norm": 0.009535088203847408, - "learning_rate": 0.0001999827186437746, - "loss": 46.0, - "step": 36758 - }, - { - "epoch": 5.919763275494183, - "grad_norm": 0.013317562639713287, - "learning_rate": 0.00019998271770325574, - "loss": 46.0, - "step": 36759 - }, - { - "epoch": 5.91992431257297, - "grad_norm": 0.008625675924122334, - "learning_rate": 0.0001999827167627113, - "loss": 46.0, - "step": 36760 - }, - { - "epoch": 5.920085349651758, - "grad_norm": 0.002901580650359392, - "learning_rate": 0.00019998271582214125, - "loss": 46.0, - "step": 36761 - }, - { - "epoch": 5.920246386730545, - "grad_norm": 0.006856206338852644, - "learning_rate": 0.00019998271488154564, - "loss": 46.0, - "step": 36762 - }, - { - "epoch": 5.920407423809332, - "grad_norm": 0.005245402920991182, - "learning_rate": 0.00019998271394092442, - "loss": 46.0, - "step": 36763 - }, - { - "epoch": 5.92056846088812, - "grad_norm": 0.002144214464351535, - "learning_rate": 0.00019998271300027758, - "loss": 46.0, - "step": 36764 - }, - { - "epoch": 5.9207294979669065, - "grad_norm": 0.008015332743525505, - "learning_rate": 0.00019998271205960518, - "loss": 46.0, - "step": 36765 - }, - { - "epoch": 5.920890535045694, - "grad_norm": 0.014490757137537003, - "learning_rate": 0.0001999827111189072, - "loss": 46.0, - "step": 36766 - }, - { - "epoch": 5.921051572124481, - "grad_norm": 0.00442791823297739, - "learning_rate": 0.0001999827101781836, - "loss": 46.0, - "step": 36767 - }, - { - "epoch": 5.921212609203269, - "grad_norm": 0.006829854566603899, - "learning_rate": 0.0001999827092374344, - "loss": 46.0, - "step": 36768 - }, - { - "epoch": 5.921373646282056, - "grad_norm": 0.008199771866202354, - "learning_rate": 0.00019998270829665966, - "loss": 46.0, - "step": 36769 - }, - { - "epoch": 5.921534683360844, - "grad_norm": 0.015690838918089867, - "learning_rate": 0.0001999827073558593, - "loss": 46.0, - "step": 36770 - }, - { - "epoch": 5.921695720439631, - "grad_norm": 0.0020068727899342775, - "learning_rate": 0.00019998270641503335, - "loss": 46.0, - "step": 36771 - }, - { - "epoch": 5.921856757518419, - "grad_norm": 0.011578435078263283, - "learning_rate": 0.0001999827054741818, - "loss": 46.0, - "step": 36772 - }, - { - "epoch": 5.922017794597206, - "grad_norm": 0.009915011003613472, - "learning_rate": 0.00019998270453330469, - "loss": 46.0, - "step": 36773 - }, - { - "epoch": 5.922178831675994, - "grad_norm": 0.00173856969922781, - "learning_rate": 0.00019998270359240198, - "loss": 46.0, - "step": 36774 - }, - { - "epoch": 5.922339868754781, - "grad_norm": 0.006825334392488003, - "learning_rate": 0.00019998270265147365, - "loss": 46.0, - "step": 36775 - }, - { - "epoch": 5.9225009058335685, - "grad_norm": 0.0027432385832071304, - "learning_rate": 0.00019998270171051977, - "loss": 46.0, - "step": 36776 - }, - { - "epoch": 5.922661942912356, - "grad_norm": 0.008600936271250248, - "learning_rate": 0.00019998270076954027, - "loss": 46.0, - "step": 36777 - }, - { - "epoch": 5.9228229799911425, - "grad_norm": 0.0015825724694877863, - "learning_rate": 0.00019998269982853518, - "loss": 46.0, - "step": 36778 - }, - { - "epoch": 5.922984017069931, - "grad_norm": 0.008640332147479057, - "learning_rate": 0.0001999826988875045, - "loss": 46.0, - "step": 36779 - }, - { - "epoch": 5.923145054148717, - "grad_norm": 0.00999928917735815, - "learning_rate": 0.00019998269794644827, - "loss": 46.0, - "step": 36780 - }, - { - "epoch": 5.923306091227505, - "grad_norm": 0.0022006940562278032, - "learning_rate": 0.0001999826970053664, - "loss": 46.0, - "step": 36781 - }, - { - "epoch": 5.923467128306292, - "grad_norm": 0.0022133244201540947, - "learning_rate": 0.00019998269606425896, - "loss": 46.0, - "step": 36782 - }, - { - "epoch": 5.92362816538508, - "grad_norm": 0.005816758144646883, - "learning_rate": 0.00019998269512312594, - "loss": 46.0, - "step": 36783 - }, - { - "epoch": 5.923789202463867, - "grad_norm": 0.010484855622053146, - "learning_rate": 0.0001999826941819673, - "loss": 46.0, - "step": 36784 - }, - { - "epoch": 5.923950239542655, - "grad_norm": 0.005769622046500444, - "learning_rate": 0.00019998269324078308, - "loss": 46.0, - "step": 36785 - }, - { - "epoch": 5.924111276621442, - "grad_norm": 0.0020913099870085716, - "learning_rate": 0.0001999826922995733, - "loss": 46.0, - "step": 36786 - }, - { - "epoch": 5.9242723137002296, - "grad_norm": 0.001544525264762342, - "learning_rate": 0.0001999826913583379, - "loss": 46.0, - "step": 36787 - }, - { - "epoch": 5.924433350779017, - "grad_norm": 0.005578508134931326, - "learning_rate": 0.00019998269041707691, - "loss": 46.0, - "step": 36788 - }, - { - "epoch": 5.9245943878578045, - "grad_norm": 0.00633410457521677, - "learning_rate": 0.00019998268947579034, - "loss": 46.0, - "step": 36789 - }, - { - "epoch": 5.924755424936592, - "grad_norm": 0.008016320876777172, - "learning_rate": 0.00019998268853447818, - "loss": 46.0, - "step": 36790 - }, - { - "epoch": 5.924916462015379, - "grad_norm": 0.0036888362374156713, - "learning_rate": 0.0001999826875931404, - "loss": 46.0, - "step": 36791 - }, - { - "epoch": 5.925077499094167, - "grad_norm": 0.004931919742375612, - "learning_rate": 0.00019998268665177707, - "loss": 46.0, - "step": 36792 - }, - { - "epoch": 5.925238536172953, - "grad_norm": 0.00144600635394454, - "learning_rate": 0.00019998268571038815, - "loss": 46.0, - "step": 36793 - }, - { - "epoch": 5.925399573251741, - "grad_norm": 0.002845043782144785, - "learning_rate": 0.0001999826847689736, - "loss": 46.0, - "step": 36794 - }, - { - "epoch": 5.925560610330528, - "grad_norm": 0.01663287729024887, - "learning_rate": 0.0001999826838275335, - "loss": 46.0, - "step": 36795 - }, - { - "epoch": 5.925721647409316, - "grad_norm": 0.005727509967982769, - "learning_rate": 0.00019998268288606779, - "loss": 46.0, - "step": 36796 - }, - { - "epoch": 5.925882684488103, - "grad_norm": 0.0028591693844646215, - "learning_rate": 0.0001999826819445765, - "loss": 46.0, - "step": 36797 - }, - { - "epoch": 5.926043721566891, - "grad_norm": 0.004529683385044336, - "learning_rate": 0.00019998268100305958, - "loss": 46.0, - "step": 36798 - }, - { - "epoch": 5.926204758645678, - "grad_norm": 0.0013791258679702878, - "learning_rate": 0.00019998268006151713, - "loss": 46.0, - "step": 36799 - }, - { - "epoch": 5.9263657957244655, - "grad_norm": 0.002873591845855117, - "learning_rate": 0.00019998267911994905, - "loss": 46.0, - "step": 36800 - }, - { - "epoch": 5.926526832803253, - "grad_norm": 0.0020084616262465715, - "learning_rate": 0.00019998267817835538, - "loss": 46.0, - "step": 36801 - }, - { - "epoch": 5.92668786988204, - "grad_norm": 0.009874220006167889, - "learning_rate": 0.00019998267723673614, - "loss": 46.0, - "step": 36802 - }, - { - "epoch": 5.926848906960828, - "grad_norm": 0.0038444348610937595, - "learning_rate": 0.0001999826762950913, - "loss": 46.0, - "step": 36803 - }, - { - "epoch": 5.927009944039615, - "grad_norm": 0.002023317152634263, - "learning_rate": 0.00019998267535342086, - "loss": 46.0, - "step": 36804 - }, - { - "epoch": 5.927170981118403, - "grad_norm": 0.0018444063607603312, - "learning_rate": 0.00019998267441172484, - "loss": 46.0, - "step": 36805 - }, - { - "epoch": 5.92733201819719, - "grad_norm": 0.009191123768687248, - "learning_rate": 0.0001999826734700032, - "loss": 46.0, - "step": 36806 - }, - { - "epoch": 5.927493055275978, - "grad_norm": 0.003837514203041792, - "learning_rate": 0.000199982672528256, - "loss": 46.0, - "step": 36807 - }, - { - "epoch": 5.927654092354764, - "grad_norm": 0.005431676749140024, - "learning_rate": 0.00019998267158648323, - "loss": 46.0, - "step": 36808 - }, - { - "epoch": 5.927815129433552, - "grad_norm": 0.005784062203019857, - "learning_rate": 0.00019998267064468483, - "loss": 46.0, - "step": 36809 - }, - { - "epoch": 5.927976166512339, - "grad_norm": 0.012924952432513237, - "learning_rate": 0.00019998266970286087, - "loss": 46.0, - "step": 36810 - }, - { - "epoch": 5.928137203591127, - "grad_norm": 0.006054857280105352, - "learning_rate": 0.0001999826687610113, - "loss": 46.0, - "step": 36811 - }, - { - "epoch": 5.928298240669914, - "grad_norm": 0.01029572170227766, - "learning_rate": 0.00019998266781913614, - "loss": 46.0, - "step": 36812 - }, - { - "epoch": 5.9284592777487015, - "grad_norm": 0.0015591337578371167, - "learning_rate": 0.00019998266687723542, - "loss": 46.0, - "step": 36813 - }, - { - "epoch": 5.928620314827489, - "grad_norm": 0.008079933002591133, - "learning_rate": 0.00019998266593530906, - "loss": 46.0, - "step": 36814 - }, - { - "epoch": 5.928781351906276, - "grad_norm": 0.005458934232592583, - "learning_rate": 0.00019998266499335717, - "loss": 46.0, - "step": 36815 - }, - { - "epoch": 5.928942388985064, - "grad_norm": 0.002612989628687501, - "learning_rate": 0.00019998266405137964, - "loss": 46.0, - "step": 36816 - }, - { - "epoch": 5.929103426063851, - "grad_norm": 0.002193618332967162, - "learning_rate": 0.00019998266310937651, - "loss": 46.0, - "step": 36817 - }, - { - "epoch": 5.929264463142639, - "grad_norm": 0.020804710686206818, - "learning_rate": 0.00019998266216734783, - "loss": 46.0, - "step": 36818 - }, - { - "epoch": 5.929425500221426, - "grad_norm": 0.0009054761030711234, - "learning_rate": 0.0001999826612252935, - "loss": 46.0, - "step": 36819 - }, - { - "epoch": 5.929586537300214, - "grad_norm": 0.003950676415115595, - "learning_rate": 0.00019998266028321365, - "loss": 46.0, - "step": 36820 - }, - { - "epoch": 5.929747574379001, - "grad_norm": 0.0007985651027411222, - "learning_rate": 0.00019998265934110818, - "loss": 46.0, - "step": 36821 - }, - { - "epoch": 5.929908611457789, - "grad_norm": 0.001724847941659391, - "learning_rate": 0.00019998265839897715, - "loss": 46.0, - "step": 36822 - }, - { - "epoch": 5.930069648536575, - "grad_norm": 0.0027348280418664217, - "learning_rate": 0.00019998265745682048, - "loss": 46.0, - "step": 36823 - }, - { - "epoch": 5.930230685615363, - "grad_norm": 0.012773178517818451, - "learning_rate": 0.00019998265651463822, - "loss": 46.0, - "step": 36824 - }, - { - "epoch": 5.93039172269415, - "grad_norm": 0.004951681476086378, - "learning_rate": 0.0001999826555724304, - "loss": 46.0, - "step": 36825 - }, - { - "epoch": 5.9305527597729375, - "grad_norm": 0.010246754623949528, - "learning_rate": 0.000199982654630197, - "loss": 46.0, - "step": 36826 - }, - { - "epoch": 5.930713796851725, - "grad_norm": 0.008923030458390713, - "learning_rate": 0.00019998265368793798, - "loss": 46.0, - "step": 36827 - }, - { - "epoch": 5.930874833930512, - "grad_norm": 0.0013812516117468476, - "learning_rate": 0.00019998265274565337, - "loss": 46.0, - "step": 36828 - }, - { - "epoch": 5.9310358710093, - "grad_norm": 0.010209945961833, - "learning_rate": 0.0001999826518033432, - "loss": 46.0, - "step": 36829 - }, - { - "epoch": 5.931196908088087, - "grad_norm": 0.0032221318688243628, - "learning_rate": 0.0001999826508610074, - "loss": 46.0, - "step": 36830 - }, - { - "epoch": 5.931357945166875, - "grad_norm": 0.00614152243360877, - "learning_rate": 0.00019998264991864602, - "loss": 46.0, - "step": 36831 - }, - { - "epoch": 5.931518982245662, - "grad_norm": 0.003793375799432397, - "learning_rate": 0.00019998264897625907, - "loss": 46.0, - "step": 36832 - }, - { - "epoch": 5.93168001932445, - "grad_norm": 0.006623846013098955, - "learning_rate": 0.00019998264803384653, - "loss": 46.0, - "step": 36833 - }, - { - "epoch": 5.931841056403237, - "grad_norm": 0.0018425935413688421, - "learning_rate": 0.00019998264709140834, - "loss": 46.0, - "step": 36834 - }, - { - "epoch": 5.932002093482025, - "grad_norm": 0.003296756185591221, - "learning_rate": 0.00019998264614894465, - "loss": 46.0, - "step": 36835 - }, - { - "epoch": 5.932163130560812, - "grad_norm": 0.0028366323094815016, - "learning_rate": 0.00019998264520645532, - "loss": 46.0, - "step": 36836 - }, - { - "epoch": 5.9323241676395995, - "grad_norm": 0.0022995176259428263, - "learning_rate": 0.0001999826442639404, - "loss": 46.0, - "step": 36837 - }, - { - "epoch": 5.932485204718386, - "grad_norm": 0.016457993537187576, - "learning_rate": 0.0001999826433213999, - "loss": 46.0, - "step": 36838 - }, - { - "epoch": 5.9326462417971735, - "grad_norm": 0.0038181720301508904, - "learning_rate": 0.0001999826423788338, - "loss": 46.0, - "step": 36839 - }, - { - "epoch": 5.932807278875961, - "grad_norm": 0.01116947177797556, - "learning_rate": 0.0001999826414362421, - "loss": 46.0, - "step": 36840 - }, - { - "epoch": 5.932968315954748, - "grad_norm": 0.010643711313605309, - "learning_rate": 0.00019998264049362482, - "loss": 46.0, - "step": 36841 - }, - { - "epoch": 5.933129353033536, - "grad_norm": 0.0033071911893785, - "learning_rate": 0.00019998263955098194, - "loss": 46.0, - "step": 36842 - }, - { - "epoch": 5.933290390112323, - "grad_norm": 0.004507561679929495, - "learning_rate": 0.0001999826386083135, - "loss": 46.0, - "step": 36843 - }, - { - "epoch": 5.933451427191111, - "grad_norm": 0.007893814705312252, - "learning_rate": 0.00019998263766561944, - "loss": 46.0, - "step": 36844 - }, - { - "epoch": 5.933612464269898, - "grad_norm": 0.005530776921659708, - "learning_rate": 0.0001999826367228998, - "loss": 46.0, - "step": 36845 - }, - { - "epoch": 5.933773501348686, - "grad_norm": 0.007638771086931229, - "learning_rate": 0.0001999826357801546, - "loss": 46.0, - "step": 36846 - }, - { - "epoch": 5.933934538427473, - "grad_norm": 0.0024144684430211782, - "learning_rate": 0.00019998263483738375, - "loss": 46.0, - "step": 36847 - }, - { - "epoch": 5.9340955755062605, - "grad_norm": 0.01121516153216362, - "learning_rate": 0.00019998263389458734, - "loss": 46.0, - "step": 36848 - }, - { - "epoch": 5.934256612585048, - "grad_norm": 0.0031699230894446373, - "learning_rate": 0.00019998263295176532, - "loss": 46.0, - "step": 36849 - }, - { - "epoch": 5.9344176496638354, - "grad_norm": 0.001713825506158173, - "learning_rate": 0.00019998263200891775, - "loss": 46.0, - "step": 36850 - }, - { - "epoch": 5.934578686742622, - "grad_norm": 0.012295176275074482, - "learning_rate": 0.00019998263106604455, - "loss": 46.0, - "step": 36851 - }, - { - "epoch": 5.93473972382141, - "grad_norm": 0.0014337149914354086, - "learning_rate": 0.0001999826301231458, - "loss": 46.0, - "step": 36852 - }, - { - "epoch": 5.934900760900197, - "grad_norm": 0.007024618797004223, - "learning_rate": 0.00019998262918022143, - "loss": 46.0, - "step": 36853 - }, - { - "epoch": 5.935061797978984, - "grad_norm": 0.00954799260944128, - "learning_rate": 0.00019998262823727147, - "loss": 46.0, - "step": 36854 - }, - { - "epoch": 5.935222835057772, - "grad_norm": 0.005930591840296984, - "learning_rate": 0.00019998262729429596, - "loss": 46.0, - "step": 36855 - }, - { - "epoch": 5.935383872136559, - "grad_norm": 0.006837198976427317, - "learning_rate": 0.0001999826263512948, - "loss": 46.0, - "step": 36856 - }, - { - "epoch": 5.935544909215347, - "grad_norm": 0.003982583526521921, - "learning_rate": 0.00019998262540826809, - "loss": 46.0, - "step": 36857 - }, - { - "epoch": 5.935705946294134, - "grad_norm": 0.0175002571195364, - "learning_rate": 0.00019998262446521578, - "loss": 46.0, - "step": 36858 - }, - { - "epoch": 5.935866983372922, - "grad_norm": 0.0042568654753267765, - "learning_rate": 0.00019998262352213786, - "loss": 46.0, - "step": 36859 - }, - { - "epoch": 5.936028020451709, - "grad_norm": 0.0033177966251969337, - "learning_rate": 0.00019998262257903438, - "loss": 46.0, - "step": 36860 - }, - { - "epoch": 5.9361890575304965, - "grad_norm": 0.0018974541453644633, - "learning_rate": 0.0001999826216359053, - "loss": 46.0, - "step": 36861 - }, - { - "epoch": 5.936350094609284, - "grad_norm": 0.005490519572049379, - "learning_rate": 0.0001999826206927506, - "loss": 46.0, - "step": 36862 - }, - { - "epoch": 5.936511131688071, - "grad_norm": 0.004988665692508221, - "learning_rate": 0.00019998261974957035, - "loss": 46.0, - "step": 36863 - }, - { - "epoch": 5.936672168766859, - "grad_norm": 0.014877923764288425, - "learning_rate": 0.0001999826188063645, - "loss": 46.0, - "step": 36864 - }, - { - "epoch": 5.936833205845646, - "grad_norm": 0.00861698854714632, - "learning_rate": 0.00019998261786313305, - "loss": 46.0, - "step": 36865 - }, - { - "epoch": 5.936994242924433, - "grad_norm": 0.002893635770305991, - "learning_rate": 0.00019998261691987602, - "loss": 46.0, - "step": 36866 - }, - { - "epoch": 5.937155280003221, - "grad_norm": 0.010076380334794521, - "learning_rate": 0.00019998261597659335, - "loss": 46.0, - "step": 36867 - }, - { - "epoch": 5.937316317082008, - "grad_norm": 0.008594764396548271, - "learning_rate": 0.00019998261503328515, - "loss": 46.0, - "step": 36868 - }, - { - "epoch": 5.937477354160795, - "grad_norm": 0.006651430856436491, - "learning_rate": 0.00019998261408995133, - "loss": 46.0, - "step": 36869 - }, - { - "epoch": 5.937638391239583, - "grad_norm": 0.025923868641257286, - "learning_rate": 0.00019998261314659193, - "loss": 46.0, - "step": 36870 - }, - { - "epoch": 5.93779942831837, - "grad_norm": 0.0025881456676870584, - "learning_rate": 0.00019998261220320696, - "loss": 46.0, - "step": 36871 - }, - { - "epoch": 5.937960465397158, - "grad_norm": 0.0168552715331316, - "learning_rate": 0.00019998261125979636, - "loss": 46.0, - "step": 36872 - }, - { - "epoch": 5.938121502475945, - "grad_norm": 0.002950269030407071, - "learning_rate": 0.0001999826103163602, - "loss": 46.0, - "step": 36873 - }, - { - "epoch": 5.9382825395547325, - "grad_norm": 0.01831338368356228, - "learning_rate": 0.00019998260937289844, - "loss": 46.0, - "step": 36874 - }, - { - "epoch": 5.93844357663352, - "grad_norm": 0.00851343385875225, - "learning_rate": 0.0001999826084294111, - "loss": 46.0, - "step": 36875 - }, - { - "epoch": 5.938604613712307, - "grad_norm": 0.0019877853337675333, - "learning_rate": 0.00019998260748589814, - "loss": 46.0, - "step": 36876 - }, - { - "epoch": 5.938765650791095, - "grad_norm": 0.0016741901636123657, - "learning_rate": 0.0001999826065423596, - "loss": 46.0, - "step": 36877 - }, - { - "epoch": 5.938926687869882, - "grad_norm": 0.0024467746261507273, - "learning_rate": 0.0001999826055987955, - "loss": 46.0, - "step": 36878 - }, - { - "epoch": 5.93908772494867, - "grad_norm": 0.002907415386289358, - "learning_rate": 0.00019998260465520578, - "loss": 46.0, - "step": 36879 - }, - { - "epoch": 5.939248762027457, - "grad_norm": 0.004205035045742989, - "learning_rate": 0.00019998260371159048, - "loss": 46.0, - "step": 36880 - }, - { - "epoch": 5.939409799106244, - "grad_norm": 0.0022435863502323627, - "learning_rate": 0.0001999826027679496, - "loss": 46.0, - "step": 36881 - }, - { - "epoch": 5.939570836185031, - "grad_norm": 0.0032009012065827847, - "learning_rate": 0.0001999826018242831, - "loss": 46.0, - "step": 36882 - }, - { - "epoch": 5.939731873263819, - "grad_norm": 0.0007960580987855792, - "learning_rate": 0.00019998260088059102, - "loss": 46.0, - "step": 36883 - }, - { - "epoch": 5.939892910342606, - "grad_norm": 0.006090050563216209, - "learning_rate": 0.00019998259993687337, - "loss": 46.0, - "step": 36884 - }, - { - "epoch": 5.940053947421394, - "grad_norm": 0.003619585884734988, - "learning_rate": 0.0001999825989931301, - "loss": 46.0, - "step": 36885 - }, - { - "epoch": 5.940214984500181, - "grad_norm": 0.006734329741448164, - "learning_rate": 0.00019998259804936125, - "loss": 46.0, - "step": 36886 - }, - { - "epoch": 5.9403760215789685, - "grad_norm": 0.003483704524114728, - "learning_rate": 0.0001999825971055668, - "loss": 46.0, - "step": 36887 - }, - { - "epoch": 5.940537058657756, - "grad_norm": 0.0039081210270524025, - "learning_rate": 0.00019998259616174678, - "loss": 46.0, - "step": 36888 - }, - { - "epoch": 5.940698095736543, - "grad_norm": 0.001193365198560059, - "learning_rate": 0.00019998259521790117, - "loss": 46.0, - "step": 36889 - }, - { - "epoch": 5.940859132815331, - "grad_norm": 0.0035276266280561686, - "learning_rate": 0.00019998259427402997, - "loss": 46.0, - "step": 36890 - }, - { - "epoch": 5.941020169894118, - "grad_norm": 0.013093777000904083, - "learning_rate": 0.00019998259333013315, - "loss": 46.0, - "step": 36891 - }, - { - "epoch": 5.941181206972906, - "grad_norm": 0.003916393965482712, - "learning_rate": 0.00019998259238621077, - "loss": 46.0, - "step": 36892 - }, - { - "epoch": 5.941342244051693, - "grad_norm": 0.004663741681724787, - "learning_rate": 0.00019998259144226278, - "loss": 46.0, - "step": 36893 - }, - { - "epoch": 5.941503281130481, - "grad_norm": 0.004135070368647575, - "learning_rate": 0.00019998259049828923, - "loss": 46.0, - "step": 36894 - }, - { - "epoch": 5.941664318209268, - "grad_norm": 0.009576123207807541, - "learning_rate": 0.00019998258955429007, - "loss": 46.0, - "step": 36895 - }, - { - "epoch": 5.941825355288055, - "grad_norm": 0.0024867786560207605, - "learning_rate": 0.00019998258861026532, - "loss": 46.0, - "step": 36896 - }, - { - "epoch": 5.941986392366842, - "grad_norm": 0.001111357705667615, - "learning_rate": 0.00019998258766621495, - "loss": 46.0, - "step": 36897 - }, - { - "epoch": 5.94214742944563, - "grad_norm": 0.0033197253942489624, - "learning_rate": 0.00019998258672213902, - "loss": 46.0, - "step": 36898 - }, - { - "epoch": 5.942308466524417, - "grad_norm": 0.0030090510845184326, - "learning_rate": 0.0001999825857780375, - "loss": 46.0, - "step": 36899 - }, - { - "epoch": 5.9424695036032045, - "grad_norm": 0.004534139297902584, - "learning_rate": 0.00019998258483391038, - "loss": 46.0, - "step": 36900 - }, - { - "epoch": 5.942630540681992, - "grad_norm": 0.0017294470453634858, - "learning_rate": 0.0001999825838897577, - "loss": 46.0, - "step": 36901 - }, - { - "epoch": 5.942791577760779, - "grad_norm": 0.001450419775210321, - "learning_rate": 0.00019998258294557942, - "loss": 46.0, - "step": 36902 - }, - { - "epoch": 5.942952614839567, - "grad_norm": 0.0018378933891654015, - "learning_rate": 0.00019998258200137553, - "loss": 46.0, - "step": 36903 - }, - { - "epoch": 5.943113651918354, - "grad_norm": 0.0013845686335116625, - "learning_rate": 0.00019998258105714605, - "loss": 46.0, - "step": 36904 - }, - { - "epoch": 5.943274688997142, - "grad_norm": 0.003157664556056261, - "learning_rate": 0.000199982580112891, - "loss": 46.0, - "step": 36905 - }, - { - "epoch": 5.943435726075929, - "grad_norm": 0.0026509626768529415, - "learning_rate": 0.00019998257916861034, - "loss": 46.0, - "step": 36906 - }, - { - "epoch": 5.943596763154717, - "grad_norm": 0.016884757205843925, - "learning_rate": 0.00019998257822430407, - "loss": 46.0, - "step": 36907 - }, - { - "epoch": 5.943757800233504, - "grad_norm": 0.0024585712235420942, - "learning_rate": 0.00019998257727997225, - "loss": 46.0, - "step": 36908 - }, - { - "epoch": 5.9439188373122915, - "grad_norm": 0.0050835562869906425, - "learning_rate": 0.0001999825763356148, - "loss": 46.0, - "step": 36909 - }, - { - "epoch": 5.944079874391079, - "grad_norm": 0.006497082766145468, - "learning_rate": 0.0001999825753912318, - "loss": 46.0, - "step": 36910 - }, - { - "epoch": 5.9442409114698656, - "grad_norm": 0.002081113401800394, - "learning_rate": 0.0001999825744468232, - "loss": 46.0, - "step": 36911 - }, - { - "epoch": 5.944401948548653, - "grad_norm": 0.0036203069612383842, - "learning_rate": 0.000199982573502389, - "loss": 46.0, - "step": 36912 - }, - { - "epoch": 5.9445629856274405, - "grad_norm": 0.003072316525503993, - "learning_rate": 0.00019998257255792923, - "loss": 46.0, - "step": 36913 - }, - { - "epoch": 5.944724022706228, - "grad_norm": 0.004336175974458456, - "learning_rate": 0.00019998257161344386, - "loss": 46.0, - "step": 36914 - }, - { - "epoch": 5.944885059785015, - "grad_norm": 0.004530665930360556, - "learning_rate": 0.00019998257066893287, - "loss": 46.0, - "step": 36915 - }, - { - "epoch": 5.945046096863803, - "grad_norm": 0.004490611143410206, - "learning_rate": 0.00019998256972439632, - "loss": 46.0, - "step": 36916 - }, - { - "epoch": 5.94520713394259, - "grad_norm": 0.009018462151288986, - "learning_rate": 0.00019998256877983416, - "loss": 46.0, - "step": 36917 - }, - { - "epoch": 5.945368171021378, - "grad_norm": 0.007181808818131685, - "learning_rate": 0.00019998256783524643, - "loss": 46.0, - "step": 36918 - }, - { - "epoch": 5.945529208100165, - "grad_norm": 0.009374646469950676, - "learning_rate": 0.0001999825668906331, - "loss": 46.0, - "step": 36919 - }, - { - "epoch": 5.945690245178953, - "grad_norm": 0.0029697518330067396, - "learning_rate": 0.0001999825659459942, - "loss": 46.0, - "step": 36920 - }, - { - "epoch": 5.94585128225774, - "grad_norm": 0.011085838079452515, - "learning_rate": 0.00019998256500132965, - "loss": 46.0, - "step": 36921 - }, - { - "epoch": 5.9460123193365275, - "grad_norm": 0.0015818714164197445, - "learning_rate": 0.00019998256405663956, - "loss": 46.0, - "step": 36922 - }, - { - "epoch": 5.946173356415315, - "grad_norm": 0.0025518531911075115, - "learning_rate": 0.00019998256311192387, - "loss": 46.0, - "step": 36923 - }, - { - "epoch": 5.9463343934941015, - "grad_norm": 0.007699660491198301, - "learning_rate": 0.0001999825621671826, - "loss": 46.0, - "step": 36924 - }, - { - "epoch": 5.94649543057289, - "grad_norm": 0.001772611285559833, - "learning_rate": 0.00019998256122241573, - "loss": 46.0, - "step": 36925 - }, - { - "epoch": 5.946656467651676, - "grad_norm": 0.006378230173140764, - "learning_rate": 0.00019998256027762326, - "loss": 46.0, - "step": 36926 - }, - { - "epoch": 5.946817504730464, - "grad_norm": 0.00555045111104846, - "learning_rate": 0.00019998255933280522, - "loss": 46.0, - "step": 36927 - }, - { - "epoch": 5.946978541809251, - "grad_norm": 0.0040244730189442635, - "learning_rate": 0.00019998255838796157, - "loss": 46.0, - "step": 36928 - }, - { - "epoch": 5.947139578888039, - "grad_norm": 0.0026228982023894787, - "learning_rate": 0.00019998255744309233, - "loss": 46.0, - "step": 36929 - }, - { - "epoch": 5.947300615966826, - "grad_norm": 0.005417807027697563, - "learning_rate": 0.0001999825564981975, - "loss": 46.0, - "step": 36930 - }, - { - "epoch": 5.947461653045614, - "grad_norm": 0.018967410549521446, - "learning_rate": 0.0001999825555532771, - "loss": 46.0, - "step": 36931 - }, - { - "epoch": 5.947622690124401, - "grad_norm": 0.001580257317982614, - "learning_rate": 0.00019998255460833107, - "loss": 46.0, - "step": 36932 - }, - { - "epoch": 5.947783727203189, - "grad_norm": 0.00166042847558856, - "learning_rate": 0.0001999825536633595, - "loss": 46.0, - "step": 36933 - }, - { - "epoch": 5.947944764281976, - "grad_norm": 0.004260808229446411, - "learning_rate": 0.00019998255271836231, - "loss": 46.0, - "step": 36934 - }, - { - "epoch": 5.9481058013607635, - "grad_norm": 0.003028763923794031, - "learning_rate": 0.00019998255177333953, - "loss": 46.0, - "step": 36935 - }, - { - "epoch": 5.948266838439551, - "grad_norm": 0.006037257146090269, - "learning_rate": 0.00019998255082829113, - "loss": 46.0, - "step": 36936 - }, - { - "epoch": 5.948427875518338, - "grad_norm": 0.007757402490824461, - "learning_rate": 0.00019998254988321722, - "loss": 46.0, - "step": 36937 - }, - { - "epoch": 5.948588912597126, - "grad_norm": 0.0016534540336579084, - "learning_rate": 0.00019998254893811764, - "loss": 46.0, - "step": 36938 - }, - { - "epoch": 5.948749949675912, - "grad_norm": 0.02142789401113987, - "learning_rate": 0.0001999825479929925, - "loss": 46.0, - "step": 36939 - }, - { - "epoch": 5.948910986754701, - "grad_norm": 0.006063102278858423, - "learning_rate": 0.00019998254704784178, - "loss": 46.0, - "step": 36940 - }, - { - "epoch": 5.949072023833487, - "grad_norm": 0.00894453190267086, - "learning_rate": 0.00019998254610266547, - "loss": 46.0, - "step": 36941 - }, - { - "epoch": 5.949233060912275, - "grad_norm": 0.008720950223505497, - "learning_rate": 0.00019998254515746355, - "loss": 46.0, - "step": 36942 - }, - { - "epoch": 5.949394097991062, - "grad_norm": 0.0016925291856750846, - "learning_rate": 0.00019998254421223606, - "loss": 46.0, - "step": 36943 - }, - { - "epoch": 5.94955513506985, - "grad_norm": 0.003567629959434271, - "learning_rate": 0.00019998254326698296, - "loss": 46.0, - "step": 36944 - }, - { - "epoch": 5.949716172148637, - "grad_norm": 0.003626785473898053, - "learning_rate": 0.00019998254232170428, - "loss": 46.0, - "step": 36945 - }, - { - "epoch": 5.949877209227425, - "grad_norm": 0.004043857101351023, - "learning_rate": 0.0001999825413764, - "loss": 46.0, - "step": 36946 - }, - { - "epoch": 5.950038246306212, - "grad_norm": 0.005927152466028929, - "learning_rate": 0.00019998254043107015, - "loss": 46.0, - "step": 36947 - }, - { - "epoch": 5.9501992833849995, - "grad_norm": 0.010291928425431252, - "learning_rate": 0.0001999825394857147, - "loss": 46.0, - "step": 36948 - }, - { - "epoch": 5.950360320463787, - "grad_norm": 0.0007531038136221468, - "learning_rate": 0.00019998253854033364, - "loss": 46.0, - "step": 36949 - }, - { - "epoch": 5.950521357542574, - "grad_norm": 0.00466937618330121, - "learning_rate": 0.00019998253759492701, - "loss": 46.0, - "step": 36950 - }, - { - "epoch": 5.950682394621362, - "grad_norm": 0.008026626892387867, - "learning_rate": 0.0001999825366494948, - "loss": 46.0, - "step": 36951 - }, - { - "epoch": 5.950843431700149, - "grad_norm": 0.003214562311768532, - "learning_rate": 0.00019998253570403698, - "loss": 46.0, - "step": 36952 - }, - { - "epoch": 5.951004468778937, - "grad_norm": 0.001777904573827982, - "learning_rate": 0.0001999825347585536, - "loss": 46.0, - "step": 36953 - }, - { - "epoch": 5.951165505857723, - "grad_norm": 0.018588803708553314, - "learning_rate": 0.0001999825338130446, - "loss": 46.0, - "step": 36954 - }, - { - "epoch": 5.951326542936511, - "grad_norm": 0.00618468364700675, - "learning_rate": 0.00019998253286751002, - "loss": 46.0, - "step": 36955 - }, - { - "epoch": 5.951487580015298, - "grad_norm": 0.002803102135658264, - "learning_rate": 0.00019998253192194984, - "loss": 46.0, - "step": 36956 - }, - { - "epoch": 5.951648617094086, - "grad_norm": 0.017444560304284096, - "learning_rate": 0.00019998253097636406, - "loss": 46.0, - "step": 36957 - }, - { - "epoch": 5.951809654172873, - "grad_norm": 0.002624739659950137, - "learning_rate": 0.0001999825300307527, - "loss": 46.0, - "step": 36958 - }, - { - "epoch": 5.951970691251661, - "grad_norm": 0.007176046725362539, - "learning_rate": 0.00019998252908511578, - "loss": 46.0, - "step": 36959 - }, - { - "epoch": 5.952131728330448, - "grad_norm": 0.006515739485621452, - "learning_rate": 0.00019998252813945323, - "loss": 46.0, - "step": 36960 - }, - { - "epoch": 5.9522927654092355, - "grad_norm": 0.002790396334603429, - "learning_rate": 0.00019998252719376512, - "loss": 46.0, - "step": 36961 - }, - { - "epoch": 5.952453802488023, - "grad_norm": 0.0010005622170865536, - "learning_rate": 0.00019998252624805137, - "loss": 46.0, - "step": 36962 - }, - { - "epoch": 5.95261483956681, - "grad_norm": 0.0025750664062798023, - "learning_rate": 0.00019998252530231206, - "loss": 46.0, - "step": 36963 - }, - { - "epoch": 5.952775876645598, - "grad_norm": 0.0009480998269282281, - "learning_rate": 0.0001999825243565472, - "loss": 46.0, - "step": 36964 - }, - { - "epoch": 5.952936913724385, - "grad_norm": 0.02478196658194065, - "learning_rate": 0.00019998252341075668, - "loss": 46.0, - "step": 36965 - }, - { - "epoch": 5.953097950803173, - "grad_norm": 0.002616604557260871, - "learning_rate": 0.0001999825224649406, - "loss": 46.0, - "step": 36966 - }, - { - "epoch": 5.95325898788196, - "grad_norm": 0.0112298633903265, - "learning_rate": 0.00019998252151909895, - "loss": 46.0, - "step": 36967 - }, - { - "epoch": 5.953420024960748, - "grad_norm": 0.004851947538554668, - "learning_rate": 0.00019998252057323168, - "loss": 46.0, - "step": 36968 - }, - { - "epoch": 5.953581062039534, - "grad_norm": 0.00239943852648139, - "learning_rate": 0.00019998251962733882, - "loss": 46.0, - "step": 36969 - }, - { - "epoch": 5.953742099118322, - "grad_norm": 0.007052809931337833, - "learning_rate": 0.0001999825186814204, - "loss": 46.0, - "step": 36970 - }, - { - "epoch": 5.953903136197109, - "grad_norm": 0.008446740917861462, - "learning_rate": 0.00019998251773547637, - "loss": 46.0, - "step": 36971 - }, - { - "epoch": 5.9540641732758965, - "grad_norm": 0.004200976341962814, - "learning_rate": 0.00019998251678950672, - "loss": 46.0, - "step": 36972 - }, - { - "epoch": 5.954225210354684, - "grad_norm": 0.024789215996861458, - "learning_rate": 0.00019998251584351154, - "loss": 46.0, - "step": 36973 - }, - { - "epoch": 5.9543862474334714, - "grad_norm": 0.006637731567025185, - "learning_rate": 0.00019998251489749072, - "loss": 46.0, - "step": 36974 - }, - { - "epoch": 5.954547284512259, - "grad_norm": 0.032918959856033325, - "learning_rate": 0.0001999825139514443, - "loss": 46.0, - "step": 36975 - }, - { - "epoch": 5.954708321591046, - "grad_norm": 0.00895368866622448, - "learning_rate": 0.00019998251300537234, - "loss": 46.0, - "step": 36976 - }, - { - "epoch": 5.954869358669834, - "grad_norm": 0.0028805043548345566, - "learning_rate": 0.00019998251205927478, - "loss": 46.0, - "step": 36977 - }, - { - "epoch": 5.955030395748621, - "grad_norm": 0.007396654691547155, - "learning_rate": 0.0001999825111131516, - "loss": 46.0, - "step": 36978 - }, - { - "epoch": 5.955191432827409, - "grad_norm": 0.0033771449234336615, - "learning_rate": 0.00019998251016700285, - "loss": 46.0, - "step": 36979 - }, - { - "epoch": 5.955352469906196, - "grad_norm": 0.0005558221018873155, - "learning_rate": 0.0001999825092208285, - "loss": 46.0, - "step": 36980 - }, - { - "epoch": 5.955513506984984, - "grad_norm": 0.00286436197347939, - "learning_rate": 0.0001999825082746286, - "loss": 46.0, - "step": 36981 - }, - { - "epoch": 5.955674544063771, - "grad_norm": 0.007379957474768162, - "learning_rate": 0.00019998250732840305, - "loss": 46.0, - "step": 36982 - }, - { - "epoch": 5.9558355811425585, - "grad_norm": 0.006225490011274815, - "learning_rate": 0.00019998250638215192, - "loss": 46.0, - "step": 36983 - }, - { - "epoch": 5.955996618221345, - "grad_norm": 0.0007428998360410333, - "learning_rate": 0.00019998250543587523, - "loss": 46.0, - "step": 36984 - }, - { - "epoch": 5.9561576553001325, - "grad_norm": 0.027918454259634018, - "learning_rate": 0.00019998250448957294, - "loss": 46.0, - "step": 36985 - }, - { - "epoch": 5.95631869237892, - "grad_norm": 0.0026623853482306004, - "learning_rate": 0.00019998250354324505, - "loss": 46.0, - "step": 36986 - }, - { - "epoch": 5.956479729457707, - "grad_norm": 0.007374635897576809, - "learning_rate": 0.00019998250259689157, - "loss": 46.0, - "step": 36987 - }, - { - "epoch": 5.956640766536495, - "grad_norm": 0.002996884984895587, - "learning_rate": 0.00019998250165051252, - "loss": 46.0, - "step": 36988 - }, - { - "epoch": 5.956801803615282, - "grad_norm": 0.004277043975889683, - "learning_rate": 0.00019998250070410784, - "loss": 46.0, - "step": 36989 - }, - { - "epoch": 5.95696284069407, - "grad_norm": 0.016431516036391258, - "learning_rate": 0.0001999824997576776, - "loss": 46.0, - "step": 36990 - }, - { - "epoch": 5.957123877772857, - "grad_norm": 0.0027599839959293604, - "learning_rate": 0.00019998249881122176, - "loss": 46.0, - "step": 36991 - }, - { - "epoch": 5.957284914851645, - "grad_norm": 0.006952356547117233, - "learning_rate": 0.00019998249786474034, - "loss": 46.0, - "step": 36992 - }, - { - "epoch": 5.957445951930432, - "grad_norm": 0.0024024515878409147, - "learning_rate": 0.00019998249691823328, - "loss": 46.0, - "step": 36993 - }, - { - "epoch": 5.95760698900922, - "grad_norm": 0.01071472093462944, - "learning_rate": 0.00019998249597170072, - "loss": 46.0, - "step": 36994 - }, - { - "epoch": 5.957768026088007, - "grad_norm": 0.007149725221097469, - "learning_rate": 0.0001999824950251425, - "loss": 46.0, - "step": 36995 - }, - { - "epoch": 5.9579290631667945, - "grad_norm": 0.00957890972495079, - "learning_rate": 0.00019998249407855872, - "loss": 46.0, - "step": 36996 - }, - { - "epoch": 5.958090100245581, - "grad_norm": 0.001530267996713519, - "learning_rate": 0.00019998249313194933, - "loss": 46.0, - "step": 36997 - }, - { - "epoch": 5.958251137324369, - "grad_norm": 0.0027556815184652805, - "learning_rate": 0.00019998249218531434, - "loss": 46.0, - "step": 36998 - }, - { - "epoch": 5.958412174403156, - "grad_norm": 0.0062414417043328285, - "learning_rate": 0.0001999824912386538, - "loss": 46.0, - "step": 36999 - }, - { - "epoch": 5.958573211481943, - "grad_norm": 0.007867907173931599, - "learning_rate": 0.00019998249029196764, - "loss": 46.0, - "step": 37000 - }, - { - "epoch": 5.958734248560731, - "grad_norm": 0.0024836256634444, - "learning_rate": 0.0001999824893452559, - "loss": 46.0, - "step": 37001 - }, - { - "epoch": 5.958895285639518, - "grad_norm": 0.0028169762808829546, - "learning_rate": 0.0001999824883985186, - "loss": 46.0, - "step": 37002 - }, - { - "epoch": 5.959056322718306, - "grad_norm": 0.004039568826556206, - "learning_rate": 0.00019998248745175566, - "loss": 46.0, - "step": 37003 - }, - { - "epoch": 5.959217359797093, - "grad_norm": 0.0026104410644620657, - "learning_rate": 0.00019998248650496714, - "loss": 46.0, - "step": 37004 - }, - { - "epoch": 5.959378396875881, - "grad_norm": 0.00978656392544508, - "learning_rate": 0.00019998248555815304, - "loss": 46.0, - "step": 37005 - }, - { - "epoch": 5.959539433954668, - "grad_norm": 0.0026953667402267456, - "learning_rate": 0.00019998248461131332, - "loss": 46.0, - "step": 37006 - }, - { - "epoch": 5.959700471033456, - "grad_norm": 0.00448788283392787, - "learning_rate": 0.00019998248366444806, - "loss": 46.0, - "step": 37007 - }, - { - "epoch": 5.959861508112243, - "grad_norm": 0.013963552191853523, - "learning_rate": 0.00019998248271755717, - "loss": 46.0, - "step": 37008 - }, - { - "epoch": 5.9600225451910305, - "grad_norm": 0.001601981115527451, - "learning_rate": 0.00019998248177064069, - "loss": 46.0, - "step": 37009 - }, - { - "epoch": 5.960183582269818, - "grad_norm": 0.003871679538860917, - "learning_rate": 0.00019998248082369867, - "loss": 46.0, - "step": 37010 - }, - { - "epoch": 5.960344619348605, - "grad_norm": 0.004306553862988949, - "learning_rate": 0.000199982479876731, - "loss": 46.0, - "step": 37011 - }, - { - "epoch": 5.960505656427392, - "grad_norm": 0.00464608846232295, - "learning_rate": 0.00019998247892973777, - "loss": 46.0, - "step": 37012 - }, - { - "epoch": 5.96066669350618, - "grad_norm": 0.009025861509144306, - "learning_rate": 0.00019998247798271894, - "loss": 46.0, - "step": 37013 - }, - { - "epoch": 5.960827730584967, - "grad_norm": 0.0028407550416886806, - "learning_rate": 0.00019998247703567452, - "loss": 46.0, - "step": 37014 - }, - { - "epoch": 5.960988767663754, - "grad_norm": 0.020244669169187546, - "learning_rate": 0.00019998247608860452, - "loss": 46.0, - "step": 37015 - }, - { - "epoch": 5.961149804742542, - "grad_norm": 0.004608507733792067, - "learning_rate": 0.00019998247514150893, - "loss": 46.0, - "step": 37016 - }, - { - "epoch": 5.961310841821329, - "grad_norm": 0.001804869738407433, - "learning_rate": 0.00019998247419438775, - "loss": 46.0, - "step": 37017 - }, - { - "epoch": 5.961471878900117, - "grad_norm": 0.003487489651888609, - "learning_rate": 0.00019998247324724095, - "loss": 46.0, - "step": 37018 - }, - { - "epoch": 5.961632915978904, - "grad_norm": 0.0036999143194407225, - "learning_rate": 0.0001999824723000686, - "loss": 46.0, - "step": 37019 - }, - { - "epoch": 5.9617939530576916, - "grad_norm": 0.001381033449433744, - "learning_rate": 0.00019998247135287063, - "loss": 46.0, - "step": 37020 - }, - { - "epoch": 5.961954990136479, - "grad_norm": 0.006018578540533781, - "learning_rate": 0.00019998247040564708, - "loss": 46.0, - "step": 37021 - }, - { - "epoch": 5.9621160272152665, - "grad_norm": 0.027820903807878494, - "learning_rate": 0.00019998246945839793, - "loss": 46.0, - "step": 37022 - }, - { - "epoch": 5.962277064294054, - "grad_norm": 0.002910071285441518, - "learning_rate": 0.0001999824685111232, - "loss": 46.0, - "step": 37023 - }, - { - "epoch": 5.962438101372841, - "grad_norm": 0.010540717281401157, - "learning_rate": 0.0001999824675638229, - "loss": 46.0, - "step": 37024 - }, - { - "epoch": 5.962599138451629, - "grad_norm": 0.017062803730368614, - "learning_rate": 0.00019998246661649698, - "loss": 46.0, - "step": 37025 - }, - { - "epoch": 5.962760175530416, - "grad_norm": 0.002759263152256608, - "learning_rate": 0.00019998246566914547, - "loss": 46.0, - "step": 37026 - }, - { - "epoch": 5.962921212609203, - "grad_norm": 0.0048307436518371105, - "learning_rate": 0.0001999824647217684, - "loss": 46.0, - "step": 37027 - }, - { - "epoch": 5.96308224968799, - "grad_norm": 0.004624025896191597, - "learning_rate": 0.0001999824637743657, - "loss": 46.0, - "step": 37028 - }, - { - "epoch": 5.963243286766778, - "grad_norm": 0.005874814465641975, - "learning_rate": 0.00019998246282693742, - "loss": 46.0, - "step": 37029 - }, - { - "epoch": 5.963404323845565, - "grad_norm": 0.004562532529234886, - "learning_rate": 0.00019998246187948355, - "loss": 46.0, - "step": 37030 - }, - { - "epoch": 5.963565360924353, - "grad_norm": 0.004294287413358688, - "learning_rate": 0.0001999824609320041, - "loss": 46.0, - "step": 37031 - }, - { - "epoch": 5.96372639800314, - "grad_norm": 0.0021024267189204693, - "learning_rate": 0.00019998245998449906, - "loss": 46.0, - "step": 37032 - }, - { - "epoch": 5.9638874350819275, - "grad_norm": 0.01449536345899105, - "learning_rate": 0.00019998245903696843, - "loss": 46.0, - "step": 37033 - }, - { - "epoch": 5.964048472160715, - "grad_norm": 0.009961480274796486, - "learning_rate": 0.0001999824580894122, - "loss": 46.0, - "step": 37034 - }, - { - "epoch": 5.964209509239502, - "grad_norm": 0.005282219965010881, - "learning_rate": 0.00019998245714183038, - "loss": 46.0, - "step": 37035 - }, - { - "epoch": 5.96437054631829, - "grad_norm": 0.00541432062163949, - "learning_rate": 0.00019998245619422297, - "loss": 46.0, - "step": 37036 - }, - { - "epoch": 5.964531583397077, - "grad_norm": 0.010922852903604507, - "learning_rate": 0.00019998245524658996, - "loss": 46.0, - "step": 37037 - }, - { - "epoch": 5.964692620475865, - "grad_norm": 0.0033942132722586393, - "learning_rate": 0.0001999824542989314, - "loss": 46.0, - "step": 37038 - }, - { - "epoch": 5.964853657554652, - "grad_norm": 0.007351012900471687, - "learning_rate": 0.00019998245335124722, - "loss": 46.0, - "step": 37039 - }, - { - "epoch": 5.96501469463344, - "grad_norm": 0.004245967138558626, - "learning_rate": 0.00019998245240353745, - "loss": 46.0, - "step": 37040 - }, - { - "epoch": 5.965175731712227, - "grad_norm": 0.01678537391126156, - "learning_rate": 0.0001999824514558021, - "loss": 46.0, - "step": 37041 - }, - { - "epoch": 5.965336768791014, - "grad_norm": 0.006228059530258179, - "learning_rate": 0.00019998245050804113, - "loss": 46.0, - "step": 37042 - }, - { - "epoch": 5.965497805869801, - "grad_norm": 0.00493715750053525, - "learning_rate": 0.0001999824495602546, - "loss": 46.0, - "step": 37043 - }, - { - "epoch": 5.965658842948589, - "grad_norm": 0.003997992258518934, - "learning_rate": 0.00019998244861244247, - "loss": 46.0, - "step": 37044 - }, - { - "epoch": 5.965819880027376, - "grad_norm": 0.0010850883554667234, - "learning_rate": 0.00019998244766460474, - "loss": 46.0, - "step": 37045 - }, - { - "epoch": 5.9659809171061635, - "grad_norm": 0.019312787801027298, - "learning_rate": 0.00019998244671674145, - "loss": 46.0, - "step": 37046 - }, - { - "epoch": 5.966141954184951, - "grad_norm": 0.008613995276391506, - "learning_rate": 0.00019998244576885255, - "loss": 46.0, - "step": 37047 - }, - { - "epoch": 5.966302991263738, - "grad_norm": 0.005646351259201765, - "learning_rate": 0.00019998244482093806, - "loss": 46.0, - "step": 37048 - }, - { - "epoch": 5.966464028342526, - "grad_norm": 0.00948098860681057, - "learning_rate": 0.00019998244387299798, - "loss": 46.0, - "step": 37049 - }, - { - "epoch": 5.966625065421313, - "grad_norm": 0.0033022197894752026, - "learning_rate": 0.0001999824429250323, - "loss": 46.0, - "step": 37050 - }, - { - "epoch": 5.966786102500101, - "grad_norm": 0.005580211989581585, - "learning_rate": 0.00019998244197704104, - "loss": 46.0, - "step": 37051 - }, - { - "epoch": 5.966947139578888, - "grad_norm": 0.0010011555859819055, - "learning_rate": 0.00019998244102902417, - "loss": 46.0, - "step": 37052 - }, - { - "epoch": 5.967108176657676, - "grad_norm": 0.0023300573229789734, - "learning_rate": 0.00019998244008098174, - "loss": 46.0, - "step": 37053 - }, - { - "epoch": 5.967269213736463, - "grad_norm": 0.007257194723933935, - "learning_rate": 0.0001999824391329137, - "loss": 46.0, - "step": 37054 - }, - { - "epoch": 5.967430250815251, - "grad_norm": 0.002618317026644945, - "learning_rate": 0.0001999824381848201, - "loss": 46.0, - "step": 37055 - }, - { - "epoch": 5.967591287894038, - "grad_norm": 0.002119618933647871, - "learning_rate": 0.00019998243723670086, - "loss": 46.0, - "step": 37056 - }, - { - "epoch": 5.967752324972825, - "grad_norm": 0.009573266841471195, - "learning_rate": 0.00019998243628855606, - "loss": 46.0, - "step": 37057 - }, - { - "epoch": 5.967913362051612, - "grad_norm": 0.020647570490837097, - "learning_rate": 0.00019998243534038567, - "loss": 46.0, - "step": 37058 - }, - { - "epoch": 5.9680743991303995, - "grad_norm": 0.008657350204885006, - "learning_rate": 0.0001999824343921897, - "loss": 46.0, - "step": 37059 - }, - { - "epoch": 5.968235436209187, - "grad_norm": 0.0030234032310545444, - "learning_rate": 0.00019998243344396813, - "loss": 46.0, - "step": 37060 - }, - { - "epoch": 5.968396473287974, - "grad_norm": 0.00368187646381557, - "learning_rate": 0.00019998243249572096, - "loss": 46.0, - "step": 37061 - }, - { - "epoch": 5.968557510366762, - "grad_norm": 0.0033871724735945463, - "learning_rate": 0.0001999824315474482, - "loss": 46.0, - "step": 37062 - }, - { - "epoch": 5.968718547445549, - "grad_norm": 0.007835880853235722, - "learning_rate": 0.00019998243059914984, - "loss": 46.0, - "step": 37063 - }, - { - "epoch": 5.968879584524337, - "grad_norm": 0.008382249623537064, - "learning_rate": 0.00019998242965082593, - "loss": 46.0, - "step": 37064 - }, - { - "epoch": 5.969040621603124, - "grad_norm": 0.0015771297039464116, - "learning_rate": 0.00019998242870247637, - "loss": 46.0, - "step": 37065 - }, - { - "epoch": 5.969201658681912, - "grad_norm": 0.001699367188848555, - "learning_rate": 0.0001999824277541013, - "loss": 46.0, - "step": 37066 - }, - { - "epoch": 5.969362695760699, - "grad_norm": 0.0034891997929662466, - "learning_rate": 0.00019998242680570056, - "loss": 46.0, - "step": 37067 - }, - { - "epoch": 5.969523732839487, - "grad_norm": 0.0017445620615035295, - "learning_rate": 0.00019998242585727427, - "loss": 46.0, - "step": 37068 - }, - { - "epoch": 5.969684769918274, - "grad_norm": 0.014812956564128399, - "learning_rate": 0.00019998242490882237, - "loss": 46.0, - "step": 37069 - }, - { - "epoch": 5.969845806997061, - "grad_norm": 0.011132532730698586, - "learning_rate": 0.0001999824239603449, - "loss": 46.0, - "step": 37070 - }, - { - "epoch": 5.970006844075849, - "grad_norm": 0.009903768077492714, - "learning_rate": 0.00019998242301184183, - "loss": 46.0, - "step": 37071 - }, - { - "epoch": 5.9701678811546355, - "grad_norm": 0.002271889941766858, - "learning_rate": 0.00019998242206331317, - "loss": 46.0, - "step": 37072 - }, - { - "epoch": 5.970328918233423, - "grad_norm": 0.009553348645567894, - "learning_rate": 0.00019998242111475892, - "loss": 46.0, - "step": 37073 - }, - { - "epoch": 5.97048995531221, - "grad_norm": 0.020357292145490646, - "learning_rate": 0.00019998242016617908, - "loss": 46.0, - "step": 37074 - }, - { - "epoch": 5.970650992390998, - "grad_norm": 0.0046440488658845425, - "learning_rate": 0.00019998241921757366, - "loss": 46.0, - "step": 37075 - }, - { - "epoch": 5.970812029469785, - "grad_norm": 0.006508314982056618, - "learning_rate": 0.00019998241826894262, - "loss": 46.0, - "step": 37076 - }, - { - "epoch": 5.970973066548573, - "grad_norm": 0.0016414884012192488, - "learning_rate": 0.00019998241732028602, - "loss": 46.0, - "step": 37077 - }, - { - "epoch": 5.97113410362736, - "grad_norm": 0.0027092830277979374, - "learning_rate": 0.0001999824163716038, - "loss": 46.0, - "step": 37078 - }, - { - "epoch": 5.971295140706148, - "grad_norm": 0.0013686578022316098, - "learning_rate": 0.00019998241542289603, - "loss": 46.0, - "step": 37079 - }, - { - "epoch": 5.971456177784935, - "grad_norm": 0.0020064592827111483, - "learning_rate": 0.00019998241447416264, - "loss": 46.0, - "step": 37080 - }, - { - "epoch": 5.9716172148637225, - "grad_norm": 0.0025119762867689133, - "learning_rate": 0.00019998241352540367, - "loss": 46.0, - "step": 37081 - }, - { - "epoch": 5.97177825194251, - "grad_norm": 0.006823559291660786, - "learning_rate": 0.0001999824125766191, - "loss": 46.0, - "step": 37082 - }, - { - "epoch": 5.9719392890212974, - "grad_norm": 0.004893995355814695, - "learning_rate": 0.00019998241162780896, - "loss": 46.0, - "step": 37083 - }, - { - "epoch": 5.972100326100085, - "grad_norm": 0.0027168591041117907, - "learning_rate": 0.00019998241067897322, - "loss": 46.0, - "step": 37084 - }, - { - "epoch": 5.9722613631788715, - "grad_norm": 0.003251564921811223, - "learning_rate": 0.00019998240973011187, - "loss": 46.0, - "step": 37085 - }, - { - "epoch": 5.97242240025766, - "grad_norm": 0.001785908592864871, - "learning_rate": 0.00019998240878122493, - "loss": 46.0, - "step": 37086 - }, - { - "epoch": 5.972583437336446, - "grad_norm": 0.006836011540144682, - "learning_rate": 0.00019998240783231243, - "loss": 46.0, - "step": 37087 - }, - { - "epoch": 5.972744474415234, - "grad_norm": 0.0011833381140604615, - "learning_rate": 0.00019998240688337432, - "loss": 46.0, - "step": 37088 - }, - { - "epoch": 5.972905511494021, - "grad_norm": 0.007628599647432566, - "learning_rate": 0.00019998240593441065, - "loss": 46.0, - "step": 37089 - }, - { - "epoch": 5.973066548572809, - "grad_norm": 0.0030606070067733526, - "learning_rate": 0.00019998240498542136, - "loss": 46.0, - "step": 37090 - }, - { - "epoch": 5.973227585651596, - "grad_norm": 0.002758630784228444, - "learning_rate": 0.00019998240403640646, - "loss": 46.0, - "step": 37091 - }, - { - "epoch": 5.973388622730384, - "grad_norm": 0.0008674160926602781, - "learning_rate": 0.000199982403087366, - "loss": 46.0, - "step": 37092 - }, - { - "epoch": 5.973549659809171, - "grad_norm": 0.003769145580008626, - "learning_rate": 0.00019998240213829992, - "loss": 46.0, - "step": 37093 - }, - { - "epoch": 5.9737106968879585, - "grad_norm": 0.011612356640398502, - "learning_rate": 0.0001999824011892083, - "loss": 46.0, - "step": 37094 - }, - { - "epoch": 5.973871733966746, - "grad_norm": 0.0016333921812474728, - "learning_rate": 0.00019998240024009104, - "loss": 46.0, - "step": 37095 - }, - { - "epoch": 5.974032771045533, - "grad_norm": 0.015395691618323326, - "learning_rate": 0.00019998239929094823, - "loss": 46.0, - "step": 37096 - }, - { - "epoch": 5.974193808124321, - "grad_norm": 0.005219616461545229, - "learning_rate": 0.0001999823983417798, - "loss": 46.0, - "step": 37097 - }, - { - "epoch": 5.974354845203108, - "grad_norm": 0.005606296006590128, - "learning_rate": 0.0001999823973925858, - "loss": 46.0, - "step": 37098 - }, - { - "epoch": 5.974515882281896, - "grad_norm": 0.001775104203261435, - "learning_rate": 0.0001999823964433662, - "loss": 46.0, - "step": 37099 - }, - { - "epoch": 5.974676919360682, - "grad_norm": 0.004449504427611828, - "learning_rate": 0.000199982395494121, - "loss": 46.0, - "step": 37100 - }, - { - "epoch": 5.974837956439471, - "grad_norm": 0.0008433775510638952, - "learning_rate": 0.00019998239454485024, - "loss": 46.0, - "step": 37101 - }, - { - "epoch": 5.974998993518257, - "grad_norm": 0.001981128705665469, - "learning_rate": 0.00019998239359555385, - "loss": 46.0, - "step": 37102 - }, - { - "epoch": 5.975160030597045, - "grad_norm": 0.01373693160712719, - "learning_rate": 0.0001999823926462319, - "loss": 46.0, - "step": 37103 - }, - { - "epoch": 5.975321067675832, - "grad_norm": 0.002771165454760194, - "learning_rate": 0.00019998239169688434, - "loss": 46.0, - "step": 37104 - }, - { - "epoch": 5.97548210475462, - "grad_norm": 0.004061555489897728, - "learning_rate": 0.00019998239074751122, - "loss": 46.0, - "step": 37105 - }, - { - "epoch": 5.975643141833407, - "grad_norm": 0.012783958576619625, - "learning_rate": 0.00019998238979811249, - "loss": 46.0, - "step": 37106 - }, - { - "epoch": 5.9758041789121945, - "grad_norm": 0.0038586093578487635, - "learning_rate": 0.00019998238884868816, - "loss": 46.0, - "step": 37107 - }, - { - "epoch": 5.975965215990982, - "grad_norm": 0.013749733567237854, - "learning_rate": 0.00019998238789923825, - "loss": 46.0, - "step": 37108 - }, - { - "epoch": 5.976126253069769, - "grad_norm": 0.0018850447377189994, - "learning_rate": 0.00019998238694976276, - "loss": 46.0, - "step": 37109 - }, - { - "epoch": 5.976287290148557, - "grad_norm": 0.005399515386670828, - "learning_rate": 0.00019998238600026164, - "loss": 46.0, - "step": 37110 - }, - { - "epoch": 5.976448327227344, - "grad_norm": 0.004808348137885332, - "learning_rate": 0.00019998238505073497, - "loss": 46.0, - "step": 37111 - }, - { - "epoch": 5.976609364306132, - "grad_norm": 0.0017414601752534509, - "learning_rate": 0.0001999823841011827, - "loss": 46.0, - "step": 37112 - }, - { - "epoch": 5.976770401384919, - "grad_norm": 0.010008828714489937, - "learning_rate": 0.00019998238315160481, - "loss": 46.0, - "step": 37113 - }, - { - "epoch": 5.976931438463707, - "grad_norm": 0.008640378713607788, - "learning_rate": 0.00019998238220200135, - "loss": 46.0, - "step": 37114 - }, - { - "epoch": 5.977092475542493, - "grad_norm": 0.0008898033411242068, - "learning_rate": 0.0001999823812523723, - "loss": 46.0, - "step": 37115 - }, - { - "epoch": 5.977253512621281, - "grad_norm": 0.005171169526875019, - "learning_rate": 0.0001999823803027177, - "loss": 46.0, - "step": 37116 - }, - { - "epoch": 5.977414549700068, - "grad_norm": 0.004170972388237715, - "learning_rate": 0.00019998237935303745, - "loss": 46.0, - "step": 37117 - }, - { - "epoch": 5.977575586778856, - "grad_norm": 0.0012544617056846619, - "learning_rate": 0.00019998237840333164, - "loss": 46.0, - "step": 37118 - }, - { - "epoch": 5.977736623857643, - "grad_norm": 0.0035998127423226833, - "learning_rate": 0.00019998237745360025, - "loss": 46.0, - "step": 37119 - }, - { - "epoch": 5.9778976609364305, - "grad_norm": 0.003074868582189083, - "learning_rate": 0.00019998237650384324, - "loss": 46.0, - "step": 37120 - }, - { - "epoch": 5.978058698015218, - "grad_norm": 0.0025523509830236435, - "learning_rate": 0.00019998237555406067, - "loss": 46.0, - "step": 37121 - }, - { - "epoch": 5.978219735094005, - "grad_norm": 0.005547038745135069, - "learning_rate": 0.00019998237460425248, - "loss": 46.0, - "step": 37122 - }, - { - "epoch": 5.978380772172793, - "grad_norm": 0.0020832493901252747, - "learning_rate": 0.0001999823736544187, - "loss": 46.0, - "step": 37123 - }, - { - "epoch": 5.97854180925158, - "grad_norm": 0.005689549725502729, - "learning_rate": 0.00019998237270455935, - "loss": 46.0, - "step": 37124 - }, - { - "epoch": 5.978702846330368, - "grad_norm": 0.0017920630052685738, - "learning_rate": 0.0001999823717546744, - "loss": 46.0, - "step": 37125 - }, - { - "epoch": 5.978863883409155, - "grad_norm": 0.017536768689751625, - "learning_rate": 0.00019998237080476384, - "loss": 46.0, - "step": 37126 - }, - { - "epoch": 5.979024920487943, - "grad_norm": 0.005700399167835712, - "learning_rate": 0.00019998236985482772, - "loss": 46.0, - "step": 37127 - }, - { - "epoch": 5.97918595756673, - "grad_norm": 0.007136261556297541, - "learning_rate": 0.00019998236890486602, - "loss": 46.0, - "step": 37128 - }, - { - "epoch": 5.9793469946455176, - "grad_norm": 0.0017615235410630703, - "learning_rate": 0.0001999823679548787, - "loss": 46.0, - "step": 37129 - }, - { - "epoch": 5.979508031724304, - "grad_norm": 0.00639650272205472, - "learning_rate": 0.0001999823670048658, - "loss": 46.0, - "step": 37130 - }, - { - "epoch": 5.979669068803092, - "grad_norm": 0.003200503997504711, - "learning_rate": 0.00019998236605482732, - "loss": 46.0, - "step": 37131 - }, - { - "epoch": 5.979830105881879, - "grad_norm": 0.002541875932365656, - "learning_rate": 0.00019998236510476326, - "loss": 46.0, - "step": 37132 - }, - { - "epoch": 5.9799911429606665, - "grad_norm": 0.0013374063419178128, - "learning_rate": 0.00019998236415467356, - "loss": 46.0, - "step": 37133 - }, - { - "epoch": 5.980152180039454, - "grad_norm": 0.0021549889352172613, - "learning_rate": 0.0001999823632045583, - "loss": 46.0, - "step": 37134 - }, - { - "epoch": 5.980313217118241, - "grad_norm": 0.0028978127520531416, - "learning_rate": 0.00019998236225441743, - "loss": 46.0, - "step": 37135 - }, - { - "epoch": 5.980474254197029, - "grad_norm": 0.003962274640798569, - "learning_rate": 0.000199982361304251, - "loss": 46.0, - "step": 37136 - }, - { - "epoch": 5.980635291275816, - "grad_norm": 0.0036189004313200712, - "learning_rate": 0.00019998236035405898, - "loss": 46.0, - "step": 37137 - }, - { - "epoch": 5.980796328354604, - "grad_norm": 0.006551253143697977, - "learning_rate": 0.00019998235940384135, - "loss": 46.0, - "step": 37138 - }, - { - "epoch": 5.980957365433391, - "grad_norm": 0.003585867816582322, - "learning_rate": 0.00019998235845359813, - "loss": 46.0, - "step": 37139 - }, - { - "epoch": 5.981118402512179, - "grad_norm": 0.006087068468332291, - "learning_rate": 0.00019998235750332932, - "loss": 46.0, - "step": 37140 - }, - { - "epoch": 5.981279439590966, - "grad_norm": 0.005042713135480881, - "learning_rate": 0.00019998235655303495, - "loss": 46.0, - "step": 37141 - }, - { - "epoch": 5.9814404766697535, - "grad_norm": 0.00267610396258533, - "learning_rate": 0.00019998235560271494, - "loss": 46.0, - "step": 37142 - }, - { - "epoch": 5.981601513748541, - "grad_norm": 0.003711590077728033, - "learning_rate": 0.00019998235465236937, - "loss": 46.0, - "step": 37143 - }, - { - "epoch": 5.981762550827328, - "grad_norm": 0.005382280331104994, - "learning_rate": 0.00019998235370199822, - "loss": 46.0, - "step": 37144 - }, - { - "epoch": 5.981923587906115, - "grad_norm": 0.0033533256500959396, - "learning_rate": 0.00019998235275160147, - "loss": 46.0, - "step": 37145 - }, - { - "epoch": 5.9820846249849025, - "grad_norm": 0.002165958285331726, - "learning_rate": 0.00019998235180117911, - "loss": 46.0, - "step": 37146 - }, - { - "epoch": 5.98224566206369, - "grad_norm": 0.0034255855716764927, - "learning_rate": 0.00019998235085073117, - "loss": 46.0, - "step": 37147 - }, - { - "epoch": 5.982406699142477, - "grad_norm": 0.0010856261942535639, - "learning_rate": 0.00019998234990025764, - "loss": 46.0, - "step": 37148 - }, - { - "epoch": 5.982567736221265, - "grad_norm": 0.003644917393103242, - "learning_rate": 0.00019998234894975854, - "loss": 46.0, - "step": 37149 - }, - { - "epoch": 5.982728773300052, - "grad_norm": 0.01214093342423439, - "learning_rate": 0.0001999823479992338, - "loss": 46.0, - "step": 37150 - }, - { - "epoch": 5.98288981037884, - "grad_norm": 0.0041720070876181126, - "learning_rate": 0.00019998234704868352, - "loss": 46.0, - "step": 37151 - }, - { - "epoch": 5.983050847457627, - "grad_norm": 0.0014115235535427928, - "learning_rate": 0.0001999823460981076, - "loss": 46.0, - "step": 37152 - }, - { - "epoch": 5.983211884536415, - "grad_norm": 0.004376129247248173, - "learning_rate": 0.00019998234514750617, - "loss": 46.0, - "step": 37153 - }, - { - "epoch": 5.983372921615202, - "grad_norm": 0.002607706468552351, - "learning_rate": 0.00019998234419687908, - "loss": 46.0, - "step": 37154 - }, - { - "epoch": 5.9835339586939895, - "grad_norm": 0.011401246301829815, - "learning_rate": 0.00019998234324622642, - "loss": 46.0, - "step": 37155 - }, - { - "epoch": 5.983694995772777, - "grad_norm": 0.0013237452367320657, - "learning_rate": 0.00019998234229554819, - "loss": 46.0, - "step": 37156 - }, - { - "epoch": 5.983856032851564, - "grad_norm": 0.002852878998965025, - "learning_rate": 0.00019998234134484431, - "loss": 46.0, - "step": 37157 - }, - { - "epoch": 5.984017069930351, - "grad_norm": 0.0027569483499974012, - "learning_rate": 0.0001999823403941149, - "loss": 46.0, - "step": 37158 - }, - { - "epoch": 5.984178107009139, - "grad_norm": 0.02459539659321308, - "learning_rate": 0.0001999823394433599, - "loss": 46.0, - "step": 37159 - }, - { - "epoch": 5.984339144087926, - "grad_norm": 0.007951420731842518, - "learning_rate": 0.0001999823384925793, - "loss": 46.0, - "step": 37160 - }, - { - "epoch": 5.984500181166713, - "grad_norm": 0.0024845865555107594, - "learning_rate": 0.0001999823375417731, - "loss": 46.0, - "step": 37161 - }, - { - "epoch": 5.984661218245501, - "grad_norm": 0.007075507193803787, - "learning_rate": 0.0001999823365909413, - "loss": 46.0, - "step": 37162 - }, - { - "epoch": 5.984822255324288, - "grad_norm": 0.0025554453022778034, - "learning_rate": 0.0001999823356400839, - "loss": 46.0, - "step": 37163 - }, - { - "epoch": 5.984983292403076, - "grad_norm": 0.001774023869074881, - "learning_rate": 0.0001999823346892009, - "loss": 46.0, - "step": 37164 - }, - { - "epoch": 5.985144329481863, - "grad_norm": 0.003452279604971409, - "learning_rate": 0.00019998233373829237, - "loss": 46.0, - "step": 37165 - }, - { - "epoch": 5.985305366560651, - "grad_norm": 0.005397127475589514, - "learning_rate": 0.00019998233278735821, - "loss": 46.0, - "step": 37166 - }, - { - "epoch": 5.985466403639438, - "grad_norm": 0.004143360070884228, - "learning_rate": 0.00019998233183639847, - "loss": 46.0, - "step": 37167 - }, - { - "epoch": 5.9856274407182255, - "grad_norm": 0.0029504646081477404, - "learning_rate": 0.00019998233088541314, - "loss": 46.0, - "step": 37168 - }, - { - "epoch": 5.985788477797013, - "grad_norm": 0.003240946214646101, - "learning_rate": 0.00019998232993440222, - "loss": 46.0, - "step": 37169 - }, - { - "epoch": 5.9859495148758, - "grad_norm": 0.0009449656354263425, - "learning_rate": 0.0001999823289833657, - "loss": 46.0, - "step": 37170 - }, - { - "epoch": 5.986110551954588, - "grad_norm": 0.010652769356966019, - "learning_rate": 0.0001999823280323036, - "loss": 46.0, - "step": 37171 - }, - { - "epoch": 5.986271589033375, - "grad_norm": 0.0018262845696881413, - "learning_rate": 0.0001999823270812159, - "loss": 46.0, - "step": 37172 - }, - { - "epoch": 5.986432626112162, - "grad_norm": 0.01686927303671837, - "learning_rate": 0.0001999823261301026, - "loss": 46.0, - "step": 37173 - }, - { - "epoch": 5.98659366319095, - "grad_norm": 0.009832118637859821, - "learning_rate": 0.00019998232517896375, - "loss": 46.0, - "step": 37174 - }, - { - "epoch": 5.986754700269737, - "grad_norm": 0.0014192365342751145, - "learning_rate": 0.00019998232422779926, - "loss": 46.0, - "step": 37175 - }, - { - "epoch": 5.986915737348524, - "grad_norm": 0.005346552934497595, - "learning_rate": 0.0001999823232766092, - "loss": 46.0, - "step": 37176 - }, - { - "epoch": 5.987076774427312, - "grad_norm": 0.009062749333679676, - "learning_rate": 0.00019998232232539356, - "loss": 46.0, - "step": 37177 - }, - { - "epoch": 5.987237811506099, - "grad_norm": 0.003989992197602987, - "learning_rate": 0.00019998232137415233, - "loss": 46.0, - "step": 37178 - }, - { - "epoch": 5.987398848584887, - "grad_norm": 0.003291300730779767, - "learning_rate": 0.0001999823204228855, - "loss": 46.0, - "step": 37179 - }, - { - "epoch": 5.987559885663674, - "grad_norm": 0.001711393124423921, - "learning_rate": 0.00019998231947159308, - "loss": 46.0, - "step": 37180 - }, - { - "epoch": 5.9877209227424615, - "grad_norm": 0.0007577283540740609, - "learning_rate": 0.00019998231852027507, - "loss": 46.0, - "step": 37181 - }, - { - "epoch": 5.987881959821249, - "grad_norm": 0.0029843361116945744, - "learning_rate": 0.0001999823175689315, - "loss": 46.0, - "step": 37182 - }, - { - "epoch": 5.988042996900036, - "grad_norm": 0.007334083318710327, - "learning_rate": 0.00019998231661756227, - "loss": 46.0, - "step": 37183 - }, - { - "epoch": 5.988204033978824, - "grad_norm": 0.0019836369901895523, - "learning_rate": 0.0001999823156661675, - "loss": 46.0, - "step": 37184 - }, - { - "epoch": 5.988365071057611, - "grad_norm": 0.003181385574862361, - "learning_rate": 0.00019998231471474713, - "loss": 46.0, - "step": 37185 - }, - { - "epoch": 5.988526108136399, - "grad_norm": 0.008767525665462017, - "learning_rate": 0.0001999823137633012, - "loss": 46.0, - "step": 37186 - }, - { - "epoch": 5.988687145215186, - "grad_norm": 0.0035331561230123043, - "learning_rate": 0.00019998231281182963, - "loss": 46.0, - "step": 37187 - }, - { - "epoch": 5.988848182293973, - "grad_norm": 0.006100935861468315, - "learning_rate": 0.00019998231186033248, - "loss": 46.0, - "step": 37188 - }, - { - "epoch": 5.98900921937276, - "grad_norm": 0.005503537133336067, - "learning_rate": 0.00019998231090880974, - "loss": 46.0, - "step": 37189 - }, - { - "epoch": 5.989170256451548, - "grad_norm": 0.01521372701972723, - "learning_rate": 0.00019998230995726143, - "loss": 46.0, - "step": 37190 - }, - { - "epoch": 5.989331293530335, - "grad_norm": 0.006318739615380764, - "learning_rate": 0.00019998230900568752, - "loss": 46.0, - "step": 37191 - }, - { - "epoch": 5.989492330609123, - "grad_norm": 0.005000052507966757, - "learning_rate": 0.00019998230805408801, - "loss": 46.0, - "step": 37192 - }, - { - "epoch": 5.98965336768791, - "grad_norm": 0.004043553490191698, - "learning_rate": 0.00019998230710246292, - "loss": 46.0, - "step": 37193 - }, - { - "epoch": 5.9898144047666975, - "grad_norm": 0.010761475190520287, - "learning_rate": 0.00019998230615081225, - "loss": 46.0, - "step": 37194 - }, - { - "epoch": 5.989975441845485, - "grad_norm": 0.01122052501887083, - "learning_rate": 0.00019998230519913595, - "loss": 46.0, - "step": 37195 - }, - { - "epoch": 5.990136478924272, - "grad_norm": 0.00972563587129116, - "learning_rate": 0.00019998230424743413, - "loss": 46.0, - "step": 37196 - }, - { - "epoch": 5.99029751600306, - "grad_norm": 0.004708815831691027, - "learning_rate": 0.00019998230329570666, - "loss": 46.0, - "step": 37197 - }, - { - "epoch": 5.990458553081847, - "grad_norm": 0.001252274145372212, - "learning_rate": 0.0001999823023439536, - "loss": 46.0, - "step": 37198 - }, - { - "epoch": 5.990619590160635, - "grad_norm": 0.0019713020883500576, - "learning_rate": 0.00019998230139217497, - "loss": 46.0, - "step": 37199 - }, - { - "epoch": 5.990780627239422, - "grad_norm": 0.0017733210697770119, - "learning_rate": 0.00019998230044037074, - "loss": 46.0, - "step": 37200 - }, - { - "epoch": 5.99094166431821, - "grad_norm": 0.003067875513806939, - "learning_rate": 0.00019998229948854092, - "loss": 46.0, - "step": 37201 - }, - { - "epoch": 5.991102701396997, - "grad_norm": 0.002472830470651388, - "learning_rate": 0.00019998229853668552, - "loss": 46.0, - "step": 37202 - }, - { - "epoch": 5.991263738475784, - "grad_norm": 0.0011308375978842378, - "learning_rate": 0.00019998229758480453, - "loss": 46.0, - "step": 37203 - }, - { - "epoch": 5.991424775554571, - "grad_norm": 0.003935275599360466, - "learning_rate": 0.00019998229663289796, - "loss": 46.0, - "step": 37204 - }, - { - "epoch": 5.9915858126333585, - "grad_norm": 0.014076292514801025, - "learning_rate": 0.0001999822956809658, - "loss": 46.0, - "step": 37205 - }, - { - "epoch": 5.991746849712146, - "grad_norm": 0.004145600367337465, - "learning_rate": 0.00019998229472900801, - "loss": 46.0, - "step": 37206 - }, - { - "epoch": 5.9919078867909334, - "grad_norm": 0.0025334605015814304, - "learning_rate": 0.00019998229377702465, - "loss": 46.0, - "step": 37207 - }, - { - "epoch": 5.992068923869721, - "grad_norm": 0.0030865399166941643, - "learning_rate": 0.00019998229282501567, - "loss": 46.0, - "step": 37208 - }, - { - "epoch": 5.992229960948508, - "grad_norm": 0.004648580215871334, - "learning_rate": 0.00019998229187298116, - "loss": 46.0, - "step": 37209 - }, - { - "epoch": 5.992390998027296, - "grad_norm": 0.02252069115638733, - "learning_rate": 0.00019998229092092103, - "loss": 46.0, - "step": 37210 - }, - { - "epoch": 5.992552035106083, - "grad_norm": 0.010720000602304935, - "learning_rate": 0.00019998228996883532, - "loss": 46.0, - "step": 37211 - }, - { - "epoch": 5.992713072184871, - "grad_norm": 0.0017160896677523851, - "learning_rate": 0.000199982289016724, - "loss": 46.0, - "step": 37212 - }, - { - "epoch": 5.992874109263658, - "grad_norm": 0.02023484744131565, - "learning_rate": 0.0001999822880645871, - "loss": 46.0, - "step": 37213 - }, - { - "epoch": 5.993035146342446, - "grad_norm": 0.0030344570986926556, - "learning_rate": 0.00019998228711242463, - "loss": 46.0, - "step": 37214 - }, - { - "epoch": 5.993196183421233, - "grad_norm": 0.0024063277523964643, - "learning_rate": 0.00019998228616023654, - "loss": 46.0, - "step": 37215 - }, - { - "epoch": 5.9933572205000205, - "grad_norm": 0.0023261159658432007, - "learning_rate": 0.00019998228520802286, - "loss": 46.0, - "step": 37216 - }, - { - "epoch": 5.993518257578808, - "grad_norm": 0.007904346100986004, - "learning_rate": 0.0001999822842557836, - "loss": 46.0, - "step": 37217 - }, - { - "epoch": 5.9936792946575945, - "grad_norm": 0.006069988943636417, - "learning_rate": 0.00019998228330351877, - "loss": 46.0, - "step": 37218 - }, - { - "epoch": 5.993840331736382, - "grad_norm": 0.0035625228192657232, - "learning_rate": 0.0001999822823512283, - "loss": 46.0, - "step": 37219 - }, - { - "epoch": 5.994001368815169, - "grad_norm": 0.0026505119167268276, - "learning_rate": 0.00019998228139891228, - "loss": 46.0, - "step": 37220 - }, - { - "epoch": 5.994162405893957, - "grad_norm": 0.009775853715837002, - "learning_rate": 0.00019998228044657067, - "loss": 46.0, - "step": 37221 - }, - { - "epoch": 5.994323442972744, - "grad_norm": 0.008968714624643326, - "learning_rate": 0.00019998227949420344, - "loss": 46.0, - "step": 37222 - }, - { - "epoch": 5.994484480051532, - "grad_norm": 0.0030200222972780466, - "learning_rate": 0.00019998227854181065, - "loss": 46.0, - "step": 37223 - }, - { - "epoch": 5.994645517130319, - "grad_norm": 0.0058414386585354805, - "learning_rate": 0.00019998227758939225, - "loss": 46.0, - "step": 37224 - }, - { - "epoch": 5.994806554209107, - "grad_norm": 0.008276640437543392, - "learning_rate": 0.00019998227663694826, - "loss": 46.0, - "step": 37225 - }, - { - "epoch": 5.994967591287894, - "grad_norm": 0.019536063075065613, - "learning_rate": 0.0001999822756844787, - "loss": 46.0, - "step": 37226 - }, - { - "epoch": 5.995128628366682, - "grad_norm": 0.004572566598653793, - "learning_rate": 0.00019998227473198355, - "loss": 46.0, - "step": 37227 - }, - { - "epoch": 5.995289665445469, - "grad_norm": 0.009302785620093346, - "learning_rate": 0.00019998227377946278, - "loss": 46.0, - "step": 37228 - }, - { - "epoch": 5.9954507025242565, - "grad_norm": 0.0041602598503232, - "learning_rate": 0.0001999822728269164, - "loss": 46.0, - "step": 37229 - }, - { - "epoch": 5.995611739603044, - "grad_norm": 0.019374659284949303, - "learning_rate": 0.00019998227187434449, - "loss": 46.0, - "step": 37230 - }, - { - "epoch": 5.9957727766818305, - "grad_norm": 0.008892161771655083, - "learning_rate": 0.00019998227092174697, - "loss": 46.0, - "step": 37231 - }, - { - "epoch": 5.995933813760619, - "grad_norm": 0.006749188527464867, - "learning_rate": 0.00019998226996912385, - "loss": 46.0, - "step": 37232 - }, - { - "epoch": 5.996094850839405, - "grad_norm": 0.003060927614569664, - "learning_rate": 0.00019998226901647516, - "loss": 46.0, - "step": 37233 - }, - { - "epoch": 5.996255887918193, - "grad_norm": 0.004948275163769722, - "learning_rate": 0.00019998226806380086, - "loss": 46.0, - "step": 37234 - }, - { - "epoch": 5.99641692499698, - "grad_norm": 0.0046185594983398914, - "learning_rate": 0.00019998226711110098, - "loss": 46.0, - "step": 37235 - }, - { - "epoch": 5.996577962075768, - "grad_norm": 0.0008127544424496591, - "learning_rate": 0.0001999822661583755, - "loss": 46.0, - "step": 37236 - }, - { - "epoch": 5.996738999154555, - "grad_norm": 0.00691266218200326, - "learning_rate": 0.0001999822652056244, - "loss": 46.0, - "step": 37237 - }, - { - "epoch": 5.996900036233343, - "grad_norm": 0.0009220805368386209, - "learning_rate": 0.00019998226425284776, - "loss": 46.0, - "step": 37238 - }, - { - "epoch": 5.99706107331213, - "grad_norm": 0.012285149656236172, - "learning_rate": 0.00019998226330004553, - "loss": 46.0, - "step": 37239 - }, - { - "epoch": 5.997222110390918, - "grad_norm": 0.002424470614641905, - "learning_rate": 0.00019998226234721768, - "loss": 46.0, - "step": 37240 - }, - { - "epoch": 5.997383147469705, - "grad_norm": 0.004506948869675398, - "learning_rate": 0.00019998226139436427, - "loss": 46.0, - "step": 37241 - }, - { - "epoch": 5.9975441845484925, - "grad_norm": 0.002975679701194167, - "learning_rate": 0.00019998226044148524, - "loss": 46.0, - "step": 37242 - }, - { - "epoch": 5.99770522162728, - "grad_norm": 0.006455249153077602, - "learning_rate": 0.00019998225948858063, - "loss": 46.0, - "step": 37243 - }, - { - "epoch": 5.997866258706067, - "grad_norm": 0.0023767005186527967, - "learning_rate": 0.00019998225853565043, - "loss": 46.0, - "step": 37244 - }, - { - "epoch": 5.998027295784855, - "grad_norm": 0.003752077929675579, - "learning_rate": 0.00019998225758269462, - "loss": 46.0, - "step": 37245 - }, - { - "epoch": 5.998188332863641, - "grad_norm": 0.0031229937449097633, - "learning_rate": 0.00019998225662971327, - "loss": 46.0, - "step": 37246 - }, - { - "epoch": 5.99834936994243, - "grad_norm": 0.002404554048553109, - "learning_rate": 0.00019998225567670629, - "loss": 46.0, - "step": 37247 - }, - { - "epoch": 5.998510407021216, - "grad_norm": 0.009118358604609966, - "learning_rate": 0.00019998225472367374, - "loss": 46.0, - "step": 37248 - }, - { - "epoch": 5.998671444100004, - "grad_norm": 0.0017667836509644985, - "learning_rate": 0.00019998225377061558, - "loss": 46.0, - "step": 37249 - }, - { - "epoch": 5.998832481178791, - "grad_norm": 0.004705061204731464, - "learning_rate": 0.00019998225281753183, - "loss": 46.0, - "step": 37250 - }, - { - "epoch": 5.998993518257579, - "grad_norm": 0.001341478549875319, - "learning_rate": 0.0001999822518644225, - "loss": 46.0, - "step": 37251 - }, - { - "epoch": 5.999154555336366, - "grad_norm": 0.006101112347096205, - "learning_rate": 0.00019998225091128757, - "loss": 46.0, - "step": 37252 - }, - { - "epoch": 5.9993155924151536, - "grad_norm": 0.004955587908625603, - "learning_rate": 0.00019998224995812706, - "loss": 46.0, - "step": 37253 - }, - { - "epoch": 5.999476629493941, - "grad_norm": 0.009894069284200668, - "learning_rate": 0.00019998224900494096, - "loss": 46.0, - "step": 37254 - }, - { - "epoch": 5.9996376665727285, - "grad_norm": 0.0034555376041680574, - "learning_rate": 0.00019998224805172927, - "loss": 46.0, - "step": 37255 - }, - { - "epoch": 5.999798703651516, - "grad_norm": 0.003928289748728275, - "learning_rate": 0.00019998224709849197, - "loss": 46.0, - "step": 37256 - }, - { - "epoch": 5.999959740730303, - "grad_norm": 0.0034505431540310383, - "learning_rate": 0.00019998224614522911, - "loss": 46.0, - "step": 37257 - }, - { - "epoch": 5.999959740730303, - "eval_loss": 11.5, - "eval_runtime": 15.137, - "eval_samples_per_second": 172.756, - "eval_steps_per_second": 86.411, - "step": 37257 - }, - { - "epoch": 6.0001610370787875, - "grad_norm": 0.001225961372256279, - "learning_rate": 0.00019998224519194064, - "loss": 46.0, - "step": 37258 - }, - { - "epoch": 6.000322074157575, - "grad_norm": 0.00869645643979311, - "learning_rate": 0.00019998224423862658, - "loss": 46.0, - "step": 37259 - }, - { - "epoch": 6.000483111236362, - "grad_norm": 0.009907534345984459, - "learning_rate": 0.00019998224328528696, - "loss": 46.0, - "step": 37260 - }, - { - "epoch": 6.00064414831515, - "grad_norm": 0.000844583788421005, - "learning_rate": 0.0001999822423319217, - "loss": 46.0, - "step": 37261 - }, - { - "epoch": 6.000805185393937, - "grad_norm": 0.005630886647850275, - "learning_rate": 0.00019998224137853088, - "loss": 46.0, - "step": 37262 - }, - { - "epoch": 6.000966222472725, - "grad_norm": 0.0012189788976684213, - "learning_rate": 0.00019998224042511447, - "loss": 46.0, - "step": 37263 - }, - { - "epoch": 6.001127259551512, - "grad_norm": 0.0024373335763812065, - "learning_rate": 0.00019998223947167247, - "loss": 46.0, - "step": 37264 - }, - { - "epoch": 6.001288296630299, - "grad_norm": 0.0018727300921455026, - "learning_rate": 0.00019998223851820486, - "loss": 46.0, - "step": 37265 - }, - { - "epoch": 6.001449333709086, - "grad_norm": 0.008964837528765202, - "learning_rate": 0.0001999822375647117, - "loss": 46.0, - "step": 37266 - }, - { - "epoch": 6.001610370787874, - "grad_norm": 0.0017164716264232993, - "learning_rate": 0.0001999822366111929, - "loss": 46.0, - "step": 37267 - }, - { - "epoch": 6.001771407866661, - "grad_norm": 0.0040796780958771706, - "learning_rate": 0.00019998223565764853, - "loss": 46.0, - "step": 37268 - }, - { - "epoch": 6.0019324449454485, - "grad_norm": 0.0067762043327093124, - "learning_rate": 0.0001999822347040786, - "loss": 46.0, - "step": 37269 - }, - { - "epoch": 6.002093482024236, - "grad_norm": 0.0016794345574453473, - "learning_rate": 0.00019998223375048305, - "loss": 46.0, - "step": 37270 - }, - { - "epoch": 6.002254519103023, - "grad_norm": 0.015847422182559967, - "learning_rate": 0.0001999822327968619, - "loss": 46.0, - "step": 37271 - }, - { - "epoch": 6.002415556181811, - "grad_norm": 0.004540848545730114, - "learning_rate": 0.00019998223184321517, - "loss": 46.0, - "step": 37272 - }, - { - "epoch": 6.002576593260598, - "grad_norm": 0.0037385665345937014, - "learning_rate": 0.00019998223088954283, - "loss": 46.0, - "step": 37273 - }, - { - "epoch": 6.002737630339386, - "grad_norm": 0.006277099717408419, - "learning_rate": 0.0001999822299358449, - "loss": 46.0, - "step": 37274 - }, - { - "epoch": 6.002898667418173, - "grad_norm": 0.0035424153320491314, - "learning_rate": 0.00019998222898212146, - "loss": 46.0, - "step": 37275 - }, - { - "epoch": 6.003059704496961, - "grad_norm": 0.009347725659608841, - "learning_rate": 0.00019998222802837233, - "loss": 46.0, - "step": 37276 - }, - { - "epoch": 6.003220741575748, - "grad_norm": 0.003090252634137869, - "learning_rate": 0.00019998222707459767, - "loss": 46.0, - "step": 37277 - }, - { - "epoch": 6.003381778654536, - "grad_norm": 0.011063123121857643, - "learning_rate": 0.0001999822261207974, - "loss": 46.0, - "step": 37278 - }, - { - "epoch": 6.003542815733322, - "grad_norm": 0.014631282538175583, - "learning_rate": 0.00019998222516697155, - "loss": 46.0, - "step": 37279 - }, - { - "epoch": 6.00370385281211, - "grad_norm": 0.002892265561968088, - "learning_rate": 0.0001999822242131201, - "loss": 46.0, - "step": 37280 - }, - { - "epoch": 6.003864889890897, - "grad_norm": 0.002990832319483161, - "learning_rate": 0.00019998222325924304, - "loss": 46.0, - "step": 37281 - }, - { - "epoch": 6.0040259269696845, - "grad_norm": 0.012151021510362625, - "learning_rate": 0.00019998222230534042, - "loss": 46.0, - "step": 37282 - }, - { - "epoch": 6.004186964048472, - "grad_norm": 0.0014796452596783638, - "learning_rate": 0.0001999822213514122, - "loss": 46.0, - "step": 37283 - }, - { - "epoch": 6.004348001127259, - "grad_norm": 0.0038918552454560995, - "learning_rate": 0.00019998222039745836, - "loss": 46.0, - "step": 37284 - }, - { - "epoch": 6.004509038206047, - "grad_norm": 0.006824355572462082, - "learning_rate": 0.00019998221944347896, - "loss": 46.0, - "step": 37285 - }, - { - "epoch": 6.004670075284834, - "grad_norm": 0.0050840736366808414, - "learning_rate": 0.000199982218489474, - "loss": 46.0, - "step": 37286 - }, - { - "epoch": 6.004831112363622, - "grad_norm": 0.004692744929343462, - "learning_rate": 0.0001999822175354434, - "loss": 46.0, - "step": 37287 - }, - { - "epoch": 6.004992149442409, - "grad_norm": 0.0027085093315690756, - "learning_rate": 0.00019998221658138723, - "loss": 46.0, - "step": 37288 - }, - { - "epoch": 6.005153186521197, - "grad_norm": 0.02194962278008461, - "learning_rate": 0.00019998221562730545, - "loss": 46.0, - "step": 37289 - }, - { - "epoch": 6.005314223599984, - "grad_norm": 0.007821271196007729, - "learning_rate": 0.0001999822146731981, - "loss": 46.0, - "step": 37290 - }, - { - "epoch": 6.005475260678772, - "grad_norm": 0.005405803211033344, - "learning_rate": 0.00019998221371906515, - "loss": 46.0, - "step": 37291 - }, - { - "epoch": 6.005636297757559, - "grad_norm": 0.008932650089263916, - "learning_rate": 0.00019998221276490663, - "loss": 46.0, - "step": 37292 - }, - { - "epoch": 6.0057973348363465, - "grad_norm": 0.005691902246326208, - "learning_rate": 0.0001999822118107225, - "loss": 46.0, - "step": 37293 - }, - { - "epoch": 6.005958371915133, - "grad_norm": 0.007117095403373241, - "learning_rate": 0.00019998221085651278, - "loss": 46.0, - "step": 37294 - }, - { - "epoch": 6.0061194089939205, - "grad_norm": 0.006958093028515577, - "learning_rate": 0.0001999822099022775, - "loss": 46.0, - "step": 37295 - }, - { - "epoch": 6.006280446072708, - "grad_norm": 0.012638378888368607, - "learning_rate": 0.00019998220894801657, - "loss": 46.0, - "step": 37296 - }, - { - "epoch": 6.006441483151495, - "grad_norm": 0.0037501745391637087, - "learning_rate": 0.0001999822079937301, - "loss": 46.0, - "step": 37297 - }, - { - "epoch": 6.006602520230283, - "grad_norm": 0.006044814828783274, - "learning_rate": 0.00019998220703941802, - "loss": 46.0, - "step": 37298 - }, - { - "epoch": 6.00676355730907, - "grad_norm": 0.009478239342570305, - "learning_rate": 0.00019998220608508037, - "loss": 46.0, - "step": 37299 - }, - { - "epoch": 6.006924594387858, - "grad_norm": 0.003994721919298172, - "learning_rate": 0.0001999822051307171, - "loss": 46.0, - "step": 37300 - }, - { - "epoch": 6.007085631466645, - "grad_norm": 0.0020560252014547586, - "learning_rate": 0.00019998220417632824, - "loss": 46.0, - "step": 37301 - }, - { - "epoch": 6.007246668545433, - "grad_norm": 0.009916729293763638, - "learning_rate": 0.00019998220322191382, - "loss": 46.0, - "step": 37302 - }, - { - "epoch": 6.00740770562422, - "grad_norm": 0.0034186893608421087, - "learning_rate": 0.00019998220226747376, - "loss": 46.0, - "step": 37303 - }, - { - "epoch": 6.007568742703008, - "grad_norm": 0.0007954526809044182, - "learning_rate": 0.00019998220131300814, - "loss": 46.0, - "step": 37304 - }, - { - "epoch": 6.007729779781795, - "grad_norm": 0.004026367329061031, - "learning_rate": 0.00019998220035851697, - "loss": 46.0, - "step": 37305 - }, - { - "epoch": 6.0078908168605825, - "grad_norm": 0.011674848385155201, - "learning_rate": 0.00019998219940400017, - "loss": 46.0, - "step": 37306 - }, - { - "epoch": 6.00805185393937, - "grad_norm": 0.003670050762593746, - "learning_rate": 0.0001999821984494578, - "loss": 46.0, - "step": 37307 - }, - { - "epoch": 6.008212891018157, - "grad_norm": 0.011337011121213436, - "learning_rate": 0.0001999821974948898, - "loss": 46.0, - "step": 37308 - }, - { - "epoch": 6.008373928096944, - "grad_norm": 0.005951997358351946, - "learning_rate": 0.00019998219654029621, - "loss": 46.0, - "step": 37309 - }, - { - "epoch": 6.008534965175731, - "grad_norm": 0.008224834688007832, - "learning_rate": 0.00019998219558567705, - "loss": 46.0, - "step": 37310 - }, - { - "epoch": 6.008696002254519, - "grad_norm": 0.004736274015158415, - "learning_rate": 0.0001999821946310323, - "loss": 46.0, - "step": 37311 - }, - { - "epoch": 6.008857039333306, - "grad_norm": 0.0023458453360944986, - "learning_rate": 0.00019998219367636195, - "loss": 46.0, - "step": 37312 - }, - { - "epoch": 6.009018076412094, - "grad_norm": 0.006603058893233538, - "learning_rate": 0.00019998219272166602, - "loss": 46.0, - "step": 37313 - }, - { - "epoch": 6.009179113490881, - "grad_norm": 0.010915643535554409, - "learning_rate": 0.0001999821917669445, - "loss": 46.0, - "step": 37314 - }, - { - "epoch": 6.009340150569669, - "grad_norm": 0.004482037387788296, - "learning_rate": 0.0001999821908121974, - "loss": 46.0, - "step": 37315 - }, - { - "epoch": 6.009501187648456, - "grad_norm": 0.015618345700204372, - "learning_rate": 0.0001999821898574247, - "loss": 46.0, - "step": 37316 - }, - { - "epoch": 6.0096622247272435, - "grad_norm": 0.01698637567460537, - "learning_rate": 0.00019998218890262642, - "loss": 46.0, - "step": 37317 - }, - { - "epoch": 6.009823261806031, - "grad_norm": 0.0017972791101783514, - "learning_rate": 0.0001999821879478025, - "loss": 46.0, - "step": 37318 - }, - { - "epoch": 6.009984298884818, - "grad_norm": 0.005839345045387745, - "learning_rate": 0.00019998218699295305, - "loss": 46.0, - "step": 37319 - }, - { - "epoch": 6.010145335963606, - "grad_norm": 0.013213645666837692, - "learning_rate": 0.00019998218603807798, - "loss": 46.0, - "step": 37320 - }, - { - "epoch": 6.010306373042393, - "grad_norm": 0.010846849530935287, - "learning_rate": 0.00019998218508317735, - "loss": 46.0, - "step": 37321 - }, - { - "epoch": 6.010467410121181, - "grad_norm": 0.002234281040728092, - "learning_rate": 0.00019998218412825108, - "loss": 46.0, - "step": 37322 - }, - { - "epoch": 6.010628447199967, - "grad_norm": 0.0065218741074204445, - "learning_rate": 0.00019998218317329925, - "loss": 46.0, - "step": 37323 - }, - { - "epoch": 6.010789484278755, - "grad_norm": 0.0009722562972456217, - "learning_rate": 0.00019998218221832184, - "loss": 46.0, - "step": 37324 - }, - { - "epoch": 6.010950521357542, - "grad_norm": 0.005423685535788536, - "learning_rate": 0.0001999821812633188, - "loss": 46.0, - "step": 37325 - }, - { - "epoch": 6.01111155843633, - "grad_norm": 0.001530548557639122, - "learning_rate": 0.0001999821803082902, - "loss": 46.0, - "step": 37326 - }, - { - "epoch": 6.011272595515117, - "grad_norm": 0.0016949146520346403, - "learning_rate": 0.000199982179353236, - "loss": 46.0, - "step": 37327 - }, - { - "epoch": 6.011433632593905, - "grad_norm": 0.008777761831879616, - "learning_rate": 0.00019998217839815622, - "loss": 46.0, - "step": 37328 - }, - { - "epoch": 6.011594669672692, - "grad_norm": 0.002952597802504897, - "learning_rate": 0.00019998217744305086, - "loss": 46.0, - "step": 37329 - }, - { - "epoch": 6.0117557067514795, - "grad_norm": 0.01073518954217434, - "learning_rate": 0.00019998217648791987, - "loss": 46.0, - "step": 37330 - }, - { - "epoch": 6.011916743830267, - "grad_norm": 0.005928761791437864, - "learning_rate": 0.00019998217553276332, - "loss": 46.0, - "step": 37331 - }, - { - "epoch": 6.012077780909054, - "grad_norm": 0.03993905708193779, - "learning_rate": 0.00019998217457758118, - "loss": 46.0, - "step": 37332 - }, - { - "epoch": 6.012238817987842, - "grad_norm": 0.0022269103210419416, - "learning_rate": 0.00019998217362237345, - "loss": 46.0, - "step": 37333 - }, - { - "epoch": 6.012399855066629, - "grad_norm": 0.0030804877169430256, - "learning_rate": 0.00019998217266714013, - "loss": 46.0, - "step": 37334 - }, - { - "epoch": 6.012560892145417, - "grad_norm": 0.0035887672565877438, - "learning_rate": 0.00019998217171188117, - "loss": 46.0, - "step": 37335 - }, - { - "epoch": 6.012721929224204, - "grad_norm": 0.0018088629003614187, - "learning_rate": 0.00019998217075659666, - "loss": 46.0, - "step": 37336 - }, - { - "epoch": 6.012882966302992, - "grad_norm": 0.004458155948668718, - "learning_rate": 0.00019998216980128655, - "loss": 46.0, - "step": 37337 - }, - { - "epoch": 6.013044003381778, - "grad_norm": 0.009427427314221859, - "learning_rate": 0.0001999821688459509, - "loss": 46.0, - "step": 37338 - }, - { - "epoch": 6.013205040460566, - "grad_norm": 0.0040281061083078384, - "learning_rate": 0.0001999821678905896, - "loss": 46.0, - "step": 37339 - }, - { - "epoch": 6.013366077539353, - "grad_norm": 0.0010320403380319476, - "learning_rate": 0.00019998216693520275, - "loss": 46.0, - "step": 37340 - }, - { - "epoch": 6.013527114618141, - "grad_norm": 0.007825557142496109, - "learning_rate": 0.0001999821659797903, - "loss": 46.0, - "step": 37341 - }, - { - "epoch": 6.013688151696928, - "grad_norm": 0.0043025859631598, - "learning_rate": 0.00019998216502435223, - "loss": 46.0, - "step": 37342 - }, - { - "epoch": 6.0138491887757155, - "grad_norm": 0.0018057033885270357, - "learning_rate": 0.0001999821640688886, - "loss": 46.0, - "step": 37343 - }, - { - "epoch": 6.014010225854503, - "grad_norm": 0.002024852205067873, - "learning_rate": 0.00019998216311339936, - "loss": 46.0, - "step": 37344 - }, - { - "epoch": 6.01417126293329, - "grad_norm": 0.007777228951454163, - "learning_rate": 0.00019998216215788453, - "loss": 46.0, - "step": 37345 - }, - { - "epoch": 6.014332300012078, - "grad_norm": 0.009013590402901173, - "learning_rate": 0.00019998216120234411, - "loss": 46.0, - "step": 37346 - }, - { - "epoch": 6.014493337090865, - "grad_norm": 0.0059803444892168045, - "learning_rate": 0.00019998216024677814, - "loss": 46.0, - "step": 37347 - }, - { - "epoch": 6.014654374169653, - "grad_norm": 0.001572663546539843, - "learning_rate": 0.00019998215929118652, - "loss": 46.0, - "step": 37348 - }, - { - "epoch": 6.01481541124844, - "grad_norm": 0.002022368600592017, - "learning_rate": 0.00019998215833556934, - "loss": 46.0, - "step": 37349 - }, - { - "epoch": 6.014976448327228, - "grad_norm": 0.006453935522586107, - "learning_rate": 0.00019998215737992658, - "loss": 46.0, - "step": 37350 - }, - { - "epoch": 6.015137485406015, - "grad_norm": 0.006935174111276865, - "learning_rate": 0.00019998215642425823, - "loss": 46.0, - "step": 37351 - }, - { - "epoch": 6.015298522484803, - "grad_norm": 0.017548134550452232, - "learning_rate": 0.00019998215546856426, - "loss": 46.0, - "step": 37352 - }, - { - "epoch": 6.015459559563589, - "grad_norm": 0.008491123095154762, - "learning_rate": 0.0001999821545128447, - "loss": 46.0, - "step": 37353 - }, - { - "epoch": 6.015620596642377, - "grad_norm": 0.0016087363474071026, - "learning_rate": 0.0001999821535570996, - "loss": 46.0, - "step": 37354 - }, - { - "epoch": 6.015781633721164, - "grad_norm": 0.0031094527803361416, - "learning_rate": 0.00019998215260132884, - "loss": 46.0, - "step": 37355 - }, - { - "epoch": 6.0159426707999515, - "grad_norm": 0.0021818201057612896, - "learning_rate": 0.00019998215164553255, - "loss": 46.0, - "step": 37356 - }, - { - "epoch": 6.016103707878739, - "grad_norm": 0.003377706278115511, - "learning_rate": 0.00019998215068971065, - "loss": 46.0, - "step": 37357 - }, - { - "epoch": 6.016264744957526, - "grad_norm": 0.00046303318231366575, - "learning_rate": 0.00019998214973386316, - "loss": 46.0, - "step": 37358 - }, - { - "epoch": 6.016425782036314, - "grad_norm": 0.006079382263123989, - "learning_rate": 0.00019998214877799003, - "loss": 46.0, - "step": 37359 - }, - { - "epoch": 6.016586819115101, - "grad_norm": 0.011122734285891056, - "learning_rate": 0.0001999821478220914, - "loss": 46.0, - "step": 37360 - }, - { - "epoch": 6.016747856193889, - "grad_norm": 0.012124674394726753, - "learning_rate": 0.00019998214686616712, - "loss": 46.0, - "step": 37361 - }, - { - "epoch": 6.016908893272676, - "grad_norm": 0.008392972871661186, - "learning_rate": 0.00019998214591021726, - "loss": 46.0, - "step": 37362 - }, - { - "epoch": 6.017069930351464, - "grad_norm": 0.010684572160243988, - "learning_rate": 0.0001999821449542418, - "loss": 46.0, - "step": 37363 - }, - { - "epoch": 6.017230967430251, - "grad_norm": 0.0042912717908620834, - "learning_rate": 0.0001999821439982408, - "loss": 46.0, - "step": 37364 - }, - { - "epoch": 6.0173920045090385, - "grad_norm": 0.016456574201583862, - "learning_rate": 0.00019998214304221414, - "loss": 46.0, - "step": 37365 - }, - { - "epoch": 6.017553041587826, - "grad_norm": 0.001738523948006332, - "learning_rate": 0.00019998214208616193, - "loss": 46.0, - "step": 37366 - }, - { - "epoch": 6.017714078666613, - "grad_norm": 0.0020229408983141184, - "learning_rate": 0.00019998214113008412, - "loss": 46.0, - "step": 37367 - }, - { - "epoch": 6.0178751157454, - "grad_norm": 0.0016072492580860853, - "learning_rate": 0.0001999821401739807, - "loss": 46.0, - "step": 37368 - }, - { - "epoch": 6.0180361528241875, - "grad_norm": 0.011316402815282345, - "learning_rate": 0.0001999821392178517, - "loss": 46.0, - "step": 37369 - }, - { - "epoch": 6.018197189902975, - "grad_norm": 0.001191994408145547, - "learning_rate": 0.00019998213826169715, - "loss": 46.0, - "step": 37370 - }, - { - "epoch": 6.018358226981762, - "grad_norm": 0.012513085268437862, - "learning_rate": 0.00019998213730551697, - "loss": 46.0, - "step": 37371 - }, - { - "epoch": 6.01851926406055, - "grad_norm": 0.0015407592291012406, - "learning_rate": 0.00019998213634931123, - "loss": 46.0, - "step": 37372 - }, - { - "epoch": 6.018680301139337, - "grad_norm": 0.0031680718529969454, - "learning_rate": 0.00019998213539307988, - "loss": 46.0, - "step": 37373 - }, - { - "epoch": 6.018841338218125, - "grad_norm": 0.01608908548951149, - "learning_rate": 0.00019998213443682294, - "loss": 46.0, - "step": 37374 - }, - { - "epoch": 6.019002375296912, - "grad_norm": 0.0027943188324570656, - "learning_rate": 0.0001999821334805404, - "loss": 46.0, - "step": 37375 - }, - { - "epoch": 6.0191634123757, - "grad_norm": 0.0034422928001731634, - "learning_rate": 0.00019998213252423228, - "loss": 46.0, - "step": 37376 - }, - { - "epoch": 6.019324449454487, - "grad_norm": 0.005031601991504431, - "learning_rate": 0.00019998213156789858, - "loss": 46.0, - "step": 37377 - }, - { - "epoch": 6.0194854865332745, - "grad_norm": 0.006252941209822893, - "learning_rate": 0.00019998213061153927, - "loss": 46.0, - "step": 37378 - }, - { - "epoch": 6.019646523612062, - "grad_norm": 0.0052984305657446384, - "learning_rate": 0.00019998212965515437, - "loss": 46.0, - "step": 37379 - }, - { - "epoch": 6.019807560690849, - "grad_norm": 0.007897310890257359, - "learning_rate": 0.0001999821286987439, - "loss": 46.0, - "step": 37380 - }, - { - "epoch": 6.019968597769637, - "grad_norm": 0.0014272683765739202, - "learning_rate": 0.00019998212774230783, - "loss": 46.0, - "step": 37381 - }, - { - "epoch": 6.0201296348484234, - "grad_norm": 0.005481986328959465, - "learning_rate": 0.00019998212678584617, - "loss": 46.0, - "step": 37382 - }, - { - "epoch": 6.020290671927211, - "grad_norm": 0.001721731387078762, - "learning_rate": 0.00019998212582935892, - "loss": 46.0, - "step": 37383 - }, - { - "epoch": 6.020451709005998, - "grad_norm": 0.000984185840934515, - "learning_rate": 0.00019998212487284606, - "loss": 46.0, - "step": 37384 - }, - { - "epoch": 6.020612746084786, - "grad_norm": 0.0015970334643498063, - "learning_rate": 0.00019998212391630763, - "loss": 46.0, - "step": 37385 - }, - { - "epoch": 6.020773783163573, - "grad_norm": 0.0028565407264977694, - "learning_rate": 0.00019998212295974362, - "loss": 46.0, - "step": 37386 - }, - { - "epoch": 6.020934820242361, - "grad_norm": 0.0026087737642228603, - "learning_rate": 0.000199982122003154, - "loss": 46.0, - "step": 37387 - }, - { - "epoch": 6.021095857321148, - "grad_norm": 0.0014479245292022824, - "learning_rate": 0.00019998212104653878, - "loss": 46.0, - "step": 37388 - }, - { - "epoch": 6.021256894399936, - "grad_norm": 0.002667824039235711, - "learning_rate": 0.000199982120089898, - "loss": 46.0, - "step": 37389 - }, - { - "epoch": 6.021417931478723, - "grad_norm": 0.006317758467048407, - "learning_rate": 0.00019998211913323162, - "loss": 46.0, - "step": 37390 - }, - { - "epoch": 6.0215789685575105, - "grad_norm": 0.0035046732518821955, - "learning_rate": 0.00019998211817653965, - "loss": 46.0, - "step": 37391 - }, - { - "epoch": 6.021740005636298, - "grad_norm": 0.0036133877001702785, - "learning_rate": 0.00019998211721982206, - "loss": 46.0, - "step": 37392 - }, - { - "epoch": 6.021901042715085, - "grad_norm": 0.0047370134852826595, - "learning_rate": 0.00019998211626307891, - "loss": 46.0, - "step": 37393 - }, - { - "epoch": 6.022062079793873, - "grad_norm": 0.0017004030523821712, - "learning_rate": 0.00019998211530631018, - "loss": 46.0, - "step": 37394 - }, - { - "epoch": 6.02222311687266, - "grad_norm": 0.007283463142812252, - "learning_rate": 0.00019998211434951583, - "loss": 46.0, - "step": 37395 - }, - { - "epoch": 6.022384153951447, - "grad_norm": 0.002013358287513256, - "learning_rate": 0.00019998211339269592, - "loss": 46.0, - "step": 37396 - }, - { - "epoch": 6.022545191030234, - "grad_norm": 0.004235814791172743, - "learning_rate": 0.0001999821124358504, - "loss": 46.0, - "step": 37397 - }, - { - "epoch": 6.022706228109022, - "grad_norm": 0.014187032356858253, - "learning_rate": 0.00019998211147897928, - "loss": 46.0, - "step": 37398 - }, - { - "epoch": 6.022867265187809, - "grad_norm": 0.0019778998102992773, - "learning_rate": 0.00019998211052208259, - "loss": 46.0, - "step": 37399 - }, - { - "epoch": 6.023028302266597, - "grad_norm": 0.021550370380282402, - "learning_rate": 0.0001999821095651603, - "loss": 46.0, - "step": 37400 - }, - { - "epoch": 6.023189339345384, - "grad_norm": 0.00854356400668621, - "learning_rate": 0.00019998210860821243, - "loss": 46.0, - "step": 37401 - }, - { - "epoch": 6.023350376424172, - "grad_norm": 0.0035862955264747143, - "learning_rate": 0.00019998210765123894, - "loss": 46.0, - "step": 37402 - }, - { - "epoch": 6.023511413502959, - "grad_norm": 0.006736627779901028, - "learning_rate": 0.0001999821066942399, - "loss": 46.0, - "step": 37403 - }, - { - "epoch": 6.0236724505817465, - "grad_norm": 0.008893050253391266, - "learning_rate": 0.00019998210573721523, - "loss": 46.0, - "step": 37404 - }, - { - "epoch": 6.023833487660534, - "grad_norm": 0.00908861868083477, - "learning_rate": 0.000199982104780165, - "loss": 46.0, - "step": 37405 - }, - { - "epoch": 6.023994524739321, - "grad_norm": 0.01650627702474594, - "learning_rate": 0.00019998210382308917, - "loss": 46.0, - "step": 37406 - }, - { - "epoch": 6.024155561818109, - "grad_norm": 0.006853667553514242, - "learning_rate": 0.00019998210286598775, - "loss": 46.0, - "step": 37407 - }, - { - "epoch": 6.024316598896896, - "grad_norm": 0.002855593804270029, - "learning_rate": 0.00019998210190886074, - "loss": 46.0, - "step": 37408 - }, - { - "epoch": 6.024477635975684, - "grad_norm": 0.006694091949611902, - "learning_rate": 0.00019998210095170814, - "loss": 46.0, - "step": 37409 - }, - { - "epoch": 6.024638673054471, - "grad_norm": 0.0018880751449614763, - "learning_rate": 0.00019998209999452996, - "loss": 46.0, - "step": 37410 - }, - { - "epoch": 6.024799710133258, - "grad_norm": 0.0077400715090334415, - "learning_rate": 0.0001999820990373262, - "loss": 46.0, - "step": 37411 - }, - { - "epoch": 6.024960747212045, - "grad_norm": 0.01441923901438713, - "learning_rate": 0.0001999820980800968, - "loss": 46.0, - "step": 37412 - }, - { - "epoch": 6.025121784290833, - "grad_norm": 0.005400239489972591, - "learning_rate": 0.00019998209712284183, - "loss": 46.0, - "step": 37413 - }, - { - "epoch": 6.02528282136962, - "grad_norm": 0.0047841304913163185, - "learning_rate": 0.0001999820961655613, - "loss": 46.0, - "step": 37414 - }, - { - "epoch": 6.025443858448408, - "grad_norm": 0.00750749371945858, - "learning_rate": 0.00019998209520825515, - "loss": 46.0, - "step": 37415 - }, - { - "epoch": 6.025604895527195, - "grad_norm": 0.006577526684850454, - "learning_rate": 0.00019998209425092342, - "loss": 46.0, - "step": 37416 - }, - { - "epoch": 6.0257659326059825, - "grad_norm": 0.002485182136297226, - "learning_rate": 0.0001999820932935661, - "loss": 46.0, - "step": 37417 - }, - { - "epoch": 6.02592696968477, - "grad_norm": 0.0026798786129802465, - "learning_rate": 0.00019998209233618316, - "loss": 46.0, - "step": 37418 - }, - { - "epoch": 6.026088006763557, - "grad_norm": 0.003676310181617737, - "learning_rate": 0.00019998209137877466, - "loss": 46.0, - "step": 37419 - }, - { - "epoch": 6.026249043842345, - "grad_norm": 0.0023098753299564123, - "learning_rate": 0.00019998209042134058, - "loss": 46.0, - "step": 37420 - }, - { - "epoch": 6.026410080921132, - "grad_norm": 0.012121430598199368, - "learning_rate": 0.0001999820894638809, - "loss": 46.0, - "step": 37421 - }, - { - "epoch": 6.02657111799992, - "grad_norm": 0.0013131614541634917, - "learning_rate": 0.00019998208850639562, - "loss": 46.0, - "step": 37422 - }, - { - "epoch": 6.026732155078707, - "grad_norm": 0.00855423603206873, - "learning_rate": 0.00019998208754888478, - "loss": 46.0, - "step": 37423 - }, - { - "epoch": 6.026893192157495, - "grad_norm": 0.0023163682781159878, - "learning_rate": 0.0001999820865913483, - "loss": 46.0, - "step": 37424 - }, - { - "epoch": 6.027054229236282, - "grad_norm": 0.009444854222238064, - "learning_rate": 0.00019998208563378627, - "loss": 46.0, - "step": 37425 - }, - { - "epoch": 6.027215266315069, - "grad_norm": 0.001725554233416915, - "learning_rate": 0.0001999820846761986, - "loss": 46.0, - "step": 37426 - }, - { - "epoch": 6.027376303393856, - "grad_norm": 0.0016803030157461762, - "learning_rate": 0.0001999820837185854, - "loss": 46.0, - "step": 37427 - }, - { - "epoch": 6.027537340472644, - "grad_norm": 0.006890660617500544, - "learning_rate": 0.00019998208276094658, - "loss": 46.0, - "step": 37428 - }, - { - "epoch": 6.027698377551431, - "grad_norm": 0.014234675094485283, - "learning_rate": 0.00019998208180328216, - "loss": 46.0, - "step": 37429 - }, - { - "epoch": 6.0278594146302185, - "grad_norm": 0.002693300601094961, - "learning_rate": 0.00019998208084559218, - "loss": 46.0, - "step": 37430 - }, - { - "epoch": 6.028020451709006, - "grad_norm": 0.00880126841366291, - "learning_rate": 0.0001999820798878766, - "loss": 46.0, - "step": 37431 - }, - { - "epoch": 6.028181488787793, - "grad_norm": 0.011463573202490807, - "learning_rate": 0.00019998207893013543, - "loss": 46.0, - "step": 37432 - }, - { - "epoch": 6.028342525866581, - "grad_norm": 0.010493949986994267, - "learning_rate": 0.00019998207797236866, - "loss": 46.0, - "step": 37433 - }, - { - "epoch": 6.028503562945368, - "grad_norm": 0.002940594684332609, - "learning_rate": 0.0001999820770145763, - "loss": 46.0, - "step": 37434 - }, - { - "epoch": 6.028664600024156, - "grad_norm": 0.0030396052170544863, - "learning_rate": 0.00019998207605675833, - "loss": 46.0, - "step": 37435 - }, - { - "epoch": 6.028825637102943, - "grad_norm": 0.002695484086871147, - "learning_rate": 0.0001999820750989148, - "loss": 46.0, - "step": 37436 - }, - { - "epoch": 6.028986674181731, - "grad_norm": 0.013518051244318485, - "learning_rate": 0.00019998207414104568, - "loss": 46.0, - "step": 37437 - }, - { - "epoch": 6.029147711260518, - "grad_norm": 0.002834593178704381, - "learning_rate": 0.00019998207318315097, - "loss": 46.0, - "step": 37438 - }, - { - "epoch": 6.0293087483393055, - "grad_norm": 0.00237145833671093, - "learning_rate": 0.00019998207222523068, - "loss": 46.0, - "step": 37439 - }, - { - "epoch": 6.029469785418092, - "grad_norm": 0.011666586622595787, - "learning_rate": 0.00019998207126728477, - "loss": 46.0, - "step": 37440 - }, - { - "epoch": 6.0296308224968795, - "grad_norm": 0.0017858650535345078, - "learning_rate": 0.00019998207030931327, - "loss": 46.0, - "step": 37441 - }, - { - "epoch": 6.029791859575667, - "grad_norm": 0.005706263706088066, - "learning_rate": 0.0001999820693513162, - "loss": 46.0, - "step": 37442 - }, - { - "epoch": 6.029952896654454, - "grad_norm": 0.00561361201107502, - "learning_rate": 0.00019998206839329352, - "loss": 46.0, - "step": 37443 - }, - { - "epoch": 6.030113933733242, - "grad_norm": 0.005115280393511057, - "learning_rate": 0.00019998206743524526, - "loss": 46.0, - "step": 37444 - }, - { - "epoch": 6.030274970812029, - "grad_norm": 0.0019947404507547617, - "learning_rate": 0.0001999820664771714, - "loss": 46.0, - "step": 37445 - }, - { - "epoch": 6.030436007890817, - "grad_norm": 0.006450181361287832, - "learning_rate": 0.00019998206551907196, - "loss": 46.0, - "step": 37446 - }, - { - "epoch": 6.030597044969604, - "grad_norm": 0.009363310411572456, - "learning_rate": 0.00019998206456094692, - "loss": 46.0, - "step": 37447 - }, - { - "epoch": 6.030758082048392, - "grad_norm": 0.00960224587470293, - "learning_rate": 0.0001999820636027963, - "loss": 46.0, - "step": 37448 - }, - { - "epoch": 6.030919119127179, - "grad_norm": 0.0018297643400728703, - "learning_rate": 0.0001999820626446201, - "loss": 46.0, - "step": 37449 - }, - { - "epoch": 6.031080156205967, - "grad_norm": 0.006229907739907503, - "learning_rate": 0.0001999820616864183, - "loss": 46.0, - "step": 37450 - }, - { - "epoch": 6.031241193284754, - "grad_norm": 0.010183049365878105, - "learning_rate": 0.0001999820607281909, - "loss": 46.0, - "step": 37451 - }, - { - "epoch": 6.0314022303635415, - "grad_norm": 0.014703231863677502, - "learning_rate": 0.0001999820597699379, - "loss": 46.0, - "step": 37452 - }, - { - "epoch": 6.031563267442329, - "grad_norm": 0.0016461131162941456, - "learning_rate": 0.00019998205881165934, - "loss": 46.0, - "step": 37453 - }, - { - "epoch": 6.031724304521116, - "grad_norm": 0.0014284674543887377, - "learning_rate": 0.0001999820578533552, - "loss": 46.0, - "step": 37454 - }, - { - "epoch": 6.031885341599903, - "grad_norm": 0.005886481609195471, - "learning_rate": 0.00019998205689502542, - "loss": 46.0, - "step": 37455 - }, - { - "epoch": 6.03204637867869, - "grad_norm": 0.01851947233080864, - "learning_rate": 0.0001999820559366701, - "loss": 46.0, - "step": 37456 - }, - { - "epoch": 6.032207415757478, - "grad_norm": 0.00692877359688282, - "learning_rate": 0.00019998205497828914, - "loss": 46.0, - "step": 37457 - }, - { - "epoch": 6.032368452836265, - "grad_norm": 0.0025417099241167307, - "learning_rate": 0.00019998205401988264, - "loss": 46.0, - "step": 37458 - }, - { - "epoch": 6.032529489915053, - "grad_norm": 0.007743894588202238, - "learning_rate": 0.0001999820530614505, - "loss": 46.0, - "step": 37459 - }, - { - "epoch": 6.03269052699384, - "grad_norm": 0.004280034452676773, - "learning_rate": 0.00019998205210299282, - "loss": 46.0, - "step": 37460 - }, - { - "epoch": 6.032851564072628, - "grad_norm": 0.004860276356339455, - "learning_rate": 0.00019998205114450952, - "loss": 46.0, - "step": 37461 - }, - { - "epoch": 6.033012601151415, - "grad_norm": 0.010296354070305824, - "learning_rate": 0.00019998205018600062, - "loss": 46.0, - "step": 37462 - }, - { - "epoch": 6.033173638230203, - "grad_norm": 0.004620649851858616, - "learning_rate": 0.00019998204922746618, - "loss": 46.0, - "step": 37463 - }, - { - "epoch": 6.03333467530899, - "grad_norm": 0.0031424169428646564, - "learning_rate": 0.0001999820482689061, - "loss": 46.0, - "step": 37464 - }, - { - "epoch": 6.0334957123877775, - "grad_norm": 0.0029387453105300665, - "learning_rate": 0.00019998204731032045, - "loss": 46.0, - "step": 37465 - }, - { - "epoch": 6.033656749466565, - "grad_norm": 0.0012476368574425578, - "learning_rate": 0.0001999820463517092, - "loss": 46.0, - "step": 37466 - }, - { - "epoch": 6.033817786545352, - "grad_norm": 0.0011241765459999442, - "learning_rate": 0.00019998204539307235, - "loss": 46.0, - "step": 37467 - }, - { - "epoch": 6.03397882362414, - "grad_norm": 0.005471441429108381, - "learning_rate": 0.0001999820444344099, - "loss": 46.0, - "step": 37468 - }, - { - "epoch": 6.034139860702927, - "grad_norm": 0.003955712541937828, - "learning_rate": 0.00019998204347572188, - "loss": 46.0, - "step": 37469 - }, - { - "epoch": 6.034300897781714, - "grad_norm": 0.001570369116961956, - "learning_rate": 0.0001999820425170083, - "loss": 46.0, - "step": 37470 - }, - { - "epoch": 6.034461934860501, - "grad_norm": 0.0023459214717149734, - "learning_rate": 0.0001999820415582691, - "loss": 46.0, - "step": 37471 - }, - { - "epoch": 6.034622971939289, - "grad_norm": 0.0036348237190395594, - "learning_rate": 0.0001999820405995043, - "loss": 46.0, - "step": 37472 - }, - { - "epoch": 6.034784009018076, - "grad_norm": 0.003946497570723295, - "learning_rate": 0.00019998203964071394, - "loss": 46.0, - "step": 37473 - }, - { - "epoch": 6.034945046096864, - "grad_norm": 0.009683527983725071, - "learning_rate": 0.000199982038681898, - "loss": 46.0, - "step": 37474 - }, - { - "epoch": 6.035106083175651, - "grad_norm": 0.008714226074516773, - "learning_rate": 0.00019998203772305642, - "loss": 46.0, - "step": 37475 - }, - { - "epoch": 6.035267120254439, - "grad_norm": 0.004451780114322901, - "learning_rate": 0.00019998203676418926, - "loss": 46.0, - "step": 37476 - }, - { - "epoch": 6.035428157333226, - "grad_norm": 0.004312592092901468, - "learning_rate": 0.00019998203580529652, - "loss": 46.0, - "step": 37477 - }, - { - "epoch": 6.0355891944120135, - "grad_norm": 0.0028346767649054527, - "learning_rate": 0.0001999820348463782, - "loss": 46.0, - "step": 37478 - }, - { - "epoch": 6.035750231490801, - "grad_norm": 0.010172472335398197, - "learning_rate": 0.00019998203388743425, - "loss": 46.0, - "step": 37479 - }, - { - "epoch": 6.035911268569588, - "grad_norm": 0.019719993695616722, - "learning_rate": 0.00019998203292846475, - "loss": 46.0, - "step": 37480 - }, - { - "epoch": 6.036072305648376, - "grad_norm": 0.0034994955640286207, - "learning_rate": 0.00019998203196946968, - "loss": 46.0, - "step": 37481 - }, - { - "epoch": 6.036233342727163, - "grad_norm": 0.009868305176496506, - "learning_rate": 0.00019998203101044898, - "loss": 46.0, - "step": 37482 - }, - { - "epoch": 6.036394379805951, - "grad_norm": 0.0043615433387458324, - "learning_rate": 0.0001999820300514027, - "loss": 46.0, - "step": 37483 - }, - { - "epoch": 6.036555416884737, - "grad_norm": 0.010640202090144157, - "learning_rate": 0.0001999820290923308, - "loss": 46.0, - "step": 37484 - }, - { - "epoch": 6.036716453963525, - "grad_norm": 0.0031366292387247086, - "learning_rate": 0.00019998202813323337, - "loss": 46.0, - "step": 37485 - }, - { - "epoch": 6.036877491042312, - "grad_norm": 0.001333284075371921, - "learning_rate": 0.00019998202717411032, - "loss": 46.0, - "step": 37486 - }, - { - "epoch": 6.0370385281211, - "grad_norm": 0.004389138892292976, - "learning_rate": 0.00019998202621496165, - "loss": 46.0, - "step": 37487 - }, - { - "epoch": 6.037199565199887, - "grad_norm": 0.0037388508208096027, - "learning_rate": 0.00019998202525578745, - "loss": 46.0, - "step": 37488 - }, - { - "epoch": 6.0373606022786745, - "grad_norm": 0.0016469616675749421, - "learning_rate": 0.0001999820242965876, - "loss": 46.0, - "step": 37489 - }, - { - "epoch": 6.037521639357462, - "grad_norm": 0.0033098775893449783, - "learning_rate": 0.0001999820233373622, - "loss": 46.0, - "step": 37490 - }, - { - "epoch": 6.0376826764362495, - "grad_norm": 0.021230148151516914, - "learning_rate": 0.0001999820223781112, - "loss": 46.0, - "step": 37491 - }, - { - "epoch": 6.037843713515037, - "grad_norm": 0.0025015573482960463, - "learning_rate": 0.00019998202141883459, - "loss": 46.0, - "step": 37492 - }, - { - "epoch": 6.038004750593824, - "grad_norm": 0.00358877913095057, - "learning_rate": 0.0001999820204595324, - "loss": 46.0, - "step": 37493 - }, - { - "epoch": 6.038165787672612, - "grad_norm": 0.006047093775123358, - "learning_rate": 0.00019998201950020464, - "loss": 46.0, - "step": 37494 - }, - { - "epoch": 6.038326824751399, - "grad_norm": 0.0024844740983098745, - "learning_rate": 0.00019998201854085128, - "loss": 46.0, - "step": 37495 - }, - { - "epoch": 6.038487861830187, - "grad_norm": 0.0014714018907397985, - "learning_rate": 0.00019998201758147233, - "loss": 46.0, - "step": 37496 - }, - { - "epoch": 6.038648898908974, - "grad_norm": 0.0034114255104213953, - "learning_rate": 0.0001999820166220678, - "loss": 46.0, - "step": 37497 - }, - { - "epoch": 6.038809935987762, - "grad_norm": 0.0029032016173005104, - "learning_rate": 0.00019998201566263766, - "loss": 46.0, - "step": 37498 - }, - { - "epoch": 6.038970973066548, - "grad_norm": 0.002625077962875366, - "learning_rate": 0.00019998201470318192, - "loss": 46.0, - "step": 37499 - }, - { - "epoch": 6.039132010145336, - "grad_norm": 0.001698369043879211, - "learning_rate": 0.0001999820137437006, - "loss": 46.0, - "step": 37500 - }, - { - "epoch": 6.039293047224123, - "grad_norm": 0.0052072396501898766, - "learning_rate": 0.00019998201278419368, - "loss": 46.0, - "step": 37501 - }, - { - "epoch": 6.0394540843029105, - "grad_norm": 0.003848006948828697, - "learning_rate": 0.00019998201182466117, - "loss": 46.0, - "step": 37502 - }, - { - "epoch": 6.039615121381698, - "grad_norm": 0.010110526345670223, - "learning_rate": 0.0001999820108651031, - "loss": 46.0, - "step": 37503 - }, - { - "epoch": 6.039776158460485, - "grad_norm": 0.006718708202242851, - "learning_rate": 0.00019998200990551946, - "loss": 46.0, - "step": 37504 - }, - { - "epoch": 6.039937195539273, - "grad_norm": 0.001223905710503459, - "learning_rate": 0.00019998200894591017, - "loss": 46.0, - "step": 37505 - }, - { - "epoch": 6.04009823261806, - "grad_norm": 0.022507235407829285, - "learning_rate": 0.00019998200798627532, - "loss": 46.0, - "step": 37506 - }, - { - "epoch": 6.040259269696848, - "grad_norm": 0.004028702154755592, - "learning_rate": 0.00019998200702661488, - "loss": 46.0, - "step": 37507 - }, - { - "epoch": 6.040420306775635, - "grad_norm": 0.0016088886186480522, - "learning_rate": 0.00019998200606692883, - "loss": 46.0, - "step": 37508 - }, - { - "epoch": 6.040581343854423, - "grad_norm": 0.01807459071278572, - "learning_rate": 0.00019998200510721722, - "loss": 46.0, - "step": 37509 - }, - { - "epoch": 6.04074238093321, - "grad_norm": 0.01649654284119606, - "learning_rate": 0.00019998200414748, - "loss": 46.0, - "step": 37510 - }, - { - "epoch": 6.040903418011998, - "grad_norm": 0.004719622898846865, - "learning_rate": 0.00019998200318771718, - "loss": 46.0, - "step": 37511 - }, - { - "epoch": 6.041064455090785, - "grad_norm": 0.0028916921000927687, - "learning_rate": 0.00019998200222792878, - "loss": 46.0, - "step": 37512 - }, - { - "epoch": 6.041225492169572, - "grad_norm": 0.0018338811350986362, - "learning_rate": 0.0001999820012681148, - "loss": 46.0, - "step": 37513 - }, - { - "epoch": 6.041386529248359, - "grad_norm": 0.0019286073511466384, - "learning_rate": 0.0001999820003082752, - "loss": 46.0, - "step": 37514 - }, - { - "epoch": 6.0415475663271465, - "grad_norm": 0.0032685145270079374, - "learning_rate": 0.00019998199934841005, - "loss": 46.0, - "step": 37515 - }, - { - "epoch": 6.041708603405934, - "grad_norm": 0.0031629721634089947, - "learning_rate": 0.00019998199838851927, - "loss": 46.0, - "step": 37516 - }, - { - "epoch": 6.041869640484721, - "grad_norm": 0.002542752306908369, - "learning_rate": 0.00019998199742860294, - "loss": 46.0, - "step": 37517 - }, - { - "epoch": 6.042030677563509, - "grad_norm": 0.003851477289572358, - "learning_rate": 0.00019998199646866098, - "loss": 46.0, - "step": 37518 - }, - { - "epoch": 6.042191714642296, - "grad_norm": 0.007247148547321558, - "learning_rate": 0.00019998199550869345, - "loss": 46.0, - "step": 37519 - }, - { - "epoch": 6.042352751721084, - "grad_norm": 0.004933567717671394, - "learning_rate": 0.00019998199454870035, - "loss": 46.0, - "step": 37520 - }, - { - "epoch": 6.042513788799871, - "grad_norm": 0.01611747033894062, - "learning_rate": 0.0001999819935886816, - "loss": 46.0, - "step": 37521 - }, - { - "epoch": 6.042674825878659, - "grad_norm": 0.004654871765524149, - "learning_rate": 0.00019998199262863734, - "loss": 46.0, - "step": 37522 - }, - { - "epoch": 6.042835862957446, - "grad_norm": 0.010619490407407284, - "learning_rate": 0.00019998199166856745, - "loss": 46.0, - "step": 37523 - }, - { - "epoch": 6.042996900036234, - "grad_norm": 0.0028765748720616102, - "learning_rate": 0.00019998199070847195, - "loss": 46.0, - "step": 37524 - }, - { - "epoch": 6.043157937115021, - "grad_norm": 0.010387152433395386, - "learning_rate": 0.00019998198974835089, - "loss": 46.0, - "step": 37525 - }, - { - "epoch": 6.0433189741938085, - "grad_norm": 0.0020469878800213337, - "learning_rate": 0.0001999819887882042, - "loss": 46.0, - "step": 37526 - }, - { - "epoch": 6.043480011272596, - "grad_norm": 0.010006871074438095, - "learning_rate": 0.00019998198782803197, - "loss": 46.0, - "step": 37527 - }, - { - "epoch": 6.0436410483513825, - "grad_norm": 0.0013885344378650188, - "learning_rate": 0.00019998198686783412, - "loss": 46.0, - "step": 37528 - }, - { - "epoch": 6.04380208543017, - "grad_norm": 0.0037922165356576443, - "learning_rate": 0.0001999819859076107, - "loss": 46.0, - "step": 37529 - }, - { - "epoch": 6.043963122508957, - "grad_norm": 0.008302551694214344, - "learning_rate": 0.00019998198494736166, - "loss": 46.0, - "step": 37530 - }, - { - "epoch": 6.044124159587745, - "grad_norm": 0.006932658609002829, - "learning_rate": 0.00019998198398708705, - "loss": 46.0, - "step": 37531 - }, - { - "epoch": 6.044285196666532, - "grad_norm": 0.003112620674073696, - "learning_rate": 0.00019998198302678682, - "loss": 46.0, - "step": 37532 - }, - { - "epoch": 6.04444623374532, - "grad_norm": 0.009254147298634052, - "learning_rate": 0.00019998198206646107, - "loss": 46.0, - "step": 37533 - }, - { - "epoch": 6.044607270824107, - "grad_norm": 0.001310255378484726, - "learning_rate": 0.00019998198110610967, - "loss": 46.0, - "step": 37534 - }, - { - "epoch": 6.044768307902895, - "grad_norm": 0.0075689353980124, - "learning_rate": 0.00019998198014573268, - "loss": 46.0, - "step": 37535 - }, - { - "epoch": 6.044929344981682, - "grad_norm": 0.004640243481844664, - "learning_rate": 0.00019998197918533013, - "loss": 46.0, - "step": 37536 - }, - { - "epoch": 6.04509038206047, - "grad_norm": 0.0066511379554867744, - "learning_rate": 0.00019998197822490197, - "loss": 46.0, - "step": 37537 - }, - { - "epoch": 6.045251419139257, - "grad_norm": 0.004504088778048754, - "learning_rate": 0.00019998197726444822, - "loss": 46.0, - "step": 37538 - }, - { - "epoch": 6.0454124562180445, - "grad_norm": 0.0021557160653173923, - "learning_rate": 0.00019998197630396888, - "loss": 46.0, - "step": 37539 - }, - { - "epoch": 6.045573493296832, - "grad_norm": 0.006787695921957493, - "learning_rate": 0.00019998197534346396, - "loss": 46.0, - "step": 37540 - }, - { - "epoch": 6.045734530375619, - "grad_norm": 0.0007721335859969258, - "learning_rate": 0.00019998197438293345, - "loss": 46.0, - "step": 37541 - }, - { - "epoch": 6.045895567454407, - "grad_norm": 0.0205103550106287, - "learning_rate": 0.00019998197342237735, - "loss": 46.0, - "step": 37542 - }, - { - "epoch": 6.046056604533193, - "grad_norm": 0.008283515460789204, - "learning_rate": 0.00019998197246179564, - "loss": 46.0, - "step": 37543 - }, - { - "epoch": 6.046217641611981, - "grad_norm": 0.0037953618448227644, - "learning_rate": 0.00019998197150118837, - "loss": 46.0, - "step": 37544 - }, - { - "epoch": 6.046378678690768, - "grad_norm": 0.01417340524494648, - "learning_rate": 0.00019998197054055548, - "loss": 46.0, - "step": 37545 - }, - { - "epoch": 6.046539715769556, - "grad_norm": 0.00409560464322567, - "learning_rate": 0.000199981969579897, - "loss": 46.0, - "step": 37546 - }, - { - "epoch": 6.046700752848343, - "grad_norm": 0.0052768150344491005, - "learning_rate": 0.00019998196861921295, - "loss": 46.0, - "step": 37547 - }, - { - "epoch": 6.046861789927131, - "grad_norm": 0.0019367403583601117, - "learning_rate": 0.0001999819676585033, - "loss": 46.0, - "step": 37548 - }, - { - "epoch": 6.047022827005918, - "grad_norm": 0.003626108868047595, - "learning_rate": 0.00019998196669776807, - "loss": 46.0, - "step": 37549 - }, - { - "epoch": 6.0471838640847055, - "grad_norm": 0.008510523475706577, - "learning_rate": 0.00019998196573700722, - "loss": 46.0, - "step": 37550 - }, - { - "epoch": 6.047344901163493, - "grad_norm": 0.0049221389926970005, - "learning_rate": 0.0001999819647762208, - "loss": 46.0, - "step": 37551 - }, - { - "epoch": 6.04750593824228, - "grad_norm": 0.0011162991868332028, - "learning_rate": 0.0001999819638154088, - "loss": 46.0, - "step": 37552 - }, - { - "epoch": 6.047666975321068, - "grad_norm": 0.003094008192420006, - "learning_rate": 0.0001999819628545712, - "loss": 46.0, - "step": 37553 - }, - { - "epoch": 6.047828012399855, - "grad_norm": 0.006014491431415081, - "learning_rate": 0.00019998196189370803, - "loss": 46.0, - "step": 37554 - }, - { - "epoch": 6.047989049478643, - "grad_norm": 0.007106460630893707, - "learning_rate": 0.00019998196093281925, - "loss": 46.0, - "step": 37555 - }, - { - "epoch": 6.04815008655743, - "grad_norm": 0.0056846062652766705, - "learning_rate": 0.00019998195997190487, - "loss": 46.0, - "step": 37556 - }, - { - "epoch": 6.048311123636217, - "grad_norm": 0.0012455809628590941, - "learning_rate": 0.00019998195901096491, - "loss": 46.0, - "step": 37557 - }, - { - "epoch": 6.048472160715004, - "grad_norm": 0.0029519947711378336, - "learning_rate": 0.00019998195804999934, - "loss": 46.0, - "step": 37558 - }, - { - "epoch": 6.048633197793792, - "grad_norm": 0.02495420351624489, - "learning_rate": 0.0001999819570890082, - "loss": 46.0, - "step": 37559 - }, - { - "epoch": 6.048794234872579, - "grad_norm": 0.00945096742361784, - "learning_rate": 0.00019998195612799149, - "loss": 46.0, - "step": 37560 - }, - { - "epoch": 6.048955271951367, - "grad_norm": 0.007004577666521072, - "learning_rate": 0.00019998195516694918, - "loss": 46.0, - "step": 37561 - }, - { - "epoch": 6.049116309030154, - "grad_norm": 0.006936834659427404, - "learning_rate": 0.00019998195420588126, - "loss": 46.0, - "step": 37562 - }, - { - "epoch": 6.0492773461089415, - "grad_norm": 0.005849672947078943, - "learning_rate": 0.00019998195324478775, - "loss": 46.0, - "step": 37563 - }, - { - "epoch": 6.049438383187729, - "grad_norm": 0.0059710959903895855, - "learning_rate": 0.00019998195228366865, - "loss": 46.0, - "step": 37564 - }, - { - "epoch": 6.049599420266516, - "grad_norm": 0.0028126032557338476, - "learning_rate": 0.00019998195132252397, - "loss": 46.0, - "step": 37565 - }, - { - "epoch": 6.049760457345304, - "grad_norm": 0.00363634480163455, - "learning_rate": 0.00019998195036135372, - "loss": 46.0, - "step": 37566 - }, - { - "epoch": 6.049921494424091, - "grad_norm": 0.0015998026356101036, - "learning_rate": 0.00019998194940015784, - "loss": 46.0, - "step": 37567 - }, - { - "epoch": 6.050082531502879, - "grad_norm": 0.0032095424830913544, - "learning_rate": 0.0001999819484389364, - "loss": 46.0, - "step": 37568 - }, - { - "epoch": 6.050243568581666, - "grad_norm": 0.0014049247838556767, - "learning_rate": 0.00019998194747768936, - "loss": 46.0, - "step": 37569 - }, - { - "epoch": 6.050404605660454, - "grad_norm": 0.001702752080745995, - "learning_rate": 0.00019998194651641674, - "loss": 46.0, - "step": 37570 - }, - { - "epoch": 6.050565642739241, - "grad_norm": 0.010074181482195854, - "learning_rate": 0.00019998194555511848, - "loss": 46.0, - "step": 37571 - }, - { - "epoch": 6.050726679818028, - "grad_norm": 0.0052128564566373825, - "learning_rate": 0.00019998194459379466, - "loss": 46.0, - "step": 37572 - }, - { - "epoch": 6.050887716896815, - "grad_norm": 0.002733284141868353, - "learning_rate": 0.00019998194363244528, - "loss": 46.0, - "step": 37573 - }, - { - "epoch": 6.051048753975603, - "grad_norm": 0.0018818244570866227, - "learning_rate": 0.00019998194267107028, - "loss": 46.0, - "step": 37574 - }, - { - "epoch": 6.05120979105439, - "grad_norm": 0.007615710608661175, - "learning_rate": 0.0001999819417096697, - "loss": 46.0, - "step": 37575 - }, - { - "epoch": 6.0513708281331775, - "grad_norm": 0.008013997226953506, - "learning_rate": 0.00019998194074824353, - "loss": 46.0, - "step": 37576 - }, - { - "epoch": 6.051531865211965, - "grad_norm": 0.003439794760197401, - "learning_rate": 0.00019998193978679177, - "loss": 46.0, - "step": 37577 - }, - { - "epoch": 6.051692902290752, - "grad_norm": 0.0008201716700568795, - "learning_rate": 0.0001999819388253144, - "loss": 46.0, - "step": 37578 - }, - { - "epoch": 6.05185393936954, - "grad_norm": 0.013601507991552353, - "learning_rate": 0.00019998193786381144, - "loss": 46.0, - "step": 37579 - }, - { - "epoch": 6.052014976448327, - "grad_norm": 0.017344433814287186, - "learning_rate": 0.00019998193690228292, - "loss": 46.0, - "step": 37580 - }, - { - "epoch": 6.052176013527115, - "grad_norm": 0.017316481098532677, - "learning_rate": 0.00019998193594072882, - "loss": 46.0, - "step": 37581 - }, - { - "epoch": 6.052337050605902, - "grad_norm": 0.00462355138733983, - "learning_rate": 0.00019998193497914907, - "loss": 46.0, - "step": 37582 - }, - { - "epoch": 6.05249808768469, - "grad_norm": 0.010756057687103748, - "learning_rate": 0.00019998193401754376, - "loss": 46.0, - "step": 37583 - }, - { - "epoch": 6.052659124763477, - "grad_norm": 0.0032620029523968697, - "learning_rate": 0.00019998193305591287, - "loss": 46.0, - "step": 37584 - }, - { - "epoch": 6.052820161842265, - "grad_norm": 0.0014358529588207603, - "learning_rate": 0.0001999819320942564, - "loss": 46.0, - "step": 37585 - }, - { - "epoch": 6.052981198921051, - "grad_norm": 0.01641983352601528, - "learning_rate": 0.00019998193113257432, - "loss": 46.0, - "step": 37586 - }, - { - "epoch": 6.053142235999839, - "grad_norm": 0.001161521882750094, - "learning_rate": 0.00019998193017086664, - "loss": 46.0, - "step": 37587 - }, - { - "epoch": 6.053303273078626, - "grad_norm": 0.00905858725309372, - "learning_rate": 0.0001999819292091334, - "loss": 46.0, - "step": 37588 - }, - { - "epoch": 6.0534643101574135, - "grad_norm": 0.004667037632316351, - "learning_rate": 0.00019998192824737454, - "loss": 46.0, - "step": 37589 - }, - { - "epoch": 6.053625347236201, - "grad_norm": 0.005883590783923864, - "learning_rate": 0.00019998192728559012, - "loss": 46.0, - "step": 37590 - }, - { - "epoch": 6.053786384314988, - "grad_norm": 0.019329693168401718, - "learning_rate": 0.00019998192632378006, - "loss": 46.0, - "step": 37591 - }, - { - "epoch": 6.053947421393776, - "grad_norm": 0.003590578678995371, - "learning_rate": 0.00019998192536194444, - "loss": 46.0, - "step": 37592 - }, - { - "epoch": 6.054108458472563, - "grad_norm": 0.0036193940322846174, - "learning_rate": 0.00019998192440008326, - "loss": 46.0, - "step": 37593 - }, - { - "epoch": 6.054269495551351, - "grad_norm": 0.002063893247395754, - "learning_rate": 0.00019998192343819644, - "loss": 46.0, - "step": 37594 - }, - { - "epoch": 6.054430532630138, - "grad_norm": 0.003849130356684327, - "learning_rate": 0.00019998192247628403, - "loss": 46.0, - "step": 37595 - }, - { - "epoch": 6.054591569708926, - "grad_norm": 0.007457686122506857, - "learning_rate": 0.00019998192151434607, - "loss": 46.0, - "step": 37596 - }, - { - "epoch": 6.054752606787713, - "grad_norm": 0.010821538977324963, - "learning_rate": 0.00019998192055238249, - "loss": 46.0, - "step": 37597 - }, - { - "epoch": 6.0549136438665005, - "grad_norm": 0.009901958517730236, - "learning_rate": 0.00019998191959039334, - "loss": 46.0, - "step": 37598 - }, - { - "epoch": 6.055074680945288, - "grad_norm": 0.004473851528018713, - "learning_rate": 0.0001999819186283786, - "loss": 46.0, - "step": 37599 - }, - { - "epoch": 6.0552357180240755, - "grad_norm": 0.0010103661334142089, - "learning_rate": 0.00019998191766633824, - "loss": 46.0, - "step": 37600 - }, - { - "epoch": 6.055396755102862, - "grad_norm": 0.00481271930038929, - "learning_rate": 0.00019998191670427234, - "loss": 46.0, - "step": 37601 - }, - { - "epoch": 6.0555577921816495, - "grad_norm": 0.005023716017603874, - "learning_rate": 0.0001999819157421808, - "loss": 46.0, - "step": 37602 - }, - { - "epoch": 6.055718829260437, - "grad_norm": 0.006178389303386211, - "learning_rate": 0.00019998191478006366, - "loss": 46.0, - "step": 37603 - }, - { - "epoch": 6.055879866339224, - "grad_norm": 0.005437683314085007, - "learning_rate": 0.000199981913817921, - "loss": 46.0, - "step": 37604 - }, - { - "epoch": 6.056040903418012, - "grad_norm": 0.011029353365302086, - "learning_rate": 0.0001999819128557527, - "loss": 46.0, - "step": 37605 - }, - { - "epoch": 6.056201940496799, - "grad_norm": 0.008159259334206581, - "learning_rate": 0.00019998191189355883, - "loss": 46.0, - "step": 37606 - }, - { - "epoch": 6.056362977575587, - "grad_norm": 0.012174089439213276, - "learning_rate": 0.00019998191093133932, - "loss": 46.0, - "step": 37607 - }, - { - "epoch": 6.056524014654374, - "grad_norm": 0.0017896260833367705, - "learning_rate": 0.00019998190996909428, - "loss": 46.0, - "step": 37608 - }, - { - "epoch": 6.056685051733162, - "grad_norm": 0.005163069348782301, - "learning_rate": 0.0001999819090068236, - "loss": 46.0, - "step": 37609 - }, - { - "epoch": 6.056846088811949, - "grad_norm": 0.004615572281181812, - "learning_rate": 0.00019998190804452736, - "loss": 46.0, - "step": 37610 - }, - { - "epoch": 6.0570071258907365, - "grad_norm": 0.010767842642962933, - "learning_rate": 0.00019998190708220555, - "loss": 46.0, - "step": 37611 - }, - { - "epoch": 6.057168162969524, - "grad_norm": 0.003302559722214937, - "learning_rate": 0.0001999819061198581, - "loss": 46.0, - "step": 37612 - }, - { - "epoch": 6.057329200048311, - "grad_norm": 0.0014210636727511883, - "learning_rate": 0.0001999819051574851, - "loss": 46.0, - "step": 37613 - }, - { - "epoch": 6.057490237127099, - "grad_norm": 0.004590318538248539, - "learning_rate": 0.00019998190419508652, - "loss": 46.0, - "step": 37614 - }, - { - "epoch": 6.057651274205886, - "grad_norm": 0.0017021981766447425, - "learning_rate": 0.0001999819032326623, - "loss": 46.0, - "step": 37615 - }, - { - "epoch": 6.057812311284673, - "grad_norm": 0.0070489319041371346, - "learning_rate": 0.00019998190227021252, - "loss": 46.0, - "step": 37616 - }, - { - "epoch": 6.05797334836346, - "grad_norm": 0.005635182838886976, - "learning_rate": 0.00019998190130773714, - "loss": 46.0, - "step": 37617 - }, - { - "epoch": 6.058134385442248, - "grad_norm": 0.011269645765423775, - "learning_rate": 0.00019998190034523617, - "loss": 46.0, - "step": 37618 - }, - { - "epoch": 6.058295422521035, - "grad_norm": 0.007693962659686804, - "learning_rate": 0.00019998189938270962, - "loss": 46.0, - "step": 37619 - }, - { - "epoch": 6.058456459599823, - "grad_norm": 0.003759726881980896, - "learning_rate": 0.00019998189842015748, - "loss": 46.0, - "step": 37620 - }, - { - "epoch": 6.05861749667861, - "grad_norm": 0.0019457685993984342, - "learning_rate": 0.00019998189745757975, - "loss": 46.0, - "step": 37621 - }, - { - "epoch": 6.058778533757398, - "grad_norm": 0.004904123488813639, - "learning_rate": 0.0001999818964949764, - "loss": 46.0, - "step": 37622 - }, - { - "epoch": 6.058939570836185, - "grad_norm": 0.006958994548767805, - "learning_rate": 0.0001999818955323475, - "loss": 46.0, - "step": 37623 - }, - { - "epoch": 6.0591006079149725, - "grad_norm": 0.0025455632712692022, - "learning_rate": 0.00019998189456969302, - "loss": 46.0, - "step": 37624 - }, - { - "epoch": 6.05926164499376, - "grad_norm": 0.004838835448026657, - "learning_rate": 0.00019998189360701288, - "loss": 46.0, - "step": 37625 - }, - { - "epoch": 6.059422682072547, - "grad_norm": 0.003193289041519165, - "learning_rate": 0.0001999818926443072, - "loss": 46.0, - "step": 37626 - }, - { - "epoch": 6.059583719151335, - "grad_norm": 0.009384668432176113, - "learning_rate": 0.00019998189168157592, - "loss": 46.0, - "step": 37627 - }, - { - "epoch": 6.059744756230122, - "grad_norm": 0.001866149134002626, - "learning_rate": 0.00019998189071881908, - "loss": 46.0, - "step": 37628 - }, - { - "epoch": 6.05990579330891, - "grad_norm": 0.009719938039779663, - "learning_rate": 0.0001999818897560366, - "loss": 46.0, - "step": 37629 - }, - { - "epoch": 6.060066830387696, - "grad_norm": 0.003988360054790974, - "learning_rate": 0.00019998188879322856, - "loss": 46.0, - "step": 37630 - }, - { - "epoch": 6.060227867466484, - "grad_norm": 0.0032057813368737698, - "learning_rate": 0.0001999818878303949, - "loss": 46.0, - "step": 37631 - }, - { - "epoch": 6.060388904545271, - "grad_norm": 0.0011375918984413147, - "learning_rate": 0.0001999818868675357, - "loss": 46.0, - "step": 37632 - }, - { - "epoch": 6.060549941624059, - "grad_norm": 0.004053414333611727, - "learning_rate": 0.0001999818859046509, - "loss": 46.0, - "step": 37633 - }, - { - "epoch": 6.060710978702846, - "grad_norm": 0.013996666297316551, - "learning_rate": 0.00019998188494174047, - "loss": 46.0, - "step": 37634 - }, - { - "epoch": 6.060872015781634, - "grad_norm": 0.006290460005402565, - "learning_rate": 0.0001999818839788045, - "loss": 46.0, - "step": 37635 - }, - { - "epoch": 6.061033052860421, - "grad_norm": 0.013546953909099102, - "learning_rate": 0.00019998188301584285, - "loss": 46.0, - "step": 37636 - }, - { - "epoch": 6.0611940899392085, - "grad_norm": 0.0071923574432730675, - "learning_rate": 0.0001999818820528557, - "loss": 46.0, - "step": 37637 - }, - { - "epoch": 6.061355127017996, - "grad_norm": 0.008371626026928425, - "learning_rate": 0.0001999818810898429, - "loss": 46.0, - "step": 37638 - }, - { - "epoch": 6.061516164096783, - "grad_norm": 0.0030042158905416727, - "learning_rate": 0.00019998188012680456, - "loss": 46.0, - "step": 37639 - }, - { - "epoch": 6.061677201175571, - "grad_norm": 0.009203464724123478, - "learning_rate": 0.00019998187916374062, - "loss": 46.0, - "step": 37640 - }, - { - "epoch": 6.061838238254358, - "grad_norm": 0.0011970112100243568, - "learning_rate": 0.00019998187820065107, - "loss": 46.0, - "step": 37641 - }, - { - "epoch": 6.061999275333146, - "grad_norm": 0.010034559294581413, - "learning_rate": 0.00019998187723753596, - "loss": 46.0, - "step": 37642 - }, - { - "epoch": 6.062160312411933, - "grad_norm": 0.0034387779887765646, - "learning_rate": 0.00019998187627439523, - "loss": 46.0, - "step": 37643 - }, - { - "epoch": 6.062321349490721, - "grad_norm": 0.002076078439131379, - "learning_rate": 0.00019998187531122892, - "loss": 46.0, - "step": 37644 - }, - { - "epoch": 6.062482386569507, - "grad_norm": 0.0006307397852651775, - "learning_rate": 0.00019998187434803701, - "loss": 46.0, - "step": 37645 - }, - { - "epoch": 6.062643423648295, - "grad_norm": 0.002080549718812108, - "learning_rate": 0.0001999818733848195, - "loss": 46.0, - "step": 37646 - }, - { - "epoch": 6.062804460727082, - "grad_norm": 0.006836682092398405, - "learning_rate": 0.00019998187242157645, - "loss": 46.0, - "step": 37647 - }, - { - "epoch": 6.06296549780587, - "grad_norm": 0.0017179992282763124, - "learning_rate": 0.00019998187145830776, - "loss": 46.0, - "step": 37648 - }, - { - "epoch": 6.063126534884657, - "grad_norm": 0.0013875626027584076, - "learning_rate": 0.0001999818704950135, - "loss": 46.0, - "step": 37649 - }, - { - "epoch": 6.0632875719634445, - "grad_norm": 0.0037537780590355396, - "learning_rate": 0.00019998186953169365, - "loss": 46.0, - "step": 37650 - }, - { - "epoch": 6.063448609042232, - "grad_norm": 0.0062209502793848515, - "learning_rate": 0.0001999818685683482, - "loss": 46.0, - "step": 37651 - }, - { - "epoch": 6.063609646121019, - "grad_norm": 0.014454977586865425, - "learning_rate": 0.00019998186760497718, - "loss": 46.0, - "step": 37652 - }, - { - "epoch": 6.063770683199807, - "grad_norm": 0.002053608885034919, - "learning_rate": 0.00019998186664158056, - "loss": 46.0, - "step": 37653 - }, - { - "epoch": 6.063931720278594, - "grad_norm": 0.0029052826575934887, - "learning_rate": 0.00019998186567815834, - "loss": 46.0, - "step": 37654 - }, - { - "epoch": 6.064092757357382, - "grad_norm": 0.010470651090145111, - "learning_rate": 0.00019998186471471054, - "loss": 46.0, - "step": 37655 - }, - { - "epoch": 6.064253794436169, - "grad_norm": 0.004357074853032827, - "learning_rate": 0.00019998186375123713, - "loss": 46.0, - "step": 37656 - }, - { - "epoch": 6.064414831514957, - "grad_norm": 0.02475530281662941, - "learning_rate": 0.00019998186278773815, - "loss": 46.0, - "step": 37657 - }, - { - "epoch": 6.064575868593744, - "grad_norm": 0.00314020412042737, - "learning_rate": 0.0001999818618242136, - "loss": 46.0, - "step": 37658 - }, - { - "epoch": 6.064736905672531, - "grad_norm": 0.005822691135108471, - "learning_rate": 0.00019998186086066342, - "loss": 46.0, - "step": 37659 - }, - { - "epoch": 6.064897942751318, - "grad_norm": 0.002460055286064744, - "learning_rate": 0.00019998185989708765, - "loss": 46.0, - "step": 37660 - }, - { - "epoch": 6.065058979830106, - "grad_norm": 0.0021120214369148016, - "learning_rate": 0.0001999818589334863, - "loss": 46.0, - "step": 37661 - }, - { - "epoch": 6.065220016908893, - "grad_norm": 0.007058229297399521, - "learning_rate": 0.0001999818579698594, - "loss": 46.0, - "step": 37662 - }, - { - "epoch": 6.0653810539876805, - "grad_norm": 0.008226689882576466, - "learning_rate": 0.00019998185700620687, - "loss": 46.0, - "step": 37663 - }, - { - "epoch": 6.065542091066468, - "grad_norm": 0.0018811551854014397, - "learning_rate": 0.00019998185604252876, - "loss": 46.0, - "step": 37664 - }, - { - "epoch": 6.065703128145255, - "grad_norm": 0.007397475652396679, - "learning_rate": 0.00019998185507882506, - "loss": 46.0, - "step": 37665 - }, - { - "epoch": 6.065864165224043, - "grad_norm": 0.0032358323223888874, - "learning_rate": 0.00019998185411509574, - "loss": 46.0, - "step": 37666 - }, - { - "epoch": 6.06602520230283, - "grad_norm": 0.014195479452610016, - "learning_rate": 0.00019998185315134087, - "loss": 46.0, - "step": 37667 - }, - { - "epoch": 6.066186239381618, - "grad_norm": 0.0020239283330738544, - "learning_rate": 0.00019998185218756038, - "loss": 46.0, - "step": 37668 - }, - { - "epoch": 6.066347276460405, - "grad_norm": 0.0025001477915793657, - "learning_rate": 0.0001999818512237543, - "loss": 46.0, - "step": 37669 - }, - { - "epoch": 6.066508313539193, - "grad_norm": 0.005226124078035355, - "learning_rate": 0.00019998185025992267, - "loss": 46.0, - "step": 37670 - }, - { - "epoch": 6.06666935061798, - "grad_norm": 0.008876755833625793, - "learning_rate": 0.0001999818492960654, - "loss": 46.0, - "step": 37671 - }, - { - "epoch": 6.0668303876967675, - "grad_norm": 0.0037230125162750483, - "learning_rate": 0.00019998184833218256, - "loss": 46.0, - "step": 37672 - }, - { - "epoch": 6.066991424775555, - "grad_norm": 0.009621241129934788, - "learning_rate": 0.00019998184736827414, - "loss": 46.0, - "step": 37673 - }, - { - "epoch": 6.0671524618543415, - "grad_norm": 0.000965080747846514, - "learning_rate": 0.00019998184640434012, - "loss": 46.0, - "step": 37674 - }, - { - "epoch": 6.067313498933129, - "grad_norm": 0.0017883339896798134, - "learning_rate": 0.00019998184544038053, - "loss": 46.0, - "step": 37675 - }, - { - "epoch": 6.067474536011916, - "grad_norm": 0.010907691903412342, - "learning_rate": 0.00019998184447639531, - "loss": 46.0, - "step": 37676 - }, - { - "epoch": 6.067635573090704, - "grad_norm": 0.00790534820407629, - "learning_rate": 0.00019998184351238454, - "loss": 46.0, - "step": 37677 - }, - { - "epoch": 6.067796610169491, - "grad_norm": 0.013911493122577667, - "learning_rate": 0.00019998184254834816, - "loss": 46.0, - "step": 37678 - }, - { - "epoch": 6.067957647248279, - "grad_norm": 0.0084577901288867, - "learning_rate": 0.00019998184158428618, - "loss": 46.0, - "step": 37679 - }, - { - "epoch": 6.068118684327066, - "grad_norm": 0.005075663328170776, - "learning_rate": 0.00019998184062019862, - "loss": 46.0, - "step": 37680 - }, - { - "epoch": 6.068279721405854, - "grad_norm": 0.002144649624824524, - "learning_rate": 0.00019998183965608547, - "loss": 46.0, - "step": 37681 - }, - { - "epoch": 6.068440758484641, - "grad_norm": 0.0026876521296799183, - "learning_rate": 0.00019998183869194674, - "loss": 46.0, - "step": 37682 - }, - { - "epoch": 6.068601795563429, - "grad_norm": 0.0033802534453570843, - "learning_rate": 0.00019998183772778242, - "loss": 46.0, - "step": 37683 - }, - { - "epoch": 6.068762832642216, - "grad_norm": 0.017919015139341354, - "learning_rate": 0.00019998183676359248, - "loss": 46.0, - "step": 37684 - }, - { - "epoch": 6.0689238697210035, - "grad_norm": 0.004853111691772938, - "learning_rate": 0.00019998183579937698, - "loss": 46.0, - "step": 37685 - }, - { - "epoch": 6.069084906799791, - "grad_norm": 0.011810258962213993, - "learning_rate": 0.00019998183483513587, - "loss": 46.0, - "step": 37686 - }, - { - "epoch": 6.069245943878578, - "grad_norm": 0.024830302223563194, - "learning_rate": 0.00019998183387086917, - "loss": 46.0, - "step": 37687 - }, - { - "epoch": 6.069406980957366, - "grad_norm": 0.006081919185817242, - "learning_rate": 0.0001999818329065769, - "loss": 46.0, - "step": 37688 - }, - { - "epoch": 6.069568018036152, - "grad_norm": 0.001091701677069068, - "learning_rate": 0.00019998183194225902, - "loss": 46.0, - "step": 37689 - }, - { - "epoch": 6.06972905511494, - "grad_norm": 0.0021871202625334263, - "learning_rate": 0.00019998183097791556, - "loss": 46.0, - "step": 37690 - }, - { - "epoch": 6.069890092193727, - "grad_norm": 0.0007331203669309616, - "learning_rate": 0.0001999818300135465, - "loss": 46.0, - "step": 37691 - }, - { - "epoch": 6.070051129272515, - "grad_norm": 0.0017588506452739239, - "learning_rate": 0.00019998182904915187, - "loss": 46.0, - "step": 37692 - }, - { - "epoch": 6.070212166351302, - "grad_norm": 0.007409881334751844, - "learning_rate": 0.00019998182808473165, - "loss": 46.0, - "step": 37693 - }, - { - "epoch": 6.07037320343009, - "grad_norm": 0.0028296681120991707, - "learning_rate": 0.00019998182712028582, - "loss": 46.0, - "step": 37694 - }, - { - "epoch": 6.070534240508877, - "grad_norm": 0.003882122691720724, - "learning_rate": 0.0001999818261558144, - "loss": 46.0, - "step": 37695 - }, - { - "epoch": 6.070695277587665, - "grad_norm": 0.015723485499620438, - "learning_rate": 0.00019998182519131739, - "loss": 46.0, - "step": 37696 - }, - { - "epoch": 6.070856314666452, - "grad_norm": 0.007811098359525204, - "learning_rate": 0.00019998182422679482, - "loss": 46.0, - "step": 37697 - }, - { - "epoch": 6.0710173517452395, - "grad_norm": 0.00732022151350975, - "learning_rate": 0.0001999818232622466, - "loss": 46.0, - "step": 37698 - }, - { - "epoch": 6.071178388824027, - "grad_norm": 0.005656701512634754, - "learning_rate": 0.00019998182229767286, - "loss": 46.0, - "step": 37699 - }, - { - "epoch": 6.071339425902814, - "grad_norm": 0.0034385446924716234, - "learning_rate": 0.00019998182133307347, - "loss": 46.0, - "step": 37700 - }, - { - "epoch": 6.071500462981602, - "grad_norm": 0.008008560165762901, - "learning_rate": 0.00019998182036844853, - "loss": 46.0, - "step": 37701 - }, - { - "epoch": 6.071661500060389, - "grad_norm": 0.00456809950992465, - "learning_rate": 0.000199981819403798, - "loss": 46.0, - "step": 37702 - }, - { - "epoch": 6.071822537139177, - "grad_norm": 0.007869865745306015, - "learning_rate": 0.00019998181843912188, - "loss": 46.0, - "step": 37703 - }, - { - "epoch": 6.071983574217963, - "grad_norm": 0.001352488063275814, - "learning_rate": 0.00019998181747442014, - "loss": 46.0, - "step": 37704 - }, - { - "epoch": 6.072144611296751, - "grad_norm": 0.005654243752360344, - "learning_rate": 0.00019998181650969282, - "loss": 46.0, - "step": 37705 - }, - { - "epoch": 6.072305648375538, - "grad_norm": 0.0026084836572408676, - "learning_rate": 0.0001999818155449399, - "loss": 46.0, - "step": 37706 - }, - { - "epoch": 6.072466685454326, - "grad_norm": 0.0034040119498968124, - "learning_rate": 0.00019998181458016144, - "loss": 46.0, - "step": 37707 - }, - { - "epoch": 6.072627722533113, - "grad_norm": 0.007144168950617313, - "learning_rate": 0.00019998181361535733, - "loss": 46.0, - "step": 37708 - }, - { - "epoch": 6.072788759611901, - "grad_norm": 0.0042244018986821175, - "learning_rate": 0.00019998181265052764, - "loss": 46.0, - "step": 37709 - }, - { - "epoch": 6.072949796690688, - "grad_norm": 0.0025879214517772198, - "learning_rate": 0.00019998181168567238, - "loss": 46.0, - "step": 37710 - }, - { - "epoch": 6.0731108337694755, - "grad_norm": 0.00336763565428555, - "learning_rate": 0.00019998181072079153, - "loss": 46.0, - "step": 37711 - }, - { - "epoch": 6.073271870848263, - "grad_norm": 0.008521047420799732, - "learning_rate": 0.00019998180975588507, - "loss": 46.0, - "step": 37712 - }, - { - "epoch": 6.07343290792705, - "grad_norm": 0.007442219648510218, - "learning_rate": 0.00019998180879095306, - "loss": 46.0, - "step": 37713 - }, - { - "epoch": 6.073593945005838, - "grad_norm": 0.01680023968219757, - "learning_rate": 0.00019998180782599542, - "loss": 46.0, - "step": 37714 - }, - { - "epoch": 6.073754982084625, - "grad_norm": 0.016661720350384712, - "learning_rate": 0.0001999818068610122, - "loss": 46.0, - "step": 37715 - }, - { - "epoch": 6.073916019163413, - "grad_norm": 0.003566613420844078, - "learning_rate": 0.0001999818058960034, - "loss": 46.0, - "step": 37716 - }, - { - "epoch": 6.0740770562422, - "grad_norm": 0.002581510692834854, - "learning_rate": 0.000199981804930969, - "loss": 46.0, - "step": 37717 - }, - { - "epoch": 6.074238093320987, - "grad_norm": 0.0032831125427037477, - "learning_rate": 0.00019998180396590902, - "loss": 46.0, - "step": 37718 - }, - { - "epoch": 6.074399130399774, - "grad_norm": 0.0018455300014466047, - "learning_rate": 0.00019998180300082342, - "loss": 46.0, - "step": 37719 - }, - { - "epoch": 6.074560167478562, - "grad_norm": 0.0010420371545478702, - "learning_rate": 0.00019998180203571227, - "loss": 46.0, - "step": 37720 - }, - { - "epoch": 6.074721204557349, - "grad_norm": 0.00391122093424201, - "learning_rate": 0.00019998180107057552, - "loss": 46.0, - "step": 37721 - }, - { - "epoch": 6.0748822416361365, - "grad_norm": 0.01678929477930069, - "learning_rate": 0.00019998180010541317, - "loss": 46.0, - "step": 37722 - }, - { - "epoch": 6.075043278714924, - "grad_norm": 0.0030893736984580755, - "learning_rate": 0.00019998179914022522, - "loss": 46.0, - "step": 37723 - }, - { - "epoch": 6.0752043157937115, - "grad_norm": 0.0034665707498788834, - "learning_rate": 0.00019998179817501172, - "loss": 46.0, - "step": 37724 - }, - { - "epoch": 6.075365352872499, - "grad_norm": 0.010268663056194782, - "learning_rate": 0.00019998179720977257, - "loss": 46.0, - "step": 37725 - }, - { - "epoch": 6.075526389951286, - "grad_norm": 0.006135433446615934, - "learning_rate": 0.00019998179624450786, - "loss": 46.0, - "step": 37726 - }, - { - "epoch": 6.075687427030074, - "grad_norm": 0.0077285682782530785, - "learning_rate": 0.00019998179527921757, - "loss": 46.0, - "step": 37727 - }, - { - "epoch": 6.075848464108861, - "grad_norm": 0.014268687926232815, - "learning_rate": 0.00019998179431390166, - "loss": 46.0, - "step": 37728 - }, - { - "epoch": 6.076009501187649, - "grad_norm": 0.006600626278668642, - "learning_rate": 0.0001999817933485602, - "loss": 46.0, - "step": 37729 - }, - { - "epoch": 6.076170538266436, - "grad_norm": 0.007557873148471117, - "learning_rate": 0.0001999817923831931, - "loss": 46.0, - "step": 37730 - }, - { - "epoch": 6.076331575345224, - "grad_norm": 0.003869457636028528, - "learning_rate": 0.00019998179141780047, - "loss": 46.0, - "step": 37731 - }, - { - "epoch": 6.076492612424011, - "grad_norm": 0.004075285978615284, - "learning_rate": 0.00019998179045238221, - "loss": 46.0, - "step": 37732 - }, - { - "epoch": 6.076653649502798, - "grad_norm": 0.006278911139816046, - "learning_rate": 0.00019998178948693837, - "loss": 46.0, - "step": 37733 - }, - { - "epoch": 6.076814686581585, - "grad_norm": 0.01255898829549551, - "learning_rate": 0.00019998178852146894, - "loss": 46.0, - "step": 37734 - }, - { - "epoch": 6.0769757236603725, - "grad_norm": 0.0029725776985287666, - "learning_rate": 0.00019998178755597392, - "loss": 46.0, - "step": 37735 - }, - { - "epoch": 6.07713676073916, - "grad_norm": 0.0010472597787156701, - "learning_rate": 0.00019998178659045332, - "loss": 46.0, - "step": 37736 - }, - { - "epoch": 6.077297797817947, - "grad_norm": 0.0033789826557040215, - "learning_rate": 0.0001999817856249071, - "loss": 46.0, - "step": 37737 - }, - { - "epoch": 6.077458834896735, - "grad_norm": 0.003187508787959814, - "learning_rate": 0.0001999817846593353, - "loss": 46.0, - "step": 37738 - }, - { - "epoch": 6.077619871975522, - "grad_norm": 0.010816927067935467, - "learning_rate": 0.00019998178369373792, - "loss": 46.0, - "step": 37739 - }, - { - "epoch": 6.07778090905431, - "grad_norm": 0.002791301580145955, - "learning_rate": 0.00019998178272811497, - "loss": 46.0, - "step": 37740 - }, - { - "epoch": 6.077941946133097, - "grad_norm": 0.005094537511467934, - "learning_rate": 0.0001999817817624664, - "loss": 46.0, - "step": 37741 - }, - { - "epoch": 6.078102983211885, - "grad_norm": 0.006306060589849949, - "learning_rate": 0.00019998178079679227, - "loss": 46.0, - "step": 37742 - }, - { - "epoch": 6.078264020290672, - "grad_norm": 0.0030474953819066286, - "learning_rate": 0.00019998177983109253, - "loss": 46.0, - "step": 37743 - }, - { - "epoch": 6.07842505736946, - "grad_norm": 0.0017909746384248137, - "learning_rate": 0.00019998177886536718, - "loss": 46.0, - "step": 37744 - }, - { - "epoch": 6.078586094448247, - "grad_norm": 0.008043472655117512, - "learning_rate": 0.00019998177789961626, - "loss": 46.0, - "step": 37745 - }, - { - "epoch": 6.0787471315270345, - "grad_norm": 0.005664821248501539, - "learning_rate": 0.00019998177693383973, - "loss": 46.0, - "step": 37746 - }, - { - "epoch": 6.078908168605821, - "grad_norm": 0.001391304307617247, - "learning_rate": 0.0001999817759680376, - "loss": 46.0, - "step": 37747 - }, - { - "epoch": 6.0790692056846085, - "grad_norm": 0.0021558445878326893, - "learning_rate": 0.00019998177500220993, - "loss": 46.0, - "step": 37748 - }, - { - "epoch": 6.079230242763396, - "grad_norm": 0.0063851396553218365, - "learning_rate": 0.00019998177403635664, - "loss": 46.0, - "step": 37749 - }, - { - "epoch": 6.079391279842183, - "grad_norm": 0.010128156282007694, - "learning_rate": 0.00019998177307047779, - "loss": 46.0, - "step": 37750 - }, - { - "epoch": 6.079552316920971, - "grad_norm": 0.005564019083976746, - "learning_rate": 0.00019998177210457332, - "loss": 46.0, - "step": 37751 - }, - { - "epoch": 6.079713353999758, - "grad_norm": 0.0013689086772501469, - "learning_rate": 0.00019998177113864327, - "loss": 46.0, - "step": 37752 - }, - { - "epoch": 6.079874391078546, - "grad_norm": 0.010868123732507229, - "learning_rate": 0.00019998177017268762, - "loss": 46.0, - "step": 37753 - }, - { - "epoch": 6.080035428157333, - "grad_norm": 0.00566981453448534, - "learning_rate": 0.0001999817692067064, - "loss": 46.0, - "step": 37754 - }, - { - "epoch": 6.080196465236121, - "grad_norm": 0.00451606186106801, - "learning_rate": 0.00019998176824069955, - "loss": 46.0, - "step": 37755 - }, - { - "epoch": 6.080357502314908, - "grad_norm": 0.0017864335095509887, - "learning_rate": 0.00019998176727466715, - "loss": 46.0, - "step": 37756 - }, - { - "epoch": 6.080518539393696, - "grad_norm": 0.00178652280010283, - "learning_rate": 0.00019998176630860913, - "loss": 46.0, - "step": 37757 - }, - { - "epoch": 6.080679576472483, - "grad_norm": 0.0012568294769153, - "learning_rate": 0.00019998176534252556, - "loss": 46.0, - "step": 37758 - }, - { - "epoch": 6.0808406135512705, - "grad_norm": 0.0038692532107234, - "learning_rate": 0.00019998176437641636, - "loss": 46.0, - "step": 37759 - }, - { - "epoch": 6.081001650630058, - "grad_norm": 0.01501297764480114, - "learning_rate": 0.00019998176341028159, - "loss": 46.0, - "step": 37760 - }, - { - "epoch": 6.081162687708845, - "grad_norm": 0.0016528075793758035, - "learning_rate": 0.00019998176244412122, - "loss": 46.0, - "step": 37761 - }, - { - "epoch": 6.081323724787632, - "grad_norm": 0.015143604017794132, - "learning_rate": 0.00019998176147793527, - "loss": 46.0, - "step": 37762 - }, - { - "epoch": 6.081484761866419, - "grad_norm": 0.004088354762643576, - "learning_rate": 0.0001999817605117237, - "loss": 46.0, - "step": 37763 - }, - { - "epoch": 6.081645798945207, - "grad_norm": 0.0016328999772667885, - "learning_rate": 0.00019998175954548657, - "loss": 46.0, - "step": 37764 - }, - { - "epoch": 6.081806836023994, - "grad_norm": 0.001362946117296815, - "learning_rate": 0.00019998175857922386, - "loss": 46.0, - "step": 37765 - }, - { - "epoch": 6.081967873102782, - "grad_norm": 0.005000079050660133, - "learning_rate": 0.0001999817576129355, - "loss": 46.0, - "step": 37766 - }, - { - "epoch": 6.082128910181569, - "grad_norm": 0.0035223346203565598, - "learning_rate": 0.00019998175664662161, - "loss": 46.0, - "step": 37767 - }, - { - "epoch": 6.082289947260357, - "grad_norm": 0.002147382590919733, - "learning_rate": 0.0001999817556802821, - "loss": 46.0, - "step": 37768 - }, - { - "epoch": 6.082450984339144, - "grad_norm": 0.0032049333676695824, - "learning_rate": 0.00019998175471391702, - "loss": 46.0, - "step": 37769 - }, - { - "epoch": 6.082612021417932, - "grad_norm": 0.002670903457328677, - "learning_rate": 0.00019998175374752637, - "loss": 46.0, - "step": 37770 - }, - { - "epoch": 6.082773058496719, - "grad_norm": 0.002350566443055868, - "learning_rate": 0.00019998175278111008, - "loss": 46.0, - "step": 37771 - }, - { - "epoch": 6.0829340955755065, - "grad_norm": 0.010421018116176128, - "learning_rate": 0.00019998175181466823, - "loss": 46.0, - "step": 37772 - }, - { - "epoch": 6.083095132654294, - "grad_norm": 0.009646703489124775, - "learning_rate": 0.0001999817508482008, - "loss": 46.0, - "step": 37773 - }, - { - "epoch": 6.083256169733081, - "grad_norm": 0.0058188787661492825, - "learning_rate": 0.00019998174988170774, - "loss": 46.0, - "step": 37774 - }, - { - "epoch": 6.083417206811869, - "grad_norm": 0.007185832131654024, - "learning_rate": 0.0001999817489151891, - "loss": 46.0, - "step": 37775 - }, - { - "epoch": 6.083578243890656, - "grad_norm": 0.0030367723666131496, - "learning_rate": 0.0001999817479486449, - "loss": 46.0, - "step": 37776 - }, - { - "epoch": 6.083739280969443, - "grad_norm": 0.008109228685498238, - "learning_rate": 0.00019998174698207508, - "loss": 46.0, - "step": 37777 - }, - { - "epoch": 6.08390031804823, - "grad_norm": 0.002837846055626869, - "learning_rate": 0.00019998174601547968, - "loss": 46.0, - "step": 37778 - }, - { - "epoch": 6.084061355127018, - "grad_norm": 0.0011027230648323894, - "learning_rate": 0.0001999817450488587, - "loss": 46.0, - "step": 37779 - }, - { - "epoch": 6.084222392205805, - "grad_norm": 0.00266228336840868, - "learning_rate": 0.0001999817440822121, - "loss": 46.0, - "step": 37780 - }, - { - "epoch": 6.084383429284593, - "grad_norm": 0.0011304491199553013, - "learning_rate": 0.00019998174311553995, - "loss": 46.0, - "step": 37781 - }, - { - "epoch": 6.08454446636338, - "grad_norm": 0.0015038179699331522, - "learning_rate": 0.00019998174214884217, - "loss": 46.0, - "step": 37782 - }, - { - "epoch": 6.0847055034421675, - "grad_norm": 0.006239565555006266, - "learning_rate": 0.00019998174118211883, - "loss": 46.0, - "step": 37783 - }, - { - "epoch": 6.084866540520955, - "grad_norm": 0.005426181945949793, - "learning_rate": 0.00019998174021536988, - "loss": 46.0, - "step": 37784 - }, - { - "epoch": 6.085027577599742, - "grad_norm": 0.011212630197405815, - "learning_rate": 0.00019998173924859534, - "loss": 46.0, - "step": 37785 - }, - { - "epoch": 6.08518861467853, - "grad_norm": 0.007488547824323177, - "learning_rate": 0.00019998173828179522, - "loss": 46.0, - "step": 37786 - }, - { - "epoch": 6.085349651757317, - "grad_norm": 0.0012200287310406566, - "learning_rate": 0.00019998173731496953, - "loss": 46.0, - "step": 37787 - }, - { - "epoch": 6.085510688836105, - "grad_norm": 0.0040877568535506725, - "learning_rate": 0.00019998173634811823, - "loss": 46.0, - "step": 37788 - }, - { - "epoch": 6.085671725914892, - "grad_norm": 0.0018878281116485596, - "learning_rate": 0.00019998173538124134, - "loss": 46.0, - "step": 37789 - }, - { - "epoch": 6.08583276299368, - "grad_norm": 0.00496339239180088, - "learning_rate": 0.00019998173441433884, - "loss": 46.0, - "step": 37790 - }, - { - "epoch": 6.085993800072466, - "grad_norm": 0.0024393887724727392, - "learning_rate": 0.00019998173344741078, - "loss": 46.0, - "step": 37791 - }, - { - "epoch": 6.086154837151254, - "grad_norm": 0.008046883158385754, - "learning_rate": 0.0001999817324804571, - "loss": 46.0, - "step": 37792 - }, - { - "epoch": 6.086315874230041, - "grad_norm": 0.0015929220244288445, - "learning_rate": 0.00019998173151347787, - "loss": 46.0, - "step": 37793 - }, - { - "epoch": 6.086476911308829, - "grad_norm": 0.017275795340538025, - "learning_rate": 0.00019998173054647301, - "loss": 46.0, - "step": 37794 - }, - { - "epoch": 6.086637948387616, - "grad_norm": 0.002468364778906107, - "learning_rate": 0.00019998172957944258, - "loss": 46.0, - "step": 37795 - }, - { - "epoch": 6.0867989854664035, - "grad_norm": 0.006003603804856539, - "learning_rate": 0.00019998172861238655, - "loss": 46.0, - "step": 37796 - }, - { - "epoch": 6.086960022545191, - "grad_norm": 0.0015118923038244247, - "learning_rate": 0.00019998172764530497, - "loss": 46.0, - "step": 37797 - }, - { - "epoch": 6.087121059623978, - "grad_norm": 0.004045466426759958, - "learning_rate": 0.00019998172667819774, - "loss": 46.0, - "step": 37798 - }, - { - "epoch": 6.087282096702766, - "grad_norm": 0.010266249068081379, - "learning_rate": 0.00019998172571106495, - "loss": 46.0, - "step": 37799 - }, - { - "epoch": 6.087443133781553, - "grad_norm": 0.0012398279504850507, - "learning_rate": 0.00019998172474390655, - "loss": 46.0, - "step": 37800 - }, - { - "epoch": 6.087604170860341, - "grad_norm": 0.0013336322735995054, - "learning_rate": 0.00019998172377672256, - "loss": 46.0, - "step": 37801 - }, - { - "epoch": 6.087765207939128, - "grad_norm": 0.00229376507923007, - "learning_rate": 0.00019998172280951302, - "loss": 46.0, - "step": 37802 - }, - { - "epoch": 6.087926245017916, - "grad_norm": 0.0028291502967476845, - "learning_rate": 0.00019998172184227788, - "loss": 46.0, - "step": 37803 - }, - { - "epoch": 6.088087282096703, - "grad_norm": 0.010053534992039204, - "learning_rate": 0.00019998172087501713, - "loss": 46.0, - "step": 37804 - }, - { - "epoch": 6.088248319175491, - "grad_norm": 0.0007450601551681757, - "learning_rate": 0.00019998171990773077, - "loss": 46.0, - "step": 37805 - }, - { - "epoch": 6.088409356254277, - "grad_norm": 0.0018764289561659098, - "learning_rate": 0.00019998171894041887, - "loss": 46.0, - "step": 37806 - }, - { - "epoch": 6.088570393333065, - "grad_norm": 0.0032143276184797287, - "learning_rate": 0.00019998171797308136, - "loss": 46.0, - "step": 37807 - }, - { - "epoch": 6.088731430411852, - "grad_norm": 0.0026210385840386152, - "learning_rate": 0.00019998171700571823, - "loss": 46.0, - "step": 37808 - }, - { - "epoch": 6.0888924674906395, - "grad_norm": 0.001016295631416142, - "learning_rate": 0.00019998171603832955, - "loss": 46.0, - "step": 37809 - }, - { - "epoch": 6.089053504569427, - "grad_norm": 0.0027335411868989468, - "learning_rate": 0.00019998171507091525, - "loss": 46.0, - "step": 37810 - }, - { - "epoch": 6.089214541648214, - "grad_norm": 0.004869597498327494, - "learning_rate": 0.0001999817141034754, - "loss": 46.0, - "step": 37811 - }, - { - "epoch": 6.089375578727002, - "grad_norm": 0.0014778736513108015, - "learning_rate": 0.00019998171313600992, - "loss": 46.0, - "step": 37812 - }, - { - "epoch": 6.089536615805789, - "grad_norm": 0.0025663163978606462, - "learning_rate": 0.00019998171216851885, - "loss": 46.0, - "step": 37813 - }, - { - "epoch": 6.089697652884577, - "grad_norm": 0.00807054340839386, - "learning_rate": 0.00019998171120100223, - "loss": 46.0, - "step": 37814 - }, - { - "epoch": 6.089858689963364, - "grad_norm": 0.0015795142389833927, - "learning_rate": 0.00019998171023345997, - "loss": 46.0, - "step": 37815 - }, - { - "epoch": 6.090019727042152, - "grad_norm": 0.0025860497262328863, - "learning_rate": 0.00019998170926589215, - "loss": 46.0, - "step": 37816 - }, - { - "epoch": 6.090180764120939, - "grad_norm": 0.001016231719404459, - "learning_rate": 0.00019998170829829874, - "loss": 46.0, - "step": 37817 - }, - { - "epoch": 6.090341801199727, - "grad_norm": 0.003120674518868327, - "learning_rate": 0.00019998170733067974, - "loss": 46.0, - "step": 37818 - }, - { - "epoch": 6.090502838278514, - "grad_norm": 0.009864549152553082, - "learning_rate": 0.00019998170636303513, - "loss": 46.0, - "step": 37819 - }, - { - "epoch": 6.090663875357301, - "grad_norm": 0.0027071491349488497, - "learning_rate": 0.00019998170539536493, - "loss": 46.0, - "step": 37820 - }, - { - "epoch": 6.090824912436088, - "grad_norm": 0.003821437945589423, - "learning_rate": 0.00019998170442766915, - "loss": 46.0, - "step": 37821 - }, - { - "epoch": 6.0909859495148755, - "grad_norm": 0.005640721879899502, - "learning_rate": 0.0001999817034599478, - "loss": 46.0, - "step": 37822 - }, - { - "epoch": 6.091146986593663, - "grad_norm": 0.008255884982645512, - "learning_rate": 0.00019998170249220084, - "loss": 46.0, - "step": 37823 - }, - { - "epoch": 6.09130802367245, - "grad_norm": 0.0023857494816184044, - "learning_rate": 0.0001999817015244283, - "loss": 46.0, - "step": 37824 - }, - { - "epoch": 6.091469060751238, - "grad_norm": 0.0019795005209743977, - "learning_rate": 0.00019998170055663016, - "loss": 46.0, - "step": 37825 - }, - { - "epoch": 6.091630097830025, - "grad_norm": 0.009732520207762718, - "learning_rate": 0.0001999816995888064, - "loss": 46.0, - "step": 37826 - }, - { - "epoch": 6.091791134908813, - "grad_norm": 0.007448509335517883, - "learning_rate": 0.0001999816986209571, - "loss": 46.0, - "step": 37827 - }, - { - "epoch": 6.0919521719876, - "grad_norm": 0.001298447954468429, - "learning_rate": 0.00019998169765308218, - "loss": 46.0, - "step": 37828 - }, - { - "epoch": 6.092113209066388, - "grad_norm": 0.006628180388361216, - "learning_rate": 0.0001999816966851817, - "loss": 46.0, - "step": 37829 - }, - { - "epoch": 6.092274246145175, - "grad_norm": 0.001536199008114636, - "learning_rate": 0.0001999816957172556, - "loss": 46.0, - "step": 37830 - }, - { - "epoch": 6.0924352832239625, - "grad_norm": 0.00138020608574152, - "learning_rate": 0.00019998169474930394, - "loss": 46.0, - "step": 37831 - }, - { - "epoch": 6.09259632030275, - "grad_norm": 0.006614008452743292, - "learning_rate": 0.00019998169378132664, - "loss": 46.0, - "step": 37832 - }, - { - "epoch": 6.0927573573815375, - "grad_norm": 0.003222718834877014, - "learning_rate": 0.0001999816928133238, - "loss": 46.0, - "step": 37833 - }, - { - "epoch": 6.092918394460325, - "grad_norm": 0.008102389052510262, - "learning_rate": 0.00019998169184529534, - "loss": 46.0, - "step": 37834 - }, - { - "epoch": 6.0930794315391115, - "grad_norm": 0.005741816014051437, - "learning_rate": 0.0001999816908772413, - "loss": 46.0, - "step": 37835 - }, - { - "epoch": 6.093240468617899, - "grad_norm": 0.005830100737512112, - "learning_rate": 0.00019998168990916168, - "loss": 46.0, - "step": 37836 - }, - { - "epoch": 6.093401505696686, - "grad_norm": 0.002829157281666994, - "learning_rate": 0.00019998168894105645, - "loss": 46.0, - "step": 37837 - }, - { - "epoch": 6.093562542775474, - "grad_norm": 0.004213452804833651, - "learning_rate": 0.00019998168797292563, - "loss": 46.0, - "step": 37838 - }, - { - "epoch": 6.093723579854261, - "grad_norm": 0.002485365839675069, - "learning_rate": 0.00019998168700476922, - "loss": 46.0, - "step": 37839 - }, - { - "epoch": 6.093884616933049, - "grad_norm": 0.0038056441117078066, - "learning_rate": 0.00019998168603658725, - "loss": 46.0, - "step": 37840 - }, - { - "epoch": 6.094045654011836, - "grad_norm": 0.00824055913835764, - "learning_rate": 0.00019998168506837966, - "loss": 46.0, - "step": 37841 - }, - { - "epoch": 6.094206691090624, - "grad_norm": 0.0017792948056012392, - "learning_rate": 0.0001999816841001465, - "loss": 46.0, - "step": 37842 - }, - { - "epoch": 6.094367728169411, - "grad_norm": 0.004388670902699232, - "learning_rate": 0.00019998168313188774, - "loss": 46.0, - "step": 37843 - }, - { - "epoch": 6.0945287652481985, - "grad_norm": 0.017673298716545105, - "learning_rate": 0.0001999816821636034, - "loss": 46.0, - "step": 37844 - }, - { - "epoch": 6.094689802326986, - "grad_norm": 0.0008767392719164491, - "learning_rate": 0.00019998168119529343, - "loss": 46.0, - "step": 37845 - }, - { - "epoch": 6.094850839405773, - "grad_norm": 0.002628319663926959, - "learning_rate": 0.00019998168022695789, - "loss": 46.0, - "step": 37846 - }, - { - "epoch": 6.095011876484561, - "grad_norm": 0.003136137267574668, - "learning_rate": 0.00019998167925859675, - "loss": 46.0, - "step": 37847 - }, - { - "epoch": 6.095172913563348, - "grad_norm": 0.007929238490760326, - "learning_rate": 0.00019998167829021006, - "loss": 46.0, - "step": 37848 - }, - { - "epoch": 6.095333950642136, - "grad_norm": 0.0013684015721082687, - "learning_rate": 0.00019998167732179778, - "loss": 46.0, - "step": 37849 - }, - { - "epoch": 6.095494987720922, - "grad_norm": 0.007586033549159765, - "learning_rate": 0.00019998167635335988, - "loss": 46.0, - "step": 37850 - }, - { - "epoch": 6.09565602479971, - "grad_norm": 0.007154947612434626, - "learning_rate": 0.00019998167538489637, - "loss": 46.0, - "step": 37851 - }, - { - "epoch": 6.095817061878497, - "grad_norm": 0.0011864241678267717, - "learning_rate": 0.0001999816744164073, - "loss": 46.0, - "step": 37852 - }, - { - "epoch": 6.095978098957285, - "grad_norm": 0.005761478561908007, - "learning_rate": 0.00019998167344789265, - "loss": 46.0, - "step": 37853 - }, - { - "epoch": 6.096139136036072, - "grad_norm": 0.029655126854777336, - "learning_rate": 0.0001999816724793524, - "loss": 46.0, - "step": 37854 - }, - { - "epoch": 6.09630017311486, - "grad_norm": 0.0015494056278839707, - "learning_rate": 0.00019998167151078655, - "loss": 46.0, - "step": 37855 - }, - { - "epoch": 6.096461210193647, - "grad_norm": 0.004338191822171211, - "learning_rate": 0.00019998167054219513, - "loss": 46.0, - "step": 37856 - }, - { - "epoch": 6.0966222472724345, - "grad_norm": 0.001258274307474494, - "learning_rate": 0.0001999816695735781, - "loss": 46.0, - "step": 37857 - }, - { - "epoch": 6.096783284351222, - "grad_norm": 0.0016012617852538824, - "learning_rate": 0.00019998166860493548, - "loss": 46.0, - "step": 37858 - }, - { - "epoch": 6.096944321430009, - "grad_norm": 0.0020765161607414484, - "learning_rate": 0.00019998166763626727, - "loss": 46.0, - "step": 37859 - }, - { - "epoch": 6.097105358508797, - "grad_norm": 0.0010396366706117988, - "learning_rate": 0.00019998166666757348, - "loss": 46.0, - "step": 37860 - }, - { - "epoch": 6.097266395587584, - "grad_norm": 0.0017461135284975171, - "learning_rate": 0.0001999816656988541, - "loss": 46.0, - "step": 37861 - }, - { - "epoch": 6.097427432666372, - "grad_norm": 0.0015936428681015968, - "learning_rate": 0.0001999816647301091, - "loss": 46.0, - "step": 37862 - }, - { - "epoch": 6.097588469745159, - "grad_norm": 0.007107043173164129, - "learning_rate": 0.00019998166376133854, - "loss": 46.0, - "step": 37863 - }, - { - "epoch": 6.097749506823946, - "grad_norm": 0.009870864450931549, - "learning_rate": 0.00019998166279254238, - "loss": 46.0, - "step": 37864 - }, - { - "epoch": 6.097910543902733, - "grad_norm": 0.00346820754930377, - "learning_rate": 0.00019998166182372065, - "loss": 46.0, - "step": 37865 - }, - { - "epoch": 6.098071580981521, - "grad_norm": 0.0031371782533824444, - "learning_rate": 0.0001999816608548733, - "loss": 46.0, - "step": 37866 - }, - { - "epoch": 6.098232618060308, - "grad_norm": 0.013296880759298801, - "learning_rate": 0.0001999816598860004, - "loss": 46.0, - "step": 37867 - }, - { - "epoch": 6.098393655139096, - "grad_norm": 0.005946441553533077, - "learning_rate": 0.00019998165891710188, - "loss": 46.0, - "step": 37868 - }, - { - "epoch": 6.098554692217883, - "grad_norm": 0.011022618040442467, - "learning_rate": 0.00019998165794817775, - "loss": 46.0, - "step": 37869 - }, - { - "epoch": 6.0987157292966705, - "grad_norm": 0.0025238527450710535, - "learning_rate": 0.00019998165697922805, - "loss": 46.0, - "step": 37870 - }, - { - "epoch": 6.098876766375458, - "grad_norm": 0.0042534396052360535, - "learning_rate": 0.00019998165601025275, - "loss": 46.0, - "step": 37871 - }, - { - "epoch": 6.099037803454245, - "grad_norm": 0.0019194354536011815, - "learning_rate": 0.0001999816550412519, - "loss": 46.0, - "step": 37872 - }, - { - "epoch": 6.099198840533033, - "grad_norm": 0.006690456531941891, - "learning_rate": 0.00019998165407222543, - "loss": 46.0, - "step": 37873 - }, - { - "epoch": 6.09935987761182, - "grad_norm": 0.0009905594633892179, - "learning_rate": 0.00019998165310317338, - "loss": 46.0, - "step": 37874 - }, - { - "epoch": 6.099520914690608, - "grad_norm": 0.003276173258200288, - "learning_rate": 0.00019998165213409573, - "loss": 46.0, - "step": 37875 - }, - { - "epoch": 6.099681951769395, - "grad_norm": 0.0015897850971668959, - "learning_rate": 0.0001999816511649925, - "loss": 46.0, - "step": 37876 - }, - { - "epoch": 6.099842988848183, - "grad_norm": 0.00707613630220294, - "learning_rate": 0.00019998165019586366, - "loss": 46.0, - "step": 37877 - }, - { - "epoch": 6.10000402592697, - "grad_norm": 0.0074333129450678825, - "learning_rate": 0.00019998164922670927, - "loss": 46.0, - "step": 37878 - }, - { - "epoch": 6.100165063005757, - "grad_norm": 0.008302539587020874, - "learning_rate": 0.00019998164825752924, - "loss": 46.0, - "step": 37879 - }, - { - "epoch": 6.100326100084544, - "grad_norm": 0.001487349858507514, - "learning_rate": 0.00019998164728832362, - "loss": 46.0, - "step": 37880 - }, - { - "epoch": 6.100487137163332, - "grad_norm": 0.006328992545604706, - "learning_rate": 0.00019998164631909244, - "loss": 46.0, - "step": 37881 - }, - { - "epoch": 6.100648174242119, - "grad_norm": 0.005400301888585091, - "learning_rate": 0.00019998164534983567, - "loss": 46.0, - "step": 37882 - }, - { - "epoch": 6.1008092113209065, - "grad_norm": 0.008416292257606983, - "learning_rate": 0.0001999816443805533, - "loss": 46.0, - "step": 37883 - }, - { - "epoch": 6.100970248399694, - "grad_norm": 0.0014171391958370805, - "learning_rate": 0.00019998164341124533, - "loss": 46.0, - "step": 37884 - }, - { - "epoch": 6.101131285478481, - "grad_norm": 0.01370865199714899, - "learning_rate": 0.00019998164244191177, - "loss": 46.0, - "step": 37885 - }, - { - "epoch": 6.101292322557269, - "grad_norm": 0.0027628070674836636, - "learning_rate": 0.00019998164147255266, - "loss": 46.0, - "step": 37886 - }, - { - "epoch": 6.101453359636056, - "grad_norm": 0.008990590460598469, - "learning_rate": 0.00019998164050316793, - "loss": 46.0, - "step": 37887 - }, - { - "epoch": 6.101614396714844, - "grad_norm": 0.011891982518136501, - "learning_rate": 0.0001999816395337576, - "loss": 46.0, - "step": 37888 - }, - { - "epoch": 6.101775433793631, - "grad_norm": 0.017663370817899704, - "learning_rate": 0.00019998163856432168, - "loss": 46.0, - "step": 37889 - }, - { - "epoch": 6.101936470872419, - "grad_norm": 0.002410690300166607, - "learning_rate": 0.00019998163759486016, - "loss": 46.0, - "step": 37890 - }, - { - "epoch": 6.102097507951206, - "grad_norm": 0.0016873523127287626, - "learning_rate": 0.00019998163662537311, - "loss": 46.0, - "step": 37891 - }, - { - "epoch": 6.1022585450299935, - "grad_norm": 0.005430019460618496, - "learning_rate": 0.00019998163565586042, - "loss": 46.0, - "step": 37892 - }, - { - "epoch": 6.10241958210878, - "grad_norm": 0.005881018005311489, - "learning_rate": 0.00019998163468632214, - "loss": 46.0, - "step": 37893 - }, - { - "epoch": 6.102580619187568, - "grad_norm": 0.00956530962139368, - "learning_rate": 0.00019998163371675828, - "loss": 46.0, - "step": 37894 - }, - { - "epoch": 6.102741656266355, - "grad_norm": 0.019478866830468178, - "learning_rate": 0.00019998163274716882, - "loss": 46.0, - "step": 37895 - }, - { - "epoch": 6.1029026933451425, - "grad_norm": 0.0012692884774878621, - "learning_rate": 0.0001999816317775538, - "loss": 46.0, - "step": 37896 - }, - { - "epoch": 6.10306373042393, - "grad_norm": 0.01564978063106537, - "learning_rate": 0.00019998163080791316, - "loss": 46.0, - "step": 37897 - }, - { - "epoch": 6.103224767502717, - "grad_norm": 0.003363940631970763, - "learning_rate": 0.00019998162983824694, - "loss": 46.0, - "step": 37898 - }, - { - "epoch": 6.103385804581505, - "grad_norm": 0.007944186218082905, - "learning_rate": 0.0001999816288685551, - "loss": 46.0, - "step": 37899 - }, - { - "epoch": 6.103546841660292, - "grad_norm": 0.013608613982796669, - "learning_rate": 0.00019998162789883772, - "loss": 46.0, - "step": 37900 - }, - { - "epoch": 6.10370787873908, - "grad_norm": 0.0018901459407061338, - "learning_rate": 0.00019998162692909475, - "loss": 46.0, - "step": 37901 - }, - { - "epoch": 6.103868915817867, - "grad_norm": 0.002593145938590169, - "learning_rate": 0.00019998162595932616, - "loss": 46.0, - "step": 37902 - }, - { - "epoch": 6.104029952896655, - "grad_norm": 0.006367926951497793, - "learning_rate": 0.00019998162498953198, - "loss": 46.0, - "step": 37903 - }, - { - "epoch": 6.104190989975442, - "grad_norm": 0.005814204923808575, - "learning_rate": 0.00019998162401971221, - "loss": 46.0, - "step": 37904 - }, - { - "epoch": 6.1043520270542295, - "grad_norm": 0.009547271765768528, - "learning_rate": 0.00019998162304986686, - "loss": 46.0, - "step": 37905 - }, - { - "epoch": 6.104513064133017, - "grad_norm": 0.004241904243826866, - "learning_rate": 0.00019998162207999592, - "loss": 46.0, - "step": 37906 - }, - { - "epoch": 6.104674101211804, - "grad_norm": 0.0032758968882262707, - "learning_rate": 0.00019998162111009937, - "loss": 46.0, - "step": 37907 - }, - { - "epoch": 6.104835138290591, - "grad_norm": 0.003191495081409812, - "learning_rate": 0.00019998162014017728, - "loss": 46.0, - "step": 37908 - }, - { - "epoch": 6.104996175369378, - "grad_norm": 0.0013137803180143237, - "learning_rate": 0.00019998161917022955, - "loss": 46.0, - "step": 37909 - }, - { - "epoch": 6.105157212448166, - "grad_norm": 0.0024282927624881268, - "learning_rate": 0.00019998161820025624, - "loss": 46.0, - "step": 37910 - }, - { - "epoch": 6.105318249526953, - "grad_norm": 0.0022701954003423452, - "learning_rate": 0.00019998161723025736, - "loss": 46.0, - "step": 37911 - }, - { - "epoch": 6.105479286605741, - "grad_norm": 0.017151150852441788, - "learning_rate": 0.00019998161626023288, - "loss": 46.0, - "step": 37912 - }, - { - "epoch": 6.105640323684528, - "grad_norm": 0.0027103072497993708, - "learning_rate": 0.00019998161529018277, - "loss": 46.0, - "step": 37913 - }, - { - "epoch": 6.105801360763316, - "grad_norm": 0.0013941014185547829, - "learning_rate": 0.0001999816143201071, - "loss": 46.0, - "step": 37914 - }, - { - "epoch": 6.105962397842103, - "grad_norm": 0.002070285379886627, - "learning_rate": 0.00019998161335000586, - "loss": 46.0, - "step": 37915 - }, - { - "epoch": 6.106123434920891, - "grad_norm": 0.0026621525175869465, - "learning_rate": 0.00019998161237987902, - "loss": 46.0, - "step": 37916 - }, - { - "epoch": 6.106284471999678, - "grad_norm": 0.01080483477562666, - "learning_rate": 0.0001999816114097266, - "loss": 46.0, - "step": 37917 - }, - { - "epoch": 6.1064455090784655, - "grad_norm": 0.006349895615130663, - "learning_rate": 0.00019998161043954858, - "loss": 46.0, - "step": 37918 - }, - { - "epoch": 6.106606546157253, - "grad_norm": 0.0034800455905497074, - "learning_rate": 0.00019998160946934496, - "loss": 46.0, - "step": 37919 - }, - { - "epoch": 6.10676758323604, - "grad_norm": 0.010056184604763985, - "learning_rate": 0.00019998160849911574, - "loss": 46.0, - "step": 37920 - }, - { - "epoch": 6.106928620314828, - "grad_norm": 0.009464414790272713, - "learning_rate": 0.00019998160752886094, - "loss": 46.0, - "step": 37921 - }, - { - "epoch": 6.107089657393615, - "grad_norm": 0.003048659535124898, - "learning_rate": 0.00019998160655858058, - "loss": 46.0, - "step": 37922 - }, - { - "epoch": 6.107250694472402, - "grad_norm": 0.0032577086240053177, - "learning_rate": 0.00019998160558827458, - "loss": 46.0, - "step": 37923 - }, - { - "epoch": 6.107411731551189, - "grad_norm": 0.0035832293797284365, - "learning_rate": 0.00019998160461794301, - "loss": 46.0, - "step": 37924 - }, - { - "epoch": 6.107572768629977, - "grad_norm": 0.0037689015734940767, - "learning_rate": 0.00019998160364758586, - "loss": 46.0, - "step": 37925 - }, - { - "epoch": 6.107733805708764, - "grad_norm": 0.002485231263563037, - "learning_rate": 0.00019998160267720313, - "loss": 46.0, - "step": 37926 - }, - { - "epoch": 6.107894842787552, - "grad_norm": 0.002768041333183646, - "learning_rate": 0.00019998160170679478, - "loss": 46.0, - "step": 37927 - }, - { - "epoch": 6.108055879866339, - "grad_norm": 0.017291713505983353, - "learning_rate": 0.00019998160073636087, - "loss": 46.0, - "step": 37928 - }, - { - "epoch": 6.108216916945127, - "grad_norm": 0.007592449430376291, - "learning_rate": 0.00019998159976590134, - "loss": 46.0, - "step": 37929 - }, - { - "epoch": 6.108377954023914, - "grad_norm": 0.002873070538043976, - "learning_rate": 0.00019998159879541625, - "loss": 46.0, - "step": 37930 - }, - { - "epoch": 6.1085389911027015, - "grad_norm": 0.010722826234996319, - "learning_rate": 0.00019998159782490555, - "loss": 46.0, - "step": 37931 - }, - { - "epoch": 6.108700028181489, - "grad_norm": 0.014389226213097572, - "learning_rate": 0.00019998159685436927, - "loss": 46.0, - "step": 37932 - }, - { - "epoch": 6.108861065260276, - "grad_norm": 0.02572016417980194, - "learning_rate": 0.0001999815958838074, - "loss": 46.0, - "step": 37933 - }, - { - "epoch": 6.109022102339064, - "grad_norm": 0.0034549778793007135, - "learning_rate": 0.0001999815949132199, - "loss": 46.0, - "step": 37934 - }, - { - "epoch": 6.109183139417851, - "grad_norm": 0.0053646741434931755, - "learning_rate": 0.00019998159394260686, - "loss": 46.0, - "step": 37935 - }, - { - "epoch": 6.109344176496639, - "grad_norm": 0.005037750583142042, - "learning_rate": 0.00019998159297196822, - "loss": 46.0, - "step": 37936 - }, - { - "epoch": 6.109505213575425, - "grad_norm": 0.002898403676226735, - "learning_rate": 0.00019998159200130394, - "loss": 46.0, - "step": 37937 - }, - { - "epoch": 6.109666250654213, - "grad_norm": 0.008576362393796444, - "learning_rate": 0.00019998159103061413, - "loss": 46.0, - "step": 37938 - }, - { - "epoch": 6.109827287733, - "grad_norm": 0.009034862741827965, - "learning_rate": 0.0001999815900598987, - "loss": 46.0, - "step": 37939 - }, - { - "epoch": 6.109988324811788, - "grad_norm": 0.010549517348408699, - "learning_rate": 0.0001999815890891577, - "loss": 46.0, - "step": 37940 - }, - { - "epoch": 6.110149361890575, - "grad_norm": 0.0023834872990846634, - "learning_rate": 0.00019998158811839113, - "loss": 46.0, - "step": 37941 - }, - { - "epoch": 6.110310398969363, - "grad_norm": 0.005191650707274675, - "learning_rate": 0.00019998158714759891, - "loss": 46.0, - "step": 37942 - }, - { - "epoch": 6.11047143604815, - "grad_norm": 0.0058679780922830105, - "learning_rate": 0.00019998158617678114, - "loss": 46.0, - "step": 37943 - }, - { - "epoch": 6.1106324731269375, - "grad_norm": 0.005831942893564701, - "learning_rate": 0.00019998158520593778, - "loss": 46.0, - "step": 37944 - }, - { - "epoch": 6.110793510205725, - "grad_norm": 0.0051593054085969925, - "learning_rate": 0.0001999815842350688, - "loss": 46.0, - "step": 37945 - }, - { - "epoch": 6.110954547284512, - "grad_norm": 0.003682398470118642, - "learning_rate": 0.00019998158326417427, - "loss": 46.0, - "step": 37946 - }, - { - "epoch": 6.1111155843633, - "grad_norm": 0.003645791672170162, - "learning_rate": 0.00019998158229325412, - "loss": 46.0, - "step": 37947 - }, - { - "epoch": 6.111276621442087, - "grad_norm": 0.006710568908601999, - "learning_rate": 0.0001999815813223084, - "loss": 46.0, - "step": 37948 - }, - { - "epoch": 6.111437658520875, - "grad_norm": 0.033028069883584976, - "learning_rate": 0.00019998158035133706, - "loss": 46.0, - "step": 37949 - }, - { - "epoch": 6.111598695599662, - "grad_norm": 0.0022661553230136633, - "learning_rate": 0.00019998157938034015, - "loss": 46.0, - "step": 37950 - }, - { - "epoch": 6.11175973267845, - "grad_norm": 0.0008625874179415405, - "learning_rate": 0.00019998157840931766, - "loss": 46.0, - "step": 37951 - }, - { - "epoch": 6.111920769757236, - "grad_norm": 0.0030962605960667133, - "learning_rate": 0.00019998157743826957, - "loss": 46.0, - "step": 37952 - }, - { - "epoch": 6.112081806836024, - "grad_norm": 0.003905230201780796, - "learning_rate": 0.0001999815764671959, - "loss": 46.0, - "step": 37953 - }, - { - "epoch": 6.112242843914811, - "grad_norm": 0.006993682123720646, - "learning_rate": 0.00019998157549609661, - "loss": 46.0, - "step": 37954 - }, - { - "epoch": 6.1124038809935985, - "grad_norm": 0.0074691143818199635, - "learning_rate": 0.00019998157452497174, - "loss": 46.0, - "step": 37955 - }, - { - "epoch": 6.112564918072386, - "grad_norm": 0.006067709997296333, - "learning_rate": 0.0001999815735538213, - "loss": 46.0, - "step": 37956 - }, - { - "epoch": 6.1127259551511735, - "grad_norm": 0.0025282027199864388, - "learning_rate": 0.00019998157258264526, - "loss": 46.0, - "step": 37957 - }, - { - "epoch": 6.112886992229961, - "grad_norm": 0.0023204137105494738, - "learning_rate": 0.00019998157161144365, - "loss": 46.0, - "step": 37958 - }, - { - "epoch": 6.113048029308748, - "grad_norm": 0.008157733827829361, - "learning_rate": 0.00019998157064021643, - "loss": 46.0, - "step": 37959 - }, - { - "epoch": 6.113209066387536, - "grad_norm": 0.0015601470367982984, - "learning_rate": 0.00019998156966896362, - "loss": 46.0, - "step": 37960 - }, - { - "epoch": 6.113370103466323, - "grad_norm": 0.01031606923788786, - "learning_rate": 0.0001999815686976852, - "loss": 46.0, - "step": 37961 - }, - { - "epoch": 6.113531140545111, - "grad_norm": 0.004116305150091648, - "learning_rate": 0.00019998156772638121, - "loss": 46.0, - "step": 37962 - }, - { - "epoch": 6.113692177623898, - "grad_norm": 0.005379362031817436, - "learning_rate": 0.00019998156675505162, - "loss": 46.0, - "step": 37963 - }, - { - "epoch": 6.113853214702686, - "grad_norm": 0.0032583598513156176, - "learning_rate": 0.00019998156578369643, - "loss": 46.0, - "step": 37964 - }, - { - "epoch": 6.114014251781473, - "grad_norm": 0.008800827898085117, - "learning_rate": 0.0001999815648123157, - "loss": 46.0, - "step": 37965 - }, - { - "epoch": 6.11417528886026, - "grad_norm": 0.0035629295744001865, - "learning_rate": 0.00019998156384090935, - "loss": 46.0, - "step": 37966 - }, - { - "epoch": 6.114336325939047, - "grad_norm": 0.0010251258499920368, - "learning_rate": 0.0001999815628694774, - "loss": 46.0, - "step": 37967 - }, - { - "epoch": 6.1144973630178345, - "grad_norm": 0.006093880161643028, - "learning_rate": 0.00019998156189801987, - "loss": 46.0, - "step": 37968 - }, - { - "epoch": 6.114658400096622, - "grad_norm": 0.0019650463946163654, - "learning_rate": 0.00019998156092653675, - "loss": 46.0, - "step": 37969 - }, - { - "epoch": 6.114819437175409, - "grad_norm": 0.002851185156032443, - "learning_rate": 0.00019998155995502802, - "loss": 46.0, - "step": 37970 - }, - { - "epoch": 6.114980474254197, - "grad_norm": 0.004890704061836004, - "learning_rate": 0.00019998155898349372, - "loss": 46.0, - "step": 37971 - }, - { - "epoch": 6.115141511332984, - "grad_norm": 0.029467392712831497, - "learning_rate": 0.00019998155801193384, - "loss": 46.0, - "step": 37972 - }, - { - "epoch": 6.115302548411772, - "grad_norm": 0.001138051156885922, - "learning_rate": 0.00019998155704034835, - "loss": 46.0, - "step": 37973 - }, - { - "epoch": 6.115463585490559, - "grad_norm": 0.002168205101042986, - "learning_rate": 0.00019998155606873726, - "loss": 46.0, - "step": 37974 - }, - { - "epoch": 6.115624622569347, - "grad_norm": 0.0041888016276061535, - "learning_rate": 0.00019998155509710062, - "loss": 46.0, - "step": 37975 - }, - { - "epoch": 6.115785659648134, - "grad_norm": 0.021223602816462517, - "learning_rate": 0.00019998155412543836, - "loss": 46.0, - "step": 37976 - }, - { - "epoch": 6.115946696726922, - "grad_norm": 0.0009306090651080012, - "learning_rate": 0.00019998155315375051, - "loss": 46.0, - "step": 37977 - }, - { - "epoch": 6.116107733805709, - "grad_norm": 0.0015372381312772632, - "learning_rate": 0.00019998155218203708, - "loss": 46.0, - "step": 37978 - }, - { - "epoch": 6.1162687708844965, - "grad_norm": 0.017244672402739525, - "learning_rate": 0.00019998155121029804, - "loss": 46.0, - "step": 37979 - }, - { - "epoch": 6.116429807963284, - "grad_norm": 0.003151554614305496, - "learning_rate": 0.00019998155023853346, - "loss": 46.0, - "step": 37980 - }, - { - "epoch": 6.1165908450420705, - "grad_norm": 0.004102529492229223, - "learning_rate": 0.00019998154926674324, - "loss": 46.0, - "step": 37981 - }, - { - "epoch": 6.116751882120858, - "grad_norm": 0.002577023347839713, - "learning_rate": 0.00019998154829492743, - "loss": 46.0, - "step": 37982 - }, - { - "epoch": 6.116912919199645, - "grad_norm": 0.005130618344992399, - "learning_rate": 0.00019998154732308606, - "loss": 46.0, - "step": 37983 - }, - { - "epoch": 6.117073956278433, - "grad_norm": 0.002902435604482889, - "learning_rate": 0.00019998154635121908, - "loss": 46.0, - "step": 37984 - }, - { - "epoch": 6.11723499335722, - "grad_norm": 0.0036257412284612656, - "learning_rate": 0.0001999815453793265, - "loss": 46.0, - "step": 37985 - }, - { - "epoch": 6.117396030436008, - "grad_norm": 0.0008864043047651649, - "learning_rate": 0.00019998154440740838, - "loss": 46.0, - "step": 37986 - }, - { - "epoch": 6.117557067514795, - "grad_norm": 0.0033855861984193325, - "learning_rate": 0.0001999815434354646, - "loss": 46.0, - "step": 37987 - }, - { - "epoch": 6.117718104593583, - "grad_norm": 0.013077301904559135, - "learning_rate": 0.00019998154246349527, - "loss": 46.0, - "step": 37988 - }, - { - "epoch": 6.11787914167237, - "grad_norm": 0.0028082553762942553, - "learning_rate": 0.00019998154149150036, - "loss": 46.0, - "step": 37989 - }, - { - "epoch": 6.118040178751158, - "grad_norm": 0.005463637411594391, - "learning_rate": 0.00019998154051947985, - "loss": 46.0, - "step": 37990 - }, - { - "epoch": 6.118201215829945, - "grad_norm": 0.02385544776916504, - "learning_rate": 0.00019998153954743373, - "loss": 46.0, - "step": 37991 - }, - { - "epoch": 6.1183622529087325, - "grad_norm": 0.01385748852044344, - "learning_rate": 0.00019998153857536205, - "loss": 46.0, - "step": 37992 - }, - { - "epoch": 6.11852328998752, - "grad_norm": 0.007933791726827621, - "learning_rate": 0.00019998153760326476, - "loss": 46.0, - "step": 37993 - }, - { - "epoch": 6.118684327066307, - "grad_norm": 0.0036477777175605297, - "learning_rate": 0.00019998153663114188, - "loss": 46.0, - "step": 37994 - }, - { - "epoch": 6.118845364145095, - "grad_norm": 0.010108874179422855, - "learning_rate": 0.0001999815356589934, - "loss": 46.0, - "step": 37995 - }, - { - "epoch": 6.119006401223881, - "grad_norm": 0.012677617371082306, - "learning_rate": 0.00019998153468681935, - "loss": 46.0, - "step": 37996 - }, - { - "epoch": 6.119167438302669, - "grad_norm": 0.011315323412418365, - "learning_rate": 0.0001999815337146197, - "loss": 46.0, - "step": 37997 - }, - { - "epoch": 6.119328475381456, - "grad_norm": 0.003727993695065379, - "learning_rate": 0.00019998153274239445, - "loss": 46.0, - "step": 37998 - }, - { - "epoch": 6.119489512460244, - "grad_norm": 0.005289892200380564, - "learning_rate": 0.00019998153177014363, - "loss": 46.0, - "step": 37999 - }, - { - "epoch": 6.119650549539031, - "grad_norm": 0.01043575257062912, - "learning_rate": 0.00019998153079786723, - "loss": 46.0, - "step": 38000 - }, - { - "epoch": 6.119811586617819, - "grad_norm": 0.013002244755625725, - "learning_rate": 0.0001999815298255652, - "loss": 46.0, - "step": 38001 - }, - { - "epoch": 6.119972623696606, - "grad_norm": 0.0046134768053889275, - "learning_rate": 0.00019998152885323763, - "loss": 46.0, - "step": 38002 - }, - { - "epoch": 6.120133660775394, - "grad_norm": 0.01637146808207035, - "learning_rate": 0.0001999815278808844, - "loss": 46.0, - "step": 38003 - }, - { - "epoch": 6.120294697854181, - "grad_norm": 0.002225615317001939, - "learning_rate": 0.00019998152690850563, - "loss": 46.0, - "step": 38004 - }, - { - "epoch": 6.1204557349329685, - "grad_norm": 0.002993962960317731, - "learning_rate": 0.00019998152593610126, - "loss": 46.0, - "step": 38005 - }, - { - "epoch": 6.120616772011756, - "grad_norm": 0.002434214809909463, - "learning_rate": 0.0001999815249636713, - "loss": 46.0, - "step": 38006 - }, - { - "epoch": 6.120777809090543, - "grad_norm": 0.0043135639280080795, - "learning_rate": 0.00019998152399121577, - "loss": 46.0, - "step": 38007 - }, - { - "epoch": 6.120938846169331, - "grad_norm": 0.0027210870757699013, - "learning_rate": 0.00019998152301873464, - "loss": 46.0, - "step": 38008 - }, - { - "epoch": 6.121099883248118, - "grad_norm": 0.012294287793338299, - "learning_rate": 0.0001999815220462279, - "loss": 46.0, - "step": 38009 - }, - { - "epoch": 6.121260920326906, - "grad_norm": 0.0053977444767951965, - "learning_rate": 0.0001999815210736956, - "loss": 46.0, - "step": 38010 - }, - { - "epoch": 6.121421957405692, - "grad_norm": 0.007198524661362171, - "learning_rate": 0.00019998152010113768, - "loss": 46.0, - "step": 38011 - }, - { - "epoch": 6.12158299448448, - "grad_norm": 0.0027359111700206995, - "learning_rate": 0.00019998151912855417, - "loss": 46.0, - "step": 38012 - }, - { - "epoch": 6.121744031563267, - "grad_norm": 0.00808628648519516, - "learning_rate": 0.0001999815181559451, - "loss": 46.0, - "step": 38013 - }, - { - "epoch": 6.121905068642055, - "grad_norm": 0.0009916462004184723, - "learning_rate": 0.00019998151718331043, - "loss": 46.0, - "step": 38014 - }, - { - "epoch": 6.122066105720842, - "grad_norm": 0.003694161307066679, - "learning_rate": 0.00019998151621065016, - "loss": 46.0, - "step": 38015 - }, - { - "epoch": 6.1222271427996295, - "grad_norm": 0.0021007994655519724, - "learning_rate": 0.00019998151523796425, - "loss": 46.0, - "step": 38016 - }, - { - "epoch": 6.122388179878417, - "grad_norm": 0.002220150548964739, - "learning_rate": 0.00019998151426525284, - "loss": 46.0, - "step": 38017 - }, - { - "epoch": 6.122549216957204, - "grad_norm": 0.015249320305883884, - "learning_rate": 0.00019998151329251579, - "loss": 46.0, - "step": 38018 - }, - { - "epoch": 6.122710254035992, - "grad_norm": 0.009781640022993088, - "learning_rate": 0.00019998151231975317, - "loss": 46.0, - "step": 38019 - }, - { - "epoch": 6.122871291114779, - "grad_norm": 0.016867421567440033, - "learning_rate": 0.00019998151134696494, - "loss": 46.0, - "step": 38020 - }, - { - "epoch": 6.123032328193567, - "grad_norm": 0.00385505729354918, - "learning_rate": 0.00019998151037415113, - "loss": 46.0, - "step": 38021 - }, - { - "epoch": 6.123193365272354, - "grad_norm": 0.0023131840862333775, - "learning_rate": 0.00019998150940131172, - "loss": 46.0, - "step": 38022 - }, - { - "epoch": 6.123354402351142, - "grad_norm": 0.0061761774122715, - "learning_rate": 0.00019998150842844673, - "loss": 46.0, - "step": 38023 - }, - { - "epoch": 6.123515439429929, - "grad_norm": 0.007513138931244612, - "learning_rate": 0.00019998150745555618, - "loss": 46.0, - "step": 38024 - }, - { - "epoch": 6.123676476508716, - "grad_norm": 0.012906387448310852, - "learning_rate": 0.00019998150648264, - "loss": 46.0, - "step": 38025 - }, - { - "epoch": 6.123837513587503, - "grad_norm": 0.0005248477682471275, - "learning_rate": 0.0001999815055096982, - "loss": 46.0, - "step": 38026 - }, - { - "epoch": 6.123998550666291, - "grad_norm": 0.002935335971415043, - "learning_rate": 0.00019998150453673084, - "loss": 46.0, - "step": 38027 - }, - { - "epoch": 6.124159587745078, - "grad_norm": 0.002595342695713043, - "learning_rate": 0.00019998150356373792, - "loss": 46.0, - "step": 38028 - }, - { - "epoch": 6.1243206248238655, - "grad_norm": 0.012992041185498238, - "learning_rate": 0.0001999815025907194, - "loss": 46.0, - "step": 38029 - }, - { - "epoch": 6.124481661902653, - "grad_norm": 0.002713417401537299, - "learning_rate": 0.00019998150161767528, - "loss": 46.0, - "step": 38030 - }, - { - "epoch": 6.12464269898144, - "grad_norm": 0.026003146544098854, - "learning_rate": 0.00019998150064460556, - "loss": 46.0, - "step": 38031 - }, - { - "epoch": 6.124803736060228, - "grad_norm": 0.0016140779480338097, - "learning_rate": 0.00019998149967151029, - "loss": 46.0, - "step": 38032 - }, - { - "epoch": 6.124964773139015, - "grad_norm": 0.002056755591183901, - "learning_rate": 0.00019998149869838937, - "loss": 46.0, - "step": 38033 - }, - { - "epoch": 6.125125810217803, - "grad_norm": 0.0034807310439646244, - "learning_rate": 0.0001999814977252429, - "loss": 46.0, - "step": 38034 - }, - { - "epoch": 6.12528684729659, - "grad_norm": 0.003984707407653332, - "learning_rate": 0.00019998149675207083, - "loss": 46.0, - "step": 38035 - }, - { - "epoch": 6.125447884375378, - "grad_norm": 0.0059023951180279255, - "learning_rate": 0.00019998149577887315, - "loss": 46.0, - "step": 38036 - }, - { - "epoch": 6.125608921454165, - "grad_norm": 0.00479265907779336, - "learning_rate": 0.00019998149480564989, - "loss": 46.0, - "step": 38037 - }, - { - "epoch": 6.125769958532953, - "grad_norm": 0.0049417889676988125, - "learning_rate": 0.00019998149383240106, - "loss": 46.0, - "step": 38038 - }, - { - "epoch": 6.125930995611739, - "grad_norm": 0.009702890180051327, - "learning_rate": 0.00019998149285912665, - "loss": 46.0, - "step": 38039 - }, - { - "epoch": 6.126092032690527, - "grad_norm": 0.003543219994753599, - "learning_rate": 0.00019998149188582662, - "loss": 46.0, - "step": 38040 - }, - { - "epoch": 6.126253069769314, - "grad_norm": 0.004731372464448214, - "learning_rate": 0.00019998149091250098, - "loss": 46.0, - "step": 38041 - }, - { - "epoch": 6.1264141068481015, - "grad_norm": 0.002965356456115842, - "learning_rate": 0.0001999814899391498, - "loss": 46.0, - "step": 38042 - }, - { - "epoch": 6.126575143926889, - "grad_norm": 0.00771909998729825, - "learning_rate": 0.000199981488965773, - "loss": 46.0, - "step": 38043 - }, - { - "epoch": 6.126736181005676, - "grad_norm": 0.0014029525918886065, - "learning_rate": 0.00019998148799237062, - "loss": 46.0, - "step": 38044 - }, - { - "epoch": 6.126897218084464, - "grad_norm": 0.008475450798869133, - "learning_rate": 0.00019998148701894263, - "loss": 46.0, - "step": 38045 - }, - { - "epoch": 6.127058255163251, - "grad_norm": 0.010319489054381847, - "learning_rate": 0.00019998148604548908, - "loss": 46.0, - "step": 38046 - }, - { - "epoch": 6.127219292242039, - "grad_norm": 0.008587724529206753, - "learning_rate": 0.0001999814850720099, - "loss": 46.0, - "step": 38047 - }, - { - "epoch": 6.127380329320826, - "grad_norm": 0.002170801628381014, - "learning_rate": 0.00019998148409850516, - "loss": 46.0, - "step": 38048 - }, - { - "epoch": 6.127541366399614, - "grad_norm": 0.003980133216828108, - "learning_rate": 0.00019998148312497485, - "loss": 46.0, - "step": 38049 - }, - { - "epoch": 6.127702403478401, - "grad_norm": 0.003032906446605921, - "learning_rate": 0.00019998148215141892, - "loss": 46.0, - "step": 38050 - }, - { - "epoch": 6.127863440557189, - "grad_norm": 0.005040084943175316, - "learning_rate": 0.0001999814811778374, - "loss": 46.0, - "step": 38051 - }, - { - "epoch": 6.128024477635976, - "grad_norm": 0.0038813622668385506, - "learning_rate": 0.0001999814802042303, - "loss": 46.0, - "step": 38052 - }, - { - "epoch": 6.1281855147147635, - "grad_norm": 0.03539165109395981, - "learning_rate": 0.0001999814792305976, - "loss": 46.0, - "step": 38053 - }, - { - "epoch": 6.12834655179355, - "grad_norm": 0.0008202372118830681, - "learning_rate": 0.00019998147825693932, - "loss": 46.0, - "step": 38054 - }, - { - "epoch": 6.1285075888723375, - "grad_norm": 0.00880894809961319, - "learning_rate": 0.00019998147728325543, - "loss": 46.0, - "step": 38055 - }, - { - "epoch": 6.128668625951125, - "grad_norm": 0.001970510231330991, - "learning_rate": 0.00019998147630954599, - "loss": 46.0, - "step": 38056 - }, - { - "epoch": 6.128829663029912, - "grad_norm": 0.0038770800456404686, - "learning_rate": 0.0001999814753358109, - "loss": 46.0, - "step": 38057 - }, - { - "epoch": 6.1289907001087, - "grad_norm": 0.0012005168246105313, - "learning_rate": 0.00019998147436205027, - "loss": 46.0, - "step": 38058 - }, - { - "epoch": 6.129151737187487, - "grad_norm": 0.006247636396437883, - "learning_rate": 0.00019998147338826404, - "loss": 46.0, - "step": 38059 - }, - { - "epoch": 6.129312774266275, - "grad_norm": 0.002642413368448615, - "learning_rate": 0.0001999814724144522, - "loss": 46.0, - "step": 38060 - }, - { - "epoch": 6.129473811345062, - "grad_norm": 0.0021298436913639307, - "learning_rate": 0.0001999814714406148, - "loss": 46.0, - "step": 38061 - }, - { - "epoch": 6.12963484842385, - "grad_norm": 0.002107681706547737, - "learning_rate": 0.0001999814704667518, - "loss": 46.0, - "step": 38062 - }, - { - "epoch": 6.129795885502637, - "grad_norm": 0.004172795917838812, - "learning_rate": 0.00019998146949286321, - "loss": 46.0, - "step": 38063 - }, - { - "epoch": 6.1299569225814245, - "grad_norm": 0.003007931634783745, - "learning_rate": 0.00019998146851894901, - "loss": 46.0, - "step": 38064 - }, - { - "epoch": 6.130117959660212, - "grad_norm": 0.01600167155265808, - "learning_rate": 0.00019998146754500923, - "loss": 46.0, - "step": 38065 - }, - { - "epoch": 6.1302789967389995, - "grad_norm": 0.008139573968946934, - "learning_rate": 0.00019998146657104385, - "loss": 46.0, - "step": 38066 - }, - { - "epoch": 6.130440033817787, - "grad_norm": 0.002752548549324274, - "learning_rate": 0.00019998146559705292, - "loss": 46.0, - "step": 38067 - }, - { - "epoch": 6.130601070896574, - "grad_norm": 0.008915587328374386, - "learning_rate": 0.00019998146462303637, - "loss": 46.0, - "step": 38068 - }, - { - "epoch": 6.130762107975361, - "grad_norm": 0.006481254007667303, - "learning_rate": 0.00019998146364899423, - "loss": 46.0, - "step": 38069 - }, - { - "epoch": 6.130923145054148, - "grad_norm": 0.0029557249508798122, - "learning_rate": 0.0001999814626749265, - "loss": 46.0, - "step": 38070 - }, - { - "epoch": 6.131084182132936, - "grad_norm": 0.005246481858193874, - "learning_rate": 0.00019998146170083317, - "loss": 46.0, - "step": 38071 - }, - { - "epoch": 6.131245219211723, - "grad_norm": 0.020238157361745834, - "learning_rate": 0.00019998146072671428, - "loss": 46.0, - "step": 38072 - }, - { - "epoch": 6.131406256290511, - "grad_norm": 0.0030776364728808403, - "learning_rate": 0.0001999814597525698, - "loss": 46.0, - "step": 38073 - }, - { - "epoch": 6.131567293369298, - "grad_norm": 0.005505464971065521, - "learning_rate": 0.0001999814587783997, - "loss": 46.0, - "step": 38074 - }, - { - "epoch": 6.131728330448086, - "grad_norm": 0.008040811866521835, - "learning_rate": 0.00019998145780420403, - "loss": 46.0, - "step": 38075 - }, - { - "epoch": 6.131889367526873, - "grad_norm": 0.005472471471875906, - "learning_rate": 0.00019998145682998276, - "loss": 46.0, - "step": 38076 - }, - { - "epoch": 6.1320504046056605, - "grad_norm": 0.007615697104483843, - "learning_rate": 0.0001999814558557359, - "loss": 46.0, - "step": 38077 - }, - { - "epoch": 6.132211441684448, - "grad_norm": 0.002209107857197523, - "learning_rate": 0.00019998145488146345, - "loss": 46.0, - "step": 38078 - }, - { - "epoch": 6.132372478763235, - "grad_norm": 0.00337052159011364, - "learning_rate": 0.00019998145390716542, - "loss": 46.0, - "step": 38079 - }, - { - "epoch": 6.132533515842023, - "grad_norm": 0.002775853266939521, - "learning_rate": 0.0001999814529328418, - "loss": 46.0, - "step": 38080 - }, - { - "epoch": 6.13269455292081, - "grad_norm": 0.01040705107152462, - "learning_rate": 0.00019998145195849256, - "loss": 46.0, - "step": 38081 - }, - { - "epoch": 6.132855589999598, - "grad_norm": 0.002455855254083872, - "learning_rate": 0.00019998145098411776, - "loss": 46.0, - "step": 38082 - }, - { - "epoch": 6.133016627078385, - "grad_norm": 0.002750950865447521, - "learning_rate": 0.00019998145000971735, - "loss": 46.0, - "step": 38083 - }, - { - "epoch": 6.133177664157172, - "grad_norm": 0.0018378561362624168, - "learning_rate": 0.00019998144903529138, - "loss": 46.0, - "step": 38084 - }, - { - "epoch": 6.133338701235959, - "grad_norm": 0.018560731783509254, - "learning_rate": 0.0001999814480608398, - "loss": 46.0, - "step": 38085 - }, - { - "epoch": 6.133499738314747, - "grad_norm": 0.011354668997228146, - "learning_rate": 0.00019998144708636262, - "loss": 46.0, - "step": 38086 - }, - { - "epoch": 6.133660775393534, - "grad_norm": 0.002525091404095292, - "learning_rate": 0.00019998144611185986, - "loss": 46.0, - "step": 38087 - }, - { - "epoch": 6.133821812472322, - "grad_norm": 0.004146402236074209, - "learning_rate": 0.0001999814451373315, - "loss": 46.0, - "step": 38088 - }, - { - "epoch": 6.133982849551109, - "grad_norm": 0.0017427769489586353, - "learning_rate": 0.00019998144416277758, - "loss": 46.0, - "step": 38089 - }, - { - "epoch": 6.1341438866298965, - "grad_norm": 0.0190053079277277, - "learning_rate": 0.00019998144318819804, - "loss": 46.0, - "step": 38090 - }, - { - "epoch": 6.134304923708684, - "grad_norm": 0.003641800954937935, - "learning_rate": 0.00019998144221359293, - "loss": 46.0, - "step": 38091 - }, - { - "epoch": 6.134465960787471, - "grad_norm": 0.0028951247222721577, - "learning_rate": 0.00019998144123896223, - "loss": 46.0, - "step": 38092 - }, - { - "epoch": 6.134626997866259, - "grad_norm": 0.008201111108064651, - "learning_rate": 0.00019998144026430592, - "loss": 46.0, - "step": 38093 - }, - { - "epoch": 6.134788034945046, - "grad_norm": 0.006603895220905542, - "learning_rate": 0.00019998143928962403, - "loss": 46.0, - "step": 38094 - }, - { - "epoch": 6.134949072023834, - "grad_norm": 0.01317819207906723, - "learning_rate": 0.00019998143831491657, - "loss": 46.0, - "step": 38095 - }, - { - "epoch": 6.135110109102621, - "grad_norm": 0.00565582700073719, - "learning_rate": 0.00019998143734018347, - "loss": 46.0, - "step": 38096 - }, - { - "epoch": 6.135271146181409, - "grad_norm": 0.004093275871127844, - "learning_rate": 0.00019998143636542481, - "loss": 46.0, - "step": 38097 - }, - { - "epoch": 6.135432183260195, - "grad_norm": 0.016649968922138214, - "learning_rate": 0.00019998143539064057, - "loss": 46.0, - "step": 38098 - }, - { - "epoch": 6.135593220338983, - "grad_norm": 0.0034655649214982986, - "learning_rate": 0.00019998143441583074, - "loss": 46.0, - "step": 38099 - }, - { - "epoch": 6.13575425741777, - "grad_norm": 0.0032079655211418867, - "learning_rate": 0.0001999814334409953, - "loss": 46.0, - "step": 38100 - }, - { - "epoch": 6.135915294496558, - "grad_norm": 0.002937356708571315, - "learning_rate": 0.00019998143246613428, - "loss": 46.0, - "step": 38101 - }, - { - "epoch": 6.136076331575345, - "grad_norm": 0.005748485215008259, - "learning_rate": 0.00019998143149124766, - "loss": 46.0, - "step": 38102 - }, - { - "epoch": 6.1362373686541325, - "grad_norm": 0.0035205793101340532, - "learning_rate": 0.00019998143051633545, - "loss": 46.0, - "step": 38103 - }, - { - "epoch": 6.13639840573292, - "grad_norm": 0.01791226677596569, - "learning_rate": 0.00019998142954139766, - "loss": 46.0, - "step": 38104 - }, - { - "epoch": 6.136559442811707, - "grad_norm": 0.0010290952632203698, - "learning_rate": 0.0001999814285664343, - "loss": 46.0, - "step": 38105 - }, - { - "epoch": 6.136720479890495, - "grad_norm": 0.005278922617435455, - "learning_rate": 0.0001999814275914453, - "loss": 46.0, - "step": 38106 - }, - { - "epoch": 6.136881516969282, - "grad_norm": 0.0071410625241696835, - "learning_rate": 0.00019998142661643075, - "loss": 46.0, - "step": 38107 - }, - { - "epoch": 6.13704255404807, - "grad_norm": 0.005684057250618935, - "learning_rate": 0.0001999814256413906, - "loss": 46.0, - "step": 38108 - }, - { - "epoch": 6.137203591126857, - "grad_norm": 0.010827167890965939, - "learning_rate": 0.00019998142466632487, - "loss": 46.0, - "step": 38109 - }, - { - "epoch": 6.137364628205645, - "grad_norm": 0.0033071995712816715, - "learning_rate": 0.00019998142369123353, - "loss": 46.0, - "step": 38110 - }, - { - "epoch": 6.137525665284432, - "grad_norm": 0.001631106366403401, - "learning_rate": 0.00019998142271611662, - "loss": 46.0, - "step": 38111 - }, - { - "epoch": 6.13768670236322, - "grad_norm": 0.004384367261081934, - "learning_rate": 0.0001999814217409741, - "loss": 46.0, - "step": 38112 - }, - { - "epoch": 6.137847739442006, - "grad_norm": 0.0061812326312065125, - "learning_rate": 0.000199981420765806, - "loss": 46.0, - "step": 38113 - }, - { - "epoch": 6.138008776520794, - "grad_norm": 0.005505761131644249, - "learning_rate": 0.0001999814197906123, - "loss": 46.0, - "step": 38114 - }, - { - "epoch": 6.138169813599581, - "grad_norm": 0.0031309358309954405, - "learning_rate": 0.00019998141881539302, - "loss": 46.0, - "step": 38115 - }, - { - "epoch": 6.1383308506783685, - "grad_norm": 0.015083485282957554, - "learning_rate": 0.00019998141784014815, - "loss": 46.0, - "step": 38116 - }, - { - "epoch": 6.138491887757156, - "grad_norm": 0.010768556967377663, - "learning_rate": 0.0001999814168648777, - "loss": 46.0, - "step": 38117 - }, - { - "epoch": 6.138652924835943, - "grad_norm": 0.016163639724254608, - "learning_rate": 0.00019998141588958162, - "loss": 46.0, - "step": 38118 - }, - { - "epoch": 6.138813961914731, - "grad_norm": 0.0023822453804314137, - "learning_rate": 0.00019998141491426, - "loss": 46.0, - "step": 38119 - }, - { - "epoch": 6.138974998993518, - "grad_norm": 0.007294551003724337, - "learning_rate": 0.00019998141393891278, - "loss": 46.0, - "step": 38120 - }, - { - "epoch": 6.139136036072306, - "grad_norm": 0.0016139112412929535, - "learning_rate": 0.00019998141296353995, - "loss": 46.0, - "step": 38121 - }, - { - "epoch": 6.139297073151093, - "grad_norm": 0.005083042196929455, - "learning_rate": 0.00019998141198814153, - "loss": 46.0, - "step": 38122 - }, - { - "epoch": 6.139458110229881, - "grad_norm": 0.008838369511067867, - "learning_rate": 0.00019998141101271752, - "loss": 46.0, - "step": 38123 - }, - { - "epoch": 6.139619147308668, - "grad_norm": 0.002758219838142395, - "learning_rate": 0.00019998141003726793, - "loss": 46.0, - "step": 38124 - }, - { - "epoch": 6.1397801843874555, - "grad_norm": 0.0021110151428729296, - "learning_rate": 0.00019998140906179275, - "loss": 46.0, - "step": 38125 - }, - { - "epoch": 6.139941221466243, - "grad_norm": 0.003262303536757827, - "learning_rate": 0.00019998140808629198, - "loss": 46.0, - "step": 38126 - }, - { - "epoch": 6.14010225854503, - "grad_norm": 0.010376634076237679, - "learning_rate": 0.0001999814071107656, - "loss": 46.0, - "step": 38127 - }, - { - "epoch": 6.140263295623817, - "grad_norm": 0.008289509452879429, - "learning_rate": 0.00019998140613521366, - "loss": 46.0, - "step": 38128 - }, - { - "epoch": 6.1404243327026045, - "grad_norm": 0.002768499543890357, - "learning_rate": 0.00019998140515963613, - "loss": 46.0, - "step": 38129 - }, - { - "epoch": 6.140585369781392, - "grad_norm": 0.0010839044116437435, - "learning_rate": 0.00019998140418403299, - "loss": 46.0, - "step": 38130 - }, - { - "epoch": 6.140746406860179, - "grad_norm": 0.009540265426039696, - "learning_rate": 0.00019998140320840423, - "loss": 46.0, - "step": 38131 - }, - { - "epoch": 6.140907443938967, - "grad_norm": 0.022692784667015076, - "learning_rate": 0.00019998140223274994, - "loss": 46.0, - "step": 38132 - }, - { - "epoch": 6.141068481017754, - "grad_norm": 0.003273993730545044, - "learning_rate": 0.00019998140125707003, - "loss": 46.0, - "step": 38133 - }, - { - "epoch": 6.141229518096542, - "grad_norm": 0.009144116193056107, - "learning_rate": 0.00019998140028136454, - "loss": 46.0, - "step": 38134 - }, - { - "epoch": 6.141390555175329, - "grad_norm": 0.005073470063507557, - "learning_rate": 0.00019998139930563346, - "loss": 46.0, - "step": 38135 - }, - { - "epoch": 6.141551592254117, - "grad_norm": 0.02522943541407585, - "learning_rate": 0.0001999813983298768, - "loss": 46.0, - "step": 38136 - }, - { - "epoch": 6.141712629332904, - "grad_norm": 0.01788550801575184, - "learning_rate": 0.00019998139735409451, - "loss": 46.0, - "step": 38137 - }, - { - "epoch": 6.1418736664116915, - "grad_norm": 0.0052560726180672646, - "learning_rate": 0.00019998139637828665, - "loss": 46.0, - "step": 38138 - }, - { - "epoch": 6.142034703490479, - "grad_norm": 0.005746690556406975, - "learning_rate": 0.00019998139540245322, - "loss": 46.0, - "step": 38139 - }, - { - "epoch": 6.142195740569266, - "grad_norm": 0.00949172955006361, - "learning_rate": 0.00019998139442659418, - "loss": 46.0, - "step": 38140 - }, - { - "epoch": 6.142356777648054, - "grad_norm": 0.0020084786228835583, - "learning_rate": 0.00019998139345070955, - "loss": 46.0, - "step": 38141 - }, - { - "epoch": 6.14251781472684, - "grad_norm": 0.010748251341283321, - "learning_rate": 0.00019998139247479933, - "loss": 46.0, - "step": 38142 - }, - { - "epoch": 6.142678851805628, - "grad_norm": 0.0010066042887046933, - "learning_rate": 0.00019998139149886353, - "loss": 46.0, - "step": 38143 - }, - { - "epoch": 6.142839888884415, - "grad_norm": 0.005377229768782854, - "learning_rate": 0.00019998139052290214, - "loss": 46.0, - "step": 38144 - }, - { - "epoch": 6.143000925963203, - "grad_norm": 0.00917661003768444, - "learning_rate": 0.00019998138954691513, - "loss": 46.0, - "step": 38145 - }, - { - "epoch": 6.14316196304199, - "grad_norm": 0.0022325965110212564, - "learning_rate": 0.0001999813885709026, - "loss": 46.0, - "step": 38146 - }, - { - "epoch": 6.143323000120778, - "grad_norm": 0.016158388927578926, - "learning_rate": 0.0001999813875948644, - "loss": 46.0, - "step": 38147 - }, - { - "epoch": 6.143484037199565, - "grad_norm": 0.007938831113278866, - "learning_rate": 0.00019998138661880065, - "loss": 46.0, - "step": 38148 - }, - { - "epoch": 6.143645074278353, - "grad_norm": 0.005012881942093372, - "learning_rate": 0.00019998138564271132, - "loss": 46.0, - "step": 38149 - }, - { - "epoch": 6.14380611135714, - "grad_norm": 0.007556302472949028, - "learning_rate": 0.00019998138466659638, - "loss": 46.0, - "step": 38150 - }, - { - "epoch": 6.1439671484359275, - "grad_norm": 0.0013723070733249187, - "learning_rate": 0.00019998138369045585, - "loss": 46.0, - "step": 38151 - }, - { - "epoch": 6.144128185514715, - "grad_norm": 0.006414035335183144, - "learning_rate": 0.00019998138271428974, - "loss": 46.0, - "step": 38152 - }, - { - "epoch": 6.144289222593502, - "grad_norm": 0.0014599489513784647, - "learning_rate": 0.00019998138173809804, - "loss": 46.0, - "step": 38153 - }, - { - "epoch": 6.14445025967229, - "grad_norm": 0.009400549344718456, - "learning_rate": 0.00019998138076188072, - "loss": 46.0, - "step": 38154 - }, - { - "epoch": 6.144611296751077, - "grad_norm": 0.0049391454085707664, - "learning_rate": 0.00019998137978563784, - "loss": 46.0, - "step": 38155 - }, - { - "epoch": 6.144772333829865, - "grad_norm": 0.02154109999537468, - "learning_rate": 0.00019998137880936935, - "loss": 46.0, - "step": 38156 - }, - { - "epoch": 6.144933370908651, - "grad_norm": 0.010342485271394253, - "learning_rate": 0.0001999813778330753, - "loss": 46.0, - "step": 38157 - }, - { - "epoch": 6.145094407987439, - "grad_norm": 0.0028692493215203285, - "learning_rate": 0.00019998137685675564, - "loss": 46.0, - "step": 38158 - }, - { - "epoch": 6.145255445066226, - "grad_norm": 0.012828550301492214, - "learning_rate": 0.0001999813758804104, - "loss": 46.0, - "step": 38159 - }, - { - "epoch": 6.145416482145014, - "grad_norm": 0.0009911274537444115, - "learning_rate": 0.00019998137490403957, - "loss": 46.0, - "step": 38160 - }, - { - "epoch": 6.145577519223801, - "grad_norm": 0.0066237300634384155, - "learning_rate": 0.00019998137392764314, - "loss": 46.0, - "step": 38161 - }, - { - "epoch": 6.145738556302589, - "grad_norm": 0.003932784777134657, - "learning_rate": 0.00019998137295122113, - "loss": 46.0, - "step": 38162 - }, - { - "epoch": 6.145899593381376, - "grad_norm": 0.0034112115390598774, - "learning_rate": 0.0001999813719747735, - "loss": 46.0, - "step": 38163 - }, - { - "epoch": 6.1460606304601635, - "grad_norm": 0.0019163420656695962, - "learning_rate": 0.0001999813709983003, - "loss": 46.0, - "step": 38164 - }, - { - "epoch": 6.146221667538951, - "grad_norm": 0.0012701962841674685, - "learning_rate": 0.00019998137002180154, - "loss": 46.0, - "step": 38165 - }, - { - "epoch": 6.146382704617738, - "grad_norm": 0.007408374920487404, - "learning_rate": 0.00019998136904527715, - "loss": 46.0, - "step": 38166 - }, - { - "epoch": 6.146543741696526, - "grad_norm": 0.00444489112123847, - "learning_rate": 0.00019998136806872717, - "loss": 46.0, - "step": 38167 - }, - { - "epoch": 6.146704778775313, - "grad_norm": 0.009543496184051037, - "learning_rate": 0.00019998136709215163, - "loss": 46.0, - "step": 38168 - }, - { - "epoch": 6.146865815854101, - "grad_norm": 0.0064465063624084, - "learning_rate": 0.00019998136611555048, - "loss": 46.0, - "step": 38169 - }, - { - "epoch": 6.147026852932888, - "grad_norm": 0.004339747130870819, - "learning_rate": 0.00019998136513892374, - "loss": 46.0, - "step": 38170 - }, - { - "epoch": 6.147187890011675, - "grad_norm": 0.008034233003854752, - "learning_rate": 0.00019998136416227142, - "loss": 46.0, - "step": 38171 - }, - { - "epoch": 6.147348927090462, - "grad_norm": 0.007363523822277784, - "learning_rate": 0.00019998136318559348, - "loss": 46.0, - "step": 38172 - }, - { - "epoch": 6.14750996416925, - "grad_norm": 0.013469934463500977, - "learning_rate": 0.00019998136220888998, - "loss": 46.0, - "step": 38173 - }, - { - "epoch": 6.147671001248037, - "grad_norm": 0.013557642698287964, - "learning_rate": 0.00019998136123216086, - "loss": 46.0, - "step": 38174 - }, - { - "epoch": 6.147832038326825, - "grad_norm": 0.004900000989437103, - "learning_rate": 0.0001999813602554062, - "loss": 46.0, - "step": 38175 - }, - { - "epoch": 6.147993075405612, - "grad_norm": 0.0017434413312003016, - "learning_rate": 0.0001999813592786259, - "loss": 46.0, - "step": 38176 - }, - { - "epoch": 6.1481541124843995, - "grad_norm": 0.0008235702407546341, - "learning_rate": 0.00019998135830182005, - "loss": 46.0, - "step": 38177 - }, - { - "epoch": 6.148315149563187, - "grad_norm": 0.006556585896760225, - "learning_rate": 0.0001999813573249886, - "loss": 46.0, - "step": 38178 - }, - { - "epoch": 6.148476186641974, - "grad_norm": 0.017589643597602844, - "learning_rate": 0.00019998135634813157, - "loss": 46.0, - "step": 38179 - }, - { - "epoch": 6.148637223720762, - "grad_norm": 0.0028431490063667297, - "learning_rate": 0.0001999813553712489, - "loss": 46.0, - "step": 38180 - }, - { - "epoch": 6.148798260799549, - "grad_norm": 0.0018631118582561612, - "learning_rate": 0.00019998135439434068, - "loss": 46.0, - "step": 38181 - }, - { - "epoch": 6.148959297878337, - "grad_norm": 0.00662678899243474, - "learning_rate": 0.00019998135341740684, - "loss": 46.0, - "step": 38182 - }, - { - "epoch": 6.149120334957124, - "grad_norm": 0.012830279767513275, - "learning_rate": 0.00019998135244044744, - "loss": 46.0, - "step": 38183 - }, - { - "epoch": 6.149281372035912, - "grad_norm": 0.004706101026386023, - "learning_rate": 0.00019998135146346243, - "loss": 46.0, - "step": 38184 - }, - { - "epoch": 6.149442409114699, - "grad_norm": 0.008113808929920197, - "learning_rate": 0.00019998135048645185, - "loss": 46.0, - "step": 38185 - }, - { - "epoch": 6.149603446193486, - "grad_norm": 0.018631508573889732, - "learning_rate": 0.00019998134950941567, - "loss": 46.0, - "step": 38186 - }, - { - "epoch": 6.149764483272273, - "grad_norm": 0.0024354676716029644, - "learning_rate": 0.00019998134853235392, - "loss": 46.0, - "step": 38187 - }, - { - "epoch": 6.1499255203510605, - "grad_norm": 0.0007428895914927125, - "learning_rate": 0.00019998134755526653, - "loss": 46.0, - "step": 38188 - }, - { - "epoch": 6.150086557429848, - "grad_norm": 0.017941290512681007, - "learning_rate": 0.00019998134657815358, - "loss": 46.0, - "step": 38189 - }, - { - "epoch": 6.1502475945086355, - "grad_norm": 0.004254871513694525, - "learning_rate": 0.00019998134560101507, - "loss": 46.0, - "step": 38190 - }, - { - "epoch": 6.150408631587423, - "grad_norm": 0.002839587163180113, - "learning_rate": 0.00019998134462385092, - "loss": 46.0, - "step": 38191 - }, - { - "epoch": 6.15056966866621, - "grad_norm": 0.005300450138747692, - "learning_rate": 0.00019998134364666118, - "loss": 46.0, - "step": 38192 - }, - { - "epoch": 6.150730705744998, - "grad_norm": 0.0015842207940295339, - "learning_rate": 0.00019998134266944591, - "loss": 46.0, - "step": 38193 - }, - { - "epoch": 6.150891742823785, - "grad_norm": 0.006529216188937426, - "learning_rate": 0.000199981341692205, - "loss": 46.0, - "step": 38194 - }, - { - "epoch": 6.151052779902573, - "grad_norm": 0.0021917361300438643, - "learning_rate": 0.0001999813407149385, - "loss": 46.0, - "step": 38195 - }, - { - "epoch": 6.15121381698136, - "grad_norm": 0.015276730060577393, - "learning_rate": 0.00019998133973764644, - "loss": 46.0, - "step": 38196 - }, - { - "epoch": 6.151374854060148, - "grad_norm": 0.0172035600990057, - "learning_rate": 0.00019998133876032874, - "loss": 46.0, - "step": 38197 - }, - { - "epoch": 6.151535891138935, - "grad_norm": 0.008346655406057835, - "learning_rate": 0.00019998133778298548, - "loss": 46.0, - "step": 38198 - }, - { - "epoch": 6.1516969282177225, - "grad_norm": 0.011320813558995724, - "learning_rate": 0.00019998133680561664, - "loss": 46.0, - "step": 38199 - }, - { - "epoch": 6.151857965296509, - "grad_norm": 0.004653043579310179, - "learning_rate": 0.0001999813358282222, - "loss": 46.0, - "step": 38200 - }, - { - "epoch": 6.1520190023752965, - "grad_norm": 0.0027833282947540283, - "learning_rate": 0.00019998133485080218, - "loss": 46.0, - "step": 38201 - }, - { - "epoch": 6.152180039454084, - "grad_norm": 0.007442052476108074, - "learning_rate": 0.00019998133387335654, - "loss": 46.0, - "step": 38202 - }, - { - "epoch": 6.152341076532871, - "grad_norm": 0.0020572366192936897, - "learning_rate": 0.00019998133289588532, - "loss": 46.0, - "step": 38203 - }, - { - "epoch": 6.152502113611659, - "grad_norm": 0.005303728394210339, - "learning_rate": 0.00019998133191838853, - "loss": 46.0, - "step": 38204 - }, - { - "epoch": 6.152663150690446, - "grad_norm": 0.02952386625111103, - "learning_rate": 0.00019998133094086616, - "loss": 46.0, - "step": 38205 - }, - { - "epoch": 6.152824187769234, - "grad_norm": 0.0012008358025923371, - "learning_rate": 0.00019998132996331815, - "loss": 46.0, - "step": 38206 - }, - { - "epoch": 6.152985224848021, - "grad_norm": 0.01046083401888609, - "learning_rate": 0.00019998132898574458, - "loss": 46.0, - "step": 38207 - }, - { - "epoch": 6.153146261926809, - "grad_norm": 0.004147620871663094, - "learning_rate": 0.00019998132800814542, - "loss": 46.0, - "step": 38208 - }, - { - "epoch": 6.153307299005596, - "grad_norm": 0.002657199278473854, - "learning_rate": 0.00019998132703052067, - "loss": 46.0, - "step": 38209 - }, - { - "epoch": 6.153468336084384, - "grad_norm": 0.010750516317784786, - "learning_rate": 0.0001999813260528703, - "loss": 46.0, - "step": 38210 - }, - { - "epoch": 6.153629373163171, - "grad_norm": 0.00933627039194107, - "learning_rate": 0.0001999813250751944, - "loss": 46.0, - "step": 38211 - }, - { - "epoch": 6.1537904102419585, - "grad_norm": 0.0017052008770406246, - "learning_rate": 0.00019998132409749288, - "loss": 46.0, - "step": 38212 - }, - { - "epoch": 6.153951447320746, - "grad_norm": 0.0023985765874385834, - "learning_rate": 0.00019998132311976576, - "loss": 46.0, - "step": 38213 - }, - { - "epoch": 6.154112484399533, - "grad_norm": 0.0024245830718427896, - "learning_rate": 0.00019998132214201308, - "loss": 46.0, - "step": 38214 - }, - { - "epoch": 6.15427352147832, - "grad_norm": 0.004561722278594971, - "learning_rate": 0.00019998132116423478, - "loss": 46.0, - "step": 38215 - }, - { - "epoch": 6.154434558557107, - "grad_norm": 0.004155233968049288, - "learning_rate": 0.0001999813201864309, - "loss": 46.0, - "step": 38216 - }, - { - "epoch": 6.154595595635895, - "grad_norm": 0.017093494534492493, - "learning_rate": 0.00019998131920860143, - "loss": 46.0, - "step": 38217 - }, - { - "epoch": 6.154756632714682, - "grad_norm": 0.003951186314225197, - "learning_rate": 0.00019998131823074634, - "loss": 46.0, - "step": 38218 - }, - { - "epoch": 6.15491766979347, - "grad_norm": 0.008156186901032925, - "learning_rate": 0.0001999813172528657, - "loss": 46.0, - "step": 38219 - }, - { - "epoch": 6.155078706872257, - "grad_norm": 0.0031261479016393423, - "learning_rate": 0.00019998131627495946, - "loss": 46.0, - "step": 38220 - }, - { - "epoch": 6.155239743951045, - "grad_norm": 0.005263959988951683, - "learning_rate": 0.00019998131529702762, - "loss": 46.0, - "step": 38221 - }, - { - "epoch": 6.155400781029832, - "grad_norm": 0.014790375716984272, - "learning_rate": 0.0001999813143190702, - "loss": 46.0, - "step": 38222 - }, - { - "epoch": 6.15556181810862, - "grad_norm": 0.0013469933765009046, - "learning_rate": 0.0001999813133410872, - "loss": 46.0, - "step": 38223 - }, - { - "epoch": 6.155722855187407, - "grad_norm": 0.0006598061299882829, - "learning_rate": 0.00019998131236307858, - "loss": 46.0, - "step": 38224 - }, - { - "epoch": 6.1558838922661945, - "grad_norm": 0.0046853115782141685, - "learning_rate": 0.00019998131138504439, - "loss": 46.0, - "step": 38225 - }, - { - "epoch": 6.156044929344982, - "grad_norm": 0.011621733196079731, - "learning_rate": 0.00019998131040698463, - "loss": 46.0, - "step": 38226 - }, - { - "epoch": 6.156205966423769, - "grad_norm": 0.02308325655758381, - "learning_rate": 0.00019998130942889923, - "loss": 46.0, - "step": 38227 - }, - { - "epoch": 6.156367003502557, - "grad_norm": 0.004738691728562117, - "learning_rate": 0.00019998130845078828, - "loss": 46.0, - "step": 38228 - }, - { - "epoch": 6.156528040581344, - "grad_norm": 0.008279988542199135, - "learning_rate": 0.0001999813074726517, - "loss": 46.0, - "step": 38229 - }, - { - "epoch": 6.156689077660131, - "grad_norm": 0.001596157904714346, - "learning_rate": 0.00019998130649448957, - "loss": 46.0, - "step": 38230 - }, - { - "epoch": 6.156850114738918, - "grad_norm": 0.014751922339200974, - "learning_rate": 0.00019998130551630183, - "loss": 46.0, - "step": 38231 - }, - { - "epoch": 6.157011151817706, - "grad_norm": 0.014020483009517193, - "learning_rate": 0.00019998130453808852, - "loss": 46.0, - "step": 38232 - }, - { - "epoch": 6.157172188896493, - "grad_norm": 0.0033833379857242107, - "learning_rate": 0.0001999813035598496, - "loss": 46.0, - "step": 38233 - }, - { - "epoch": 6.157333225975281, - "grad_norm": 0.0010672836797311902, - "learning_rate": 0.0001999813025815851, - "loss": 46.0, - "step": 38234 - }, - { - "epoch": 6.157494263054068, - "grad_norm": 0.0030781817622482777, - "learning_rate": 0.00019998130160329503, - "loss": 46.0, - "step": 38235 - }, - { - "epoch": 6.157655300132856, - "grad_norm": 0.01249680481851101, - "learning_rate": 0.00019998130062497932, - "loss": 46.0, - "step": 38236 - }, - { - "epoch": 6.157816337211643, - "grad_norm": 0.018243996426463127, - "learning_rate": 0.00019998129964663805, - "loss": 46.0, - "step": 38237 - }, - { - "epoch": 6.1579773742904305, - "grad_norm": 0.009122656658291817, - "learning_rate": 0.0001999812986682712, - "loss": 46.0, - "step": 38238 - }, - { - "epoch": 6.158138411369218, - "grad_norm": 0.007188413292169571, - "learning_rate": 0.00019998129768987872, - "loss": 46.0, - "step": 38239 - }, - { - "epoch": 6.158299448448005, - "grad_norm": 0.007149084471166134, - "learning_rate": 0.0001999812967114607, - "loss": 46.0, - "step": 38240 - }, - { - "epoch": 6.158460485526793, - "grad_norm": 0.0060903532430529594, - "learning_rate": 0.00019998129573301708, - "loss": 46.0, - "step": 38241 - }, - { - "epoch": 6.15862152260558, - "grad_norm": 0.006014631595462561, - "learning_rate": 0.00019998129475454784, - "loss": 46.0, - "step": 38242 - }, - { - "epoch": 6.158782559684368, - "grad_norm": 0.00504056503996253, - "learning_rate": 0.00019998129377605303, - "loss": 46.0, - "step": 38243 - }, - { - "epoch": 6.158943596763155, - "grad_norm": 0.0018291643355041742, - "learning_rate": 0.00019998129279753262, - "loss": 46.0, - "step": 38244 - }, - { - "epoch": 6.159104633841942, - "grad_norm": 0.001553820795379579, - "learning_rate": 0.00019998129181898663, - "loss": 46.0, - "step": 38245 - }, - { - "epoch": 6.159265670920729, - "grad_norm": 0.004942046478390694, - "learning_rate": 0.00019998129084041505, - "loss": 46.0, - "step": 38246 - }, - { - "epoch": 6.159426707999517, - "grad_norm": 0.003221700666472316, - "learning_rate": 0.00019998128986181785, - "loss": 46.0, - "step": 38247 - }, - { - "epoch": 6.159587745078304, - "grad_norm": 0.0010458199540153146, - "learning_rate": 0.0001999812888831951, - "loss": 46.0, - "step": 38248 - }, - { - "epoch": 6.1597487821570915, - "grad_norm": 0.009848294779658318, - "learning_rate": 0.00019998128790454673, - "loss": 46.0, - "step": 38249 - }, - { - "epoch": 6.159909819235879, - "grad_norm": 0.0064690001308918, - "learning_rate": 0.0001999812869258728, - "loss": 46.0, - "step": 38250 - }, - { - "epoch": 6.160070856314666, - "grad_norm": 0.0045645879581570625, - "learning_rate": 0.00019998128594717326, - "loss": 46.0, - "step": 38251 - }, - { - "epoch": 6.160231893393454, - "grad_norm": 0.005330066196620464, - "learning_rate": 0.00019998128496844815, - "loss": 46.0, - "step": 38252 - }, - { - "epoch": 6.160392930472241, - "grad_norm": 0.006402960047125816, - "learning_rate": 0.0001999812839896974, - "loss": 46.0, - "step": 38253 - }, - { - "epoch": 6.160553967551029, - "grad_norm": 0.01867598108947277, - "learning_rate": 0.0001999812830109211, - "loss": 46.0, - "step": 38254 - }, - { - "epoch": 6.160715004629816, - "grad_norm": 0.0015556865837424994, - "learning_rate": 0.0001999812820321192, - "loss": 46.0, - "step": 38255 - }, - { - "epoch": 6.160876041708604, - "grad_norm": 0.002903979504480958, - "learning_rate": 0.00019998128105329173, - "loss": 46.0, - "step": 38256 - }, - { - "epoch": 6.161037078787391, - "grad_norm": 0.00995730608701706, - "learning_rate": 0.00019998128007443864, - "loss": 46.0, - "step": 38257 - }, - { - "epoch": 6.161198115866179, - "grad_norm": 0.001089159632101655, - "learning_rate": 0.00019998127909555999, - "loss": 46.0, - "step": 38258 - }, - { - "epoch": 6.161359152944965, - "grad_norm": 0.0021435206290334463, - "learning_rate": 0.00019998127811665572, - "loss": 46.0, - "step": 38259 - }, - { - "epoch": 6.161520190023753, - "grad_norm": 0.006833336781710386, - "learning_rate": 0.0001999812771377259, - "loss": 46.0, - "step": 38260 - }, - { - "epoch": 6.16168122710254, - "grad_norm": 0.0033831140026450157, - "learning_rate": 0.00019998127615877045, - "loss": 46.0, - "step": 38261 - }, - { - "epoch": 6.1618422641813275, - "grad_norm": 0.002358735539019108, - "learning_rate": 0.00019998127517978942, - "loss": 46.0, - "step": 38262 - }, - { - "epoch": 6.162003301260115, - "grad_norm": 0.002337033860385418, - "learning_rate": 0.0001999812742007828, - "loss": 46.0, - "step": 38263 - }, - { - "epoch": 6.162164338338902, - "grad_norm": 0.009295708499848843, - "learning_rate": 0.0001999812732217506, - "loss": 46.0, - "step": 38264 - }, - { - "epoch": 6.16232537541769, - "grad_norm": 0.011148395016789436, - "learning_rate": 0.0001999812722426928, - "loss": 46.0, - "step": 38265 - }, - { - "epoch": 6.162486412496477, - "grad_norm": 0.004165736027061939, - "learning_rate": 0.0001999812712636094, - "loss": 46.0, - "step": 38266 - }, - { - "epoch": 6.162647449575265, - "grad_norm": 0.0015171512495726347, - "learning_rate": 0.00019998127028450044, - "loss": 46.0, - "step": 38267 - }, - { - "epoch": 6.162808486654052, - "grad_norm": 0.001808301080018282, - "learning_rate": 0.00019998126930536586, - "loss": 46.0, - "step": 38268 - }, - { - "epoch": 6.16296952373284, - "grad_norm": 0.002995187882333994, - "learning_rate": 0.00019998126832620572, - "loss": 46.0, - "step": 38269 - }, - { - "epoch": 6.163130560811627, - "grad_norm": 0.005926748272031546, - "learning_rate": 0.00019998126734701996, - "loss": 46.0, - "step": 38270 - }, - { - "epoch": 6.163291597890415, - "grad_norm": 0.002057367004454136, - "learning_rate": 0.00019998126636780862, - "loss": 46.0, - "step": 38271 - }, - { - "epoch": 6.163452634969202, - "grad_norm": 0.0014211698435246944, - "learning_rate": 0.00019998126538857172, - "loss": 46.0, - "step": 38272 - }, - { - "epoch": 6.163613672047989, - "grad_norm": 0.0036514876410365105, - "learning_rate": 0.00019998126440930918, - "loss": 46.0, - "step": 38273 - }, - { - "epoch": 6.163774709126776, - "grad_norm": 0.006783165503293276, - "learning_rate": 0.00019998126343002108, - "loss": 46.0, - "step": 38274 - }, - { - "epoch": 6.1639357462055635, - "grad_norm": 0.007316098548471928, - "learning_rate": 0.0001999812624507074, - "loss": 46.0, - "step": 38275 - }, - { - "epoch": 6.164096783284351, - "grad_norm": 0.0031602047383785248, - "learning_rate": 0.00019998126147136808, - "loss": 46.0, - "step": 38276 - }, - { - "epoch": 6.164257820363138, - "grad_norm": 0.00608556205406785, - "learning_rate": 0.00019998126049200322, - "loss": 46.0, - "step": 38277 - }, - { - "epoch": 6.164418857441926, - "grad_norm": 0.006487439386546612, - "learning_rate": 0.00019998125951261274, - "loss": 46.0, - "step": 38278 - }, - { - "epoch": 6.164579894520713, - "grad_norm": 0.0024090849328786135, - "learning_rate": 0.0001999812585331967, - "loss": 46.0, - "step": 38279 - }, - { - "epoch": 6.164740931599501, - "grad_norm": 0.0014213065151125193, - "learning_rate": 0.00019998125755375505, - "loss": 46.0, - "step": 38280 - }, - { - "epoch": 6.164901968678288, - "grad_norm": 0.0029977899976074696, - "learning_rate": 0.0001999812565742878, - "loss": 46.0, - "step": 38281 - }, - { - "epoch": 6.165063005757076, - "grad_norm": 0.0020964420400559902, - "learning_rate": 0.00019998125559479498, - "loss": 46.0, - "step": 38282 - }, - { - "epoch": 6.165224042835863, - "grad_norm": 0.006059358827769756, - "learning_rate": 0.00019998125461527657, - "loss": 46.0, - "step": 38283 - }, - { - "epoch": 6.165385079914651, - "grad_norm": 0.003123324830085039, - "learning_rate": 0.00019998125363573257, - "loss": 46.0, - "step": 38284 - }, - { - "epoch": 6.165546116993438, - "grad_norm": 0.002225477248430252, - "learning_rate": 0.00019998125265616295, - "loss": 46.0, - "step": 38285 - }, - { - "epoch": 6.1657071540722255, - "grad_norm": 0.008417419157922268, - "learning_rate": 0.00019998125167656778, - "loss": 46.0, - "step": 38286 - }, - { - "epoch": 6.165868191151013, - "grad_norm": 0.003415049985051155, - "learning_rate": 0.000199981250696947, - "loss": 46.0, - "step": 38287 - }, - { - "epoch": 6.1660292282297995, - "grad_norm": 0.0019182567484676838, - "learning_rate": 0.00019998124971730064, - "loss": 46.0, - "step": 38288 - }, - { - "epoch": 6.166190265308587, - "grad_norm": 0.0014812789158895612, - "learning_rate": 0.00019998124873762867, - "loss": 46.0, - "step": 38289 - }, - { - "epoch": 6.166351302387374, - "grad_norm": 0.005335068330168724, - "learning_rate": 0.00019998124775793112, - "loss": 46.0, - "step": 38290 - }, - { - "epoch": 6.166512339466162, - "grad_norm": 0.0017525829607620835, - "learning_rate": 0.00019998124677820798, - "loss": 46.0, - "step": 38291 - }, - { - "epoch": 6.166673376544949, - "grad_norm": 0.0102470638230443, - "learning_rate": 0.00019998124579845926, - "loss": 46.0, - "step": 38292 - }, - { - "epoch": 6.166834413623737, - "grad_norm": 0.004462293814867735, - "learning_rate": 0.00019998124481868495, - "loss": 46.0, - "step": 38293 - }, - { - "epoch": 6.166995450702524, - "grad_norm": 0.011110853403806686, - "learning_rate": 0.00019998124383888502, - "loss": 46.0, - "step": 38294 - }, - { - "epoch": 6.167156487781312, - "grad_norm": 0.00249012210406363, - "learning_rate": 0.00019998124285905953, - "loss": 46.0, - "step": 38295 - }, - { - "epoch": 6.167317524860099, - "grad_norm": 0.00370098277926445, - "learning_rate": 0.00019998124187920843, - "loss": 46.0, - "step": 38296 - }, - { - "epoch": 6.1674785619388865, - "grad_norm": 0.00599149102345109, - "learning_rate": 0.00019998124089933177, - "loss": 46.0, - "step": 38297 - }, - { - "epoch": 6.167639599017674, - "grad_norm": 0.007958956994116306, - "learning_rate": 0.0001999812399194295, - "loss": 46.0, - "step": 38298 - }, - { - "epoch": 6.1678006360964615, - "grad_norm": 0.0014243399491533637, - "learning_rate": 0.00019998123893950163, - "loss": 46.0, - "step": 38299 - }, - { - "epoch": 6.167961673175249, - "grad_norm": 0.010591837577521801, - "learning_rate": 0.00019998123795954818, - "loss": 46.0, - "step": 38300 - }, - { - "epoch": 6.168122710254036, - "grad_norm": 0.007675534579902887, - "learning_rate": 0.00019998123697956914, - "loss": 46.0, - "step": 38301 - }, - { - "epoch": 6.168283747332824, - "grad_norm": 0.0066420529037714005, - "learning_rate": 0.00019998123599956454, - "loss": 46.0, - "step": 38302 - }, - { - "epoch": 6.16844478441161, - "grad_norm": 0.0013729004422202706, - "learning_rate": 0.0001999812350195343, - "loss": 46.0, - "step": 38303 - }, - { - "epoch": 6.168605821490398, - "grad_norm": 0.0026360023766756058, - "learning_rate": 0.00019998123403947848, - "loss": 46.0, - "step": 38304 - }, - { - "epoch": 6.168766858569185, - "grad_norm": 0.001710473676212132, - "learning_rate": 0.0001999812330593971, - "loss": 46.0, - "step": 38305 - }, - { - "epoch": 6.168927895647973, - "grad_norm": 0.00616045854985714, - "learning_rate": 0.00019998123207929012, - "loss": 46.0, - "step": 38306 - }, - { - "epoch": 6.16908893272676, - "grad_norm": 0.0023090096656233072, - "learning_rate": 0.0001999812310991575, - "loss": 46.0, - "step": 38307 - }, - { - "epoch": 6.169249969805548, - "grad_norm": 0.007716451771557331, - "learning_rate": 0.00019998123011899933, - "loss": 46.0, - "step": 38308 - }, - { - "epoch": 6.169411006884335, - "grad_norm": 0.002946835244074464, - "learning_rate": 0.0001999812291388156, - "loss": 46.0, - "step": 38309 - }, - { - "epoch": 6.1695720439631225, - "grad_norm": 0.005844874307513237, - "learning_rate": 0.00019998122815860625, - "loss": 46.0, - "step": 38310 - }, - { - "epoch": 6.16973308104191, - "grad_norm": 0.005737279541790485, - "learning_rate": 0.00019998122717837128, - "loss": 46.0, - "step": 38311 - }, - { - "epoch": 6.169894118120697, - "grad_norm": 0.01215299405157566, - "learning_rate": 0.00019998122619811076, - "loss": 46.0, - "step": 38312 - }, - { - "epoch": 6.170055155199485, - "grad_norm": 0.012855935841798782, - "learning_rate": 0.00019998122521782465, - "loss": 46.0, - "step": 38313 - }, - { - "epoch": 6.170216192278272, - "grad_norm": 0.0019637797959148884, - "learning_rate": 0.00019998122423751293, - "loss": 46.0, - "step": 38314 - }, - { - "epoch": 6.17037722935706, - "grad_norm": 0.011320100165903568, - "learning_rate": 0.00019998122325717564, - "loss": 46.0, - "step": 38315 - }, - { - "epoch": 6.170538266435847, - "grad_norm": 0.0019749843049794436, - "learning_rate": 0.00019998122227681274, - "loss": 46.0, - "step": 38316 - }, - { - "epoch": 6.170699303514635, - "grad_norm": 0.0028116346802562475, - "learning_rate": 0.00019998122129642428, - "loss": 46.0, - "step": 38317 - }, - { - "epoch": 6.170860340593421, - "grad_norm": 0.0025257417000830173, - "learning_rate": 0.0001999812203160102, - "loss": 46.0, - "step": 38318 - }, - { - "epoch": 6.171021377672209, - "grad_norm": 0.007057895418256521, - "learning_rate": 0.00019998121933557055, - "loss": 46.0, - "step": 38319 - }, - { - "epoch": 6.171182414750996, - "grad_norm": 0.0011920987162739038, - "learning_rate": 0.0001999812183551053, - "loss": 46.0, - "step": 38320 - }, - { - "epoch": 6.171343451829784, - "grad_norm": 0.0020475653000175953, - "learning_rate": 0.00019998121737461447, - "loss": 46.0, - "step": 38321 - }, - { - "epoch": 6.171504488908571, - "grad_norm": 0.0026233065873384476, - "learning_rate": 0.00019998121639409804, - "loss": 46.0, - "step": 38322 - }, - { - "epoch": 6.1716655259873585, - "grad_norm": 0.006268750876188278, - "learning_rate": 0.00019998121541355598, - "loss": 46.0, - "step": 38323 - }, - { - "epoch": 6.171826563066146, - "grad_norm": 0.00309157301671803, - "learning_rate": 0.0001999812144329884, - "loss": 46.0, - "step": 38324 - }, - { - "epoch": 6.171987600144933, - "grad_norm": 0.0013755416730418801, - "learning_rate": 0.0001999812134523952, - "loss": 46.0, - "step": 38325 - }, - { - "epoch": 6.172148637223721, - "grad_norm": 0.0016376717248931527, - "learning_rate": 0.0001999812124717764, - "loss": 46.0, - "step": 38326 - }, - { - "epoch": 6.172309674302508, - "grad_norm": 0.008242752403020859, - "learning_rate": 0.00019998121149113202, - "loss": 46.0, - "step": 38327 - }, - { - "epoch": 6.172470711381296, - "grad_norm": 0.0016679811524227262, - "learning_rate": 0.00019998121051046207, - "loss": 46.0, - "step": 38328 - }, - { - "epoch": 6.172631748460083, - "grad_norm": 0.006396996323019266, - "learning_rate": 0.00019998120952976648, - "loss": 46.0, - "step": 38329 - }, - { - "epoch": 6.172792785538871, - "grad_norm": 0.00371205760166049, - "learning_rate": 0.00019998120854904534, - "loss": 46.0, - "step": 38330 - }, - { - "epoch": 6.172953822617658, - "grad_norm": 0.0054909847676754, - "learning_rate": 0.0001999812075682986, - "loss": 46.0, - "step": 38331 - }, - { - "epoch": 6.173114859696445, - "grad_norm": 0.004582086578011513, - "learning_rate": 0.00019998120658752626, - "loss": 46.0, - "step": 38332 - }, - { - "epoch": 6.173275896775232, - "grad_norm": 0.0013593441108241677, - "learning_rate": 0.00019998120560672835, - "loss": 46.0, - "step": 38333 - }, - { - "epoch": 6.17343693385402, - "grad_norm": 0.01412205770611763, - "learning_rate": 0.00019998120462590485, - "loss": 46.0, - "step": 38334 - }, - { - "epoch": 6.173597970932807, - "grad_norm": 0.007406009826809168, - "learning_rate": 0.00019998120364505572, - "loss": 46.0, - "step": 38335 - }, - { - "epoch": 6.1737590080115945, - "grad_norm": 0.002195616252720356, - "learning_rate": 0.00019998120266418105, - "loss": 46.0, - "step": 38336 - }, - { - "epoch": 6.173920045090382, - "grad_norm": 0.00959303230047226, - "learning_rate": 0.00019998120168328076, - "loss": 46.0, - "step": 38337 - }, - { - "epoch": 6.174081082169169, - "grad_norm": 0.003236561780795455, - "learning_rate": 0.0001999812007023549, - "loss": 46.0, - "step": 38338 - }, - { - "epoch": 6.174242119247957, - "grad_norm": 0.006811128929257393, - "learning_rate": 0.00019998119972140343, - "loss": 46.0, - "step": 38339 - }, - { - "epoch": 6.174403156326744, - "grad_norm": 0.008931980468332767, - "learning_rate": 0.0001999811987404264, - "loss": 46.0, - "step": 38340 - }, - { - "epoch": 6.174564193405532, - "grad_norm": 0.007095687557011843, - "learning_rate": 0.00019998119775942375, - "loss": 46.0, - "step": 38341 - }, - { - "epoch": 6.174725230484319, - "grad_norm": 0.002567967865616083, - "learning_rate": 0.0001999811967783955, - "loss": 46.0, - "step": 38342 - }, - { - "epoch": 6.174886267563107, - "grad_norm": 0.002297481754794717, - "learning_rate": 0.0001999811957973417, - "loss": 46.0, - "step": 38343 - }, - { - "epoch": 6.175047304641894, - "grad_norm": 0.0025986151304095984, - "learning_rate": 0.00019998119481626228, - "loss": 46.0, - "step": 38344 - }, - { - "epoch": 6.175208341720682, - "grad_norm": 0.0014254905981943011, - "learning_rate": 0.00019998119383515727, - "loss": 46.0, - "step": 38345 - }, - { - "epoch": 6.175369378799468, - "grad_norm": 0.006875585298985243, - "learning_rate": 0.00019998119285402668, - "loss": 46.0, - "step": 38346 - }, - { - "epoch": 6.175530415878256, - "grad_norm": 0.003062509698793292, - "learning_rate": 0.0001999811918728705, - "loss": 46.0, - "step": 38347 - }, - { - "epoch": 6.175691452957043, - "grad_norm": 0.0026650105137377977, - "learning_rate": 0.00019998119089168875, - "loss": 46.0, - "step": 38348 - }, - { - "epoch": 6.1758524900358305, - "grad_norm": 0.00251230550929904, - "learning_rate": 0.00019998118991048137, - "loss": 46.0, - "step": 38349 - }, - { - "epoch": 6.176013527114618, - "grad_norm": 0.00517770042642951, - "learning_rate": 0.0001999811889292484, - "loss": 46.0, - "step": 38350 - }, - { - "epoch": 6.176174564193405, - "grad_norm": 0.01145362202078104, - "learning_rate": 0.00019998118794798986, - "loss": 46.0, - "step": 38351 - }, - { - "epoch": 6.176335601272193, - "grad_norm": 0.007486908230930567, - "learning_rate": 0.00019998118696670574, - "loss": 46.0, - "step": 38352 - }, - { - "epoch": 6.17649663835098, - "grad_norm": 0.002026529749855399, - "learning_rate": 0.000199981185985396, - "loss": 46.0, - "step": 38353 - }, - { - "epoch": 6.176657675429768, - "grad_norm": 0.016162317246198654, - "learning_rate": 0.00019998118500406072, - "loss": 46.0, - "step": 38354 - }, - { - "epoch": 6.176818712508555, - "grad_norm": 0.0048370095901191235, - "learning_rate": 0.0001999811840226998, - "loss": 46.0, - "step": 38355 - }, - { - "epoch": 6.176979749587343, - "grad_norm": 0.0014495229115709662, - "learning_rate": 0.00019998118304131332, - "loss": 46.0, - "step": 38356 - }, - { - "epoch": 6.17714078666613, - "grad_norm": 0.020785022526979446, - "learning_rate": 0.0001999811820599012, - "loss": 46.0, - "step": 38357 - }, - { - "epoch": 6.1773018237449175, - "grad_norm": 0.006474633235484362, - "learning_rate": 0.00019998118107846357, - "loss": 46.0, - "step": 38358 - }, - { - "epoch": 6.177462860823705, - "grad_norm": 0.002027590526267886, - "learning_rate": 0.0001999811800970003, - "loss": 46.0, - "step": 38359 - }, - { - "epoch": 6.1776238979024924, - "grad_norm": 0.002121510449796915, - "learning_rate": 0.00019998117911551144, - "loss": 46.0, - "step": 38360 - }, - { - "epoch": 6.177784934981279, - "grad_norm": 0.0012588793179020286, - "learning_rate": 0.00019998117813399698, - "loss": 46.0, - "step": 38361 - }, - { - "epoch": 6.1779459720600665, - "grad_norm": 0.00857162568718195, - "learning_rate": 0.00019998117715245696, - "loss": 46.0, - "step": 38362 - }, - { - "epoch": 6.178107009138854, - "grad_norm": 0.00472683971747756, - "learning_rate": 0.00019998117617089133, - "loss": 46.0, - "step": 38363 - }, - { - "epoch": 6.178268046217641, - "grad_norm": 0.00392599543556571, - "learning_rate": 0.00019998117518930014, - "loss": 46.0, - "step": 38364 - }, - { - "epoch": 6.178429083296429, - "grad_norm": 0.01352886762470007, - "learning_rate": 0.0001999811742076833, - "loss": 46.0, - "step": 38365 - }, - { - "epoch": 6.178590120375216, - "grad_norm": 0.0017806334653869271, - "learning_rate": 0.00019998117322604094, - "loss": 46.0, - "step": 38366 - }, - { - "epoch": 6.178751157454004, - "grad_norm": 0.0024008865002542734, - "learning_rate": 0.00019998117224437296, - "loss": 46.0, - "step": 38367 - }, - { - "epoch": 6.178912194532791, - "grad_norm": 0.002225179923698306, - "learning_rate": 0.0001999811712626794, - "loss": 46.0, - "step": 38368 - }, - { - "epoch": 6.179073231611579, - "grad_norm": 0.003912065178155899, - "learning_rate": 0.0001999811702809602, - "loss": 46.0, - "step": 38369 - }, - { - "epoch": 6.179234268690366, - "grad_norm": 0.01413965318351984, - "learning_rate": 0.00019998116929921547, - "loss": 46.0, - "step": 38370 - }, - { - "epoch": 6.1793953057691535, - "grad_norm": 0.003520033787935972, - "learning_rate": 0.00019998116831744511, - "loss": 46.0, - "step": 38371 - }, - { - "epoch": 6.179556342847941, - "grad_norm": 0.0037289022002369165, - "learning_rate": 0.00019998116733564917, - "loss": 46.0, - "step": 38372 - }, - { - "epoch": 6.179717379926728, - "grad_norm": 0.003721510758623481, - "learning_rate": 0.00019998116635382767, - "loss": 46.0, - "step": 38373 - }, - { - "epoch": 6.179878417005516, - "grad_norm": 0.005485042463988066, - "learning_rate": 0.00019998116537198055, - "loss": 46.0, - "step": 38374 - }, - { - "epoch": 6.180039454084303, - "grad_norm": 0.004296879284083843, - "learning_rate": 0.00019998116439010784, - "loss": 46.0, - "step": 38375 - }, - { - "epoch": 6.18020049116309, - "grad_norm": 0.0034149251878261566, - "learning_rate": 0.00019998116340820955, - "loss": 46.0, - "step": 38376 - }, - { - "epoch": 6.180361528241877, - "grad_norm": 0.018693335354328156, - "learning_rate": 0.00019998116242628567, - "loss": 46.0, - "step": 38377 - }, - { - "epoch": 6.180522565320665, - "grad_norm": 0.004584385547786951, - "learning_rate": 0.00019998116144433618, - "loss": 46.0, - "step": 38378 - }, - { - "epoch": 6.180683602399452, - "grad_norm": 0.011237303726375103, - "learning_rate": 0.00019998116046236115, - "loss": 46.0, - "step": 38379 - }, - { - "epoch": 6.18084463947824, - "grad_norm": 0.006295708008110523, - "learning_rate": 0.00019998115948036049, - "loss": 46.0, - "step": 38380 - }, - { - "epoch": 6.181005676557027, - "grad_norm": 0.005004039034247398, - "learning_rate": 0.00019998115849833426, - "loss": 46.0, - "step": 38381 - }, - { - "epoch": 6.181166713635815, - "grad_norm": 0.002871039556339383, - "learning_rate": 0.0001999811575162824, - "loss": 46.0, - "step": 38382 - }, - { - "epoch": 6.181327750714602, - "grad_norm": 0.00879332609474659, - "learning_rate": 0.00019998115653420499, - "loss": 46.0, - "step": 38383 - }, - { - "epoch": 6.1814887877933895, - "grad_norm": 0.003851083805784583, - "learning_rate": 0.00019998115555210197, - "loss": 46.0, - "step": 38384 - }, - { - "epoch": 6.181649824872177, - "grad_norm": 0.005037369206547737, - "learning_rate": 0.00019998115456997337, - "loss": 46.0, - "step": 38385 - }, - { - "epoch": 6.181810861950964, - "grad_norm": 0.004178883507847786, - "learning_rate": 0.00019998115358781917, - "loss": 46.0, - "step": 38386 - }, - { - "epoch": 6.181971899029752, - "grad_norm": 0.008065703324973583, - "learning_rate": 0.0001999811526056394, - "loss": 46.0, - "step": 38387 - }, - { - "epoch": 6.182132936108539, - "grad_norm": 0.0018702278612181544, - "learning_rate": 0.000199981151623434, - "loss": 46.0, - "step": 38388 - }, - { - "epoch": 6.182293973187327, - "grad_norm": 0.0022260923869907856, - "learning_rate": 0.00019998115064120305, - "loss": 46.0, - "step": 38389 - }, - { - "epoch": 6.182455010266114, - "grad_norm": 0.008190924301743507, - "learning_rate": 0.00019998114965894649, - "loss": 46.0, - "step": 38390 - }, - { - "epoch": 6.182616047344901, - "grad_norm": 0.015463664196431637, - "learning_rate": 0.00019998114867666436, - "loss": 46.0, - "step": 38391 - }, - { - "epoch": 6.182777084423688, - "grad_norm": 0.0030463470611721277, - "learning_rate": 0.00019998114769435662, - "loss": 46.0, - "step": 38392 - }, - { - "epoch": 6.182938121502476, - "grad_norm": 0.004487467464059591, - "learning_rate": 0.00019998114671202332, - "loss": 46.0, - "step": 38393 - }, - { - "epoch": 6.183099158581263, - "grad_norm": 0.004162515047937632, - "learning_rate": 0.00019998114572966437, - "loss": 46.0, - "step": 38394 - }, - { - "epoch": 6.183260195660051, - "grad_norm": 0.009179658256471157, - "learning_rate": 0.00019998114474727987, - "loss": 46.0, - "step": 38395 - }, - { - "epoch": 6.183421232738838, - "grad_norm": 0.008634859696030617, - "learning_rate": 0.00019998114376486978, - "loss": 46.0, - "step": 38396 - }, - { - "epoch": 6.1835822698176255, - "grad_norm": 0.0035388132091611624, - "learning_rate": 0.0001999811427824341, - "loss": 46.0, - "step": 38397 - }, - { - "epoch": 6.183743306896413, - "grad_norm": 0.0043466719798743725, - "learning_rate": 0.00019998114179997281, - "loss": 46.0, - "step": 38398 - }, - { - "epoch": 6.1839043439752, - "grad_norm": 0.0103925671428442, - "learning_rate": 0.00019998114081748596, - "loss": 46.0, - "step": 38399 - }, - { - "epoch": 6.184065381053988, - "grad_norm": 0.007809177972376347, - "learning_rate": 0.0001999811398349735, - "loss": 46.0, - "step": 38400 - }, - { - "epoch": 6.184226418132775, - "grad_norm": 0.009312433190643787, - "learning_rate": 0.00019998113885243544, - "loss": 46.0, - "step": 38401 - }, - { - "epoch": 6.184387455211563, - "grad_norm": 0.005295824259519577, - "learning_rate": 0.00019998113786987183, - "loss": 46.0, - "step": 38402 - }, - { - "epoch": 6.18454849229035, - "grad_norm": 0.018275827169418335, - "learning_rate": 0.0001999811368872826, - "loss": 46.0, - "step": 38403 - }, - { - "epoch": 6.184709529369138, - "grad_norm": 0.013024595566093922, - "learning_rate": 0.0001999811359046678, - "loss": 46.0, - "step": 38404 - }, - { - "epoch": 6.184870566447924, - "grad_norm": 0.005584705621004105, - "learning_rate": 0.0001999811349220274, - "loss": 46.0, - "step": 38405 - }, - { - "epoch": 6.185031603526712, - "grad_norm": 0.006218624766916037, - "learning_rate": 0.0001999811339393614, - "loss": 46.0, - "step": 38406 - }, - { - "epoch": 6.185192640605499, - "grad_norm": 0.007102572359144688, - "learning_rate": 0.0001999811329566698, - "loss": 46.0, - "step": 38407 - }, - { - "epoch": 6.185353677684287, - "grad_norm": 0.0029884851537644863, - "learning_rate": 0.00019998113197395264, - "loss": 46.0, - "step": 38408 - }, - { - "epoch": 6.185514714763074, - "grad_norm": 0.004121674224734306, - "learning_rate": 0.00019998113099120986, - "loss": 46.0, - "step": 38409 - }, - { - "epoch": 6.1856757518418615, - "grad_norm": 0.0016099418280646205, - "learning_rate": 0.0001999811300084415, - "loss": 46.0, - "step": 38410 - }, - { - "epoch": 6.185836788920649, - "grad_norm": 0.005504402797669172, - "learning_rate": 0.00019998112902564757, - "loss": 46.0, - "step": 38411 - }, - { - "epoch": 6.185997825999436, - "grad_norm": 0.009299728088080883, - "learning_rate": 0.00019998112804282803, - "loss": 46.0, - "step": 38412 - }, - { - "epoch": 6.186158863078224, - "grad_norm": 0.00526196975260973, - "learning_rate": 0.0001999811270599829, - "loss": 46.0, - "step": 38413 - }, - { - "epoch": 6.186319900157011, - "grad_norm": 0.004188033752143383, - "learning_rate": 0.0001999811260771122, - "loss": 46.0, - "step": 38414 - }, - { - "epoch": 6.186480937235799, - "grad_norm": 0.002466693054884672, - "learning_rate": 0.0001999811250942159, - "loss": 46.0, - "step": 38415 - }, - { - "epoch": 6.186641974314586, - "grad_norm": 0.0014106532325968146, - "learning_rate": 0.00019998112411129398, - "loss": 46.0, - "step": 38416 - }, - { - "epoch": 6.186803011393374, - "grad_norm": 0.009123711846768856, - "learning_rate": 0.0001999811231283465, - "loss": 46.0, - "step": 38417 - }, - { - "epoch": 6.186964048472161, - "grad_norm": 0.0063522448763251305, - "learning_rate": 0.00019998112214537344, - "loss": 46.0, - "step": 38418 - }, - { - "epoch": 6.1871250855509485, - "grad_norm": 0.0025015079881995916, - "learning_rate": 0.00019998112116237477, - "loss": 46.0, - "step": 38419 - }, - { - "epoch": 6.187286122629735, - "grad_norm": 0.011544137261807919, - "learning_rate": 0.0001999811201793505, - "loss": 46.0, - "step": 38420 - }, - { - "epoch": 6.1874471597085225, - "grad_norm": 0.004128925036638975, - "learning_rate": 0.00019998111919630068, - "loss": 46.0, - "step": 38421 - }, - { - "epoch": 6.18760819678731, - "grad_norm": 0.009844529442489147, - "learning_rate": 0.00019998111821322521, - "loss": 46.0, - "step": 38422 - }, - { - "epoch": 6.1877692338660975, - "grad_norm": 0.004908531438559294, - "learning_rate": 0.00019998111723012422, - "loss": 46.0, - "step": 38423 - }, - { - "epoch": 6.187930270944885, - "grad_norm": 0.0027955572586506605, - "learning_rate": 0.0001999811162469976, - "loss": 46.0, - "step": 38424 - }, - { - "epoch": 6.188091308023672, - "grad_norm": 0.011891791597008705, - "learning_rate": 0.00019998111526384538, - "loss": 46.0, - "step": 38425 - }, - { - "epoch": 6.18825234510246, - "grad_norm": 0.00636469479650259, - "learning_rate": 0.0001999811142806676, - "loss": 46.0, - "step": 38426 - }, - { - "epoch": 6.188413382181247, - "grad_norm": 0.003043847158551216, - "learning_rate": 0.00019998111329746422, - "loss": 46.0, - "step": 38427 - }, - { - "epoch": 6.188574419260035, - "grad_norm": 0.0017767632380127907, - "learning_rate": 0.00019998111231423523, - "loss": 46.0, - "step": 38428 - }, - { - "epoch": 6.188735456338822, - "grad_norm": 0.012975356541574001, - "learning_rate": 0.00019998111133098068, - "loss": 46.0, - "step": 38429 - }, - { - "epoch": 6.18889649341761, - "grad_norm": 0.002072844421491027, - "learning_rate": 0.0001999811103477005, - "loss": 46.0, - "step": 38430 - }, - { - "epoch": 6.189057530496397, - "grad_norm": 0.003157053841277957, - "learning_rate": 0.00019998110936439475, - "loss": 46.0, - "step": 38431 - }, - { - "epoch": 6.1892185675751845, - "grad_norm": 0.0028113776352256536, - "learning_rate": 0.00019998110838106344, - "loss": 46.0, - "step": 38432 - }, - { - "epoch": 6.189379604653972, - "grad_norm": 0.0035995894577354193, - "learning_rate": 0.0001999811073977065, - "loss": 46.0, - "step": 38433 - }, - { - "epoch": 6.1895406417327585, - "grad_norm": 0.0022093728184700012, - "learning_rate": 0.000199981106414324, - "loss": 46.0, - "step": 38434 - }, - { - "epoch": 6.189701678811546, - "grad_norm": 0.0016370511148124933, - "learning_rate": 0.00019998110543091588, - "loss": 46.0, - "step": 38435 - }, - { - "epoch": 6.189862715890333, - "grad_norm": 0.0027529362123459578, - "learning_rate": 0.0001999811044474822, - "loss": 46.0, - "step": 38436 - }, - { - "epoch": 6.190023752969121, - "grad_norm": 0.00694660097360611, - "learning_rate": 0.0001999811034640229, - "loss": 46.0, - "step": 38437 - }, - { - "epoch": 6.190184790047908, - "grad_norm": 0.013354429043829441, - "learning_rate": 0.00019998110248053803, - "loss": 46.0, - "step": 38438 - }, - { - "epoch": 6.190345827126696, - "grad_norm": 0.012662060558795929, - "learning_rate": 0.00019998110149702756, - "loss": 46.0, - "step": 38439 - }, - { - "epoch": 6.190506864205483, - "grad_norm": 0.020456044003367424, - "learning_rate": 0.0001999811005134915, - "loss": 46.0, - "step": 38440 - }, - { - "epoch": 6.190667901284271, - "grad_norm": 0.002556009916588664, - "learning_rate": 0.00019998109952992985, - "loss": 46.0, - "step": 38441 - }, - { - "epoch": 6.190828938363058, - "grad_norm": 0.0012579491594806314, - "learning_rate": 0.00019998109854634262, - "loss": 46.0, - "step": 38442 - }, - { - "epoch": 6.190989975441846, - "grad_norm": 0.0013837008737027645, - "learning_rate": 0.00019998109756272977, - "loss": 46.0, - "step": 38443 - }, - { - "epoch": 6.191151012520633, - "grad_norm": 0.004684407729655504, - "learning_rate": 0.00019998109657909136, - "loss": 46.0, - "step": 38444 - }, - { - "epoch": 6.1913120495994205, - "grad_norm": 0.003011419204995036, - "learning_rate": 0.00019998109559542736, - "loss": 46.0, - "step": 38445 - }, - { - "epoch": 6.191473086678208, - "grad_norm": 0.0025610574521124363, - "learning_rate": 0.00019998109461173778, - "loss": 46.0, - "step": 38446 - }, - { - "epoch": 6.191634123756995, - "grad_norm": 0.005681574810296297, - "learning_rate": 0.0001999810936280226, - "loss": 46.0, - "step": 38447 - }, - { - "epoch": 6.191795160835783, - "grad_norm": 0.0061127422377467155, - "learning_rate": 0.00019998109264428182, - "loss": 46.0, - "step": 38448 - }, - { - "epoch": 6.191956197914569, - "grad_norm": 0.006921328604221344, - "learning_rate": 0.00019998109166051545, - "loss": 46.0, - "step": 38449 - }, - { - "epoch": 6.192117234993357, - "grad_norm": 0.0032270397059619427, - "learning_rate": 0.0001999810906767235, - "loss": 46.0, - "step": 38450 - }, - { - "epoch": 6.192278272072144, - "grad_norm": 0.0013061941135674715, - "learning_rate": 0.00019998108969290594, - "loss": 46.0, - "step": 38451 - }, - { - "epoch": 6.192439309150932, - "grad_norm": 0.011984949931502342, - "learning_rate": 0.0001999810887090628, - "loss": 46.0, - "step": 38452 - }, - { - "epoch": 6.192600346229719, - "grad_norm": 0.010736663825809956, - "learning_rate": 0.0001999810877251941, - "loss": 46.0, - "step": 38453 - }, - { - "epoch": 6.192761383308507, - "grad_norm": 0.0023626014590263367, - "learning_rate": 0.00019998108674129975, - "loss": 46.0, - "step": 38454 - }, - { - "epoch": 6.192922420387294, - "grad_norm": 0.0017383425729349256, - "learning_rate": 0.00019998108575737986, - "loss": 46.0, - "step": 38455 - }, - { - "epoch": 6.193083457466082, - "grad_norm": 0.008774983696639538, - "learning_rate": 0.00019998108477343435, - "loss": 46.0, - "step": 38456 - }, - { - "epoch": 6.193244494544869, - "grad_norm": 0.004909793380647898, - "learning_rate": 0.00019998108378946328, - "loss": 46.0, - "step": 38457 - }, - { - "epoch": 6.1934055316236565, - "grad_norm": 0.0026797510217875242, - "learning_rate": 0.0001999810828054666, - "loss": 46.0, - "step": 38458 - }, - { - "epoch": 6.193566568702444, - "grad_norm": 0.0020990953780710697, - "learning_rate": 0.00019998108182144432, - "loss": 46.0, - "step": 38459 - }, - { - "epoch": 6.193727605781231, - "grad_norm": 0.00247512711212039, - "learning_rate": 0.0001999810808373965, - "loss": 46.0, - "step": 38460 - }, - { - "epoch": 6.193888642860019, - "grad_norm": 0.006691396702080965, - "learning_rate": 0.00019998107985332305, - "loss": 46.0, - "step": 38461 - }, - { - "epoch": 6.194049679938806, - "grad_norm": 0.0046486384235322475, - "learning_rate": 0.00019998107886922399, - "loss": 46.0, - "step": 38462 - }, - { - "epoch": 6.194210717017594, - "grad_norm": 0.004219725262373686, - "learning_rate": 0.00019998107788509937, - "loss": 46.0, - "step": 38463 - }, - { - "epoch": 6.19437175409638, - "grad_norm": 0.0061407554894685745, - "learning_rate": 0.00019998107690094913, - "loss": 46.0, - "step": 38464 - }, - { - "epoch": 6.194532791175168, - "grad_norm": 0.004228383302688599, - "learning_rate": 0.00019998107591677334, - "loss": 46.0, - "step": 38465 - }, - { - "epoch": 6.194693828253955, - "grad_norm": 0.0012823771685361862, - "learning_rate": 0.00019998107493257196, - "loss": 46.0, - "step": 38466 - }, - { - "epoch": 6.194854865332743, - "grad_norm": 0.013044308871030807, - "learning_rate": 0.000199981073948345, - "loss": 46.0, - "step": 38467 - }, - { - "epoch": 6.19501590241153, - "grad_norm": 0.005286875646561384, - "learning_rate": 0.00019998107296409238, - "loss": 46.0, - "step": 38468 - }, - { - "epoch": 6.195176939490318, - "grad_norm": 0.0065002660267055035, - "learning_rate": 0.0001999810719798142, - "loss": 46.0, - "step": 38469 - }, - { - "epoch": 6.195337976569105, - "grad_norm": 0.007993276230990887, - "learning_rate": 0.00019998107099551048, - "loss": 46.0, - "step": 38470 - }, - { - "epoch": 6.1954990136478925, - "grad_norm": 0.002286696108058095, - "learning_rate": 0.0001999810700111811, - "loss": 46.0, - "step": 38471 - }, - { - "epoch": 6.19566005072668, - "grad_norm": 0.0018333852058276534, - "learning_rate": 0.00019998106902682617, - "loss": 46.0, - "step": 38472 - }, - { - "epoch": 6.195821087805467, - "grad_norm": 0.0015674241585657, - "learning_rate": 0.00019998106804244566, - "loss": 46.0, - "step": 38473 - }, - { - "epoch": 6.195982124884255, - "grad_norm": 0.0017585881287232041, - "learning_rate": 0.00019998106705803955, - "loss": 46.0, - "step": 38474 - }, - { - "epoch": 6.196143161963042, - "grad_norm": 0.003924446180462837, - "learning_rate": 0.00019998106607360783, - "loss": 46.0, - "step": 38475 - }, - { - "epoch": 6.19630419904183, - "grad_norm": 0.008062554523348808, - "learning_rate": 0.00019998106508915052, - "loss": 46.0, - "step": 38476 - }, - { - "epoch": 6.196465236120617, - "grad_norm": 0.0027739075012505054, - "learning_rate": 0.00019998106410466763, - "loss": 46.0, - "step": 38477 - }, - { - "epoch": 6.196626273199404, - "grad_norm": 0.0020640636794269085, - "learning_rate": 0.00019998106312015917, - "loss": 46.0, - "step": 38478 - }, - { - "epoch": 6.196787310278191, - "grad_norm": 0.004370938055217266, - "learning_rate": 0.0001999810621356251, - "loss": 46.0, - "step": 38479 - }, - { - "epoch": 6.196948347356979, - "grad_norm": 0.002564073074609041, - "learning_rate": 0.00019998106115106542, - "loss": 46.0, - "step": 38480 - }, - { - "epoch": 6.197109384435766, - "grad_norm": 0.002289981348440051, - "learning_rate": 0.0001999810601664802, - "loss": 46.0, - "step": 38481 - }, - { - "epoch": 6.1972704215145535, - "grad_norm": 0.008379199542105198, - "learning_rate": 0.00019998105918186935, - "loss": 46.0, - "step": 38482 - }, - { - "epoch": 6.197431458593341, - "grad_norm": 0.007253784220665693, - "learning_rate": 0.00019998105819723293, - "loss": 46.0, - "step": 38483 - }, - { - "epoch": 6.197592495672128, - "grad_norm": 0.002788061508908868, - "learning_rate": 0.00019998105721257093, - "loss": 46.0, - "step": 38484 - }, - { - "epoch": 6.197753532750916, - "grad_norm": 0.0016887930687516928, - "learning_rate": 0.0001999810562278833, - "loss": 46.0, - "step": 38485 - }, - { - "epoch": 6.197914569829703, - "grad_norm": 0.007803408429026604, - "learning_rate": 0.0001999810552431701, - "loss": 46.0, - "step": 38486 - }, - { - "epoch": 6.198075606908491, - "grad_norm": 0.0014974446967244148, - "learning_rate": 0.00019998105425843133, - "loss": 46.0, - "step": 38487 - }, - { - "epoch": 6.198236643987278, - "grad_norm": 0.003115519415587187, - "learning_rate": 0.00019998105327366695, - "loss": 46.0, - "step": 38488 - }, - { - "epoch": 6.198397681066066, - "grad_norm": 0.002230840502306819, - "learning_rate": 0.00019998105228887699, - "loss": 46.0, - "step": 38489 - }, - { - "epoch": 6.198558718144853, - "grad_norm": 0.002150146523490548, - "learning_rate": 0.00019998105130406143, - "loss": 46.0, - "step": 38490 - }, - { - "epoch": 6.198719755223641, - "grad_norm": 0.024488558992743492, - "learning_rate": 0.0001999810503192203, - "loss": 46.0, - "step": 38491 - }, - { - "epoch": 6.198880792302428, - "grad_norm": 0.006840047426521778, - "learning_rate": 0.00019998104933435356, - "loss": 46.0, - "step": 38492 - }, - { - "epoch": 6.199041829381215, - "grad_norm": 0.00801506545394659, - "learning_rate": 0.00019998104834946122, - "loss": 46.0, - "step": 38493 - }, - { - "epoch": 6.199202866460002, - "grad_norm": 0.005811032839119434, - "learning_rate": 0.00019998104736454329, - "loss": 46.0, - "step": 38494 - }, - { - "epoch": 6.1993639035387895, - "grad_norm": 0.003501306753605604, - "learning_rate": 0.00019998104637959977, - "loss": 46.0, - "step": 38495 - }, - { - "epoch": 6.199524940617577, - "grad_norm": 0.00610383041203022, - "learning_rate": 0.0001999810453946307, - "loss": 46.0, - "step": 38496 - }, - { - "epoch": 6.199685977696364, - "grad_norm": 0.0011243073968216777, - "learning_rate": 0.000199981044409636, - "loss": 46.0, - "step": 38497 - }, - { - "epoch": 6.199847014775152, - "grad_norm": 0.0010643979767337441, - "learning_rate": 0.00019998104342461572, - "loss": 46.0, - "step": 38498 - }, - { - "epoch": 6.200008051853939, - "grad_norm": 0.0012480011209845543, - "learning_rate": 0.00019998104243956985, - "loss": 46.0, - "step": 38499 - }, - { - "epoch": 6.200169088932727, - "grad_norm": 0.007588703650981188, - "learning_rate": 0.0001999810414544984, - "loss": 46.0, - "step": 38500 - }, - { - "epoch": 6.200330126011514, - "grad_norm": 0.021159600466489792, - "learning_rate": 0.00019998104046940136, - "loss": 46.0, - "step": 38501 - }, - { - "epoch": 6.200491163090302, - "grad_norm": 0.0031840221490710974, - "learning_rate": 0.0001999810394842787, - "loss": 46.0, - "step": 38502 - }, - { - "epoch": 6.200652200169089, - "grad_norm": 0.006811012048274279, - "learning_rate": 0.0001999810384991305, - "loss": 46.0, - "step": 38503 - }, - { - "epoch": 6.200813237247877, - "grad_norm": 0.0030008992180228233, - "learning_rate": 0.00019998103751395666, - "loss": 46.0, - "step": 38504 - }, - { - "epoch": 6.200974274326664, - "grad_norm": 0.0083366883918643, - "learning_rate": 0.00019998103652875727, - "loss": 46.0, - "step": 38505 - }, - { - "epoch": 6.2011353114054515, - "grad_norm": 0.011031761765480042, - "learning_rate": 0.00019998103554353226, - "loss": 46.0, - "step": 38506 - }, - { - "epoch": 6.201296348484238, - "grad_norm": 0.008430404588580132, - "learning_rate": 0.00019998103455828167, - "loss": 46.0, - "step": 38507 - }, - { - "epoch": 6.2014573855630255, - "grad_norm": 0.004892644938081503, - "learning_rate": 0.0001999810335730055, - "loss": 46.0, - "step": 38508 - }, - { - "epoch": 6.201618422641813, - "grad_norm": 0.00163461163174361, - "learning_rate": 0.00019998103258770373, - "loss": 46.0, - "step": 38509 - }, - { - "epoch": 6.2017794597206, - "grad_norm": 0.0021717629861086607, - "learning_rate": 0.00019998103160237638, - "loss": 46.0, - "step": 38510 - }, - { - "epoch": 6.201940496799388, - "grad_norm": 0.0017033193726092577, - "learning_rate": 0.0001999810306170234, - "loss": 46.0, - "step": 38511 - }, - { - "epoch": 6.202101533878175, - "grad_norm": 0.0027894889935851097, - "learning_rate": 0.00019998102963164488, - "loss": 46.0, - "step": 38512 - }, - { - "epoch": 6.202262570956963, - "grad_norm": 0.0014997628750279546, - "learning_rate": 0.00019998102864624077, - "loss": 46.0, - "step": 38513 - }, - { - "epoch": 6.20242360803575, - "grad_norm": 0.004330724012106657, - "learning_rate": 0.00019998102766081104, - "loss": 46.0, - "step": 38514 - }, - { - "epoch": 6.202584645114538, - "grad_norm": 0.008996432647109032, - "learning_rate": 0.00019998102667535575, - "loss": 46.0, - "step": 38515 - }, - { - "epoch": 6.202745682193325, - "grad_norm": 0.0032568250317126513, - "learning_rate": 0.00019998102568987485, - "loss": 46.0, - "step": 38516 - }, - { - "epoch": 6.202906719272113, - "grad_norm": 0.008566852658987045, - "learning_rate": 0.00019998102470436833, - "loss": 46.0, - "step": 38517 - }, - { - "epoch": 6.2030677563509, - "grad_norm": 0.01192668080329895, - "learning_rate": 0.00019998102371883628, - "loss": 46.0, - "step": 38518 - }, - { - "epoch": 6.2032287934296875, - "grad_norm": 0.003461219370365143, - "learning_rate": 0.00019998102273327862, - "loss": 46.0, - "step": 38519 - }, - { - "epoch": 6.203389830508475, - "grad_norm": 0.012207056395709515, - "learning_rate": 0.00019998102174769534, - "loss": 46.0, - "step": 38520 - }, - { - "epoch": 6.203550867587262, - "grad_norm": 0.002491791034117341, - "learning_rate": 0.0001999810207620865, - "loss": 46.0, - "step": 38521 - }, - { - "epoch": 6.203711904666049, - "grad_norm": 0.006606719456613064, - "learning_rate": 0.00019998101977645208, - "loss": 46.0, - "step": 38522 - }, - { - "epoch": 6.203872941744836, - "grad_norm": 0.006626618094742298, - "learning_rate": 0.00019998101879079206, - "loss": 46.0, - "step": 38523 - }, - { - "epoch": 6.204033978823624, - "grad_norm": 0.009684987366199493, - "learning_rate": 0.00019998101780510644, - "loss": 46.0, - "step": 38524 - }, - { - "epoch": 6.204195015902411, - "grad_norm": 0.017381057143211365, - "learning_rate": 0.00019998101681939522, - "loss": 46.0, - "step": 38525 - }, - { - "epoch": 6.204356052981199, - "grad_norm": 0.01152043417096138, - "learning_rate": 0.0001999810158336584, - "loss": 46.0, - "step": 38526 - }, - { - "epoch": 6.204517090059986, - "grad_norm": 0.0025890732649713755, - "learning_rate": 0.00019998101484789603, - "loss": 46.0, - "step": 38527 - }, - { - "epoch": 6.204678127138774, - "grad_norm": 0.0017062181141227484, - "learning_rate": 0.00019998101386210806, - "loss": 46.0, - "step": 38528 - }, - { - "epoch": 6.204839164217561, - "grad_norm": 0.0017474015476182103, - "learning_rate": 0.0001999810128762945, - "loss": 46.0, - "step": 38529 - }, - { - "epoch": 6.2050002012963485, - "grad_norm": 0.0033452387433499098, - "learning_rate": 0.00019998101189045532, - "loss": 46.0, - "step": 38530 - }, - { - "epoch": 6.205161238375136, - "grad_norm": 0.0010951875010505319, - "learning_rate": 0.00019998101090459058, - "loss": 46.0, - "step": 38531 - }, - { - "epoch": 6.2053222754539235, - "grad_norm": 0.010053624399006367, - "learning_rate": 0.00019998100991870023, - "loss": 46.0, - "step": 38532 - }, - { - "epoch": 6.205483312532711, - "grad_norm": 0.005480147432535887, - "learning_rate": 0.00019998100893278432, - "loss": 46.0, - "step": 38533 - }, - { - "epoch": 6.205644349611498, - "grad_norm": 0.003845823463052511, - "learning_rate": 0.0001999810079468428, - "loss": 46.0, - "step": 38534 - }, - { - "epoch": 6.205805386690286, - "grad_norm": 0.002510937163606286, - "learning_rate": 0.0001999810069608757, - "loss": 46.0, - "step": 38535 - }, - { - "epoch": 6.205966423769073, - "grad_norm": 0.005096937529742718, - "learning_rate": 0.000199981005974883, - "loss": 46.0, - "step": 38536 - }, - { - "epoch": 6.20612746084786, - "grad_norm": 0.004791811108589172, - "learning_rate": 0.00019998100498886472, - "loss": 46.0, - "step": 38537 - }, - { - "epoch": 6.206288497926647, - "grad_norm": 0.0035096872597932816, - "learning_rate": 0.00019998100400282082, - "loss": 46.0, - "step": 38538 - }, - { - "epoch": 6.206449535005435, - "grad_norm": 0.009524703957140446, - "learning_rate": 0.00019998100301675135, - "loss": 46.0, - "step": 38539 - }, - { - "epoch": 6.206610572084222, - "grad_norm": 0.010632744058966637, - "learning_rate": 0.00019998100203065628, - "loss": 46.0, - "step": 38540 - }, - { - "epoch": 6.20677160916301, - "grad_norm": 0.003830992616713047, - "learning_rate": 0.00019998100104453564, - "loss": 46.0, - "step": 38541 - }, - { - "epoch": 6.206932646241797, - "grad_norm": 0.0018759763333946466, - "learning_rate": 0.0001999810000583894, - "loss": 46.0, - "step": 38542 - }, - { - "epoch": 6.2070936833205845, - "grad_norm": 0.02378779463469982, - "learning_rate": 0.00019998099907221758, - "loss": 46.0, - "step": 38543 - }, - { - "epoch": 6.207254720399372, - "grad_norm": 0.011196010746061802, - "learning_rate": 0.00019998099808602016, - "loss": 46.0, - "step": 38544 - }, - { - "epoch": 6.207415757478159, - "grad_norm": 0.006778963375836611, - "learning_rate": 0.00019998099709979717, - "loss": 46.0, - "step": 38545 - }, - { - "epoch": 6.207576794556947, - "grad_norm": 0.0018767776200547814, - "learning_rate": 0.00019998099611354857, - "loss": 46.0, - "step": 38546 - }, - { - "epoch": 6.207737831635734, - "grad_norm": 0.006590351928025484, - "learning_rate": 0.00019998099512727439, - "loss": 46.0, - "step": 38547 - }, - { - "epoch": 6.207898868714522, - "grad_norm": 0.0036499181296676397, - "learning_rate": 0.00019998099414097459, - "loss": 46.0, - "step": 38548 - }, - { - "epoch": 6.208059905793309, - "grad_norm": 0.013853631913661957, - "learning_rate": 0.00019998099315464925, - "loss": 46.0, - "step": 38549 - }, - { - "epoch": 6.208220942872097, - "grad_norm": 0.003738798899576068, - "learning_rate": 0.00019998099216829828, - "loss": 46.0, - "step": 38550 - }, - { - "epoch": 6.208381979950884, - "grad_norm": 0.005554336588829756, - "learning_rate": 0.00019998099118192172, - "loss": 46.0, - "step": 38551 - }, - { - "epoch": 6.208543017029671, - "grad_norm": 0.002461962401866913, - "learning_rate": 0.0001999809901955196, - "loss": 46.0, - "step": 38552 - }, - { - "epoch": 6.208704054108458, - "grad_norm": 0.002006252994760871, - "learning_rate": 0.00019998098920909186, - "loss": 46.0, - "step": 38553 - }, - { - "epoch": 6.208865091187246, - "grad_norm": 0.0008995594689622521, - "learning_rate": 0.00019998098822263853, - "loss": 46.0, - "step": 38554 - }, - { - "epoch": 6.209026128266033, - "grad_norm": 0.0034110061824321747, - "learning_rate": 0.00019998098723615962, - "loss": 46.0, - "step": 38555 - }, - { - "epoch": 6.2091871653448205, - "grad_norm": 0.0011905698338523507, - "learning_rate": 0.00019998098624965512, - "loss": 46.0, - "step": 38556 - }, - { - "epoch": 6.209348202423608, - "grad_norm": 0.002167128724977374, - "learning_rate": 0.00019998098526312507, - "loss": 46.0, - "step": 38557 - }, - { - "epoch": 6.209509239502395, - "grad_norm": 0.00407895864918828, - "learning_rate": 0.00019998098427656937, - "loss": 46.0, - "step": 38558 - }, - { - "epoch": 6.209670276581183, - "grad_norm": 0.005671153776347637, - "learning_rate": 0.0001999809832899881, - "loss": 46.0, - "step": 38559 - }, - { - "epoch": 6.20983131365997, - "grad_norm": 0.0096208481118083, - "learning_rate": 0.00019998098230338126, - "loss": 46.0, - "step": 38560 - }, - { - "epoch": 6.209992350738758, - "grad_norm": 0.008138995617628098, - "learning_rate": 0.0001999809813167488, - "loss": 46.0, - "step": 38561 - }, - { - "epoch": 6.210153387817545, - "grad_norm": 0.006476081442087889, - "learning_rate": 0.00019998098033009075, - "loss": 46.0, - "step": 38562 - }, - { - "epoch": 6.210314424896333, - "grad_norm": 0.005781737621873617, - "learning_rate": 0.00019998097934340714, - "loss": 46.0, - "step": 38563 - }, - { - "epoch": 6.21047546197512, - "grad_norm": 0.0018086661584675312, - "learning_rate": 0.00019998097835669792, - "loss": 46.0, - "step": 38564 - }, - { - "epoch": 6.210636499053908, - "grad_norm": 0.0038158376701176167, - "learning_rate": 0.0001999809773699631, - "loss": 46.0, - "step": 38565 - }, - { - "epoch": 6.210797536132694, - "grad_norm": 0.0017599390121176839, - "learning_rate": 0.0001999809763832027, - "loss": 46.0, - "step": 38566 - }, - { - "epoch": 6.210958573211482, - "grad_norm": 0.005088081583380699, - "learning_rate": 0.0001999809753964167, - "loss": 46.0, - "step": 38567 - }, - { - "epoch": 6.211119610290269, - "grad_norm": 0.00866482499986887, - "learning_rate": 0.00019998097440960516, - "loss": 46.0, - "step": 38568 - }, - { - "epoch": 6.2112806473690565, - "grad_norm": 0.003718878608196974, - "learning_rate": 0.00019998097342276795, - "loss": 46.0, - "step": 38569 - }, - { - "epoch": 6.211441684447844, - "grad_norm": 0.0012801260454580188, - "learning_rate": 0.0001999809724359052, - "loss": 46.0, - "step": 38570 - }, - { - "epoch": 6.211602721526631, - "grad_norm": 0.008715521544218063, - "learning_rate": 0.0001999809714490169, - "loss": 46.0, - "step": 38571 - }, - { - "epoch": 6.211763758605419, - "grad_norm": 0.002537169260904193, - "learning_rate": 0.00019998097046210292, - "loss": 46.0, - "step": 38572 - }, - { - "epoch": 6.211924795684206, - "grad_norm": 0.0015906566986814141, - "learning_rate": 0.00019998096947516342, - "loss": 46.0, - "step": 38573 - }, - { - "epoch": 6.212085832762994, - "grad_norm": 0.0034709463361650705, - "learning_rate": 0.00019998096848819827, - "loss": 46.0, - "step": 38574 - }, - { - "epoch": 6.212246869841781, - "grad_norm": 0.003196131670847535, - "learning_rate": 0.00019998096750120756, - "loss": 46.0, - "step": 38575 - }, - { - "epoch": 6.212407906920569, - "grad_norm": 0.0008856742642819881, - "learning_rate": 0.00019998096651419126, - "loss": 46.0, - "step": 38576 - }, - { - "epoch": 6.212568943999356, - "grad_norm": 0.011310459114611149, - "learning_rate": 0.0001999809655271494, - "loss": 46.0, - "step": 38577 - }, - { - "epoch": 6.212729981078144, - "grad_norm": 0.0015778564848005772, - "learning_rate": 0.0001999809645400819, - "loss": 46.0, - "step": 38578 - }, - { - "epoch": 6.212891018156931, - "grad_norm": 0.004972171038389206, - "learning_rate": 0.00019998096355298883, - "loss": 46.0, - "step": 38579 - }, - { - "epoch": 6.213052055235718, - "grad_norm": 0.0064807613380253315, - "learning_rate": 0.00019998096256587018, - "loss": 46.0, - "step": 38580 - }, - { - "epoch": 6.213213092314505, - "grad_norm": 0.002693760907277465, - "learning_rate": 0.00019998096157872595, - "loss": 46.0, - "step": 38581 - }, - { - "epoch": 6.2133741293932925, - "grad_norm": 0.007887729443609715, - "learning_rate": 0.00019998096059155608, - "loss": 46.0, - "step": 38582 - }, - { - "epoch": 6.21353516647208, - "grad_norm": 0.00348364794626832, - "learning_rate": 0.00019998095960436067, - "loss": 46.0, - "step": 38583 - }, - { - "epoch": 6.213696203550867, - "grad_norm": 0.005970843601971865, - "learning_rate": 0.00019998095861713965, - "loss": 46.0, - "step": 38584 - }, - { - "epoch": 6.213857240629655, - "grad_norm": 0.0008491827175021172, - "learning_rate": 0.00019998095762989305, - "loss": 46.0, - "step": 38585 - }, - { - "epoch": 6.214018277708442, - "grad_norm": 0.01671157404780388, - "learning_rate": 0.00019998095664262085, - "loss": 46.0, - "step": 38586 - }, - { - "epoch": 6.21417931478723, - "grad_norm": 0.010379582643508911, - "learning_rate": 0.00019998095565532307, - "loss": 46.0, - "step": 38587 - }, - { - "epoch": 6.214340351866017, - "grad_norm": 0.009952492080628872, - "learning_rate": 0.00019998095466799968, - "loss": 46.0, - "step": 38588 - }, - { - "epoch": 6.214501388944805, - "grad_norm": 0.004560786299407482, - "learning_rate": 0.00019998095368065072, - "loss": 46.0, - "step": 38589 - }, - { - "epoch": 6.214662426023592, - "grad_norm": 0.007111283019185066, - "learning_rate": 0.00019998095269327615, - "loss": 46.0, - "step": 38590 - }, - { - "epoch": 6.2148234631023795, - "grad_norm": 0.007550255861133337, - "learning_rate": 0.00019998095170587602, - "loss": 46.0, - "step": 38591 - }, - { - "epoch": 6.214984500181167, - "grad_norm": 0.0056282998993992805, - "learning_rate": 0.0001999809507184503, - "loss": 46.0, - "step": 38592 - }, - { - "epoch": 6.2151455372599544, - "grad_norm": 0.001895827241241932, - "learning_rate": 0.00019998094973099894, - "loss": 46.0, - "step": 38593 - }, - { - "epoch": 6.215306574338742, - "grad_norm": 0.002043043030425906, - "learning_rate": 0.00019998094874352202, - "loss": 46.0, - "step": 38594 - }, - { - "epoch": 6.2154676114175285, - "grad_norm": 0.0031424269545823336, - "learning_rate": 0.00019998094775601952, - "loss": 46.0, - "step": 38595 - }, - { - "epoch": 6.215628648496316, - "grad_norm": 0.0006763574783690274, - "learning_rate": 0.00019998094676849145, - "loss": 46.0, - "step": 38596 - }, - { - "epoch": 6.215789685575103, - "grad_norm": 0.0025461832992732525, - "learning_rate": 0.00019998094578093774, - "loss": 46.0, - "step": 38597 - }, - { - "epoch": 6.215950722653891, - "grad_norm": 0.0018364378483965993, - "learning_rate": 0.00019998094479335845, - "loss": 46.0, - "step": 38598 - }, - { - "epoch": 6.216111759732678, - "grad_norm": 0.0012988555245101452, - "learning_rate": 0.0001999809438057536, - "loss": 46.0, - "step": 38599 - }, - { - "epoch": 6.216272796811466, - "grad_norm": 0.006263650022447109, - "learning_rate": 0.00019998094281812315, - "loss": 46.0, - "step": 38600 - }, - { - "epoch": 6.216433833890253, - "grad_norm": 0.0025493402499705553, - "learning_rate": 0.0001999809418304671, - "loss": 46.0, - "step": 38601 - }, - { - "epoch": 6.216594870969041, - "grad_norm": 0.004002938512712717, - "learning_rate": 0.00019998094084278545, - "loss": 46.0, - "step": 38602 - }, - { - "epoch": 6.216755908047828, - "grad_norm": 0.00786288920789957, - "learning_rate": 0.00019998093985507822, - "loss": 46.0, - "step": 38603 - }, - { - "epoch": 6.2169169451266155, - "grad_norm": 0.002239077817648649, - "learning_rate": 0.00019998093886734538, - "loss": 46.0, - "step": 38604 - }, - { - "epoch": 6.217077982205403, - "grad_norm": 0.013029851019382477, - "learning_rate": 0.00019998093787958697, - "loss": 46.0, - "step": 38605 - }, - { - "epoch": 6.21723901928419, - "grad_norm": 0.0038467231206595898, - "learning_rate": 0.00019998093689180298, - "loss": 46.0, - "step": 38606 - }, - { - "epoch": 6.217400056362978, - "grad_norm": 0.0027490751817822456, - "learning_rate": 0.00019998093590399343, - "loss": 46.0, - "step": 38607 - }, - { - "epoch": 6.217561093441765, - "grad_norm": 0.0025106542743742466, - "learning_rate": 0.00019998093491615824, - "loss": 46.0, - "step": 38608 - }, - { - "epoch": 6.217722130520553, - "grad_norm": 0.0023332072887569666, - "learning_rate": 0.00019998093392829746, - "loss": 46.0, - "step": 38609 - }, - { - "epoch": 6.217883167599339, - "grad_norm": 0.010310209356248379, - "learning_rate": 0.00019998093294041112, - "loss": 46.0, - "step": 38610 - }, - { - "epoch": 6.218044204678127, - "grad_norm": 0.005291286390274763, - "learning_rate": 0.00019998093195249916, - "loss": 46.0, - "step": 38611 - }, - { - "epoch": 6.218205241756914, - "grad_norm": 0.006393376272171736, - "learning_rate": 0.00019998093096456162, - "loss": 46.0, - "step": 38612 - }, - { - "epoch": 6.218366278835702, - "grad_norm": 0.0024698018096387386, - "learning_rate": 0.0001999809299765985, - "loss": 46.0, - "step": 38613 - }, - { - "epoch": 6.218527315914489, - "grad_norm": 0.002322337357327342, - "learning_rate": 0.00019998092898860977, - "loss": 46.0, - "step": 38614 - }, - { - "epoch": 6.218688352993277, - "grad_norm": 0.004036011174321175, - "learning_rate": 0.00019998092800059547, - "loss": 46.0, - "step": 38615 - }, - { - "epoch": 6.218849390072064, - "grad_norm": 0.002421214710921049, - "learning_rate": 0.00019998092701255558, - "loss": 46.0, - "step": 38616 - }, - { - "epoch": 6.2190104271508515, - "grad_norm": 0.010165990330278873, - "learning_rate": 0.00019998092602449007, - "loss": 46.0, - "step": 38617 - }, - { - "epoch": 6.219171464229639, - "grad_norm": 0.0024456074461340904, - "learning_rate": 0.000199980925036399, - "loss": 46.0, - "step": 38618 - }, - { - "epoch": 6.219332501308426, - "grad_norm": 0.01061293762177229, - "learning_rate": 0.00019998092404828236, - "loss": 46.0, - "step": 38619 - }, - { - "epoch": 6.219493538387214, - "grad_norm": 0.0008300011395476758, - "learning_rate": 0.00019998092306014006, - "loss": 46.0, - "step": 38620 - }, - { - "epoch": 6.219654575466001, - "grad_norm": 0.003805123269557953, - "learning_rate": 0.00019998092207197224, - "loss": 46.0, - "step": 38621 - }, - { - "epoch": 6.219815612544789, - "grad_norm": 0.013111239299178123, - "learning_rate": 0.0001999809210837788, - "loss": 46.0, - "step": 38622 - }, - { - "epoch": 6.219976649623576, - "grad_norm": 0.0020245127379894257, - "learning_rate": 0.00019998092009555977, - "loss": 46.0, - "step": 38623 - }, - { - "epoch": 6.220137686702364, - "grad_norm": 0.006429504137486219, - "learning_rate": 0.00019998091910731515, - "loss": 46.0, - "step": 38624 - }, - { - "epoch": 6.22029872378115, - "grad_norm": 0.01914132386445999, - "learning_rate": 0.00019998091811904495, - "loss": 46.0, - "step": 38625 - }, - { - "epoch": 6.220459760859938, - "grad_norm": 0.02236207015812397, - "learning_rate": 0.00019998091713074913, - "loss": 46.0, - "step": 38626 - }, - { - "epoch": 6.220620797938725, - "grad_norm": 0.02192418836057186, - "learning_rate": 0.00019998091614242776, - "loss": 46.0, - "step": 38627 - }, - { - "epoch": 6.220781835017513, - "grad_norm": 0.006403478793799877, - "learning_rate": 0.0001999809151540808, - "loss": 46.0, - "step": 38628 - }, - { - "epoch": 6.2209428720963, - "grad_norm": 0.0026490618474781513, - "learning_rate": 0.0001999809141657082, - "loss": 46.0, - "step": 38629 - }, - { - "epoch": 6.2211039091750875, - "grad_norm": 0.008898897096514702, - "learning_rate": 0.00019998091317731005, - "loss": 46.0, - "step": 38630 - }, - { - "epoch": 6.221264946253875, - "grad_norm": 0.006643269676715136, - "learning_rate": 0.0001999809121888863, - "loss": 46.0, - "step": 38631 - }, - { - "epoch": 6.221425983332662, - "grad_norm": 0.002111181616783142, - "learning_rate": 0.00019998091120043698, - "loss": 46.0, - "step": 38632 - }, - { - "epoch": 6.22158702041145, - "grad_norm": 0.004600047133862972, - "learning_rate": 0.00019998091021196203, - "loss": 46.0, - "step": 38633 - }, - { - "epoch": 6.221748057490237, - "grad_norm": 0.000806512136477977, - "learning_rate": 0.00019998090922346154, - "loss": 46.0, - "step": 38634 - }, - { - "epoch": 6.221909094569025, - "grad_norm": 0.018649008125066757, - "learning_rate": 0.00019998090823493544, - "loss": 46.0, - "step": 38635 - }, - { - "epoch": 6.222070131647812, - "grad_norm": 0.010169747285544872, - "learning_rate": 0.00019998090724638372, - "loss": 46.0, - "step": 38636 - }, - { - "epoch": 6.2222311687266, - "grad_norm": 0.0023480055388063192, - "learning_rate": 0.00019998090625780642, - "loss": 46.0, - "step": 38637 - }, - { - "epoch": 6.222392205805387, - "grad_norm": 0.007084401324391365, - "learning_rate": 0.00019998090526920356, - "loss": 46.0, - "step": 38638 - }, - { - "epoch": 6.222553242884174, - "grad_norm": 0.00897884089499712, - "learning_rate": 0.0001999809042805751, - "loss": 46.0, - "step": 38639 - }, - { - "epoch": 6.222714279962961, - "grad_norm": 0.009737041778862476, - "learning_rate": 0.00019998090329192105, - "loss": 46.0, - "step": 38640 - }, - { - "epoch": 6.222875317041749, - "grad_norm": 0.002869783900678158, - "learning_rate": 0.00019998090230324137, - "loss": 46.0, - "step": 38641 - }, - { - "epoch": 6.223036354120536, - "grad_norm": 0.0036697534378618, - "learning_rate": 0.00019998090131453616, - "loss": 46.0, - "step": 38642 - }, - { - "epoch": 6.2231973911993235, - "grad_norm": 0.004103336948901415, - "learning_rate": 0.00019998090032580533, - "loss": 46.0, - "step": 38643 - }, - { - "epoch": 6.223358428278111, - "grad_norm": 0.011155062355101109, - "learning_rate": 0.00019998089933704892, - "loss": 46.0, - "step": 38644 - }, - { - "epoch": 6.223519465356898, - "grad_norm": 0.005551514215767384, - "learning_rate": 0.0001999808983482669, - "loss": 46.0, - "step": 38645 - }, - { - "epoch": 6.223680502435686, - "grad_norm": 0.00575565779581666, - "learning_rate": 0.0001999808973594593, - "loss": 46.0, - "step": 38646 - }, - { - "epoch": 6.223841539514473, - "grad_norm": 0.008430094458162785, - "learning_rate": 0.0001999808963706261, - "loss": 46.0, - "step": 38647 - }, - { - "epoch": 6.224002576593261, - "grad_norm": 0.015129799023270607, - "learning_rate": 0.00019998089538176734, - "loss": 46.0, - "step": 38648 - }, - { - "epoch": 6.224163613672048, - "grad_norm": 0.004835840314626694, - "learning_rate": 0.00019998089439288296, - "loss": 46.0, - "step": 38649 - }, - { - "epoch": 6.224324650750836, - "grad_norm": 0.0026272109244018793, - "learning_rate": 0.000199980893403973, - "loss": 46.0, - "step": 38650 - }, - { - "epoch": 6.224485687829623, - "grad_norm": 0.009347965009510517, - "learning_rate": 0.00019998089241503748, - "loss": 46.0, - "step": 38651 - }, - { - "epoch": 6.2246467249084105, - "grad_norm": 0.023988954722881317, - "learning_rate": 0.00019998089142607634, - "loss": 46.0, - "step": 38652 - }, - { - "epoch": 6.224807761987197, - "grad_norm": 0.0048380945809185505, - "learning_rate": 0.00019998089043708962, - "loss": 46.0, - "step": 38653 - }, - { - "epoch": 6.2249687990659845, - "grad_norm": 0.0059645334258675575, - "learning_rate": 0.0001999808894480773, - "loss": 46.0, - "step": 38654 - }, - { - "epoch": 6.225129836144772, - "grad_norm": 0.01953182741999626, - "learning_rate": 0.00019998088845903938, - "loss": 46.0, - "step": 38655 - }, - { - "epoch": 6.2252908732235595, - "grad_norm": 0.009990277700126171, - "learning_rate": 0.00019998088746997586, - "loss": 46.0, - "step": 38656 - }, - { - "epoch": 6.225451910302347, - "grad_norm": 0.0029023620299994946, - "learning_rate": 0.0001999808864808868, - "loss": 46.0, - "step": 38657 - }, - { - "epoch": 6.225612947381134, - "grad_norm": 0.0032614143565297127, - "learning_rate": 0.00019998088549177213, - "loss": 46.0, - "step": 38658 - }, - { - "epoch": 6.225773984459922, - "grad_norm": 0.001424444722943008, - "learning_rate": 0.00019998088450263186, - "loss": 46.0, - "step": 38659 - }, - { - "epoch": 6.225935021538709, - "grad_norm": 0.0017049384769052267, - "learning_rate": 0.000199980883513466, - "loss": 46.0, - "step": 38660 - }, - { - "epoch": 6.226096058617497, - "grad_norm": 0.008877153508365154, - "learning_rate": 0.00019998088252427457, - "loss": 46.0, - "step": 38661 - }, - { - "epoch": 6.226257095696284, - "grad_norm": 0.0033927957993000746, - "learning_rate": 0.0001999808815350575, - "loss": 46.0, - "step": 38662 - }, - { - "epoch": 6.226418132775072, - "grad_norm": 0.01044750027358532, - "learning_rate": 0.00019998088054581489, - "loss": 46.0, - "step": 38663 - }, - { - "epoch": 6.226579169853859, - "grad_norm": 0.0124536557123065, - "learning_rate": 0.00019998087955654667, - "loss": 46.0, - "step": 38664 - }, - { - "epoch": 6.2267402069326465, - "grad_norm": 0.002921351930126548, - "learning_rate": 0.00019998087856725288, - "loss": 46.0, - "step": 38665 - }, - { - "epoch": 6.226901244011434, - "grad_norm": 0.00365428626537323, - "learning_rate": 0.00019998087757793347, - "loss": 46.0, - "step": 38666 - }, - { - "epoch": 6.227062281090221, - "grad_norm": 0.007008520420640707, - "learning_rate": 0.0001999808765885885, - "loss": 46.0, - "step": 38667 - }, - { - "epoch": 6.227223318169008, - "grad_norm": 0.004778375383466482, - "learning_rate": 0.0001999808755992179, - "loss": 46.0, - "step": 38668 - }, - { - "epoch": 6.227384355247795, - "grad_norm": 0.006018646992743015, - "learning_rate": 0.00019998087460982173, - "loss": 46.0, - "step": 38669 - }, - { - "epoch": 6.227545392326583, - "grad_norm": 0.004066370893269777, - "learning_rate": 0.0001999808736204, - "loss": 46.0, - "step": 38670 - }, - { - "epoch": 6.22770642940537, - "grad_norm": 0.0016448433743789792, - "learning_rate": 0.00019998087263095262, - "loss": 46.0, - "step": 38671 - }, - { - "epoch": 6.227867466484158, - "grad_norm": 0.0017839446663856506, - "learning_rate": 0.00019998087164147972, - "loss": 46.0, - "step": 38672 - }, - { - "epoch": 6.228028503562945, - "grad_norm": 0.014859459362924099, - "learning_rate": 0.0001999808706519812, - "loss": 46.0, - "step": 38673 - }, - { - "epoch": 6.228189540641733, - "grad_norm": 0.002171889878809452, - "learning_rate": 0.00019998086966245706, - "loss": 46.0, - "step": 38674 - }, - { - "epoch": 6.22835057772052, - "grad_norm": 0.012175188399851322, - "learning_rate": 0.00019998086867290736, - "loss": 46.0, - "step": 38675 - }, - { - "epoch": 6.228511614799308, - "grad_norm": 0.012284522876143456, - "learning_rate": 0.00019998086768333205, - "loss": 46.0, - "step": 38676 - }, - { - "epoch": 6.228672651878095, - "grad_norm": 0.0025695443619042635, - "learning_rate": 0.00019998086669373118, - "loss": 46.0, - "step": 38677 - }, - { - "epoch": 6.2288336889568825, - "grad_norm": 0.009514286182820797, - "learning_rate": 0.00019998086570410467, - "loss": 46.0, - "step": 38678 - }, - { - "epoch": 6.22899472603567, - "grad_norm": 0.0032513632904738188, - "learning_rate": 0.00019998086471445262, - "loss": 46.0, - "step": 38679 - }, - { - "epoch": 6.229155763114457, - "grad_norm": 0.002580565633252263, - "learning_rate": 0.00019998086372477496, - "loss": 46.0, - "step": 38680 - }, - { - "epoch": 6.229316800193245, - "grad_norm": 0.007044296711683273, - "learning_rate": 0.00019998086273507172, - "loss": 46.0, - "step": 38681 - }, - { - "epoch": 6.229477837272032, - "grad_norm": 0.004722347483038902, - "learning_rate": 0.00019998086174534288, - "loss": 46.0, - "step": 38682 - }, - { - "epoch": 6.229638874350819, - "grad_norm": 0.01471523568034172, - "learning_rate": 0.00019998086075558846, - "loss": 46.0, - "step": 38683 - }, - { - "epoch": 6.229799911429606, - "grad_norm": 0.002049632603302598, - "learning_rate": 0.00019998085976580845, - "loss": 46.0, - "step": 38684 - }, - { - "epoch": 6.229960948508394, - "grad_norm": 0.007146105635911226, - "learning_rate": 0.00019998085877600286, - "loss": 46.0, - "step": 38685 - }, - { - "epoch": 6.230121985587181, - "grad_norm": 0.006557625252753496, - "learning_rate": 0.00019998085778617165, - "loss": 46.0, - "step": 38686 - }, - { - "epoch": 6.230283022665969, - "grad_norm": 0.0206775963306427, - "learning_rate": 0.00019998085679631485, - "loss": 46.0, - "step": 38687 - }, - { - "epoch": 6.230444059744756, - "grad_norm": 0.00934943649917841, - "learning_rate": 0.0001999808558064325, - "loss": 46.0, - "step": 38688 - }, - { - "epoch": 6.230605096823544, - "grad_norm": 0.02179321087896824, - "learning_rate": 0.0001999808548165245, - "loss": 46.0, - "step": 38689 - }, - { - "epoch": 6.230766133902331, - "grad_norm": 0.0011299765901640058, - "learning_rate": 0.00019998085382659097, - "loss": 46.0, - "step": 38690 - }, - { - "epoch": 6.2309271709811185, - "grad_norm": 0.012291212566196918, - "learning_rate": 0.00019998085283663182, - "loss": 46.0, - "step": 38691 - }, - { - "epoch": 6.231088208059906, - "grad_norm": 0.005116683896631002, - "learning_rate": 0.0001999808518466471, - "loss": 46.0, - "step": 38692 - }, - { - "epoch": 6.231249245138693, - "grad_norm": 0.020011739805340767, - "learning_rate": 0.00019998085085663677, - "loss": 46.0, - "step": 38693 - }, - { - "epoch": 6.231410282217481, - "grad_norm": 0.010386831127107143, - "learning_rate": 0.00019998084986660083, - "loss": 46.0, - "step": 38694 - }, - { - "epoch": 6.231571319296268, - "grad_norm": 0.0034894703421741724, - "learning_rate": 0.00019998084887653934, - "loss": 46.0, - "step": 38695 - }, - { - "epoch": 6.231732356375056, - "grad_norm": 0.005942290183156729, - "learning_rate": 0.00019998084788645223, - "loss": 46.0, - "step": 38696 - }, - { - "epoch": 6.231893393453843, - "grad_norm": 0.00561217125505209, - "learning_rate": 0.00019998084689633956, - "loss": 46.0, - "step": 38697 - }, - { - "epoch": 6.23205443053263, - "grad_norm": 0.01627948507666588, - "learning_rate": 0.00019998084590620128, - "loss": 46.0, - "step": 38698 - }, - { - "epoch": 6.232215467611417, - "grad_norm": 0.009304588660597801, - "learning_rate": 0.0001999808449160374, - "loss": 46.0, - "step": 38699 - }, - { - "epoch": 6.232376504690205, - "grad_norm": 0.003932025283575058, - "learning_rate": 0.00019998084392584795, - "loss": 46.0, - "step": 38700 - }, - { - "epoch": 6.232537541768992, - "grad_norm": 0.0026350137777626514, - "learning_rate": 0.0001999808429356329, - "loss": 46.0, - "step": 38701 - }, - { - "epoch": 6.23269857884778, - "grad_norm": 0.0015443023294210434, - "learning_rate": 0.00019998084194539225, - "loss": 46.0, - "step": 38702 - }, - { - "epoch": 6.232859615926567, - "grad_norm": 0.0015425975434482098, - "learning_rate": 0.00019998084095512603, - "loss": 46.0, - "step": 38703 - }, - { - "epoch": 6.2330206530053545, - "grad_norm": 0.006231886334717274, - "learning_rate": 0.00019998083996483423, - "loss": 46.0, - "step": 38704 - }, - { - "epoch": 6.233181690084142, - "grad_norm": 0.0019597583450376987, - "learning_rate": 0.00019998083897451683, - "loss": 46.0, - "step": 38705 - }, - { - "epoch": 6.233342727162929, - "grad_norm": 0.004525311291217804, - "learning_rate": 0.00019998083798417385, - "loss": 46.0, - "step": 38706 - }, - { - "epoch": 6.233503764241717, - "grad_norm": 0.006644236855208874, - "learning_rate": 0.00019998083699380526, - "loss": 46.0, - "step": 38707 - }, - { - "epoch": 6.233664801320504, - "grad_norm": 0.0036862134002149105, - "learning_rate": 0.00019998083600341108, - "loss": 46.0, - "step": 38708 - }, - { - "epoch": 6.233825838399292, - "grad_norm": 0.0038347700610756874, - "learning_rate": 0.0001999808350129913, - "loss": 46.0, - "step": 38709 - }, - { - "epoch": 6.233986875478079, - "grad_norm": 0.0073648057878017426, - "learning_rate": 0.00019998083402254595, - "loss": 46.0, - "step": 38710 - }, - { - "epoch": 6.234147912556867, - "grad_norm": 0.00634078960865736, - "learning_rate": 0.000199980833032075, - "loss": 46.0, - "step": 38711 - }, - { - "epoch": 6.234308949635653, - "grad_norm": 0.00382929272018373, - "learning_rate": 0.00019998083204157848, - "loss": 46.0, - "step": 38712 - }, - { - "epoch": 6.234469986714441, - "grad_norm": 0.001045657554641366, - "learning_rate": 0.00019998083105105633, - "loss": 46.0, - "step": 38713 - }, - { - "epoch": 6.234631023793228, - "grad_norm": 0.0015196145977824926, - "learning_rate": 0.00019998083006050863, - "loss": 46.0, - "step": 38714 - }, - { - "epoch": 6.2347920608720155, - "grad_norm": 0.003375179599970579, - "learning_rate": 0.00019998082906993534, - "loss": 46.0, - "step": 38715 - }, - { - "epoch": 6.234953097950803, - "grad_norm": 0.001859193667769432, - "learning_rate": 0.00019998082807933643, - "loss": 46.0, - "step": 38716 - }, - { - "epoch": 6.23511413502959, - "grad_norm": 0.0022404841147363186, - "learning_rate": 0.00019998082708871194, - "loss": 46.0, - "step": 38717 - }, - { - "epoch": 6.235275172108378, - "grad_norm": 0.005487280897796154, - "learning_rate": 0.00019998082609806188, - "loss": 46.0, - "step": 38718 - }, - { - "epoch": 6.235436209187165, - "grad_norm": 0.0018628500401973724, - "learning_rate": 0.00019998082510738622, - "loss": 46.0, - "step": 38719 - }, - { - "epoch": 6.235597246265953, - "grad_norm": 0.0027922329027205706, - "learning_rate": 0.00019998082411668496, - "loss": 46.0, - "step": 38720 - }, - { - "epoch": 6.23575828334474, - "grad_norm": 0.0011255184654146433, - "learning_rate": 0.00019998082312595812, - "loss": 46.0, - "step": 38721 - }, - { - "epoch": 6.235919320423528, - "grad_norm": 0.014780014753341675, - "learning_rate": 0.00019998082213520566, - "loss": 46.0, - "step": 38722 - }, - { - "epoch": 6.236080357502315, - "grad_norm": 0.002269803313538432, - "learning_rate": 0.00019998082114442765, - "loss": 46.0, - "step": 38723 - }, - { - "epoch": 6.236241394581103, - "grad_norm": 0.0055558690801262856, - "learning_rate": 0.00019998082015362402, - "loss": 46.0, - "step": 38724 - }, - { - "epoch": 6.23640243165989, - "grad_norm": 0.007153920829296112, - "learning_rate": 0.00019998081916279483, - "loss": 46.0, - "step": 38725 - }, - { - "epoch": 6.2365634687386775, - "grad_norm": 0.004320100415498018, - "learning_rate": 0.00019998081817194002, - "loss": 46.0, - "step": 38726 - }, - { - "epoch": 6.236724505817464, - "grad_norm": 0.0030976238194853067, - "learning_rate": 0.00019998081718105963, - "loss": 46.0, - "step": 38727 - }, - { - "epoch": 6.2368855428962515, - "grad_norm": 0.0020188416820019484, - "learning_rate": 0.00019998081619015368, - "loss": 46.0, - "step": 38728 - }, - { - "epoch": 6.237046579975039, - "grad_norm": 0.0012540711322799325, - "learning_rate": 0.00019998081519922208, - "loss": 46.0, - "step": 38729 - }, - { - "epoch": 6.237207617053826, - "grad_norm": 0.004225165583193302, - "learning_rate": 0.00019998081420826496, - "loss": 46.0, - "step": 38730 - }, - { - "epoch": 6.237368654132614, - "grad_norm": 0.0022625618148595095, - "learning_rate": 0.0001999808132172822, - "loss": 46.0, - "step": 38731 - }, - { - "epoch": 6.237529691211401, - "grad_norm": 0.006432642694562674, - "learning_rate": 0.00019998081222627386, - "loss": 46.0, - "step": 38732 - }, - { - "epoch": 6.237690728290189, - "grad_norm": 0.006834803149104118, - "learning_rate": 0.00019998081123523995, - "loss": 46.0, - "step": 38733 - }, - { - "epoch": 6.237851765368976, - "grad_norm": 0.008049764670431614, - "learning_rate": 0.00019998081024418042, - "loss": 46.0, - "step": 38734 - }, - { - "epoch": 6.238012802447764, - "grad_norm": 0.003339129965752363, - "learning_rate": 0.00019998080925309535, - "loss": 46.0, - "step": 38735 - }, - { - "epoch": 6.238173839526551, - "grad_norm": 0.005171710625290871, - "learning_rate": 0.00019998080826198465, - "loss": 46.0, - "step": 38736 - }, - { - "epoch": 6.238334876605339, - "grad_norm": 0.0026569857727736235, - "learning_rate": 0.00019998080727084836, - "loss": 46.0, - "step": 38737 - }, - { - "epoch": 6.238495913684126, - "grad_norm": 0.003168070688843727, - "learning_rate": 0.00019998080627968648, - "loss": 46.0, - "step": 38738 - }, - { - "epoch": 6.2386569507629135, - "grad_norm": 0.005243937484920025, - "learning_rate": 0.00019998080528849902, - "loss": 46.0, - "step": 38739 - }, - { - "epoch": 6.238817987841701, - "grad_norm": 0.0015928170178085566, - "learning_rate": 0.00019998080429728596, - "loss": 46.0, - "step": 38740 - }, - { - "epoch": 6.2389790249204875, - "grad_norm": 0.00266423006542027, - "learning_rate": 0.00019998080330604733, - "loss": 46.0, - "step": 38741 - }, - { - "epoch": 6.239140061999275, - "grad_norm": 0.0020855246111750603, - "learning_rate": 0.00019998080231478307, - "loss": 46.0, - "step": 38742 - }, - { - "epoch": 6.239301099078062, - "grad_norm": 0.009634270332753658, - "learning_rate": 0.00019998080132349326, - "loss": 46.0, - "step": 38743 - }, - { - "epoch": 6.23946213615685, - "grad_norm": 0.005434541497379541, - "learning_rate": 0.00019998080033217783, - "loss": 46.0, - "step": 38744 - }, - { - "epoch": 6.239623173235637, - "grad_norm": 0.0017689937958493829, - "learning_rate": 0.00019998079934083684, - "loss": 46.0, - "step": 38745 - }, - { - "epoch": 6.239784210314425, - "grad_norm": 0.015064570121467113, - "learning_rate": 0.00019998079834947024, - "loss": 46.0, - "step": 38746 - }, - { - "epoch": 6.239945247393212, - "grad_norm": 0.00765609135851264, - "learning_rate": 0.00019998079735807808, - "loss": 46.0, - "step": 38747 - }, - { - "epoch": 6.240106284472, - "grad_norm": 0.003262442070990801, - "learning_rate": 0.0001999807963666603, - "loss": 46.0, - "step": 38748 - }, - { - "epoch": 6.240267321550787, - "grad_norm": 0.0124131478369236, - "learning_rate": 0.00019998079537521693, - "loss": 46.0, - "step": 38749 - }, - { - "epoch": 6.240428358629575, - "grad_norm": 0.008744933642446995, - "learning_rate": 0.00019998079438374798, - "loss": 46.0, - "step": 38750 - }, - { - "epoch": 6.240589395708362, - "grad_norm": 0.002951371716335416, - "learning_rate": 0.00019998079339225342, - "loss": 46.0, - "step": 38751 - }, - { - "epoch": 6.2407504327871495, - "grad_norm": 0.0077109141275286674, - "learning_rate": 0.00019998079240073332, - "loss": 46.0, - "step": 38752 - }, - { - "epoch": 6.240911469865937, - "grad_norm": 0.0024209748953580856, - "learning_rate": 0.00019998079140918758, - "loss": 46.0, - "step": 38753 - }, - { - "epoch": 6.241072506944724, - "grad_norm": 0.0029037443455308676, - "learning_rate": 0.00019998079041761625, - "loss": 46.0, - "step": 38754 - }, - { - "epoch": 6.241233544023512, - "grad_norm": 0.0021141646429896355, - "learning_rate": 0.00019998078942601937, - "loss": 46.0, - "step": 38755 - }, - { - "epoch": 6.241394581102298, - "grad_norm": 0.0038508321158587933, - "learning_rate": 0.00019998078843439684, - "loss": 46.0, - "step": 38756 - }, - { - "epoch": 6.241555618181086, - "grad_norm": 0.02414633333683014, - "learning_rate": 0.0001999807874427488, - "loss": 46.0, - "step": 38757 - }, - { - "epoch": 6.241716655259873, - "grad_norm": 0.010605452582240105, - "learning_rate": 0.00019998078645107513, - "loss": 46.0, - "step": 38758 - }, - { - "epoch": 6.241877692338661, - "grad_norm": 0.003035190049558878, - "learning_rate": 0.00019998078545937584, - "loss": 46.0, - "step": 38759 - }, - { - "epoch": 6.242038729417448, - "grad_norm": 0.0018731292802840471, - "learning_rate": 0.000199980784467651, - "loss": 46.0, - "step": 38760 - }, - { - "epoch": 6.242199766496236, - "grad_norm": 0.0007259367848746479, - "learning_rate": 0.00019998078347590055, - "loss": 46.0, - "step": 38761 - }, - { - "epoch": 6.242360803575023, - "grad_norm": 0.0016311903018504381, - "learning_rate": 0.00019998078248412453, - "loss": 46.0, - "step": 38762 - }, - { - "epoch": 6.2425218406538105, - "grad_norm": 0.00248343450948596, - "learning_rate": 0.0001999807814923229, - "loss": 46.0, - "step": 38763 - }, - { - "epoch": 6.242682877732598, - "grad_norm": 0.0009264177642762661, - "learning_rate": 0.00019998078050049566, - "loss": 46.0, - "step": 38764 - }, - { - "epoch": 6.2428439148113855, - "grad_norm": 0.009981558658182621, - "learning_rate": 0.0001999807795086429, - "loss": 46.0, - "step": 38765 - }, - { - "epoch": 6.243004951890173, - "grad_norm": 0.013788151554763317, - "learning_rate": 0.00019998077851676448, - "loss": 46.0, - "step": 38766 - }, - { - "epoch": 6.24316598896896, - "grad_norm": 0.003375322325155139, - "learning_rate": 0.00019998077752486052, - "loss": 46.0, - "step": 38767 - }, - { - "epoch": 6.243327026047748, - "grad_norm": 0.014961320906877518, - "learning_rate": 0.0001999807765329309, - "loss": 46.0, - "step": 38768 - }, - { - "epoch": 6.243488063126535, - "grad_norm": 0.0045212144032120705, - "learning_rate": 0.00019998077554097575, - "loss": 46.0, - "step": 38769 - }, - { - "epoch": 6.243649100205323, - "grad_norm": 0.009469741024076939, - "learning_rate": 0.000199980774548995, - "loss": 46.0, - "step": 38770 - }, - { - "epoch": 6.243810137284109, - "grad_norm": 0.014732648618519306, - "learning_rate": 0.00019998077355698867, - "loss": 46.0, - "step": 38771 - }, - { - "epoch": 6.243971174362897, - "grad_norm": 0.00553680257871747, - "learning_rate": 0.00019998077256495674, - "loss": 46.0, - "step": 38772 - }, - { - "epoch": 6.244132211441684, - "grad_norm": 0.011354890652000904, - "learning_rate": 0.00019998077157289923, - "loss": 46.0, - "step": 38773 - }, - { - "epoch": 6.244293248520472, - "grad_norm": 0.002924802014604211, - "learning_rate": 0.0001999807705808161, - "loss": 46.0, - "step": 38774 - }, - { - "epoch": 6.244454285599259, - "grad_norm": 0.012061532586812973, - "learning_rate": 0.0001999807695887074, - "loss": 46.0, - "step": 38775 - }, - { - "epoch": 6.2446153226780465, - "grad_norm": 0.0025552804581820965, - "learning_rate": 0.0001999807685965731, - "loss": 46.0, - "step": 38776 - }, - { - "epoch": 6.244776359756834, - "grad_norm": 0.00809148233383894, - "learning_rate": 0.0001999807676044132, - "loss": 46.0, - "step": 38777 - }, - { - "epoch": 6.244937396835621, - "grad_norm": 0.004028466064482927, - "learning_rate": 0.00019998076661222777, - "loss": 46.0, - "step": 38778 - }, - { - "epoch": 6.245098433914409, - "grad_norm": 0.004658353049308062, - "learning_rate": 0.00019998076562001668, - "loss": 46.0, - "step": 38779 - }, - { - "epoch": 6.245259470993196, - "grad_norm": 0.012473132461309433, - "learning_rate": 0.00019998076462778003, - "loss": 46.0, - "step": 38780 - }, - { - "epoch": 6.245420508071984, - "grad_norm": 0.008844288997352123, - "learning_rate": 0.00019998076363551782, - "loss": 46.0, - "step": 38781 - }, - { - "epoch": 6.245581545150771, - "grad_norm": 0.006389549933373928, - "learning_rate": 0.00019998076264322995, - "loss": 46.0, - "step": 38782 - }, - { - "epoch": 6.245742582229559, - "grad_norm": 0.0013734856620430946, - "learning_rate": 0.00019998076165091654, - "loss": 46.0, - "step": 38783 - }, - { - "epoch": 6.245903619308346, - "grad_norm": 0.007066596299409866, - "learning_rate": 0.00019998076065857754, - "loss": 46.0, - "step": 38784 - }, - { - "epoch": 6.246064656387133, - "grad_norm": 0.00653229421004653, - "learning_rate": 0.00019998075966621293, - "loss": 46.0, - "step": 38785 - }, - { - "epoch": 6.24622569346592, - "grad_norm": 0.0075106387957930565, - "learning_rate": 0.00019998075867382276, - "loss": 46.0, - "step": 38786 - }, - { - "epoch": 6.246386730544708, - "grad_norm": 0.008485783822834492, - "learning_rate": 0.00019998075768140695, - "loss": 46.0, - "step": 38787 - }, - { - "epoch": 6.246547767623495, - "grad_norm": 0.005509641487151384, - "learning_rate": 0.00019998075668896558, - "loss": 46.0, - "step": 38788 - }, - { - "epoch": 6.2467088047022825, - "grad_norm": 0.005409873090684414, - "learning_rate": 0.00019998075569649864, - "loss": 46.0, - "step": 38789 - }, - { - "epoch": 6.24686984178107, - "grad_norm": 0.009433098137378693, - "learning_rate": 0.00019998075470400607, - "loss": 46.0, - "step": 38790 - }, - { - "epoch": 6.247030878859857, - "grad_norm": 0.013554275035858154, - "learning_rate": 0.00019998075371148794, - "loss": 46.0, - "step": 38791 - }, - { - "epoch": 6.247191915938645, - "grad_norm": 0.006548671051859856, - "learning_rate": 0.00019998075271894422, - "loss": 46.0, - "step": 38792 - }, - { - "epoch": 6.247352953017432, - "grad_norm": 0.008764144964516163, - "learning_rate": 0.00019998075172637488, - "loss": 46.0, - "step": 38793 - }, - { - "epoch": 6.24751399009622, - "grad_norm": 0.023204488679766655, - "learning_rate": 0.00019998075073377996, - "loss": 46.0, - "step": 38794 - }, - { - "epoch": 6.247675027175007, - "grad_norm": 0.022843537852168083, - "learning_rate": 0.00019998074974115948, - "loss": 46.0, - "step": 38795 - }, - { - "epoch": 6.247836064253795, - "grad_norm": 0.005780502688139677, - "learning_rate": 0.00019998074874851338, - "loss": 46.0, - "step": 38796 - }, - { - "epoch": 6.247997101332582, - "grad_norm": 0.007769168820232153, - "learning_rate": 0.0001999807477558417, - "loss": 46.0, - "step": 38797 - }, - { - "epoch": 6.24815813841137, - "grad_norm": 0.0015627213288098574, - "learning_rate": 0.00019998074676314442, - "loss": 46.0, - "step": 38798 - }, - { - "epoch": 6.248319175490157, - "grad_norm": 0.0024867020547389984, - "learning_rate": 0.00019998074577042157, - "loss": 46.0, - "step": 38799 - }, - { - "epoch": 6.248480212568944, - "grad_norm": 0.004033821634948254, - "learning_rate": 0.0001999807447776731, - "loss": 46.0, - "step": 38800 - }, - { - "epoch": 6.248641249647731, - "grad_norm": 0.005027530714869499, - "learning_rate": 0.00019998074378489906, - "loss": 46.0, - "step": 38801 - }, - { - "epoch": 6.2488022867265185, - "grad_norm": 0.0021902115549892187, - "learning_rate": 0.00019998074279209944, - "loss": 46.0, - "step": 38802 - }, - { - "epoch": 6.248963323805306, - "grad_norm": 0.003065049648284912, - "learning_rate": 0.0001999807417992742, - "loss": 46.0, - "step": 38803 - }, - { - "epoch": 6.249124360884093, - "grad_norm": 0.010861421003937721, - "learning_rate": 0.0001999807408064234, - "loss": 46.0, - "step": 38804 - }, - { - "epoch": 6.249285397962881, - "grad_norm": 0.003233975963667035, - "learning_rate": 0.000199980739813547, - "loss": 46.0, - "step": 38805 - }, - { - "epoch": 6.249446435041668, - "grad_norm": 0.005381418392062187, - "learning_rate": 0.00019998073882064504, - "loss": 46.0, - "step": 38806 - }, - { - "epoch": 6.249607472120456, - "grad_norm": 0.001557415584102273, - "learning_rate": 0.00019998073782771743, - "loss": 46.0, - "step": 38807 - }, - { - "epoch": 6.249768509199243, - "grad_norm": 0.015568535774946213, - "learning_rate": 0.00019998073683476428, - "loss": 46.0, - "step": 38808 - }, - { - "epoch": 6.249929546278031, - "grad_norm": 0.002914362819865346, - "learning_rate": 0.00019998073584178553, - "loss": 46.0, - "step": 38809 - }, - { - "epoch": 6.250090583356818, - "grad_norm": 0.007278406526893377, - "learning_rate": 0.00019998073484878118, - "loss": 46.0, - "step": 38810 - }, - { - "epoch": 6.250251620435606, - "grad_norm": 0.000835315091535449, - "learning_rate": 0.0001999807338557512, - "loss": 46.0, - "step": 38811 - }, - { - "epoch": 6.250412657514393, - "grad_norm": 0.004674816038459539, - "learning_rate": 0.0001999807328626957, - "loss": 46.0, - "step": 38812 - }, - { - "epoch": 6.2505736945931805, - "grad_norm": 0.004328897222876549, - "learning_rate": 0.00019998073186961457, - "loss": 46.0, - "step": 38813 - }, - { - "epoch": 6.250734731671967, - "grad_norm": 0.0014253858244046569, - "learning_rate": 0.00019998073087650785, - "loss": 46.0, - "step": 38814 - }, - { - "epoch": 6.2508957687507545, - "grad_norm": 0.004605847876518965, - "learning_rate": 0.00019998072988337555, - "loss": 46.0, - "step": 38815 - }, - { - "epoch": 6.251056805829542, - "grad_norm": 0.004812745843082666, - "learning_rate": 0.00019998072889021765, - "loss": 46.0, - "step": 38816 - }, - { - "epoch": 6.251217842908329, - "grad_norm": 0.005110305733978748, - "learning_rate": 0.00019998072789703417, - "loss": 46.0, - "step": 38817 - }, - { - "epoch": 6.251378879987117, - "grad_norm": 0.0016644394490867853, - "learning_rate": 0.0001999807269038251, - "loss": 46.0, - "step": 38818 - }, - { - "epoch": 6.251539917065904, - "grad_norm": 0.009690712206065655, - "learning_rate": 0.00019998072591059045, - "loss": 46.0, - "step": 38819 - }, - { - "epoch": 6.251700954144692, - "grad_norm": 0.006749164313077927, - "learning_rate": 0.00019998072491733023, - "loss": 46.0, - "step": 38820 - }, - { - "epoch": 6.251861991223479, - "grad_norm": 0.009052845649421215, - "learning_rate": 0.00019998072392404437, - "loss": 46.0, - "step": 38821 - }, - { - "epoch": 6.252023028302267, - "grad_norm": 0.016530882567167282, - "learning_rate": 0.00019998072293073293, - "loss": 46.0, - "step": 38822 - }, - { - "epoch": 6.252184065381054, - "grad_norm": 0.00537888752296567, - "learning_rate": 0.00019998072193739592, - "loss": 46.0, - "step": 38823 - }, - { - "epoch": 6.2523451024598415, - "grad_norm": 0.005701381713151932, - "learning_rate": 0.0001999807209440333, - "loss": 46.0, - "step": 38824 - }, - { - "epoch": 6.252506139538629, - "grad_norm": 0.0031267041340470314, - "learning_rate": 0.0001999807199506451, - "loss": 46.0, - "step": 38825 - }, - { - "epoch": 6.2526671766174164, - "grad_norm": 0.004520677030086517, - "learning_rate": 0.0001999807189572313, - "loss": 46.0, - "step": 38826 - }, - { - "epoch": 6.252828213696204, - "grad_norm": 0.0032734551932662725, - "learning_rate": 0.00019998071796379195, - "loss": 46.0, - "step": 38827 - }, - { - "epoch": 6.252989250774991, - "grad_norm": 0.0007657860987819731, - "learning_rate": 0.000199980716970327, - "loss": 46.0, - "step": 38828 - }, - { - "epoch": 6.253150287853778, - "grad_norm": 0.006113671697676182, - "learning_rate": 0.0001999807159768364, - "loss": 46.0, - "step": 38829 - }, - { - "epoch": 6.253311324932565, - "grad_norm": 0.0017102460842579603, - "learning_rate": 0.00019998071498332026, - "loss": 46.0, - "step": 38830 - }, - { - "epoch": 6.253472362011353, - "grad_norm": 0.002060038037598133, - "learning_rate": 0.0001999807139897785, - "loss": 46.0, - "step": 38831 - }, - { - "epoch": 6.25363339909014, - "grad_norm": 0.0160819161683321, - "learning_rate": 0.0001999807129962112, - "loss": 46.0, - "step": 38832 - }, - { - "epoch": 6.253794436168928, - "grad_norm": 0.0016725462628528476, - "learning_rate": 0.0001999807120026183, - "loss": 46.0, - "step": 38833 - }, - { - "epoch": 6.253955473247715, - "grad_norm": 0.0012638554908335209, - "learning_rate": 0.00019998071100899977, - "loss": 46.0, - "step": 38834 - }, - { - "epoch": 6.254116510326503, - "grad_norm": 0.005010468885302544, - "learning_rate": 0.00019998071001535567, - "loss": 46.0, - "step": 38835 - }, - { - "epoch": 6.25427754740529, - "grad_norm": 0.008706933818757534, - "learning_rate": 0.00019998070902168597, - "loss": 46.0, - "step": 38836 - }, - { - "epoch": 6.2544385844840775, - "grad_norm": 0.0068745906464755535, - "learning_rate": 0.0001999807080279907, - "loss": 46.0, - "step": 38837 - }, - { - "epoch": 6.254599621562865, - "grad_norm": 0.002690599299967289, - "learning_rate": 0.00019998070703426983, - "loss": 46.0, - "step": 38838 - }, - { - "epoch": 6.254760658641652, - "grad_norm": 0.004016812890768051, - "learning_rate": 0.00019998070604052338, - "loss": 46.0, - "step": 38839 - }, - { - "epoch": 6.25492169572044, - "grad_norm": 0.0023593066725879908, - "learning_rate": 0.00019998070504675133, - "loss": 46.0, - "step": 38840 - }, - { - "epoch": 6.255082732799227, - "grad_norm": 0.0017565255984663963, - "learning_rate": 0.00019998070405295368, - "loss": 46.0, - "step": 38841 - }, - { - "epoch": 6.255243769878015, - "grad_norm": 0.002819111803546548, - "learning_rate": 0.00019998070305913046, - "loss": 46.0, - "step": 38842 - }, - { - "epoch": 6.255404806956802, - "grad_norm": 0.012306607328355312, - "learning_rate": 0.00019998070206528166, - "loss": 46.0, - "step": 38843 - }, - { - "epoch": 6.255565844035589, - "grad_norm": 0.002566410694271326, - "learning_rate": 0.00019998070107140722, - "loss": 46.0, - "step": 38844 - }, - { - "epoch": 6.255726881114376, - "grad_norm": 0.0021592844277620316, - "learning_rate": 0.00019998070007750722, - "loss": 46.0, - "step": 38845 - }, - { - "epoch": 6.255887918193164, - "grad_norm": 0.0027225178200751543, - "learning_rate": 0.00019998069908358165, - "loss": 46.0, - "step": 38846 - }, - { - "epoch": 6.256048955271951, - "grad_norm": 0.0011230995878577232, - "learning_rate": 0.00019998069808963047, - "loss": 46.0, - "step": 38847 - }, - { - "epoch": 6.256209992350739, - "grad_norm": 0.002933935495093465, - "learning_rate": 0.0001999806970956537, - "loss": 46.0, - "step": 38848 - }, - { - "epoch": 6.256371029429526, - "grad_norm": 0.0029875149484723806, - "learning_rate": 0.00019998069610165136, - "loss": 46.0, - "step": 38849 - }, - { - "epoch": 6.2565320665083135, - "grad_norm": 0.003300453070551157, - "learning_rate": 0.0001999806951076234, - "loss": 46.0, - "step": 38850 - }, - { - "epoch": 6.256693103587101, - "grad_norm": 0.003974775783717632, - "learning_rate": 0.00019998069411356986, - "loss": 46.0, - "step": 38851 - }, - { - "epoch": 6.256854140665888, - "grad_norm": 0.0083570946007967, - "learning_rate": 0.00019998069311949075, - "loss": 46.0, - "step": 38852 - }, - { - "epoch": 6.257015177744676, - "grad_norm": 0.0014552467036992311, - "learning_rate": 0.00019998069212538602, - "loss": 46.0, - "step": 38853 - }, - { - "epoch": 6.257176214823463, - "grad_norm": 0.002063014777377248, - "learning_rate": 0.0001999806911312557, - "loss": 46.0, - "step": 38854 - }, - { - "epoch": 6.257337251902251, - "grad_norm": 0.0020061584655195475, - "learning_rate": 0.0001999806901370998, - "loss": 46.0, - "step": 38855 - }, - { - "epoch": 6.257498288981038, - "grad_norm": 0.0025122964289039373, - "learning_rate": 0.00019998068914291832, - "loss": 46.0, - "step": 38856 - }, - { - "epoch": 6.257659326059826, - "grad_norm": 0.00746725732460618, - "learning_rate": 0.00019998068814871124, - "loss": 46.0, - "step": 38857 - }, - { - "epoch": 6.257820363138613, - "grad_norm": 0.009549075737595558, - "learning_rate": 0.00019998068715447857, - "loss": 46.0, - "step": 38858 - }, - { - "epoch": 6.2579814002174, - "grad_norm": 0.003968052566051483, - "learning_rate": 0.00019998068616022032, - "loss": 46.0, - "step": 38859 - }, - { - "epoch": 6.258142437296187, - "grad_norm": 0.006248748395591974, - "learning_rate": 0.00019998068516593649, - "loss": 46.0, - "step": 38860 - }, - { - "epoch": 6.258303474374975, - "grad_norm": 0.003635324537754059, - "learning_rate": 0.00019998068417162706, - "loss": 46.0, - "step": 38861 - }, - { - "epoch": 6.258464511453762, - "grad_norm": 0.0016183869447559118, - "learning_rate": 0.000199980683177292, - "loss": 46.0, - "step": 38862 - }, - { - "epoch": 6.2586255485325495, - "grad_norm": 0.0033688012044876814, - "learning_rate": 0.0001999806821829314, - "loss": 46.0, - "step": 38863 - }, - { - "epoch": 6.258786585611337, - "grad_norm": 0.003631132422015071, - "learning_rate": 0.00019998068118854518, - "loss": 46.0, - "step": 38864 - }, - { - "epoch": 6.258947622690124, - "grad_norm": 0.003034138586372137, - "learning_rate": 0.00019998068019413338, - "loss": 46.0, - "step": 38865 - }, - { - "epoch": 6.259108659768912, - "grad_norm": 0.002651434624567628, - "learning_rate": 0.00019998067919969602, - "loss": 46.0, - "step": 38866 - }, - { - "epoch": 6.259269696847699, - "grad_norm": 0.014678088948130608, - "learning_rate": 0.00019998067820523304, - "loss": 46.0, - "step": 38867 - }, - { - "epoch": 6.259430733926487, - "grad_norm": 0.008156015537679195, - "learning_rate": 0.00019998067721074448, - "loss": 46.0, - "step": 38868 - }, - { - "epoch": 6.259591771005274, - "grad_norm": 0.005732891149818897, - "learning_rate": 0.0001999806762162303, - "loss": 46.0, - "step": 38869 - }, - { - "epoch": 6.259752808084062, - "grad_norm": 0.0012494615511968732, - "learning_rate": 0.00019998067522169056, - "loss": 46.0, - "step": 38870 - }, - { - "epoch": 6.259913845162849, - "grad_norm": 0.002612861804664135, - "learning_rate": 0.00019998067422712521, - "loss": 46.0, - "step": 38871 - }, - { - "epoch": 6.2600748822416366, - "grad_norm": 0.00813622958958149, - "learning_rate": 0.0001999806732325343, - "loss": 46.0, - "step": 38872 - }, - { - "epoch": 6.260235919320423, - "grad_norm": 0.016974350437521935, - "learning_rate": 0.00019998067223791778, - "loss": 46.0, - "step": 38873 - }, - { - "epoch": 6.260396956399211, - "grad_norm": 0.0010822816984727979, - "learning_rate": 0.0001999806712432757, - "loss": 46.0, - "step": 38874 - }, - { - "epoch": 6.260557993477998, - "grad_norm": 0.003296394133940339, - "learning_rate": 0.000199980670248608, - "loss": 46.0, - "step": 38875 - }, - { - "epoch": 6.2607190305567855, - "grad_norm": 0.008714224211871624, - "learning_rate": 0.0001999806692539147, - "loss": 46.0, - "step": 38876 - }, - { - "epoch": 6.260880067635573, - "grad_norm": 0.006531689781695604, - "learning_rate": 0.0001999806682591958, - "loss": 46.0, - "step": 38877 - }, - { - "epoch": 6.26104110471436, - "grad_norm": 0.008106173016130924, - "learning_rate": 0.00019998066726445134, - "loss": 46.0, - "step": 38878 - }, - { - "epoch": 6.261202141793148, - "grad_norm": 0.0029440741054713726, - "learning_rate": 0.0001999806662696813, - "loss": 46.0, - "step": 38879 - }, - { - "epoch": 6.261363178871935, - "grad_norm": 0.004526916425675154, - "learning_rate": 0.00019998066527488563, - "loss": 46.0, - "step": 38880 - }, - { - "epoch": 6.261524215950723, - "grad_norm": 0.006189214531332254, - "learning_rate": 0.0001999806642800644, - "loss": 46.0, - "step": 38881 - }, - { - "epoch": 6.26168525302951, - "grad_norm": 0.0036093282978981733, - "learning_rate": 0.0001999806632852176, - "loss": 46.0, - "step": 38882 - }, - { - "epoch": 6.261846290108298, - "grad_norm": 0.0014533509965986013, - "learning_rate": 0.00019998066229034517, - "loss": 46.0, - "step": 38883 - }, - { - "epoch": 6.262007327187085, - "grad_norm": 0.003021940356120467, - "learning_rate": 0.00019998066129544716, - "loss": 46.0, - "step": 38884 - }, - { - "epoch": 6.2621683642658725, - "grad_norm": 0.003992132376879454, - "learning_rate": 0.0001999806603005236, - "loss": 46.0, - "step": 38885 - }, - { - "epoch": 6.26232940134466, - "grad_norm": 0.006618468556553125, - "learning_rate": 0.0001999806593055744, - "loss": 46.0, - "step": 38886 - }, - { - "epoch": 6.2624904384234465, - "grad_norm": 0.013644207268953323, - "learning_rate": 0.00019998065831059963, - "loss": 46.0, - "step": 38887 - }, - { - "epoch": 6.262651475502234, - "grad_norm": 0.005208782851696014, - "learning_rate": 0.00019998065731559924, - "loss": 46.0, - "step": 38888 - }, - { - "epoch": 6.2628125125810215, - "grad_norm": 0.018910543993115425, - "learning_rate": 0.0001999806563205733, - "loss": 46.0, - "step": 38889 - }, - { - "epoch": 6.262973549659809, - "grad_norm": 0.008549229241907597, - "learning_rate": 0.00019998065532552176, - "loss": 46.0, - "step": 38890 - }, - { - "epoch": 6.263134586738596, - "grad_norm": 0.01707020401954651, - "learning_rate": 0.0001999806543304446, - "loss": 46.0, - "step": 38891 - }, - { - "epoch": 6.263295623817384, - "grad_norm": 0.005410929676145315, - "learning_rate": 0.00019998065333534187, - "loss": 46.0, - "step": 38892 - }, - { - "epoch": 6.263456660896171, - "grad_norm": 0.002450622385367751, - "learning_rate": 0.0001999806523402136, - "loss": 46.0, - "step": 38893 - }, - { - "epoch": 6.263617697974959, - "grad_norm": 0.01842244155704975, - "learning_rate": 0.00019998065134505967, - "loss": 46.0, - "step": 38894 - }, - { - "epoch": 6.263778735053746, - "grad_norm": 0.01023776363581419, - "learning_rate": 0.00019998065034988017, - "loss": 46.0, - "step": 38895 - }, - { - "epoch": 6.263939772132534, - "grad_norm": 0.006754412315785885, - "learning_rate": 0.0001999806493546751, - "loss": 46.0, - "step": 38896 - }, - { - "epoch": 6.264100809211321, - "grad_norm": 0.00219578854739666, - "learning_rate": 0.0001999806483594444, - "loss": 46.0, - "step": 38897 - }, - { - "epoch": 6.2642618462901085, - "grad_norm": 0.0015779847744852304, - "learning_rate": 0.00019998064736418815, - "loss": 46.0, - "step": 38898 - }, - { - "epoch": 6.264422883368896, - "grad_norm": 0.006254156120121479, - "learning_rate": 0.0001999806463689063, - "loss": 46.0, - "step": 38899 - }, - { - "epoch": 6.264583920447683, - "grad_norm": 0.004142298363149166, - "learning_rate": 0.00019998064537359887, - "loss": 46.0, - "step": 38900 - }, - { - "epoch": 6.264744957526471, - "grad_norm": 0.004489119164645672, - "learning_rate": 0.00019998064437826583, - "loss": 46.0, - "step": 38901 - }, - { - "epoch": 6.264905994605257, - "grad_norm": 0.0037455197889357805, - "learning_rate": 0.00019998064338290722, - "loss": 46.0, - "step": 38902 - }, - { - "epoch": 6.265067031684045, - "grad_norm": 0.0023001465015113354, - "learning_rate": 0.000199980642387523, - "loss": 46.0, - "step": 38903 - }, - { - "epoch": 6.265228068762832, - "grad_norm": 0.006872283760458231, - "learning_rate": 0.0001999806413921132, - "loss": 46.0, - "step": 38904 - }, - { - "epoch": 6.26538910584162, - "grad_norm": 0.0023588535841554403, - "learning_rate": 0.0001999806403966778, - "loss": 46.0, - "step": 38905 - }, - { - "epoch": 6.265550142920407, - "grad_norm": 0.007021674886345863, - "learning_rate": 0.0001999806394012168, - "loss": 46.0, - "step": 38906 - }, - { - "epoch": 6.265711179999195, - "grad_norm": 0.0018633791478350759, - "learning_rate": 0.00019998063840573024, - "loss": 46.0, - "step": 38907 - }, - { - "epoch": 6.265872217077982, - "grad_norm": 0.006334179081022739, - "learning_rate": 0.00019998063741021808, - "loss": 46.0, - "step": 38908 - }, - { - "epoch": 6.26603325415677, - "grad_norm": 0.0021135439164936543, - "learning_rate": 0.00019998063641468033, - "loss": 46.0, - "step": 38909 - }, - { - "epoch": 6.266194291235557, - "grad_norm": 0.016040639951825142, - "learning_rate": 0.00019998063541911697, - "loss": 46.0, - "step": 38910 - }, - { - "epoch": 6.2663553283143445, - "grad_norm": 0.018864290788769722, - "learning_rate": 0.00019998063442352803, - "loss": 46.0, - "step": 38911 - }, - { - "epoch": 6.266516365393132, - "grad_norm": 0.0028109773993492126, - "learning_rate": 0.00019998063342791352, - "loss": 46.0, - "step": 38912 - }, - { - "epoch": 6.266677402471919, - "grad_norm": 0.00144783069845289, - "learning_rate": 0.0001999806324322734, - "loss": 46.0, - "step": 38913 - }, - { - "epoch": 6.266838439550707, - "grad_norm": 0.0012966226786375046, - "learning_rate": 0.00019998063143660772, - "loss": 46.0, - "step": 38914 - }, - { - "epoch": 6.266999476629494, - "grad_norm": 0.006295338273048401, - "learning_rate": 0.00019998063044091642, - "loss": 46.0, - "step": 38915 - }, - { - "epoch": 6.267160513708282, - "grad_norm": 0.002131564309820533, - "learning_rate": 0.00019998062944519954, - "loss": 46.0, - "step": 38916 - }, - { - "epoch": 6.267321550787068, - "grad_norm": 0.0032865824177861214, - "learning_rate": 0.00019998062844945707, - "loss": 46.0, - "step": 38917 - }, - { - "epoch": 6.267482587865856, - "grad_norm": 0.008777833543717861, - "learning_rate": 0.000199980627453689, - "loss": 46.0, - "step": 38918 - }, - { - "epoch": 6.267643624944643, - "grad_norm": 0.006253014784306288, - "learning_rate": 0.00019998062645789534, - "loss": 46.0, - "step": 38919 - }, - { - "epoch": 6.267804662023431, - "grad_norm": 0.0031594850588589907, - "learning_rate": 0.00019998062546207609, - "loss": 46.0, - "step": 38920 - }, - { - "epoch": 6.267965699102218, - "grad_norm": 0.002162312623113394, - "learning_rate": 0.00019998062446623127, - "loss": 46.0, - "step": 38921 - }, - { - "epoch": 6.268126736181006, - "grad_norm": 0.001528203021734953, - "learning_rate": 0.00019998062347036086, - "loss": 46.0, - "step": 38922 - }, - { - "epoch": 6.268287773259793, - "grad_norm": 0.004104269668459892, - "learning_rate": 0.00019998062247446484, - "loss": 46.0, - "step": 38923 - }, - { - "epoch": 6.2684488103385805, - "grad_norm": 0.013286665081977844, - "learning_rate": 0.00019998062147854324, - "loss": 46.0, - "step": 38924 - }, - { - "epoch": 6.268609847417368, - "grad_norm": 0.0030473757069557905, - "learning_rate": 0.00019998062048259604, - "loss": 46.0, - "step": 38925 - }, - { - "epoch": 6.268770884496155, - "grad_norm": 0.0043985131196677685, - "learning_rate": 0.00019998061948662326, - "loss": 46.0, - "step": 38926 - }, - { - "epoch": 6.268931921574943, - "grad_norm": 0.0037587396800518036, - "learning_rate": 0.0001999806184906249, - "loss": 46.0, - "step": 38927 - }, - { - "epoch": 6.26909295865373, - "grad_norm": 0.003596738213673234, - "learning_rate": 0.00019998061749460094, - "loss": 46.0, - "step": 38928 - }, - { - "epoch": 6.269253995732518, - "grad_norm": 0.006315917707979679, - "learning_rate": 0.00019998061649855137, - "loss": 46.0, - "step": 38929 - }, - { - "epoch": 6.269415032811305, - "grad_norm": 0.002405857667326927, - "learning_rate": 0.00019998061550247624, - "loss": 46.0, - "step": 38930 - }, - { - "epoch": 6.269576069890093, - "grad_norm": 0.015180880203843117, - "learning_rate": 0.00019998061450637552, - "loss": 46.0, - "step": 38931 - }, - { - "epoch": 6.269737106968879, - "grad_norm": 0.0017666129861027002, - "learning_rate": 0.0001999806135102492, - "loss": 46.0, - "step": 38932 - }, - { - "epoch": 6.269898144047667, - "grad_norm": 0.0018145734211429954, - "learning_rate": 0.00019998061251409727, - "loss": 46.0, - "step": 38933 - }, - { - "epoch": 6.270059181126454, - "grad_norm": 0.0033974708057940006, - "learning_rate": 0.0001999806115179198, - "loss": 46.0, - "step": 38934 - }, - { - "epoch": 6.270220218205242, - "grad_norm": 0.009783491492271423, - "learning_rate": 0.00019998061052171667, - "loss": 46.0, - "step": 38935 - }, - { - "epoch": 6.270381255284029, - "grad_norm": 0.0019134997855871916, - "learning_rate": 0.00019998060952548802, - "loss": 46.0, - "step": 38936 - }, - { - "epoch": 6.2705422923628165, - "grad_norm": 0.0031892128754407167, - "learning_rate": 0.00019998060852923372, - "loss": 46.0, - "step": 38937 - }, - { - "epoch": 6.270703329441604, - "grad_norm": 0.004307840019464493, - "learning_rate": 0.00019998060753295387, - "loss": 46.0, - "step": 38938 - }, - { - "epoch": 6.270864366520391, - "grad_norm": 0.005807558540254831, - "learning_rate": 0.00019998060653664843, - "loss": 46.0, - "step": 38939 - }, - { - "epoch": 6.271025403599179, - "grad_norm": 0.022980840876698494, - "learning_rate": 0.0001999806055403174, - "loss": 46.0, - "step": 38940 - }, - { - "epoch": 6.271186440677966, - "grad_norm": 0.001879759831354022, - "learning_rate": 0.00019998060454396078, - "loss": 46.0, - "step": 38941 - }, - { - "epoch": 6.271347477756754, - "grad_norm": 0.009212806820869446, - "learning_rate": 0.00019998060354757855, - "loss": 46.0, - "step": 38942 - }, - { - "epoch": 6.271508514835541, - "grad_norm": 0.005267132073640823, - "learning_rate": 0.00019998060255117074, - "loss": 46.0, - "step": 38943 - }, - { - "epoch": 6.271669551914329, - "grad_norm": 0.00745200552046299, - "learning_rate": 0.00019998060155473733, - "loss": 46.0, - "step": 38944 - }, - { - "epoch": 6.271830588993116, - "grad_norm": 0.00822604913264513, - "learning_rate": 0.00019998060055827834, - "loss": 46.0, - "step": 38945 - }, - { - "epoch": 6.2719916260719035, - "grad_norm": 0.003954382613301277, - "learning_rate": 0.00019998059956179376, - "loss": 46.0, - "step": 38946 - }, - { - "epoch": 6.27215266315069, - "grad_norm": 0.0016620233654975891, - "learning_rate": 0.00019998059856528357, - "loss": 46.0, - "step": 38947 - }, - { - "epoch": 6.2723137002294775, - "grad_norm": 0.009317819029092789, - "learning_rate": 0.00019998059756874784, - "loss": 46.0, - "step": 38948 - }, - { - "epoch": 6.272474737308265, - "grad_norm": 0.009431381709873676, - "learning_rate": 0.00019998059657218647, - "loss": 46.0, - "step": 38949 - }, - { - "epoch": 6.272635774387052, - "grad_norm": 0.007788951508700848, - "learning_rate": 0.00019998059557559955, - "loss": 46.0, - "step": 38950 - }, - { - "epoch": 6.27279681146584, - "grad_norm": 0.0017576408572494984, - "learning_rate": 0.000199980594578987, - "loss": 46.0, - "step": 38951 - }, - { - "epoch": 6.272957848544627, - "grad_norm": 0.0009234852041117847, - "learning_rate": 0.00019998059358234887, - "loss": 46.0, - "step": 38952 - }, - { - "epoch": 6.273118885623415, - "grad_norm": 0.0018828288884833455, - "learning_rate": 0.00019998059258568519, - "loss": 46.0, - "step": 38953 - }, - { - "epoch": 6.273279922702202, - "grad_norm": 0.005209984723478556, - "learning_rate": 0.00019998059158899588, - "loss": 46.0, - "step": 38954 - }, - { - "epoch": 6.27344095978099, - "grad_norm": 0.010661369189620018, - "learning_rate": 0.000199980590592281, - "loss": 46.0, - "step": 38955 - }, - { - "epoch": 6.273601996859777, - "grad_norm": 0.009893474169075489, - "learning_rate": 0.00019998058959554051, - "loss": 46.0, - "step": 38956 - }, - { - "epoch": 6.273763033938565, - "grad_norm": 0.002609674585983157, - "learning_rate": 0.00019998058859877445, - "loss": 46.0, - "step": 38957 - }, - { - "epoch": 6.273924071017352, - "grad_norm": 0.008573015220463276, - "learning_rate": 0.0001999805876019828, - "loss": 46.0, - "step": 38958 - }, - { - "epoch": 6.2740851080961395, - "grad_norm": 0.002283828565850854, - "learning_rate": 0.00019998058660516553, - "loss": 46.0, - "step": 38959 - }, - { - "epoch": 6.274246145174926, - "grad_norm": 0.007836409844458103, - "learning_rate": 0.00019998058560832268, - "loss": 46.0, - "step": 38960 - }, - { - "epoch": 6.2744071822537135, - "grad_norm": 0.001190313370898366, - "learning_rate": 0.0001999805846114543, - "loss": 46.0, - "step": 38961 - }, - { - "epoch": 6.274568219332501, - "grad_norm": 0.003508426481857896, - "learning_rate": 0.00019998058361456024, - "loss": 46.0, - "step": 38962 - }, - { - "epoch": 6.274729256411288, - "grad_norm": 0.0013165523996576667, - "learning_rate": 0.00019998058261764062, - "loss": 46.0, - "step": 38963 - }, - { - "epoch": 6.274890293490076, - "grad_norm": 0.003525273874402046, - "learning_rate": 0.00019998058162069545, - "loss": 46.0, - "step": 38964 - }, - { - "epoch": 6.275051330568863, - "grad_norm": 0.012274498119950294, - "learning_rate": 0.00019998058062372466, - "loss": 46.0, - "step": 38965 - }, - { - "epoch": 6.275212367647651, - "grad_norm": 0.001048225094564259, - "learning_rate": 0.00019998057962672828, - "loss": 46.0, - "step": 38966 - }, - { - "epoch": 6.275373404726438, - "grad_norm": 0.0024400344118475914, - "learning_rate": 0.00019998057862970632, - "loss": 46.0, - "step": 38967 - }, - { - "epoch": 6.275534441805226, - "grad_norm": 0.005804820451885462, - "learning_rate": 0.00019998057763265877, - "loss": 46.0, - "step": 38968 - }, - { - "epoch": 6.275695478884013, - "grad_norm": 0.00299848266877234, - "learning_rate": 0.0001999805766355856, - "loss": 46.0, - "step": 38969 - }, - { - "epoch": 6.275856515962801, - "grad_norm": 0.010823872871696949, - "learning_rate": 0.00019998057563848688, - "loss": 46.0, - "step": 38970 - }, - { - "epoch": 6.276017553041588, - "grad_norm": 0.00453021377325058, - "learning_rate": 0.00019998057464136256, - "loss": 46.0, - "step": 38971 - }, - { - "epoch": 6.2761785901203755, - "grad_norm": 0.0029733553528785706, - "learning_rate": 0.00019998057364421264, - "loss": 46.0, - "step": 38972 - }, - { - "epoch": 6.276339627199163, - "grad_norm": 0.0019039901671931148, - "learning_rate": 0.00019998057264703712, - "loss": 46.0, - "step": 38973 - }, - { - "epoch": 6.27650066427795, - "grad_norm": 0.0020593050867319107, - "learning_rate": 0.00019998057164983602, - "loss": 46.0, - "step": 38974 - }, - { - "epoch": 6.276661701356737, - "grad_norm": 0.0016014946158975363, - "learning_rate": 0.00019998057065260936, - "loss": 46.0, - "step": 38975 - }, - { - "epoch": 6.276822738435524, - "grad_norm": 0.010088545270264149, - "learning_rate": 0.00019998056965535709, - "loss": 46.0, - "step": 38976 - }, - { - "epoch": 6.276983775514312, - "grad_norm": 0.004582440946251154, - "learning_rate": 0.00019998056865807922, - "loss": 46.0, - "step": 38977 - }, - { - "epoch": 6.277144812593099, - "grad_norm": 0.0009791270131245255, - "learning_rate": 0.00019998056766077572, - "loss": 46.0, - "step": 38978 - }, - { - "epoch": 6.277305849671887, - "grad_norm": 0.018884582445025444, - "learning_rate": 0.0001999805666634467, - "loss": 46.0, - "step": 38979 - }, - { - "epoch": 6.277466886750674, - "grad_norm": 0.008716803975403309, - "learning_rate": 0.00019998056566609206, - "loss": 46.0, - "step": 38980 - }, - { - "epoch": 6.277627923829462, - "grad_norm": 0.007112538442015648, - "learning_rate": 0.00019998056466871185, - "loss": 46.0, - "step": 38981 - }, - { - "epoch": 6.277788960908249, - "grad_norm": 0.010333537124097347, - "learning_rate": 0.00019998056367130602, - "loss": 46.0, - "step": 38982 - }, - { - "epoch": 6.277949997987037, - "grad_norm": 0.003959720488637686, - "learning_rate": 0.0001999805626738746, - "loss": 46.0, - "step": 38983 - }, - { - "epoch": 6.278111035065824, - "grad_norm": 0.0013928828993812203, - "learning_rate": 0.0001999805616764176, - "loss": 46.0, - "step": 38984 - }, - { - "epoch": 6.2782720721446115, - "grad_norm": 0.001907423953525722, - "learning_rate": 0.00019998056067893502, - "loss": 46.0, - "step": 38985 - }, - { - "epoch": 6.278433109223399, - "grad_norm": 0.0045247795060276985, - "learning_rate": 0.00019998055968142685, - "loss": 46.0, - "step": 38986 - }, - { - "epoch": 6.278594146302186, - "grad_norm": 0.003401419846341014, - "learning_rate": 0.00019998055868389309, - "loss": 46.0, - "step": 38987 - }, - { - "epoch": 6.278755183380974, - "grad_norm": 0.007197286002337933, - "learning_rate": 0.00019998055768633374, - "loss": 46.0, - "step": 38988 - }, - { - "epoch": 6.278916220459761, - "grad_norm": 0.013942079618573189, - "learning_rate": 0.0001999805566887488, - "loss": 46.0, - "step": 38989 - }, - { - "epoch": 6.279077257538548, - "grad_norm": 0.005511273164302111, - "learning_rate": 0.00019998055569113828, - "loss": 46.0, - "step": 38990 - }, - { - "epoch": 6.279238294617335, - "grad_norm": 0.002227319171652198, - "learning_rate": 0.00019998055469350212, - "loss": 46.0, - "step": 38991 - }, - { - "epoch": 6.279399331696123, - "grad_norm": 0.0025035315193235874, - "learning_rate": 0.00019998055369584042, - "loss": 46.0, - "step": 38992 - }, - { - "epoch": 6.27956036877491, - "grad_norm": 0.0009496422135271132, - "learning_rate": 0.00019998055269815313, - "loss": 46.0, - "step": 38993 - }, - { - "epoch": 6.279721405853698, - "grad_norm": 0.004296082071959972, - "learning_rate": 0.0001999805517004402, - "loss": 46.0, - "step": 38994 - }, - { - "epoch": 6.279882442932485, - "grad_norm": 0.0049510616809129715, - "learning_rate": 0.00019998055070270172, - "loss": 46.0, - "step": 38995 - }, - { - "epoch": 6.2800434800112725, - "grad_norm": 0.004195664543658495, - "learning_rate": 0.00019998054970493768, - "loss": 46.0, - "step": 38996 - }, - { - "epoch": 6.28020451709006, - "grad_norm": 0.003316165180876851, - "learning_rate": 0.000199980548707148, - "loss": 46.0, - "step": 38997 - }, - { - "epoch": 6.2803655541688475, - "grad_norm": 0.005499253049492836, - "learning_rate": 0.00019998054770933274, - "loss": 46.0, - "step": 38998 - }, - { - "epoch": 6.280526591247635, - "grad_norm": 0.025381211191415787, - "learning_rate": 0.00019998054671149188, - "loss": 46.0, - "step": 38999 - }, - { - "epoch": 6.280687628326422, - "grad_norm": 0.010825708508491516, - "learning_rate": 0.00019998054571362546, - "loss": 46.0, - "step": 39000 - }, - { - "epoch": 6.28084866540521, - "grad_norm": 0.0010732181835919619, - "learning_rate": 0.00019998054471573345, - "loss": 46.0, - "step": 39001 - }, - { - "epoch": 6.281009702483997, - "grad_norm": 0.005935711786150932, - "learning_rate": 0.00019998054371781583, - "loss": 46.0, - "step": 39002 - }, - { - "epoch": 6.281170739562785, - "grad_norm": 0.0020413235761225224, - "learning_rate": 0.00019998054271987262, - "loss": 46.0, - "step": 39003 - }, - { - "epoch": 6.281331776641572, - "grad_norm": 0.009944637306034565, - "learning_rate": 0.00019998054172190385, - "loss": 46.0, - "step": 39004 - }, - { - "epoch": 6.281492813720359, - "grad_norm": 0.008419917896389961, - "learning_rate": 0.00019998054072390943, - "loss": 46.0, - "step": 39005 - }, - { - "epoch": 6.281653850799146, - "grad_norm": 0.007590579334646463, - "learning_rate": 0.00019998053972588946, - "loss": 46.0, - "step": 39006 - }, - { - "epoch": 6.281814887877934, - "grad_norm": 0.0017160444986075163, - "learning_rate": 0.0001999805387278439, - "loss": 46.0, - "step": 39007 - }, - { - "epoch": 6.281975924956721, - "grad_norm": 0.01751594804227352, - "learning_rate": 0.00019998053772977276, - "loss": 46.0, - "step": 39008 - }, - { - "epoch": 6.2821369620355085, - "grad_norm": 0.017763033509254456, - "learning_rate": 0.000199980536731676, - "loss": 46.0, - "step": 39009 - }, - { - "epoch": 6.282297999114296, - "grad_norm": 0.0032215043902397156, - "learning_rate": 0.00019998053573355368, - "loss": 46.0, - "step": 39010 - }, - { - "epoch": 6.282459036193083, - "grad_norm": 0.006471071392297745, - "learning_rate": 0.00019998053473540574, - "loss": 46.0, - "step": 39011 - }, - { - "epoch": 6.282620073271871, - "grad_norm": 0.008333157747983932, - "learning_rate": 0.00019998053373723225, - "loss": 46.0, - "step": 39012 - }, - { - "epoch": 6.282781110350658, - "grad_norm": 0.0006876584375277162, - "learning_rate": 0.00019998053273903314, - "loss": 46.0, - "step": 39013 - }, - { - "epoch": 6.282942147429446, - "grad_norm": 0.007667304016649723, - "learning_rate": 0.00019998053174080844, - "loss": 46.0, - "step": 39014 - }, - { - "epoch": 6.283103184508233, - "grad_norm": 0.006231206003576517, - "learning_rate": 0.00019998053074255819, - "loss": 46.0, - "step": 39015 - }, - { - "epoch": 6.283264221587021, - "grad_norm": 0.0014082818524912, - "learning_rate": 0.0001999805297442823, - "loss": 46.0, - "step": 39016 - }, - { - "epoch": 6.283425258665808, - "grad_norm": 0.005389024503529072, - "learning_rate": 0.00019998052874598083, - "loss": 46.0, - "step": 39017 - }, - { - "epoch": 6.283586295744596, - "grad_norm": 0.01694563962519169, - "learning_rate": 0.00019998052774765376, - "loss": 46.0, - "step": 39018 - }, - { - "epoch": 6.283747332823383, - "grad_norm": 0.006275527644902468, - "learning_rate": 0.00019998052674930113, - "loss": 46.0, - "step": 39019 - }, - { - "epoch": 6.28390836990217, - "grad_norm": 0.002231972524896264, - "learning_rate": 0.0001999805257509229, - "loss": 46.0, - "step": 39020 - }, - { - "epoch": 6.284069406980957, - "grad_norm": 0.006555341184139252, - "learning_rate": 0.0001999805247525191, - "loss": 46.0, - "step": 39021 - }, - { - "epoch": 6.2842304440597445, - "grad_norm": 0.010288205929100513, - "learning_rate": 0.00019998052375408968, - "loss": 46.0, - "step": 39022 - }, - { - "epoch": 6.284391481138532, - "grad_norm": 0.002767049940302968, - "learning_rate": 0.00019998052275563467, - "loss": 46.0, - "step": 39023 - }, - { - "epoch": 6.284552518217319, - "grad_norm": 0.0039021295960992575, - "learning_rate": 0.00019998052175715408, - "loss": 46.0, - "step": 39024 - }, - { - "epoch": 6.284713555296107, - "grad_norm": 0.0073213232681155205, - "learning_rate": 0.0001999805207586479, - "loss": 46.0, - "step": 39025 - }, - { - "epoch": 6.284874592374894, - "grad_norm": 0.002442700555548072, - "learning_rate": 0.00019998051976011612, - "loss": 46.0, - "step": 39026 - }, - { - "epoch": 6.285035629453682, - "grad_norm": 0.00956632848829031, - "learning_rate": 0.00019998051876155877, - "loss": 46.0, - "step": 39027 - }, - { - "epoch": 6.285196666532469, - "grad_norm": 0.0088145537301898, - "learning_rate": 0.00019998051776297582, - "loss": 46.0, - "step": 39028 - }, - { - "epoch": 6.285357703611257, - "grad_norm": 0.0022800287697464228, - "learning_rate": 0.00019998051676436727, - "loss": 46.0, - "step": 39029 - }, - { - "epoch": 6.285518740690044, - "grad_norm": 0.013208272866904736, - "learning_rate": 0.00019998051576573315, - "loss": 46.0, - "step": 39030 - }, - { - "epoch": 6.285679777768832, - "grad_norm": 0.012782255187630653, - "learning_rate": 0.00019998051476707342, - "loss": 46.0, - "step": 39031 - }, - { - "epoch": 6.285840814847619, - "grad_norm": 0.0017857920611277223, - "learning_rate": 0.00019998051376838812, - "loss": 46.0, - "step": 39032 - }, - { - "epoch": 6.286001851926406, - "grad_norm": 0.006807293277233839, - "learning_rate": 0.00019998051276967722, - "loss": 46.0, - "step": 39033 - }, - { - "epoch": 6.286162889005193, - "grad_norm": 0.003250164445489645, - "learning_rate": 0.00019998051177094072, - "loss": 46.0, - "step": 39034 - }, - { - "epoch": 6.2863239260839805, - "grad_norm": 0.0012515768175944686, - "learning_rate": 0.00019998051077217864, - "loss": 46.0, - "step": 39035 - }, - { - "epoch": 6.286484963162768, - "grad_norm": 0.0008437428041361272, - "learning_rate": 0.00019998050977339097, - "loss": 46.0, - "step": 39036 - }, - { - "epoch": 6.286646000241555, - "grad_norm": 0.002129937754943967, - "learning_rate": 0.00019998050877457772, - "loss": 46.0, - "step": 39037 - }, - { - "epoch": 6.286807037320343, - "grad_norm": 0.005391864571720362, - "learning_rate": 0.00019998050777573888, - "loss": 46.0, - "step": 39038 - }, - { - "epoch": 6.28696807439913, - "grad_norm": 0.0026512891054153442, - "learning_rate": 0.00019998050677687445, - "loss": 46.0, - "step": 39039 - }, - { - "epoch": 6.287129111477918, - "grad_norm": 0.006737624295055866, - "learning_rate": 0.0001999805057779844, - "loss": 46.0, - "step": 39040 - }, - { - "epoch": 6.287290148556705, - "grad_norm": 0.0065542832016944885, - "learning_rate": 0.00019998050477906877, - "loss": 46.0, - "step": 39041 - }, - { - "epoch": 6.287451185635493, - "grad_norm": 0.009030701592564583, - "learning_rate": 0.00019998050378012758, - "loss": 46.0, - "step": 39042 - }, - { - "epoch": 6.28761222271428, - "grad_norm": 0.015139184892177582, - "learning_rate": 0.00019998050278116077, - "loss": 46.0, - "step": 39043 - }, - { - "epoch": 6.287773259793068, - "grad_norm": 0.025006920099258423, - "learning_rate": 0.00019998050178216838, - "loss": 46.0, - "step": 39044 - }, - { - "epoch": 6.287934296871855, - "grad_norm": 0.0011401117080822587, - "learning_rate": 0.0001999805007831504, - "loss": 46.0, - "step": 39045 - }, - { - "epoch": 6.2880953339506425, - "grad_norm": 0.0029454021714627743, - "learning_rate": 0.00019998049978410683, - "loss": 46.0, - "step": 39046 - }, - { - "epoch": 6.28825637102943, - "grad_norm": 0.0033295098692178726, - "learning_rate": 0.00019998049878503768, - "loss": 46.0, - "step": 39047 - }, - { - "epoch": 6.2884174081082165, - "grad_norm": 0.0015547252260148525, - "learning_rate": 0.0001999804977859429, - "loss": 46.0, - "step": 39048 - }, - { - "epoch": 6.288578445187004, - "grad_norm": 0.02438298799097538, - "learning_rate": 0.00019998049678682258, - "loss": 46.0, - "step": 39049 - }, - { - "epoch": 6.288739482265791, - "grad_norm": 0.002501065144315362, - "learning_rate": 0.00019998049578767667, - "loss": 46.0, - "step": 39050 - }, - { - "epoch": 6.288900519344579, - "grad_norm": 0.0012736415956169367, - "learning_rate": 0.00019998049478850514, - "loss": 46.0, - "step": 39051 - }, - { - "epoch": 6.289061556423366, - "grad_norm": 0.009752868674695492, - "learning_rate": 0.00019998049378930802, - "loss": 46.0, - "step": 39052 - }, - { - "epoch": 6.289222593502154, - "grad_norm": 0.000991630950011313, - "learning_rate": 0.00019998049279008532, - "loss": 46.0, - "step": 39053 - }, - { - "epoch": 6.289383630580941, - "grad_norm": 0.005731829907745123, - "learning_rate": 0.00019998049179083705, - "loss": 46.0, - "step": 39054 - }, - { - "epoch": 6.289544667659729, - "grad_norm": 0.009796880185604095, - "learning_rate": 0.00019998049079156317, - "loss": 46.0, - "step": 39055 - }, - { - "epoch": 6.289705704738516, - "grad_norm": 0.0018510626396164298, - "learning_rate": 0.00019998048979226368, - "loss": 46.0, - "step": 39056 - }, - { - "epoch": 6.2898667418173035, - "grad_norm": 0.001044785720296204, - "learning_rate": 0.00019998048879293865, - "loss": 46.0, - "step": 39057 - }, - { - "epoch": 6.290027778896091, - "grad_norm": 0.004461322911083698, - "learning_rate": 0.000199980487793588, - "loss": 46.0, - "step": 39058 - }, - { - "epoch": 6.2901888159748784, - "grad_norm": 0.0017150011844933033, - "learning_rate": 0.00019998048679421176, - "loss": 46.0, - "step": 39059 - }, - { - "epoch": 6.290349853053666, - "grad_norm": 0.005361329764127731, - "learning_rate": 0.00019998048579480994, - "loss": 46.0, - "step": 39060 - }, - { - "epoch": 6.290510890132453, - "grad_norm": 0.013194623403251171, - "learning_rate": 0.00019998048479538251, - "loss": 46.0, - "step": 39061 - }, - { - "epoch": 6.290671927211241, - "grad_norm": 0.003736241487786174, - "learning_rate": 0.0001999804837959295, - "loss": 46.0, - "step": 39062 - }, - { - "epoch": 6.290832964290027, - "grad_norm": 0.006789695471525192, - "learning_rate": 0.00019998048279645092, - "loss": 46.0, - "step": 39063 - }, - { - "epoch": 6.290994001368815, - "grad_norm": 0.005020936485379934, - "learning_rate": 0.00019998048179694673, - "loss": 46.0, - "step": 39064 - }, - { - "epoch": 6.291155038447602, - "grad_norm": 0.0038886622060090303, - "learning_rate": 0.00019998048079741695, - "loss": 46.0, - "step": 39065 - }, - { - "epoch": 6.29131607552639, - "grad_norm": 0.002402149373665452, - "learning_rate": 0.0001999804797978616, - "loss": 46.0, - "step": 39066 - }, - { - "epoch": 6.291477112605177, - "grad_norm": 0.003210574621334672, - "learning_rate": 0.00019998047879828064, - "loss": 46.0, - "step": 39067 - }, - { - "epoch": 6.291638149683965, - "grad_norm": 0.00162588432431221, - "learning_rate": 0.00019998047779867407, - "loss": 46.0, - "step": 39068 - }, - { - "epoch": 6.291799186762752, - "grad_norm": 0.008654010482132435, - "learning_rate": 0.00019998047679904195, - "loss": 46.0, - "step": 39069 - }, - { - "epoch": 6.2919602238415395, - "grad_norm": 0.001888230093754828, - "learning_rate": 0.0001999804757993842, - "loss": 46.0, - "step": 39070 - }, - { - "epoch": 6.292121260920327, - "grad_norm": 0.007668881211429834, - "learning_rate": 0.0001999804747997009, - "loss": 46.0, - "step": 39071 - }, - { - "epoch": 6.292282297999114, - "grad_norm": 0.004537229426205158, - "learning_rate": 0.00019998047379999202, - "loss": 46.0, - "step": 39072 - }, - { - "epoch": 6.292443335077902, - "grad_norm": 0.020649900659918785, - "learning_rate": 0.0001999804728002575, - "loss": 46.0, - "step": 39073 - }, - { - "epoch": 6.292604372156689, - "grad_norm": 0.0029868492856621742, - "learning_rate": 0.00019998047180049743, - "loss": 46.0, - "step": 39074 - }, - { - "epoch": 6.292765409235477, - "grad_norm": 0.014783100225031376, - "learning_rate": 0.00019998047080071175, - "loss": 46.0, - "step": 39075 - }, - { - "epoch": 6.292926446314264, - "grad_norm": 0.0035305072087794542, - "learning_rate": 0.00019998046980090049, - "loss": 46.0, - "step": 39076 - }, - { - "epoch": 6.293087483393052, - "grad_norm": 0.009474482387304306, - "learning_rate": 0.00019998046880106364, - "loss": 46.0, - "step": 39077 - }, - { - "epoch": 6.293248520471838, - "grad_norm": 0.010321266017854214, - "learning_rate": 0.00019998046780120117, - "loss": 46.0, - "step": 39078 - }, - { - "epoch": 6.293409557550626, - "grad_norm": 0.006980300415307283, - "learning_rate": 0.00019998046680131315, - "loss": 46.0, - "step": 39079 - }, - { - "epoch": 6.293570594629413, - "grad_norm": 0.0057964324951171875, - "learning_rate": 0.0001999804658013995, - "loss": 46.0, - "step": 39080 - }, - { - "epoch": 6.293731631708201, - "grad_norm": 0.01205200981348753, - "learning_rate": 0.0001999804648014603, - "loss": 46.0, - "step": 39081 - }, - { - "epoch": 6.293892668786988, - "grad_norm": 0.00474933348596096, - "learning_rate": 0.0001999804638014955, - "loss": 46.0, - "step": 39082 - }, - { - "epoch": 6.2940537058657755, - "grad_norm": 0.004265707451850176, - "learning_rate": 0.00019998046280150512, - "loss": 46.0, - "step": 39083 - }, - { - "epoch": 6.294214742944563, - "grad_norm": 0.007605432998389006, - "learning_rate": 0.00019998046180148913, - "loss": 46.0, - "step": 39084 - }, - { - "epoch": 6.29437578002335, - "grad_norm": 0.007893330417573452, - "learning_rate": 0.00019998046080144756, - "loss": 46.0, - "step": 39085 - }, - { - "epoch": 6.294536817102138, - "grad_norm": 0.001544732484035194, - "learning_rate": 0.0001999804598013804, - "loss": 46.0, - "step": 39086 - }, - { - "epoch": 6.294697854180925, - "grad_norm": 0.017420900985598564, - "learning_rate": 0.00019998045880128764, - "loss": 46.0, - "step": 39087 - }, - { - "epoch": 6.294858891259713, - "grad_norm": 0.013849043287336826, - "learning_rate": 0.0001999804578011693, - "loss": 46.0, - "step": 39088 - }, - { - "epoch": 6.2950199283385, - "grad_norm": 0.013542650267481804, - "learning_rate": 0.00019998045680102538, - "loss": 46.0, - "step": 39089 - }, - { - "epoch": 6.295180965417288, - "grad_norm": 0.003099643625319004, - "learning_rate": 0.00019998045580085582, - "loss": 46.0, - "step": 39090 - }, - { - "epoch": 6.295342002496075, - "grad_norm": 0.003130297176539898, - "learning_rate": 0.00019998045480066072, - "loss": 46.0, - "step": 39091 - }, - { - "epoch": 6.295503039574863, - "grad_norm": 0.004167810548096895, - "learning_rate": 0.00019998045380044, - "loss": 46.0, - "step": 39092 - }, - { - "epoch": 6.295664076653649, - "grad_norm": 0.0021290292497724295, - "learning_rate": 0.00019998045280019374, - "loss": 46.0, - "step": 39093 - }, - { - "epoch": 6.295825113732437, - "grad_norm": 0.0017451278399676085, - "learning_rate": 0.00019998045179992185, - "loss": 46.0, - "step": 39094 - }, - { - "epoch": 6.295986150811224, - "grad_norm": 0.0014398435596376657, - "learning_rate": 0.00019998045079962437, - "loss": 46.0, - "step": 39095 - }, - { - "epoch": 6.2961471878900115, - "grad_norm": 0.011341184377670288, - "learning_rate": 0.00019998044979930131, - "loss": 46.0, - "step": 39096 - }, - { - "epoch": 6.296308224968799, - "grad_norm": 0.002223289804533124, - "learning_rate": 0.00019998044879895267, - "loss": 46.0, - "step": 39097 - }, - { - "epoch": 6.296469262047586, - "grad_norm": 0.0019719330593943596, - "learning_rate": 0.0001999804477985784, - "loss": 46.0, - "step": 39098 - }, - { - "epoch": 6.296630299126374, - "grad_norm": 0.009261532686650753, - "learning_rate": 0.0001999804467981786, - "loss": 46.0, - "step": 39099 - }, - { - "epoch": 6.296791336205161, - "grad_norm": 0.001704698195680976, - "learning_rate": 0.00019998044579775317, - "loss": 46.0, - "step": 39100 - }, - { - "epoch": 6.296952373283949, - "grad_norm": 0.0019331870134919882, - "learning_rate": 0.00019998044479730215, - "loss": 46.0, - "step": 39101 - }, - { - "epoch": 6.297113410362736, - "grad_norm": 0.0017456094501540065, - "learning_rate": 0.00019998044379682553, - "loss": 46.0, - "step": 39102 - }, - { - "epoch": 6.297274447441524, - "grad_norm": 0.004795679822564125, - "learning_rate": 0.00019998044279632336, - "loss": 46.0, - "step": 39103 - }, - { - "epoch": 6.297435484520311, - "grad_norm": 0.02229963429272175, - "learning_rate": 0.00019998044179579558, - "loss": 46.0, - "step": 39104 - }, - { - "epoch": 6.2975965215990986, - "grad_norm": 0.018068289384245872, - "learning_rate": 0.0001999804407952422, - "loss": 46.0, - "step": 39105 - }, - { - "epoch": 6.297757558677885, - "grad_norm": 0.017323555424809456, - "learning_rate": 0.00019998043979466325, - "loss": 46.0, - "step": 39106 - }, - { - "epoch": 6.297918595756673, - "grad_norm": 0.001503427280113101, - "learning_rate": 0.0001999804387940587, - "loss": 46.0, - "step": 39107 - }, - { - "epoch": 6.29807963283546, - "grad_norm": 0.003334401175379753, - "learning_rate": 0.00019998043779342854, - "loss": 46.0, - "step": 39108 - }, - { - "epoch": 6.2982406699142475, - "grad_norm": 0.011444994248449802, - "learning_rate": 0.00019998043679277282, - "loss": 46.0, - "step": 39109 - }, - { - "epoch": 6.298401706993035, - "grad_norm": 0.011853308416903019, - "learning_rate": 0.00019998043579209148, - "loss": 46.0, - "step": 39110 - }, - { - "epoch": 6.298562744071822, - "grad_norm": 0.0029568809550255537, - "learning_rate": 0.00019998043479138458, - "loss": 46.0, - "step": 39111 - }, - { - "epoch": 6.29872378115061, - "grad_norm": 0.0021663238294422626, - "learning_rate": 0.00019998043379065207, - "loss": 46.0, - "step": 39112 - }, - { - "epoch": 6.298884818229397, - "grad_norm": 0.004826546646654606, - "learning_rate": 0.000199980432789894, - "loss": 46.0, - "step": 39113 - }, - { - "epoch": 6.299045855308185, - "grad_norm": 0.001618703012354672, - "learning_rate": 0.00019998043178911032, - "loss": 46.0, - "step": 39114 - }, - { - "epoch": 6.299206892386972, - "grad_norm": 0.006082882639020681, - "learning_rate": 0.00019998043078830105, - "loss": 46.0, - "step": 39115 - }, - { - "epoch": 6.29936792946576, - "grad_norm": 0.0039020765107125044, - "learning_rate": 0.00019998042978746622, - "loss": 46.0, - "step": 39116 - }, - { - "epoch": 6.299528966544547, - "grad_norm": 0.006699398625642061, - "learning_rate": 0.00019998042878660574, - "loss": 46.0, - "step": 39117 - }, - { - "epoch": 6.2996900036233345, - "grad_norm": 0.004362224135547876, - "learning_rate": 0.0001999804277857197, - "loss": 46.0, - "step": 39118 - }, - { - "epoch": 6.299851040702122, - "grad_norm": 0.004655822180211544, - "learning_rate": 0.0001999804267848081, - "loss": 46.0, - "step": 39119 - }, - { - "epoch": 6.300012077780909, - "grad_norm": 0.011351593770086765, - "learning_rate": 0.00019998042578387085, - "loss": 46.0, - "step": 39120 - }, - { - "epoch": 6.300173114859696, - "grad_norm": 0.0015688723651692271, - "learning_rate": 0.00019998042478290806, - "loss": 46.0, - "step": 39121 - }, - { - "epoch": 6.3003341519384835, - "grad_norm": 0.01027495414018631, - "learning_rate": 0.00019998042378191965, - "loss": 46.0, - "step": 39122 - }, - { - "epoch": 6.300495189017271, - "grad_norm": 0.008149495348334312, - "learning_rate": 0.00019998042278090568, - "loss": 46.0, - "step": 39123 - }, - { - "epoch": 6.300656226096058, - "grad_norm": 0.005291405133903027, - "learning_rate": 0.00019998042177986607, - "loss": 46.0, - "step": 39124 - }, - { - "epoch": 6.300817263174846, - "grad_norm": 0.013073574751615524, - "learning_rate": 0.00019998042077880093, - "loss": 46.0, - "step": 39125 - }, - { - "epoch": 6.300978300253633, - "grad_norm": 0.0036625864449888468, - "learning_rate": 0.00019998041977771017, - "loss": 46.0, - "step": 39126 - }, - { - "epoch": 6.301139337332421, - "grad_norm": 0.003950038924813271, - "learning_rate": 0.00019998041877659383, - "loss": 46.0, - "step": 39127 - }, - { - "epoch": 6.301300374411208, - "grad_norm": 0.008592743426561356, - "learning_rate": 0.00019998041777545187, - "loss": 46.0, - "step": 39128 - }, - { - "epoch": 6.301461411489996, - "grad_norm": 0.0017043410334736109, - "learning_rate": 0.00019998041677428435, - "loss": 46.0, - "step": 39129 - }, - { - "epoch": 6.301622448568783, - "grad_norm": 0.007966981269419193, - "learning_rate": 0.00019998041577309121, - "loss": 46.0, - "step": 39130 - }, - { - "epoch": 6.3017834856475705, - "grad_norm": 0.01137902494519949, - "learning_rate": 0.0001999804147718725, - "loss": 46.0, - "step": 39131 - }, - { - "epoch": 6.301944522726358, - "grad_norm": 0.0053497301414608955, - "learning_rate": 0.0001999804137706282, - "loss": 46.0, - "step": 39132 - }, - { - "epoch": 6.302105559805145, - "grad_norm": 0.009508968330919743, - "learning_rate": 0.00019998041276935835, - "loss": 46.0, - "step": 39133 - }, - { - "epoch": 6.302266596883933, - "grad_norm": 0.013140874914824963, - "learning_rate": 0.00019998041176806286, - "loss": 46.0, - "step": 39134 - }, - { - "epoch": 6.30242763396272, - "grad_norm": 0.0019670783076435328, - "learning_rate": 0.0001999804107667418, - "loss": 46.0, - "step": 39135 - }, - { - "epoch": 6.302588671041507, - "grad_norm": 0.005829277448356152, - "learning_rate": 0.00019998040976539514, - "loss": 46.0, - "step": 39136 - }, - { - "epoch": 6.302749708120294, - "grad_norm": 0.0085930610075593, - "learning_rate": 0.0001999804087640229, - "loss": 46.0, - "step": 39137 - }, - { - "epoch": 6.302910745199082, - "grad_norm": 0.0019664703868329525, - "learning_rate": 0.00019998040776262506, - "loss": 46.0, - "step": 39138 - }, - { - "epoch": 6.303071782277869, - "grad_norm": 0.008131389506161213, - "learning_rate": 0.00019998040676120162, - "loss": 46.0, - "step": 39139 - }, - { - "epoch": 6.303232819356657, - "grad_norm": 0.0008929395698942244, - "learning_rate": 0.0001999804057597526, - "loss": 46.0, - "step": 39140 - }, - { - "epoch": 6.303393856435444, - "grad_norm": 0.0014199292054399848, - "learning_rate": 0.00019998040475827802, - "loss": 46.0, - "step": 39141 - }, - { - "epoch": 6.303554893514232, - "grad_norm": 0.007445950526744127, - "learning_rate": 0.00019998040375677781, - "loss": 46.0, - "step": 39142 - }, - { - "epoch": 6.303715930593019, - "grad_norm": 0.004941760562360287, - "learning_rate": 0.000199980402755252, - "loss": 46.0, - "step": 39143 - }, - { - "epoch": 6.3038769676718065, - "grad_norm": 0.0027029714547097683, - "learning_rate": 0.00019998040175370064, - "loss": 46.0, - "step": 39144 - }, - { - "epoch": 6.304038004750594, - "grad_norm": 0.003458069171756506, - "learning_rate": 0.00019998040075212367, - "loss": 46.0, - "step": 39145 - }, - { - "epoch": 6.304199041829381, - "grad_norm": 0.005030560307204723, - "learning_rate": 0.00019998039975052114, - "loss": 46.0, - "step": 39146 - }, - { - "epoch": 6.304360078908169, - "grad_norm": 0.014450975693762302, - "learning_rate": 0.00019998039874889297, - "loss": 46.0, - "step": 39147 - }, - { - "epoch": 6.304521115986956, - "grad_norm": 0.00146191893145442, - "learning_rate": 0.00019998039774723924, - "loss": 46.0, - "step": 39148 - }, - { - "epoch": 6.304682153065744, - "grad_norm": 0.005476540885865688, - "learning_rate": 0.00019998039674555993, - "loss": 46.0, - "step": 39149 - }, - { - "epoch": 6.304843190144531, - "grad_norm": 0.004657631739974022, - "learning_rate": 0.000199980395743855, - "loss": 46.0, - "step": 39150 - }, - { - "epoch": 6.305004227223318, - "grad_norm": 0.005235560238361359, - "learning_rate": 0.0001999803947421245, - "loss": 46.0, - "step": 39151 - }, - { - "epoch": 6.305165264302105, - "grad_norm": 0.001904643140733242, - "learning_rate": 0.0001999803937403684, - "loss": 46.0, - "step": 39152 - }, - { - "epoch": 6.305326301380893, - "grad_norm": 0.00708166416734457, - "learning_rate": 0.00019998039273858673, - "loss": 46.0, - "step": 39153 - }, - { - "epoch": 6.30548733845968, - "grad_norm": 0.0015886577311903238, - "learning_rate": 0.00019998039173677946, - "loss": 46.0, - "step": 39154 - }, - { - "epoch": 6.305648375538468, - "grad_norm": 0.012389445677399635, - "learning_rate": 0.0001999803907349466, - "loss": 46.0, - "step": 39155 - }, - { - "epoch": 6.305809412617255, - "grad_norm": 0.00206724158488214, - "learning_rate": 0.00019998038973308813, - "loss": 46.0, - "step": 39156 - }, - { - "epoch": 6.3059704496960425, - "grad_norm": 0.0034249427262693644, - "learning_rate": 0.0001999803887312041, - "loss": 46.0, - "step": 39157 - }, - { - "epoch": 6.30613148677483, - "grad_norm": 0.003982074558734894, - "learning_rate": 0.00019998038772929446, - "loss": 46.0, - "step": 39158 - }, - { - "epoch": 6.306292523853617, - "grad_norm": 0.008811620064079762, - "learning_rate": 0.00019998038672735925, - "loss": 46.0, - "step": 39159 - }, - { - "epoch": 6.306453560932405, - "grad_norm": 0.00203324225731194, - "learning_rate": 0.00019998038572539845, - "loss": 46.0, - "step": 39160 - }, - { - "epoch": 6.306614598011192, - "grad_norm": 0.0016288454644382, - "learning_rate": 0.00019998038472341203, - "loss": 46.0, - "step": 39161 - }, - { - "epoch": 6.30677563508998, - "grad_norm": 0.005029772873967886, - "learning_rate": 0.00019998038372140003, - "loss": 46.0, - "step": 39162 - }, - { - "epoch": 6.306936672168767, - "grad_norm": 0.0026036540511995554, - "learning_rate": 0.00019998038271936243, - "loss": 46.0, - "step": 39163 - }, - { - "epoch": 6.307097709247555, - "grad_norm": 0.008446864783763885, - "learning_rate": 0.00019998038171729928, - "loss": 46.0, - "step": 39164 - }, - { - "epoch": 6.307258746326342, - "grad_norm": 0.004770705010741949, - "learning_rate": 0.0001999803807152105, - "loss": 46.0, - "step": 39165 - }, - { - "epoch": 6.307419783405129, - "grad_norm": 0.006333968602120876, - "learning_rate": 0.00019998037971309616, - "loss": 46.0, - "step": 39166 - }, - { - "epoch": 6.307580820483916, - "grad_norm": 0.0029384917579591274, - "learning_rate": 0.0001999803787109562, - "loss": 46.0, - "step": 39167 - }, - { - "epoch": 6.307741857562704, - "grad_norm": 0.001456237630918622, - "learning_rate": 0.00019998037770879067, - "loss": 46.0, - "step": 39168 - }, - { - "epoch": 6.307902894641491, - "grad_norm": 0.0021201788913458586, - "learning_rate": 0.00019998037670659958, - "loss": 46.0, - "step": 39169 - }, - { - "epoch": 6.3080639317202785, - "grad_norm": 0.027082212269306183, - "learning_rate": 0.00019998037570438285, - "loss": 46.0, - "step": 39170 - }, - { - "epoch": 6.308224968799066, - "grad_norm": 0.003020569682121277, - "learning_rate": 0.00019998037470214054, - "loss": 46.0, - "step": 39171 - }, - { - "epoch": 6.308386005877853, - "grad_norm": 0.005190604366362095, - "learning_rate": 0.00019998037369987266, - "loss": 46.0, - "step": 39172 - }, - { - "epoch": 6.308547042956641, - "grad_norm": 0.007044903002679348, - "learning_rate": 0.00019998037269757917, - "loss": 46.0, - "step": 39173 - }, - { - "epoch": 6.308708080035428, - "grad_norm": 0.0012601400958374143, - "learning_rate": 0.0001999803716952601, - "loss": 46.0, - "step": 39174 - }, - { - "epoch": 6.308869117114216, - "grad_norm": 0.0019907911773771048, - "learning_rate": 0.00019998037069291543, - "loss": 46.0, - "step": 39175 - }, - { - "epoch": 6.309030154193003, - "grad_norm": 0.009525091387331486, - "learning_rate": 0.00019998036969054518, - "loss": 46.0, - "step": 39176 - }, - { - "epoch": 6.309191191271791, - "grad_norm": 0.00317505095154047, - "learning_rate": 0.00019998036868814937, - "loss": 46.0, - "step": 39177 - }, - { - "epoch": 6.309352228350578, - "grad_norm": 0.015424644574522972, - "learning_rate": 0.0001999803676857279, - "loss": 46.0, - "step": 39178 - }, - { - "epoch": 6.3095132654293655, - "grad_norm": 0.003346010809764266, - "learning_rate": 0.0001999803666832809, - "loss": 46.0, - "step": 39179 - }, - { - "epoch": 6.309674302508152, - "grad_norm": 0.0050209881737828255, - "learning_rate": 0.0001999803656808083, - "loss": 46.0, - "step": 39180 - }, - { - "epoch": 6.3098353395869395, - "grad_norm": 0.002247745404019952, - "learning_rate": 0.00019998036467831008, - "loss": 46.0, - "step": 39181 - }, - { - "epoch": 6.309996376665727, - "grad_norm": 0.0019059209153056145, - "learning_rate": 0.0001999803636757863, - "loss": 46.0, - "step": 39182 - }, - { - "epoch": 6.310157413744514, - "grad_norm": 0.007723457179963589, - "learning_rate": 0.00019998036267323692, - "loss": 46.0, - "step": 39183 - }, - { - "epoch": 6.310318450823302, - "grad_norm": 0.0029650733340531588, - "learning_rate": 0.00019998036167066197, - "loss": 46.0, - "step": 39184 - }, - { - "epoch": 6.310479487902089, - "grad_norm": 0.005114555358886719, - "learning_rate": 0.00019998036066806138, - "loss": 46.0, - "step": 39185 - }, - { - "epoch": 6.310640524980877, - "grad_norm": 0.001683143200352788, - "learning_rate": 0.00019998035966543526, - "loss": 46.0, - "step": 39186 - }, - { - "epoch": 6.310801562059664, - "grad_norm": 0.0021252150181680918, - "learning_rate": 0.0001999803586627835, - "loss": 46.0, - "step": 39187 - }, - { - "epoch": 6.310962599138452, - "grad_norm": 0.005112302955240011, - "learning_rate": 0.00019998035766010617, - "loss": 46.0, - "step": 39188 - }, - { - "epoch": 6.311123636217239, - "grad_norm": 0.0043867845088243484, - "learning_rate": 0.00019998035665740326, - "loss": 46.0, - "step": 39189 - }, - { - "epoch": 6.311284673296027, - "grad_norm": 0.00221996963955462, - "learning_rate": 0.00019998035565467476, - "loss": 46.0, - "step": 39190 - }, - { - "epoch": 6.311445710374814, - "grad_norm": 0.0011288755340501666, - "learning_rate": 0.00019998035465192064, - "loss": 46.0, - "step": 39191 - }, - { - "epoch": 6.3116067474536015, - "grad_norm": 0.0027596564032137394, - "learning_rate": 0.00019998035364914097, - "loss": 46.0, - "step": 39192 - }, - { - "epoch": 6.311767784532389, - "grad_norm": 0.007949800230562687, - "learning_rate": 0.00019998035264633568, - "loss": 46.0, - "step": 39193 - }, - { - "epoch": 6.3119288216111755, - "grad_norm": 0.001189812202937901, - "learning_rate": 0.0001999803516435048, - "loss": 46.0, - "step": 39194 - }, - { - "epoch": 6.312089858689963, - "grad_norm": 0.002585930284112692, - "learning_rate": 0.00019998035064064835, - "loss": 46.0, - "step": 39195 - }, - { - "epoch": 6.31225089576875, - "grad_norm": 0.009255031123757362, - "learning_rate": 0.0001999803496377663, - "loss": 46.0, - "step": 39196 - }, - { - "epoch": 6.312411932847538, - "grad_norm": 0.006625410169363022, - "learning_rate": 0.0001999803486348587, - "loss": 46.0, - "step": 39197 - }, - { - "epoch": 6.312572969926325, - "grad_norm": 0.005916438531130552, - "learning_rate": 0.00019998034763192544, - "loss": 46.0, - "step": 39198 - }, - { - "epoch": 6.312734007005113, - "grad_norm": 0.001434377976693213, - "learning_rate": 0.00019998034662896663, - "loss": 46.0, - "step": 39199 - }, - { - "epoch": 6.3128950440839, - "grad_norm": 0.0037752208299934864, - "learning_rate": 0.00019998034562598223, - "loss": 46.0, - "step": 39200 - }, - { - "epoch": 6.313056081162688, - "grad_norm": 0.003529967973008752, - "learning_rate": 0.00019998034462297221, - "loss": 46.0, - "step": 39201 - }, - { - "epoch": 6.313217118241475, - "grad_norm": 0.011132239364087582, - "learning_rate": 0.00019998034361993664, - "loss": 46.0, - "step": 39202 - }, - { - "epoch": 6.313378155320263, - "grad_norm": 0.016028204932808876, - "learning_rate": 0.00019998034261687546, - "loss": 46.0, - "step": 39203 - }, - { - "epoch": 6.31353919239905, - "grad_norm": 0.006546396296471357, - "learning_rate": 0.00019998034161378868, - "loss": 46.0, - "step": 39204 - }, - { - "epoch": 6.3137002294778375, - "grad_norm": 0.009632307104766369, - "learning_rate": 0.00019998034061067635, - "loss": 46.0, - "step": 39205 - }, - { - "epoch": 6.313861266556625, - "grad_norm": 0.0013545771362259984, - "learning_rate": 0.0001999803396075384, - "loss": 46.0, - "step": 39206 - }, - { - "epoch": 6.314022303635412, - "grad_norm": 0.014357620850205421, - "learning_rate": 0.00019998033860437486, - "loss": 46.0, - "step": 39207 - }, - { - "epoch": 6.3141833407142, - "grad_norm": 0.007033585105091333, - "learning_rate": 0.00019998033760118574, - "loss": 46.0, - "step": 39208 - }, - { - "epoch": 6.314344377792986, - "grad_norm": 0.0013322340091690421, - "learning_rate": 0.000199980336597971, - "loss": 46.0, - "step": 39209 - }, - { - "epoch": 6.314505414871774, - "grad_norm": 0.014954268001019955, - "learning_rate": 0.00019998033559473074, - "loss": 46.0, - "step": 39210 - }, - { - "epoch": 6.314666451950561, - "grad_norm": 0.005739069543778896, - "learning_rate": 0.00019998033459146482, - "loss": 46.0, - "step": 39211 - }, - { - "epoch": 6.314827489029349, - "grad_norm": 0.0020559667609632015, - "learning_rate": 0.00019998033358817333, - "loss": 46.0, - "step": 39212 - }, - { - "epoch": 6.314988526108136, - "grad_norm": 0.0015947798965498805, - "learning_rate": 0.00019998033258485624, - "loss": 46.0, - "step": 39213 - }, - { - "epoch": 6.315149563186924, - "grad_norm": 0.015269514173269272, - "learning_rate": 0.0001999803315815136, - "loss": 46.0, - "step": 39214 - }, - { - "epoch": 6.315310600265711, - "grad_norm": 0.006795963272452354, - "learning_rate": 0.00019998033057814534, - "loss": 46.0, - "step": 39215 - }, - { - "epoch": 6.315471637344499, - "grad_norm": 0.0033634265419095755, - "learning_rate": 0.0001999803295747515, - "loss": 46.0, - "step": 39216 - }, - { - "epoch": 6.315632674423286, - "grad_norm": 0.001218912424519658, - "learning_rate": 0.00019998032857133205, - "loss": 46.0, - "step": 39217 - }, - { - "epoch": 6.3157937115020735, - "grad_norm": 0.007960383780300617, - "learning_rate": 0.00019998032756788706, - "loss": 46.0, - "step": 39218 - }, - { - "epoch": 6.315954748580861, - "grad_norm": 0.0032012341544032097, - "learning_rate": 0.0001999803265644164, - "loss": 46.0, - "step": 39219 - }, - { - "epoch": 6.316115785659648, - "grad_norm": 0.002882528118789196, - "learning_rate": 0.00019998032556092023, - "loss": 46.0, - "step": 39220 - }, - { - "epoch": 6.316276822738436, - "grad_norm": 0.013656115159392357, - "learning_rate": 0.00019998032455739845, - "loss": 46.0, - "step": 39221 - }, - { - "epoch": 6.316437859817223, - "grad_norm": 0.0015731171006336808, - "learning_rate": 0.00019998032355385105, - "loss": 46.0, - "step": 39222 - }, - { - "epoch": 6.316598896896011, - "grad_norm": 0.0029132100753486156, - "learning_rate": 0.00019998032255027806, - "loss": 46.0, - "step": 39223 - }, - { - "epoch": 6.316759933974797, - "grad_norm": 0.007392663508653641, - "learning_rate": 0.00019998032154667952, - "loss": 46.0, - "step": 39224 - }, - { - "epoch": 6.316920971053585, - "grad_norm": 0.01143241673707962, - "learning_rate": 0.00019998032054305536, - "loss": 46.0, - "step": 39225 - }, - { - "epoch": 6.317082008132372, - "grad_norm": 0.0049977474845945835, - "learning_rate": 0.00019998031953940562, - "loss": 46.0, - "step": 39226 - }, - { - "epoch": 6.31724304521116, - "grad_norm": 0.014875965192914009, - "learning_rate": 0.00019998031853573028, - "loss": 46.0, - "step": 39227 - }, - { - "epoch": 6.317404082289947, - "grad_norm": 0.010098783299326897, - "learning_rate": 0.00019998031753202934, - "loss": 46.0, - "step": 39228 - }, - { - "epoch": 6.3175651193687345, - "grad_norm": 0.0035059915389865637, - "learning_rate": 0.00019998031652830286, - "loss": 46.0, - "step": 39229 - }, - { - "epoch": 6.317726156447522, - "grad_norm": 0.00809943862259388, - "learning_rate": 0.00019998031552455073, - "loss": 46.0, - "step": 39230 - }, - { - "epoch": 6.3178871935263095, - "grad_norm": 0.01116069033741951, - "learning_rate": 0.00019998031452077303, - "loss": 46.0, - "step": 39231 - }, - { - "epoch": 6.318048230605097, - "grad_norm": 0.0016322049777954817, - "learning_rate": 0.00019998031351696978, - "loss": 46.0, - "step": 39232 - }, - { - "epoch": 6.318209267683884, - "grad_norm": 0.005741297733038664, - "learning_rate": 0.0001999803125131409, - "loss": 46.0, - "step": 39233 - }, - { - "epoch": 6.318370304762672, - "grad_norm": 0.0017672909889370203, - "learning_rate": 0.00019998031150928643, - "loss": 46.0, - "step": 39234 - }, - { - "epoch": 6.318531341841459, - "grad_norm": 0.015608440153300762, - "learning_rate": 0.00019998031050540637, - "loss": 46.0, - "step": 39235 - }, - { - "epoch": 6.318692378920247, - "grad_norm": 0.0031857292633503675, - "learning_rate": 0.00019998030950150076, - "loss": 46.0, - "step": 39236 - }, - { - "epoch": 6.318853415999034, - "grad_norm": 0.0018248548731207848, - "learning_rate": 0.0001999803084975695, - "loss": 46.0, - "step": 39237 - }, - { - "epoch": 6.319014453077822, - "grad_norm": 0.0016317018307745457, - "learning_rate": 0.0001999803074936127, - "loss": 46.0, - "step": 39238 - }, - { - "epoch": 6.319175490156608, - "grad_norm": 0.0008817727211862803, - "learning_rate": 0.0001999803064896303, - "loss": 46.0, - "step": 39239 - }, - { - "epoch": 6.319336527235396, - "grad_norm": 0.01828228496015072, - "learning_rate": 0.00019998030548562228, - "loss": 46.0, - "step": 39240 - }, - { - "epoch": 6.319497564314183, - "grad_norm": 0.007376500405371189, - "learning_rate": 0.0001999803044815887, - "loss": 46.0, - "step": 39241 - }, - { - "epoch": 6.3196586013929705, - "grad_norm": 0.0017238880973309278, - "learning_rate": 0.00019998030347752953, - "loss": 46.0, - "step": 39242 - }, - { - "epoch": 6.319819638471758, - "grad_norm": 0.0020580957643687725, - "learning_rate": 0.00019998030247344475, - "loss": 46.0, - "step": 39243 - }, - { - "epoch": 6.319980675550545, - "grad_norm": 0.014273291453719139, - "learning_rate": 0.0001999803014693344, - "loss": 46.0, - "step": 39244 - }, - { - "epoch": 6.320141712629333, - "grad_norm": 0.0007183046545833349, - "learning_rate": 0.00019998030046519842, - "loss": 46.0, - "step": 39245 - }, - { - "epoch": 6.32030274970812, - "grad_norm": 0.00794605165719986, - "learning_rate": 0.00019998029946103688, - "loss": 46.0, - "step": 39246 - }, - { - "epoch": 6.320463786786908, - "grad_norm": 0.011247053742408752, - "learning_rate": 0.00019998029845684978, - "loss": 46.0, - "step": 39247 - }, - { - "epoch": 6.320624823865695, - "grad_norm": 0.001349453697912395, - "learning_rate": 0.00019998029745263703, - "loss": 46.0, - "step": 39248 - }, - { - "epoch": 6.320785860944483, - "grad_norm": 0.0016149523435160518, - "learning_rate": 0.00019998029644839873, - "loss": 46.0, - "step": 39249 - }, - { - "epoch": 6.32094689802327, - "grad_norm": 0.005839682184159756, - "learning_rate": 0.00019998029544413483, - "loss": 46.0, - "step": 39250 - }, - { - "epoch": 6.321107935102058, - "grad_norm": 0.007284276653081179, - "learning_rate": 0.00019998029443984536, - "loss": 46.0, - "step": 39251 - }, - { - "epoch": 6.321268972180845, - "grad_norm": 0.024734221398830414, - "learning_rate": 0.0001999802934355303, - "loss": 46.0, - "step": 39252 - }, - { - "epoch": 6.3214300092596325, - "grad_norm": 0.0008910219185054302, - "learning_rate": 0.0001999802924311896, - "loss": 46.0, - "step": 39253 - }, - { - "epoch": 6.321591046338419, - "grad_norm": 0.004034031648188829, - "learning_rate": 0.00019998029142682334, - "loss": 46.0, - "step": 39254 - }, - { - "epoch": 6.3217520834172065, - "grad_norm": 0.018770208582282066, - "learning_rate": 0.00019998029042243148, - "loss": 46.0, - "step": 39255 - }, - { - "epoch": 6.321913120495994, - "grad_norm": 0.0029557356610894203, - "learning_rate": 0.00019998028941801404, - "loss": 46.0, - "step": 39256 - }, - { - "epoch": 6.322074157574781, - "grad_norm": 0.008802195079624653, - "learning_rate": 0.00019998028841357104, - "loss": 46.0, - "step": 39257 - }, - { - "epoch": 6.322235194653569, - "grad_norm": 0.004535462241619825, - "learning_rate": 0.00019998028740910242, - "loss": 46.0, - "step": 39258 - }, - { - "epoch": 6.322396231732356, - "grad_norm": 0.001260081771761179, - "learning_rate": 0.00019998028640460822, - "loss": 46.0, - "step": 39259 - }, - { - "epoch": 6.322557268811144, - "grad_norm": 0.0010555196786299348, - "learning_rate": 0.00019998028540008843, - "loss": 46.0, - "step": 39260 - }, - { - "epoch": 6.322718305889931, - "grad_norm": 0.005778071470558643, - "learning_rate": 0.00019998028439554305, - "loss": 46.0, - "step": 39261 - }, - { - "epoch": 6.322879342968719, - "grad_norm": 0.0042562601156532764, - "learning_rate": 0.00019998028339097206, - "loss": 46.0, - "step": 39262 - }, - { - "epoch": 6.323040380047506, - "grad_norm": 0.02027692273259163, - "learning_rate": 0.0001999802823863755, - "loss": 46.0, - "step": 39263 - }, - { - "epoch": 6.323201417126294, - "grad_norm": 0.004296464845538139, - "learning_rate": 0.00019998028138175334, - "loss": 46.0, - "step": 39264 - }, - { - "epoch": 6.323362454205081, - "grad_norm": 0.0060869259759783745, - "learning_rate": 0.00019998028037710559, - "loss": 46.0, - "step": 39265 - }, - { - "epoch": 6.3235234912838685, - "grad_norm": 0.020527543500065804, - "learning_rate": 0.00019998027937243224, - "loss": 46.0, - "step": 39266 - }, - { - "epoch": 6.323684528362655, - "grad_norm": 0.0066842553205788136, - "learning_rate": 0.00019998027836773332, - "loss": 46.0, - "step": 39267 - }, - { - "epoch": 6.3238455654414425, - "grad_norm": 0.01725761592388153, - "learning_rate": 0.0001999802773630088, - "loss": 46.0, - "step": 39268 - }, - { - "epoch": 6.32400660252023, - "grad_norm": 0.009204615838825703, - "learning_rate": 0.0001999802763582587, - "loss": 46.0, - "step": 39269 - }, - { - "epoch": 6.324167639599017, - "grad_norm": 0.006052333861589432, - "learning_rate": 0.000199980275353483, - "loss": 46.0, - "step": 39270 - }, - { - "epoch": 6.324328676677805, - "grad_norm": 0.006382647901773453, - "learning_rate": 0.00019998027434868173, - "loss": 46.0, - "step": 39271 - }, - { - "epoch": 6.324489713756592, - "grad_norm": 0.00808104407042265, - "learning_rate": 0.00019998027334385484, - "loss": 46.0, - "step": 39272 - }, - { - "epoch": 6.32465075083538, - "grad_norm": 0.003572257002815604, - "learning_rate": 0.0001999802723390024, - "loss": 46.0, - "step": 39273 - }, - { - "epoch": 6.324811787914167, - "grad_norm": 0.002168485429137945, - "learning_rate": 0.00019998027133412432, - "loss": 46.0, - "step": 39274 - }, - { - "epoch": 6.324972824992955, - "grad_norm": 0.0169981736689806, - "learning_rate": 0.0001999802703292207, - "loss": 46.0, - "step": 39275 - }, - { - "epoch": 6.325133862071742, - "grad_norm": 0.002466046018525958, - "learning_rate": 0.00019998026932429146, - "loss": 46.0, - "step": 39276 - }, - { - "epoch": 6.32529489915053, - "grad_norm": 0.004602525848895311, - "learning_rate": 0.00019998026831933663, - "loss": 46.0, - "step": 39277 - }, - { - "epoch": 6.325455936229317, - "grad_norm": 0.004470365587621927, - "learning_rate": 0.00019998026731435622, - "loss": 46.0, - "step": 39278 - }, - { - "epoch": 6.3256169733081045, - "grad_norm": 0.0031690194737166166, - "learning_rate": 0.00019998026630935022, - "loss": 46.0, - "step": 39279 - }, - { - "epoch": 6.325778010386892, - "grad_norm": 0.0015667417319491506, - "learning_rate": 0.00019998026530431863, - "loss": 46.0, - "step": 39280 - }, - { - "epoch": 6.325939047465679, - "grad_norm": 0.008357924409210682, - "learning_rate": 0.00019998026429926142, - "loss": 46.0, - "step": 39281 - }, - { - "epoch": 6.326100084544466, - "grad_norm": 0.009117667563259602, - "learning_rate": 0.00019998026329417866, - "loss": 46.0, - "step": 39282 - }, - { - "epoch": 6.326261121623253, - "grad_norm": 0.01177041232585907, - "learning_rate": 0.00019998026228907028, - "loss": 46.0, - "step": 39283 - }, - { - "epoch": 6.326422158702041, - "grad_norm": 0.005420915316790342, - "learning_rate": 0.00019998026128393637, - "loss": 46.0, - "step": 39284 - }, - { - "epoch": 6.326583195780828, - "grad_norm": 0.00805660430341959, - "learning_rate": 0.00019998026027877682, - "loss": 46.0, - "step": 39285 - }, - { - "epoch": 6.326744232859616, - "grad_norm": 0.005308833438903093, - "learning_rate": 0.00019998025927359168, - "loss": 46.0, - "step": 39286 - }, - { - "epoch": 6.326905269938403, - "grad_norm": 0.009006757289171219, - "learning_rate": 0.00019998025826838096, - "loss": 46.0, - "step": 39287 - }, - { - "epoch": 6.327066307017191, - "grad_norm": 0.001884071039967239, - "learning_rate": 0.00019998025726314465, - "loss": 46.0, - "step": 39288 - }, - { - "epoch": 6.327227344095978, - "grad_norm": 0.0026832083240151405, - "learning_rate": 0.00019998025625788275, - "loss": 46.0, - "step": 39289 - }, - { - "epoch": 6.3273883811747655, - "grad_norm": 0.003901432501152158, - "learning_rate": 0.00019998025525259526, - "loss": 46.0, - "step": 39290 - }, - { - "epoch": 6.327549418253553, - "grad_norm": 0.004649568349123001, - "learning_rate": 0.00019998025424728218, - "loss": 46.0, - "step": 39291 - }, - { - "epoch": 6.3277104553323404, - "grad_norm": 0.01862914301455021, - "learning_rate": 0.0001999802532419435, - "loss": 46.0, - "step": 39292 - }, - { - "epoch": 6.327871492411128, - "grad_norm": 0.004360104911029339, - "learning_rate": 0.00019998025223657925, - "loss": 46.0, - "step": 39293 - }, - { - "epoch": 6.328032529489915, - "grad_norm": 0.00408626114949584, - "learning_rate": 0.00019998025123118938, - "loss": 46.0, - "step": 39294 - }, - { - "epoch": 6.328193566568703, - "grad_norm": 0.0039005533326417208, - "learning_rate": 0.00019998025022577393, - "loss": 46.0, - "step": 39295 - }, - { - "epoch": 6.32835460364749, - "grad_norm": 0.011384246870875359, - "learning_rate": 0.00019998024922033292, - "loss": 46.0, - "step": 39296 - }, - { - "epoch": 6.328515640726277, - "grad_norm": 0.007530677132308483, - "learning_rate": 0.0001999802482148663, - "loss": 46.0, - "step": 39297 - }, - { - "epoch": 6.328676677805064, - "grad_norm": 0.004398074001073837, - "learning_rate": 0.00019998024720937408, - "loss": 46.0, - "step": 39298 - }, - { - "epoch": 6.328837714883852, - "grad_norm": 0.0016032342100515962, - "learning_rate": 0.00019998024620385629, - "loss": 46.0, - "step": 39299 - }, - { - "epoch": 6.328998751962639, - "grad_norm": 0.012873155064880848, - "learning_rate": 0.0001999802451983129, - "loss": 46.0, - "step": 39300 - }, - { - "epoch": 6.329159789041427, - "grad_norm": 0.032686855643987656, - "learning_rate": 0.0001999802441927439, - "loss": 46.0, - "step": 39301 - }, - { - "epoch": 6.329320826120214, - "grad_norm": 0.005882898345589638, - "learning_rate": 0.00019998024318714937, - "loss": 46.0, - "step": 39302 - }, - { - "epoch": 6.3294818631990015, - "grad_norm": 0.007807684130966663, - "learning_rate": 0.0001999802421815292, - "loss": 46.0, - "step": 39303 - }, - { - "epoch": 6.329642900277789, - "grad_norm": 0.008720343932509422, - "learning_rate": 0.00019998024117588346, - "loss": 46.0, - "step": 39304 - }, - { - "epoch": 6.329803937356576, - "grad_norm": 0.0032543563283979893, - "learning_rate": 0.00019998024017021208, - "loss": 46.0, - "step": 39305 - }, - { - "epoch": 6.329964974435364, - "grad_norm": 0.0028530405834317207, - "learning_rate": 0.00019998023916451517, - "loss": 46.0, - "step": 39306 - }, - { - "epoch": 6.330126011514151, - "grad_norm": 0.004464778117835522, - "learning_rate": 0.00019998023815879265, - "loss": 46.0, - "step": 39307 - }, - { - "epoch": 6.330287048592939, - "grad_norm": 0.0033770210575312376, - "learning_rate": 0.00019998023715304454, - "loss": 46.0, - "step": 39308 - }, - { - "epoch": 6.330448085671726, - "grad_norm": 0.019810356199741364, - "learning_rate": 0.00019998023614727087, - "loss": 46.0, - "step": 39309 - }, - { - "epoch": 6.330609122750514, - "grad_norm": 0.0027148162480443716, - "learning_rate": 0.00019998023514147155, - "loss": 46.0, - "step": 39310 - }, - { - "epoch": 6.330770159829301, - "grad_norm": 0.011955147609114647, - "learning_rate": 0.00019998023413564668, - "loss": 46.0, - "step": 39311 - }, - { - "epoch": 6.330931196908088, - "grad_norm": 0.005633179098367691, - "learning_rate": 0.00019998023312979622, - "loss": 46.0, - "step": 39312 - }, - { - "epoch": 6.331092233986875, - "grad_norm": 0.007518400903791189, - "learning_rate": 0.00019998023212392018, - "loss": 46.0, - "step": 39313 - }, - { - "epoch": 6.331253271065663, - "grad_norm": 0.0015899842837825418, - "learning_rate": 0.00019998023111801851, - "loss": 46.0, - "step": 39314 - }, - { - "epoch": 6.33141430814445, - "grad_norm": 0.0027054939419031143, - "learning_rate": 0.0001999802301120913, - "loss": 46.0, - "step": 39315 - }, - { - "epoch": 6.3315753452232375, - "grad_norm": 0.006814792286604643, - "learning_rate": 0.00019998022910613846, - "loss": 46.0, - "step": 39316 - }, - { - "epoch": 6.331736382302025, - "grad_norm": 0.0038679130375385284, - "learning_rate": 0.00019998022810016004, - "loss": 46.0, - "step": 39317 - }, - { - "epoch": 6.331897419380812, - "grad_norm": 0.0044594211503863335, - "learning_rate": 0.00019998022709415603, - "loss": 46.0, - "step": 39318 - }, - { - "epoch": 6.3320584564596, - "grad_norm": 0.002873303834348917, - "learning_rate": 0.00019998022608812643, - "loss": 46.0, - "step": 39319 - }, - { - "epoch": 6.332219493538387, - "grad_norm": 0.00246586580760777, - "learning_rate": 0.00019998022508207125, - "loss": 46.0, - "step": 39320 - }, - { - "epoch": 6.332380530617175, - "grad_norm": 0.014758276753127575, - "learning_rate": 0.00019998022407599048, - "loss": 46.0, - "step": 39321 - }, - { - "epoch": 6.332541567695962, - "grad_norm": 0.004454613663256168, - "learning_rate": 0.00019998022306988412, - "loss": 46.0, - "step": 39322 - }, - { - "epoch": 6.33270260477475, - "grad_norm": 0.0018376229563727975, - "learning_rate": 0.00019998022206375215, - "loss": 46.0, - "step": 39323 - }, - { - "epoch": 6.332863641853537, - "grad_norm": 0.01380782388150692, - "learning_rate": 0.00019998022105759459, - "loss": 46.0, - "step": 39324 - }, - { - "epoch": 6.333024678932325, - "grad_norm": 0.002713521709665656, - "learning_rate": 0.00019998022005141147, - "loss": 46.0, - "step": 39325 - }, - { - "epoch": 6.333185716011112, - "grad_norm": 0.028691653162240982, - "learning_rate": 0.00019998021904520273, - "loss": 46.0, - "step": 39326 - }, - { - "epoch": 6.333346753089899, - "grad_norm": 0.003941669128835201, - "learning_rate": 0.00019998021803896844, - "loss": 46.0, - "step": 39327 - }, - { - "epoch": 6.333507790168686, - "grad_norm": 0.00348378112539649, - "learning_rate": 0.00019998021703270853, - "loss": 46.0, - "step": 39328 - }, - { - "epoch": 6.3336688272474735, - "grad_norm": 0.007795641198754311, - "learning_rate": 0.00019998021602642303, - "loss": 46.0, - "step": 39329 - }, - { - "epoch": 6.333829864326261, - "grad_norm": 0.0017936760559678078, - "learning_rate": 0.00019998021502011195, - "loss": 46.0, - "step": 39330 - }, - { - "epoch": 6.333990901405048, - "grad_norm": 0.007852043956518173, - "learning_rate": 0.00019998021401377528, - "loss": 46.0, - "step": 39331 - }, - { - "epoch": 6.334151938483836, - "grad_norm": 0.002989057218655944, - "learning_rate": 0.000199980213007413, - "loss": 46.0, - "step": 39332 - }, - { - "epoch": 6.334312975562623, - "grad_norm": 0.005599173717200756, - "learning_rate": 0.00019998021200102515, - "loss": 46.0, - "step": 39333 - }, - { - "epoch": 6.334474012641411, - "grad_norm": 0.0017831216100603342, - "learning_rate": 0.0001999802109946117, - "loss": 46.0, - "step": 39334 - }, - { - "epoch": 6.334635049720198, - "grad_norm": 0.0014196459669619799, - "learning_rate": 0.00019998020998817268, - "loss": 46.0, - "step": 39335 - }, - { - "epoch": 6.334796086798986, - "grad_norm": 0.011581974104046822, - "learning_rate": 0.00019998020898170807, - "loss": 46.0, - "step": 39336 - }, - { - "epoch": 6.334957123877773, - "grad_norm": 0.003051828360185027, - "learning_rate": 0.00019998020797521785, - "loss": 46.0, - "step": 39337 - }, - { - "epoch": 6.3351181609565606, - "grad_norm": 0.0050719608552753925, - "learning_rate": 0.00019998020696870204, - "loss": 46.0, - "step": 39338 - }, - { - "epoch": 6.335279198035348, - "grad_norm": 0.0036676533054560423, - "learning_rate": 0.00019998020596216068, - "loss": 46.0, - "step": 39339 - }, - { - "epoch": 6.335440235114135, - "grad_norm": 0.00894926581531763, - "learning_rate": 0.00019998020495559367, - "loss": 46.0, - "step": 39340 - }, - { - "epoch": 6.335601272192922, - "grad_norm": 0.005566160194575787, - "learning_rate": 0.0001999802039490011, - "loss": 46.0, - "step": 39341 - }, - { - "epoch": 6.3357623092717095, - "grad_norm": 0.01981467381119728, - "learning_rate": 0.00019998020294238294, - "loss": 46.0, - "step": 39342 - }, - { - "epoch": 6.335923346350497, - "grad_norm": 0.005660607013851404, - "learning_rate": 0.00019998020193573917, - "loss": 46.0, - "step": 39343 - }, - { - "epoch": 6.336084383429284, - "grad_norm": 0.0038558386731892824, - "learning_rate": 0.00019998020092906984, - "loss": 46.0, - "step": 39344 - }, - { - "epoch": 6.336245420508072, - "grad_norm": 0.004187397658824921, - "learning_rate": 0.00019998019992237493, - "loss": 46.0, - "step": 39345 - }, - { - "epoch": 6.336406457586859, - "grad_norm": 0.0016036182641983032, - "learning_rate": 0.0001999801989156544, - "loss": 46.0, - "step": 39346 - }, - { - "epoch": 6.336567494665647, - "grad_norm": 0.0014665999915450811, - "learning_rate": 0.0001999801979089083, - "loss": 46.0, - "step": 39347 - }, - { - "epoch": 6.336728531744434, - "grad_norm": 0.0037546316161751747, - "learning_rate": 0.0001999801969021366, - "loss": 46.0, - "step": 39348 - }, - { - "epoch": 6.336889568823222, - "grad_norm": 0.004217116162180901, - "learning_rate": 0.0001999801958953393, - "loss": 46.0, - "step": 39349 - }, - { - "epoch": 6.337050605902009, - "grad_norm": 0.005435938481241465, - "learning_rate": 0.0001999801948885164, - "loss": 46.0, - "step": 39350 - }, - { - "epoch": 6.3372116429807965, - "grad_norm": 0.005066473037004471, - "learning_rate": 0.00019998019388166796, - "loss": 46.0, - "step": 39351 - }, - { - "epoch": 6.337372680059584, - "grad_norm": 0.017243310809135437, - "learning_rate": 0.00019998019287479387, - "loss": 46.0, - "step": 39352 - }, - { - "epoch": 6.337533717138371, - "grad_norm": 0.01188422366976738, - "learning_rate": 0.00019998019186789426, - "loss": 46.0, - "step": 39353 - }, - { - "epoch": 6.337694754217159, - "grad_norm": 0.010011446662247181, - "learning_rate": 0.000199980190860969, - "loss": 46.0, - "step": 39354 - }, - { - "epoch": 6.3378557912959455, - "grad_norm": 0.0011682201875373721, - "learning_rate": 0.00019998018985401816, - "loss": 46.0, - "step": 39355 - }, - { - "epoch": 6.338016828374733, - "grad_norm": 0.006333307828754187, - "learning_rate": 0.00019998018884704176, - "loss": 46.0, - "step": 39356 - }, - { - "epoch": 6.33817786545352, - "grad_norm": 0.007472933270037174, - "learning_rate": 0.00019998018784003977, - "loss": 46.0, - "step": 39357 - }, - { - "epoch": 6.338338902532308, - "grad_norm": 0.009217468090355396, - "learning_rate": 0.00019998018683301213, - "loss": 46.0, - "step": 39358 - }, - { - "epoch": 6.338499939611095, - "grad_norm": 0.005128510762006044, - "learning_rate": 0.00019998018582595897, - "loss": 46.0, - "step": 39359 - }, - { - "epoch": 6.338660976689883, - "grad_norm": 0.003001793520525098, - "learning_rate": 0.00019998018481888016, - "loss": 46.0, - "step": 39360 - }, - { - "epoch": 6.33882201376867, - "grad_norm": 0.001012038323096931, - "learning_rate": 0.00019998018381177582, - "loss": 46.0, - "step": 39361 - }, - { - "epoch": 6.338983050847458, - "grad_norm": 0.006456390488892794, - "learning_rate": 0.00019998018280464584, - "loss": 46.0, - "step": 39362 - }, - { - "epoch": 6.339144087926245, - "grad_norm": 0.018388397991657257, - "learning_rate": 0.00019998018179749033, - "loss": 46.0, - "step": 39363 - }, - { - "epoch": 6.3393051250050325, - "grad_norm": 0.004710391163825989, - "learning_rate": 0.00019998018079030918, - "loss": 46.0, - "step": 39364 - }, - { - "epoch": 6.33946616208382, - "grad_norm": 0.0038208451587706804, - "learning_rate": 0.00019998017978310246, - "loss": 46.0, - "step": 39365 - }, - { - "epoch": 6.339627199162607, - "grad_norm": 0.004799526650458574, - "learning_rate": 0.00019998017877587013, - "loss": 46.0, - "step": 39366 - }, - { - "epoch": 6.339788236241395, - "grad_norm": 0.0019337047124281526, - "learning_rate": 0.00019998017776861221, - "loss": 46.0, - "step": 39367 - }, - { - "epoch": 6.339949273320182, - "grad_norm": 0.0025381832383573055, - "learning_rate": 0.00019998017676132874, - "loss": 46.0, - "step": 39368 - }, - { - "epoch": 6.34011031039897, - "grad_norm": 0.0052637062035501, - "learning_rate": 0.00019998017575401965, - "loss": 46.0, - "step": 39369 - }, - { - "epoch": 6.340271347477756, - "grad_norm": 0.006857589352875948, - "learning_rate": 0.00019998017474668494, - "loss": 46.0, - "step": 39370 - }, - { - "epoch": 6.340432384556544, - "grad_norm": 0.007295440416783094, - "learning_rate": 0.0001999801737393247, - "loss": 46.0, - "step": 39371 - }, - { - "epoch": 6.340593421635331, - "grad_norm": 0.00410598935559392, - "learning_rate": 0.00019998017273193885, - "loss": 46.0, - "step": 39372 - }, - { - "epoch": 6.340754458714119, - "grad_norm": 0.016875099390745163, - "learning_rate": 0.0001999801717245274, - "loss": 46.0, - "step": 39373 - }, - { - "epoch": 6.340915495792906, - "grad_norm": 0.002704192651435733, - "learning_rate": 0.00019998017071709036, - "loss": 46.0, - "step": 39374 - }, - { - "epoch": 6.341076532871694, - "grad_norm": 0.00840501394122839, - "learning_rate": 0.00019998016970962775, - "loss": 46.0, - "step": 39375 - }, - { - "epoch": 6.341237569950481, - "grad_norm": 0.0050213211216032505, - "learning_rate": 0.00019998016870213952, - "loss": 46.0, - "step": 39376 - }, - { - "epoch": 6.3413986070292685, - "grad_norm": 0.0030093619134277105, - "learning_rate": 0.0001999801676946257, - "loss": 46.0, - "step": 39377 - }, - { - "epoch": 6.341559644108056, - "grad_norm": 0.0020738153252750635, - "learning_rate": 0.00019998016668708633, - "loss": 46.0, - "step": 39378 - }, - { - "epoch": 6.341720681186843, - "grad_norm": 0.00840769987553358, - "learning_rate": 0.00019998016567952136, - "loss": 46.0, - "step": 39379 - }, - { - "epoch": 6.341881718265631, - "grad_norm": 0.0038362813647836447, - "learning_rate": 0.00019998016467193076, - "loss": 46.0, - "step": 39380 - }, - { - "epoch": 6.342042755344418, - "grad_norm": 0.009474311023950577, - "learning_rate": 0.0001999801636643146, - "loss": 46.0, - "step": 39381 - }, - { - "epoch": 6.342203792423206, - "grad_norm": 0.007288126274943352, - "learning_rate": 0.00019998016265667285, - "loss": 46.0, - "step": 39382 - }, - { - "epoch": 6.342364829501993, - "grad_norm": 0.008051365613937378, - "learning_rate": 0.00019998016164900553, - "loss": 46.0, - "step": 39383 - }, - { - "epoch": 6.342525866580781, - "grad_norm": 0.005412889178842306, - "learning_rate": 0.00019998016064131258, - "loss": 46.0, - "step": 39384 - }, - { - "epoch": 6.342686903659567, - "grad_norm": 0.005608964711427689, - "learning_rate": 0.00019998015963359404, - "loss": 46.0, - "step": 39385 - }, - { - "epoch": 6.342847940738355, - "grad_norm": 0.005815706681460142, - "learning_rate": 0.00019998015862584994, - "loss": 46.0, - "step": 39386 - }, - { - "epoch": 6.343008977817142, - "grad_norm": 0.0014363507507368922, - "learning_rate": 0.00019998015761808026, - "loss": 46.0, - "step": 39387 - }, - { - "epoch": 6.34317001489593, - "grad_norm": 0.00316497590392828, - "learning_rate": 0.00019998015661028495, - "loss": 46.0, - "step": 39388 - }, - { - "epoch": 6.343331051974717, - "grad_norm": 0.005221744999289513, - "learning_rate": 0.0001999801556024641, - "loss": 46.0, - "step": 39389 - }, - { - "epoch": 6.3434920890535045, - "grad_norm": 0.003934514243155718, - "learning_rate": 0.00019998015459461762, - "loss": 46.0, - "step": 39390 - }, - { - "epoch": 6.343653126132292, - "grad_norm": 0.0018451712094247341, - "learning_rate": 0.00019998015358674553, - "loss": 46.0, - "step": 39391 - }, - { - "epoch": 6.343814163211079, - "grad_norm": 0.030934782698750496, - "learning_rate": 0.00019998015257884788, - "loss": 46.0, - "step": 39392 - }, - { - "epoch": 6.343975200289867, - "grad_norm": 0.007448768708854914, - "learning_rate": 0.00019998015157092464, - "loss": 46.0, - "step": 39393 - }, - { - "epoch": 6.344136237368654, - "grad_norm": 0.001728096860460937, - "learning_rate": 0.00019998015056297582, - "loss": 46.0, - "step": 39394 - }, - { - "epoch": 6.344297274447442, - "grad_norm": 0.0027998778969049454, - "learning_rate": 0.0001999801495550014, - "loss": 46.0, - "step": 39395 - }, - { - "epoch": 6.344458311526229, - "grad_norm": 0.002442327793687582, - "learning_rate": 0.00019998014854700138, - "loss": 46.0, - "step": 39396 - }, - { - "epoch": 6.344619348605017, - "grad_norm": 0.005001716315746307, - "learning_rate": 0.0001999801475389758, - "loss": 46.0, - "step": 39397 - }, - { - "epoch": 6.344780385683804, - "grad_norm": 0.009232645854353905, - "learning_rate": 0.0001999801465309246, - "loss": 46.0, - "step": 39398 - }, - { - "epoch": 6.3449414227625915, - "grad_norm": 0.007224122527986765, - "learning_rate": 0.0001999801455228478, - "loss": 46.0, - "step": 39399 - }, - { - "epoch": 6.345102459841378, - "grad_norm": 0.004758620634675026, - "learning_rate": 0.00019998014451474543, - "loss": 46.0, - "step": 39400 - }, - { - "epoch": 6.345263496920166, - "grad_norm": 0.007991883903741837, - "learning_rate": 0.0001999801435066175, - "loss": 46.0, - "step": 39401 - }, - { - "epoch": 6.345424533998953, - "grad_norm": 0.0038770984392613173, - "learning_rate": 0.00019998014249846392, - "loss": 46.0, - "step": 39402 - }, - { - "epoch": 6.3455855710777405, - "grad_norm": 0.0015964569756761193, - "learning_rate": 0.00019998014149028479, - "loss": 46.0, - "step": 39403 - }, - { - "epoch": 6.345746608156528, - "grad_norm": 0.00449296273291111, - "learning_rate": 0.00019998014048208006, - "loss": 46.0, - "step": 39404 - }, - { - "epoch": 6.345907645235315, - "grad_norm": 0.0014050485333427787, - "learning_rate": 0.00019998013947384975, - "loss": 46.0, - "step": 39405 - }, - { - "epoch": 6.346068682314103, - "grad_norm": 0.008442426100373268, - "learning_rate": 0.00019998013846559386, - "loss": 46.0, - "step": 39406 - }, - { - "epoch": 6.34622971939289, - "grad_norm": 0.003294851863756776, - "learning_rate": 0.00019998013745731234, - "loss": 46.0, - "step": 39407 - }, - { - "epoch": 6.346390756471678, - "grad_norm": 0.0032214017119258642, - "learning_rate": 0.00019998013644900527, - "loss": 46.0, - "step": 39408 - }, - { - "epoch": 6.346551793550465, - "grad_norm": 0.0018650633282959461, - "learning_rate": 0.00019998013544067259, - "loss": 46.0, - "step": 39409 - }, - { - "epoch": 6.346712830629253, - "grad_norm": 0.004513530060648918, - "learning_rate": 0.0001999801344323143, - "loss": 46.0, - "step": 39410 - }, - { - "epoch": 6.34687386770804, - "grad_norm": 0.0024002757854759693, - "learning_rate": 0.00019998013342393045, - "loss": 46.0, - "step": 39411 - }, - { - "epoch": 6.3470349047868275, - "grad_norm": 0.006897768937051296, - "learning_rate": 0.000199980132415521, - "loss": 46.0, - "step": 39412 - }, - { - "epoch": 6.347195941865614, - "grad_norm": 0.021896084770560265, - "learning_rate": 0.00019998013140708597, - "loss": 46.0, - "step": 39413 - }, - { - "epoch": 6.3473569789444015, - "grad_norm": 0.012006690725684166, - "learning_rate": 0.00019998013039862532, - "loss": 46.0, - "step": 39414 - }, - { - "epoch": 6.347518016023189, - "grad_norm": 0.0020825478713959455, - "learning_rate": 0.00019998012939013914, - "loss": 46.0, - "step": 39415 - }, - { - "epoch": 6.347679053101976, - "grad_norm": 0.0020682106260210276, - "learning_rate": 0.00019998012838162732, - "loss": 46.0, - "step": 39416 - }, - { - "epoch": 6.347840090180764, - "grad_norm": 0.012185588479042053, - "learning_rate": 0.0001999801273730899, - "loss": 46.0, - "step": 39417 - }, - { - "epoch": 6.348001127259551, - "grad_norm": 0.001970587996765971, - "learning_rate": 0.00019998012636452694, - "loss": 46.0, - "step": 39418 - }, - { - "epoch": 6.348162164338339, - "grad_norm": 0.005669783800840378, - "learning_rate": 0.00019998012535593835, - "loss": 46.0, - "step": 39419 - }, - { - "epoch": 6.348323201417126, - "grad_norm": 0.005162478890269995, - "learning_rate": 0.00019998012434732418, - "loss": 46.0, - "step": 39420 - }, - { - "epoch": 6.348484238495914, - "grad_norm": 0.0027079936116933823, - "learning_rate": 0.00019998012333868445, - "loss": 46.0, - "step": 39421 - }, - { - "epoch": 6.348645275574701, - "grad_norm": 0.007538971025496721, - "learning_rate": 0.0001999801223300191, - "loss": 46.0, - "step": 39422 - }, - { - "epoch": 6.348806312653489, - "grad_norm": 0.002288009040057659, - "learning_rate": 0.00019998012132132814, - "loss": 46.0, - "step": 39423 - }, - { - "epoch": 6.348967349732276, - "grad_norm": 0.001883052522316575, - "learning_rate": 0.00019998012031261165, - "loss": 46.0, - "step": 39424 - }, - { - "epoch": 6.3491283868110635, - "grad_norm": 0.003149421652778983, - "learning_rate": 0.00019998011930386954, - "loss": 46.0, - "step": 39425 - }, - { - "epoch": 6.349289423889851, - "grad_norm": 0.0018784533021971583, - "learning_rate": 0.00019998011829510182, - "loss": 46.0, - "step": 39426 - }, - { - "epoch": 6.349450460968638, - "grad_norm": 0.002980217570438981, - "learning_rate": 0.00019998011728630853, - "loss": 46.0, - "step": 39427 - }, - { - "epoch": 6.349611498047425, - "grad_norm": 0.005143294110894203, - "learning_rate": 0.00019998011627748964, - "loss": 46.0, - "step": 39428 - }, - { - "epoch": 6.349772535126212, - "grad_norm": 0.0020320769399404526, - "learning_rate": 0.00019998011526864518, - "loss": 46.0, - "step": 39429 - }, - { - "epoch": 6.349933572205, - "grad_norm": 0.002204232383519411, - "learning_rate": 0.0001999801142597751, - "loss": 46.0, - "step": 39430 - }, - { - "epoch": 6.350094609283787, - "grad_norm": 0.016811968758702278, - "learning_rate": 0.00019998011325087945, - "loss": 46.0, - "step": 39431 - }, - { - "epoch": 6.350255646362575, - "grad_norm": 0.0065052201971411705, - "learning_rate": 0.00019998011224195823, - "loss": 46.0, - "step": 39432 - }, - { - "epoch": 6.350416683441362, - "grad_norm": 0.017053255811333656, - "learning_rate": 0.0001999801112330114, - "loss": 46.0, - "step": 39433 - }, - { - "epoch": 6.35057772052015, - "grad_norm": 0.002551349811255932, - "learning_rate": 0.00019998011022403898, - "loss": 46.0, - "step": 39434 - }, - { - "epoch": 6.350738757598937, - "grad_norm": 0.0014461820246651769, - "learning_rate": 0.00019998010921504092, - "loss": 46.0, - "step": 39435 - }, - { - "epoch": 6.350899794677725, - "grad_norm": 0.019337797537446022, - "learning_rate": 0.00019998010820601735, - "loss": 46.0, - "step": 39436 - }, - { - "epoch": 6.351060831756512, - "grad_norm": 0.003692854195833206, - "learning_rate": 0.00019998010719696815, - "loss": 46.0, - "step": 39437 - }, - { - "epoch": 6.3512218688352995, - "grad_norm": 0.008299127221107483, - "learning_rate": 0.00019998010618789338, - "loss": 46.0, - "step": 39438 - }, - { - "epoch": 6.351382905914087, - "grad_norm": 0.001452952390536666, - "learning_rate": 0.000199980105178793, - "loss": 46.0, - "step": 39439 - }, - { - "epoch": 6.351543942992874, - "grad_norm": 0.007712093181908131, - "learning_rate": 0.00019998010416966703, - "loss": 46.0, - "step": 39440 - }, - { - "epoch": 6.351704980071662, - "grad_norm": 0.010283350944519043, - "learning_rate": 0.00019998010316051547, - "loss": 46.0, - "step": 39441 - }, - { - "epoch": 6.351866017150449, - "grad_norm": 0.0124505078420043, - "learning_rate": 0.00019998010215133835, - "loss": 46.0, - "step": 39442 - }, - { - "epoch": 6.352027054229236, - "grad_norm": 0.016991138458251953, - "learning_rate": 0.00019998010114213562, - "loss": 46.0, - "step": 39443 - }, - { - "epoch": 6.352188091308023, - "grad_norm": 0.0026531871408224106, - "learning_rate": 0.0001999801001329073, - "loss": 46.0, - "step": 39444 - }, - { - "epoch": 6.352349128386811, - "grad_norm": 0.006383951287716627, - "learning_rate": 0.0001999800991236534, - "loss": 46.0, - "step": 39445 - }, - { - "epoch": 6.352510165465598, - "grad_norm": 0.007048714440315962, - "learning_rate": 0.00019998009811437388, - "loss": 46.0, - "step": 39446 - }, - { - "epoch": 6.352671202544386, - "grad_norm": 0.005653482396155596, - "learning_rate": 0.0001999800971050688, - "loss": 46.0, - "step": 39447 - }, - { - "epoch": 6.352832239623173, - "grad_norm": 0.0026763854548335075, - "learning_rate": 0.00019998009609573813, - "loss": 46.0, - "step": 39448 - }, - { - "epoch": 6.352993276701961, - "grad_norm": 0.003750694915652275, - "learning_rate": 0.00019998009508638185, - "loss": 46.0, - "step": 39449 - }, - { - "epoch": 6.353154313780748, - "grad_norm": 0.0018128384836018085, - "learning_rate": 0.000199980094077, - "loss": 46.0, - "step": 39450 - }, - { - "epoch": 6.3533153508595355, - "grad_norm": 0.0016510699642822146, - "learning_rate": 0.00019998009306759253, - "loss": 46.0, - "step": 39451 - }, - { - "epoch": 6.353476387938323, - "grad_norm": 0.014423185028135777, - "learning_rate": 0.0001999800920581595, - "loss": 46.0, - "step": 39452 - }, - { - "epoch": 6.35363742501711, - "grad_norm": 0.0018934386316686869, - "learning_rate": 0.00019998009104870088, - "loss": 46.0, - "step": 39453 - }, - { - "epoch": 6.353798462095898, - "grad_norm": 0.002123798942193389, - "learning_rate": 0.00019998009003921666, - "loss": 46.0, - "step": 39454 - }, - { - "epoch": 6.353959499174685, - "grad_norm": 0.004711429122835398, - "learning_rate": 0.00019998008902970686, - "loss": 46.0, - "step": 39455 - }, - { - "epoch": 6.354120536253473, - "grad_norm": 0.0009010710055008531, - "learning_rate": 0.00019998008802017147, - "loss": 46.0, - "step": 39456 - }, - { - "epoch": 6.35428157333226, - "grad_norm": 0.0024187364615499973, - "learning_rate": 0.00019998008701061046, - "loss": 46.0, - "step": 39457 - }, - { - "epoch": 6.354442610411047, - "grad_norm": 0.011012710630893707, - "learning_rate": 0.00019998008600102392, - "loss": 46.0, - "step": 39458 - }, - { - "epoch": 6.354603647489834, - "grad_norm": 0.003747508628293872, - "learning_rate": 0.00019998008499141174, - "loss": 46.0, - "step": 39459 - }, - { - "epoch": 6.354764684568622, - "grad_norm": 0.009693090803921223, - "learning_rate": 0.000199980083981774, - "loss": 46.0, - "step": 39460 - }, - { - "epoch": 6.354925721647409, - "grad_norm": 0.0014709543902426958, - "learning_rate": 0.00019998008297211062, - "loss": 46.0, - "step": 39461 - }, - { - "epoch": 6.3550867587261965, - "grad_norm": 0.003294868627563119, - "learning_rate": 0.0001999800819624217, - "loss": 46.0, - "step": 39462 - }, - { - "epoch": 6.355247795804984, - "grad_norm": 0.005450052209198475, - "learning_rate": 0.00019998008095270718, - "loss": 46.0, - "step": 39463 - }, - { - "epoch": 6.3554088328837715, - "grad_norm": 0.002521320478990674, - "learning_rate": 0.00019998007994296706, - "loss": 46.0, - "step": 39464 - }, - { - "epoch": 6.355569869962559, - "grad_norm": 0.0077145956456661224, - "learning_rate": 0.00019998007893320133, - "loss": 46.0, - "step": 39465 - }, - { - "epoch": 6.355730907041346, - "grad_norm": 0.002833607839420438, - "learning_rate": 0.00019998007792341004, - "loss": 46.0, - "step": 39466 - }, - { - "epoch": 6.355891944120134, - "grad_norm": 0.016394183039665222, - "learning_rate": 0.00019998007691359316, - "loss": 46.0, - "step": 39467 - }, - { - "epoch": 6.356052981198921, - "grad_norm": 0.012824208475649357, - "learning_rate": 0.0001999800759037507, - "loss": 46.0, - "step": 39468 - }, - { - "epoch": 6.356214018277709, - "grad_norm": 0.0016593087930232286, - "learning_rate": 0.00019998007489388262, - "loss": 46.0, - "step": 39469 - }, - { - "epoch": 6.356375055356496, - "grad_norm": 0.006278193090111017, - "learning_rate": 0.00019998007388398898, - "loss": 46.0, - "step": 39470 - }, - { - "epoch": 6.356536092435284, - "grad_norm": 0.006127243861556053, - "learning_rate": 0.00019998007287406973, - "loss": 46.0, - "step": 39471 - }, - { - "epoch": 6.356697129514071, - "grad_norm": 0.0033390542957931757, - "learning_rate": 0.0001999800718641249, - "loss": 46.0, - "step": 39472 - }, - { - "epoch": 6.356858166592858, - "grad_norm": 0.007892763242125511, - "learning_rate": 0.00019998007085415446, - "loss": 46.0, - "step": 39473 - }, - { - "epoch": 6.357019203671645, - "grad_norm": 0.008341191336512566, - "learning_rate": 0.00019998006984415847, - "loss": 46.0, - "step": 39474 - }, - { - "epoch": 6.3571802407504325, - "grad_norm": 0.0009239570936188102, - "learning_rate": 0.00019998006883413684, - "loss": 46.0, - "step": 39475 - }, - { - "epoch": 6.35734127782922, - "grad_norm": 0.002829850185662508, - "learning_rate": 0.00019998006782408968, - "loss": 46.0, - "step": 39476 - }, - { - "epoch": 6.357502314908007, - "grad_norm": 0.005340429488569498, - "learning_rate": 0.00019998006681401688, - "loss": 46.0, - "step": 39477 - }, - { - "epoch": 6.357663351986795, - "grad_norm": 0.0010472108842805028, - "learning_rate": 0.00019998006580391851, - "loss": 46.0, - "step": 39478 - }, - { - "epoch": 6.357824389065582, - "grad_norm": 0.002105482155457139, - "learning_rate": 0.00019998006479379456, - "loss": 46.0, - "step": 39479 - }, - { - "epoch": 6.35798542614437, - "grad_norm": 0.002203951356932521, - "learning_rate": 0.000199980063783645, - "loss": 46.0, - "step": 39480 - }, - { - "epoch": 6.358146463223157, - "grad_norm": 0.0024880466517060995, - "learning_rate": 0.00019998006277346985, - "loss": 46.0, - "step": 39481 - }, - { - "epoch": 6.358307500301945, - "grad_norm": 0.0020039756782352924, - "learning_rate": 0.00019998006176326913, - "loss": 46.0, - "step": 39482 - }, - { - "epoch": 6.358468537380732, - "grad_norm": 0.022428035736083984, - "learning_rate": 0.0001999800607530428, - "loss": 46.0, - "step": 39483 - }, - { - "epoch": 6.35862957445952, - "grad_norm": 0.0015898382989689708, - "learning_rate": 0.0001999800597427909, - "loss": 46.0, - "step": 39484 - }, - { - "epoch": 6.358790611538307, - "grad_norm": 0.007732732221484184, - "learning_rate": 0.0001999800587325134, - "loss": 46.0, - "step": 39485 - }, - { - "epoch": 6.3589516486170945, - "grad_norm": 0.002958668163046241, - "learning_rate": 0.0001999800577222103, - "loss": 46.0, - "step": 39486 - }, - { - "epoch": 6.359112685695881, - "grad_norm": 0.009270669892430305, - "learning_rate": 0.00019998005671188163, - "loss": 46.0, - "step": 39487 - }, - { - "epoch": 6.3592737227746685, - "grad_norm": 0.003521893871948123, - "learning_rate": 0.00019998005570152737, - "loss": 46.0, - "step": 39488 - }, - { - "epoch": 6.359434759853456, - "grad_norm": 0.007394652813673019, - "learning_rate": 0.0001999800546911475, - "loss": 46.0, - "step": 39489 - }, - { - "epoch": 6.359595796932243, - "grad_norm": 0.0035296056885272264, - "learning_rate": 0.00019998005368074205, - "loss": 46.0, - "step": 39490 - }, - { - "epoch": 6.359756834011031, - "grad_norm": 0.0020039433147758245, - "learning_rate": 0.000199980052670311, - "loss": 46.0, - "step": 39491 - }, - { - "epoch": 6.359917871089818, - "grad_norm": 0.013090742751955986, - "learning_rate": 0.0001999800516598544, - "loss": 46.0, - "step": 39492 - }, - { - "epoch": 6.360078908168606, - "grad_norm": 0.00434614485129714, - "learning_rate": 0.00019998005064937217, - "loss": 46.0, - "step": 39493 - }, - { - "epoch": 6.360239945247393, - "grad_norm": 0.005496986676007509, - "learning_rate": 0.00019998004963886436, - "loss": 46.0, - "step": 39494 - }, - { - "epoch": 6.360400982326181, - "grad_norm": 0.002192819956690073, - "learning_rate": 0.00019998004862833096, - "loss": 46.0, - "step": 39495 - }, - { - "epoch": 6.360562019404968, - "grad_norm": 0.02494894340634346, - "learning_rate": 0.00019998004761777197, - "loss": 46.0, - "step": 39496 - }, - { - "epoch": 6.360723056483756, - "grad_norm": 0.006052169483155012, - "learning_rate": 0.0001999800466071874, - "loss": 46.0, - "step": 39497 - }, - { - "epoch": 6.360884093562543, - "grad_norm": 0.016569942235946655, - "learning_rate": 0.00019998004559657723, - "loss": 46.0, - "step": 39498 - }, - { - "epoch": 6.3610451306413305, - "grad_norm": 0.005270159803330898, - "learning_rate": 0.00019998004458594146, - "loss": 46.0, - "step": 39499 - }, - { - "epoch": 6.361206167720118, - "grad_norm": 0.004607352893799543, - "learning_rate": 0.0001999800435752801, - "loss": 46.0, - "step": 39500 - }, - { - "epoch": 6.3613672047989045, - "grad_norm": 0.0014795513125136495, - "learning_rate": 0.0001999800425645932, - "loss": 46.0, - "step": 39501 - }, - { - "epoch": 6.361528241877692, - "grad_norm": 0.00941571407020092, - "learning_rate": 0.00019998004155388066, - "loss": 46.0, - "step": 39502 - }, - { - "epoch": 6.361689278956479, - "grad_norm": 0.0019849573727697134, - "learning_rate": 0.00019998004054314254, - "loss": 46.0, - "step": 39503 - }, - { - "epoch": 6.361850316035267, - "grad_norm": 0.004644973669201136, - "learning_rate": 0.00019998003953237886, - "loss": 46.0, - "step": 39504 - }, - { - "epoch": 6.362011353114054, - "grad_norm": 0.0060126567259430885, - "learning_rate": 0.00019998003852158953, - "loss": 46.0, - "step": 39505 - }, - { - "epoch": 6.362172390192842, - "grad_norm": 0.006208830047398806, - "learning_rate": 0.00019998003751077464, - "loss": 46.0, - "step": 39506 - }, - { - "epoch": 6.362333427271629, - "grad_norm": 0.004187299869954586, - "learning_rate": 0.00019998003649993417, - "loss": 46.0, - "step": 39507 - }, - { - "epoch": 6.362494464350417, - "grad_norm": 0.003422250971198082, - "learning_rate": 0.00019998003548906808, - "loss": 46.0, - "step": 39508 - }, - { - "epoch": 6.362655501429204, - "grad_norm": 0.003779014805331826, - "learning_rate": 0.00019998003447817646, - "loss": 46.0, - "step": 39509 - }, - { - "epoch": 6.362816538507992, - "grad_norm": 0.0063432808965444565, - "learning_rate": 0.0001999800334672592, - "loss": 46.0, - "step": 39510 - }, - { - "epoch": 6.362977575586779, - "grad_norm": 0.0014192431699484587, - "learning_rate": 0.00019998003245631638, - "loss": 46.0, - "step": 39511 - }, - { - "epoch": 6.3631386126655665, - "grad_norm": 0.001706940121948719, - "learning_rate": 0.00019998003144534797, - "loss": 46.0, - "step": 39512 - }, - { - "epoch": 6.363299649744354, - "grad_norm": 0.01604827307164669, - "learning_rate": 0.00019998003043435395, - "loss": 46.0, - "step": 39513 - }, - { - "epoch": 6.363460686823141, - "grad_norm": 0.013224104419350624, - "learning_rate": 0.00019998002942333434, - "loss": 46.0, - "step": 39514 - }, - { - "epoch": 6.363621723901929, - "grad_norm": 0.004361344501376152, - "learning_rate": 0.00019998002841228914, - "loss": 46.0, - "step": 39515 - }, - { - "epoch": 6.363782760980715, - "grad_norm": 0.0021434673108160496, - "learning_rate": 0.00019998002740121839, - "loss": 46.0, - "step": 39516 - }, - { - "epoch": 6.363943798059503, - "grad_norm": 0.002538310829550028, - "learning_rate": 0.000199980026390122, - "loss": 46.0, - "step": 39517 - }, - { - "epoch": 6.36410483513829, - "grad_norm": 0.0021049438510090113, - "learning_rate": 0.00019998002537900003, - "loss": 46.0, - "step": 39518 - }, - { - "epoch": 6.364265872217078, - "grad_norm": 0.002298015169799328, - "learning_rate": 0.00019998002436785246, - "loss": 46.0, - "step": 39519 - }, - { - "epoch": 6.364426909295865, - "grad_norm": 0.0041793943382799625, - "learning_rate": 0.00019998002335667935, - "loss": 46.0, - "step": 39520 - }, - { - "epoch": 6.364587946374653, - "grad_norm": 0.0015891504008322954, - "learning_rate": 0.0001999800223454806, - "loss": 46.0, - "step": 39521 - }, - { - "epoch": 6.36474898345344, - "grad_norm": 0.010709723457694054, - "learning_rate": 0.0001999800213342563, - "loss": 46.0, - "step": 39522 - }, - { - "epoch": 6.3649100205322275, - "grad_norm": 0.0045154886320233345, - "learning_rate": 0.00019998002032300635, - "loss": 46.0, - "step": 39523 - }, - { - "epoch": 6.365071057611015, - "grad_norm": 0.005094493739306927, - "learning_rate": 0.00019998001931173086, - "loss": 46.0, - "step": 39524 - }, - { - "epoch": 6.3652320946898024, - "grad_norm": 0.013623693957924843, - "learning_rate": 0.00019998001830042977, - "loss": 46.0, - "step": 39525 - }, - { - "epoch": 6.36539313176859, - "grad_norm": 0.010770655237138271, - "learning_rate": 0.00019998001728910308, - "loss": 46.0, - "step": 39526 - }, - { - "epoch": 6.365554168847377, - "grad_norm": 0.004940268117934465, - "learning_rate": 0.00019998001627775081, - "loss": 46.0, - "step": 39527 - }, - { - "epoch": 6.365715205926165, - "grad_norm": 0.0054796659387648106, - "learning_rate": 0.00019998001526637298, - "loss": 46.0, - "step": 39528 - }, - { - "epoch": 6.365876243004952, - "grad_norm": 0.008328541181981564, - "learning_rate": 0.00019998001425496954, - "loss": 46.0, - "step": 39529 - }, - { - "epoch": 6.36603728008374, - "grad_norm": 0.01888740248978138, - "learning_rate": 0.00019998001324354048, - "loss": 46.0, - "step": 39530 - }, - { - "epoch": 6.366198317162526, - "grad_norm": 0.00396219827234745, - "learning_rate": 0.00019998001223208583, - "loss": 46.0, - "step": 39531 - }, - { - "epoch": 6.366359354241314, - "grad_norm": 0.012622925452888012, - "learning_rate": 0.0001999800112206056, - "loss": 46.0, - "step": 39532 - }, - { - "epoch": 6.366520391320101, - "grad_norm": 0.015233221463859081, - "learning_rate": 0.00019998001020909983, - "loss": 46.0, - "step": 39533 - }, - { - "epoch": 6.366681428398889, - "grad_norm": 0.00618281913921237, - "learning_rate": 0.0001999800091975684, - "loss": 46.0, - "step": 39534 - }, - { - "epoch": 6.366842465477676, - "grad_norm": 0.004907556343823671, - "learning_rate": 0.00019998000818601143, - "loss": 46.0, - "step": 39535 - }, - { - "epoch": 6.3670035025564635, - "grad_norm": 0.0021349135786294937, - "learning_rate": 0.00019998000717442885, - "loss": 46.0, - "step": 39536 - }, - { - "epoch": 6.367164539635251, - "grad_norm": 0.0018690564902499318, - "learning_rate": 0.00019998000616282068, - "loss": 46.0, - "step": 39537 - }, - { - "epoch": 6.367325576714038, - "grad_norm": 0.003193999407812953, - "learning_rate": 0.00019998000515118692, - "loss": 46.0, - "step": 39538 - }, - { - "epoch": 6.367486613792826, - "grad_norm": 0.002504838164895773, - "learning_rate": 0.00019998000413952758, - "loss": 46.0, - "step": 39539 - }, - { - "epoch": 6.367647650871613, - "grad_norm": 0.0014109468320384622, - "learning_rate": 0.00019998000312784265, - "loss": 46.0, - "step": 39540 - }, - { - "epoch": 6.367808687950401, - "grad_norm": 0.000765151169616729, - "learning_rate": 0.00019998000211613213, - "loss": 46.0, - "step": 39541 - }, - { - "epoch": 6.367969725029188, - "grad_norm": 0.0011773803271353245, - "learning_rate": 0.000199980001104396, - "loss": 46.0, - "step": 39542 - }, - { - "epoch": 6.368130762107976, - "grad_norm": 0.0008933040080592036, - "learning_rate": 0.00019998000009263428, - "loss": 46.0, - "step": 39543 - }, - { - "epoch": 6.368291799186763, - "grad_norm": 0.0034444809425622225, - "learning_rate": 0.000199979999080847, - "loss": 46.0, - "step": 39544 - }, - { - "epoch": 6.368452836265551, - "grad_norm": 0.0028496696613729, - "learning_rate": 0.0001999799980690341, - "loss": 46.0, - "step": 39545 - }, - { - "epoch": 6.368613873344337, - "grad_norm": 0.01163305714726448, - "learning_rate": 0.00019997999705719563, - "loss": 46.0, - "step": 39546 - }, - { - "epoch": 6.368774910423125, - "grad_norm": 0.0006827443139627576, - "learning_rate": 0.00019997999604533159, - "loss": 46.0, - "step": 39547 - }, - { - "epoch": 6.368935947501912, - "grad_norm": 0.003109041368588805, - "learning_rate": 0.0001999799950334419, - "loss": 46.0, - "step": 39548 - }, - { - "epoch": 6.3690969845806995, - "grad_norm": 0.006627863738685846, - "learning_rate": 0.00019997999402152666, - "loss": 46.0, - "step": 39549 - }, - { - "epoch": 6.369258021659487, - "grad_norm": 0.011682174168527126, - "learning_rate": 0.00019997999300958583, - "loss": 46.0, - "step": 39550 - }, - { - "epoch": 6.369419058738274, - "grad_norm": 0.0022269426845014095, - "learning_rate": 0.00019997999199761941, - "loss": 46.0, - "step": 39551 - }, - { - "epoch": 6.369580095817062, - "grad_norm": 0.020668867975473404, - "learning_rate": 0.00019997999098562738, - "loss": 46.0, - "step": 39552 - }, - { - "epoch": 6.369741132895849, - "grad_norm": 0.0013435936998575926, - "learning_rate": 0.0001999799899736098, - "loss": 46.0, - "step": 39553 - }, - { - "epoch": 6.369902169974637, - "grad_norm": 0.005933010950684547, - "learning_rate": 0.0001999799889615666, - "loss": 46.0, - "step": 39554 - }, - { - "epoch": 6.370063207053424, - "grad_norm": 0.006973384879529476, - "learning_rate": 0.00019997998794949782, - "loss": 46.0, - "step": 39555 - }, - { - "epoch": 6.370224244132212, - "grad_norm": 0.005510346964001656, - "learning_rate": 0.00019997998693740344, - "loss": 46.0, - "step": 39556 - }, - { - "epoch": 6.370385281210999, - "grad_norm": 0.004914383869618177, - "learning_rate": 0.00019997998592528348, - "loss": 46.0, - "step": 39557 - }, - { - "epoch": 6.370546318289787, - "grad_norm": 0.009859933517873287, - "learning_rate": 0.00019997998491313795, - "loss": 46.0, - "step": 39558 - }, - { - "epoch": 6.370707355368574, - "grad_norm": 0.012067588977515697, - "learning_rate": 0.00019997998390096678, - "loss": 46.0, - "step": 39559 - }, - { - "epoch": 6.3708683924473615, - "grad_norm": 0.005373584106564522, - "learning_rate": 0.00019997998288877008, - "loss": 46.0, - "step": 39560 - }, - { - "epoch": 6.371029429526148, - "grad_norm": 0.009342162869870663, - "learning_rate": 0.0001999799818765477, - "loss": 46.0, - "step": 39561 - }, - { - "epoch": 6.3711904666049355, - "grad_norm": 0.00786909181624651, - "learning_rate": 0.00019997998086429983, - "loss": 46.0, - "step": 39562 - }, - { - "epoch": 6.371351503683723, - "grad_norm": 0.00831774715334177, - "learning_rate": 0.00019997997985202632, - "loss": 46.0, - "step": 39563 - }, - { - "epoch": 6.37151254076251, - "grad_norm": 0.006877412088215351, - "learning_rate": 0.00019997997883972724, - "loss": 46.0, - "step": 39564 - }, - { - "epoch": 6.371673577841298, - "grad_norm": 0.02035493776202202, - "learning_rate": 0.00019997997782740255, - "loss": 46.0, - "step": 39565 - }, - { - "epoch": 6.371834614920085, - "grad_norm": 0.0028133559972047806, - "learning_rate": 0.00019997997681505227, - "loss": 46.0, - "step": 39566 - }, - { - "epoch": 6.371995651998873, - "grad_norm": 0.014005298726260662, - "learning_rate": 0.0001999799758026764, - "loss": 46.0, - "step": 39567 - }, - { - "epoch": 6.37215668907766, - "grad_norm": 0.0031042341142892838, - "learning_rate": 0.00019997997479027495, - "loss": 46.0, - "step": 39568 - }, - { - "epoch": 6.372317726156448, - "grad_norm": 0.0029636898543685675, - "learning_rate": 0.0001999799737778479, - "loss": 46.0, - "step": 39569 - }, - { - "epoch": 6.372478763235235, - "grad_norm": 0.0021732032764703035, - "learning_rate": 0.0001999799727653953, - "loss": 46.0, - "step": 39570 - }, - { - "epoch": 6.3726398003140226, - "grad_norm": 0.010055347345769405, - "learning_rate": 0.00019997997175291707, - "loss": 46.0, - "step": 39571 - }, - { - "epoch": 6.37280083739281, - "grad_norm": 0.001629504607990384, - "learning_rate": 0.00019997997074041327, - "loss": 46.0, - "step": 39572 - }, - { - "epoch": 6.3729618744715975, - "grad_norm": 0.0034732099156826735, - "learning_rate": 0.00019997996972788385, - "loss": 46.0, - "step": 39573 - }, - { - "epoch": 6.373122911550384, - "grad_norm": 0.0012738791992887855, - "learning_rate": 0.0001999799687153289, - "loss": 46.0, - "step": 39574 - }, - { - "epoch": 6.3732839486291715, - "grad_norm": 0.008240816183388233, - "learning_rate": 0.0001999799677027483, - "loss": 46.0, - "step": 39575 - }, - { - "epoch": 6.373444985707959, - "grad_norm": 0.002950426656752825, - "learning_rate": 0.0001999799666901421, - "loss": 46.0, - "step": 39576 - }, - { - "epoch": 6.373606022786746, - "grad_norm": 0.01027581375092268, - "learning_rate": 0.00019997996567751037, - "loss": 46.0, - "step": 39577 - }, - { - "epoch": 6.373767059865534, - "grad_norm": 0.014655972830951214, - "learning_rate": 0.00019997996466485302, - "loss": 46.0, - "step": 39578 - }, - { - "epoch": 6.373928096944321, - "grad_norm": 0.00414480222389102, - "learning_rate": 0.00019997996365217008, - "loss": 46.0, - "step": 39579 - }, - { - "epoch": 6.374089134023109, - "grad_norm": 0.0041672587394714355, - "learning_rate": 0.00019997996263946158, - "loss": 46.0, - "step": 39580 - }, - { - "epoch": 6.374250171101896, - "grad_norm": 0.01014675386250019, - "learning_rate": 0.00019997996162672744, - "loss": 46.0, - "step": 39581 - }, - { - "epoch": 6.374411208180684, - "grad_norm": 0.008969432674348354, - "learning_rate": 0.00019997996061396774, - "loss": 46.0, - "step": 39582 - }, - { - "epoch": 6.374572245259471, - "grad_norm": 0.002961366903036833, - "learning_rate": 0.00019997995960118243, - "loss": 46.0, - "step": 39583 - }, - { - "epoch": 6.3747332823382585, - "grad_norm": 0.003803764469921589, - "learning_rate": 0.00019997995858837158, - "loss": 46.0, - "step": 39584 - }, - { - "epoch": 6.374894319417046, - "grad_norm": 0.0049678306095302105, - "learning_rate": 0.0001999799575755351, - "loss": 46.0, - "step": 39585 - }, - { - "epoch": 6.375055356495833, - "grad_norm": 0.008770765736699104, - "learning_rate": 0.00019997995656267304, - "loss": 46.0, - "step": 39586 - }, - { - "epoch": 6.375216393574621, - "grad_norm": 0.0038183191791176796, - "learning_rate": 0.00019997995554978535, - "loss": 46.0, - "step": 39587 - }, - { - "epoch": 6.375377430653408, - "grad_norm": 0.008817486464977264, - "learning_rate": 0.00019997995453687213, - "loss": 46.0, - "step": 39588 - }, - { - "epoch": 6.375538467732195, - "grad_norm": 0.004258319735527039, - "learning_rate": 0.0001999799535239333, - "loss": 46.0, - "step": 39589 - }, - { - "epoch": 6.375699504810982, - "grad_norm": 0.0024216873571276665, - "learning_rate": 0.00019997995251096887, - "loss": 46.0, - "step": 39590 - }, - { - "epoch": 6.37586054188977, - "grad_norm": 0.0012228693813085556, - "learning_rate": 0.00019997995149797885, - "loss": 46.0, - "step": 39591 - }, - { - "epoch": 6.376021578968557, - "grad_norm": 0.0012588078388944268, - "learning_rate": 0.00019997995048496323, - "loss": 46.0, - "step": 39592 - }, - { - "epoch": 6.376182616047345, - "grad_norm": 0.013182600028812885, - "learning_rate": 0.00019997994947192204, - "loss": 46.0, - "step": 39593 - }, - { - "epoch": 6.376343653126132, - "grad_norm": 0.0024391391780227423, - "learning_rate": 0.00019997994845885527, - "loss": 46.0, - "step": 39594 - }, - { - "epoch": 6.37650469020492, - "grad_norm": 0.0009883253369480371, - "learning_rate": 0.0001999799474457629, - "loss": 46.0, - "step": 39595 - }, - { - "epoch": 6.376665727283707, - "grad_norm": 0.0027416495140641928, - "learning_rate": 0.00019997994643264493, - "loss": 46.0, - "step": 39596 - }, - { - "epoch": 6.3768267643624945, - "grad_norm": 0.01627153903245926, - "learning_rate": 0.00019997994541950137, - "loss": 46.0, - "step": 39597 - }, - { - "epoch": 6.376987801441282, - "grad_norm": 0.008926328271627426, - "learning_rate": 0.00019997994440633222, - "loss": 46.0, - "step": 39598 - }, - { - "epoch": 6.377148838520069, - "grad_norm": 0.005098787136375904, - "learning_rate": 0.0001999799433931375, - "loss": 46.0, - "step": 39599 - }, - { - "epoch": 6.377309875598857, - "grad_norm": 0.00829018373042345, - "learning_rate": 0.00019997994237991716, - "loss": 46.0, - "step": 39600 - }, - { - "epoch": 6.377470912677644, - "grad_norm": 0.003187085734680295, - "learning_rate": 0.00019997994136667125, - "loss": 46.0, - "step": 39601 - }, - { - "epoch": 6.377631949756432, - "grad_norm": 0.02408532053232193, - "learning_rate": 0.00019997994035339978, - "loss": 46.0, - "step": 39602 - }, - { - "epoch": 6.377792986835219, - "grad_norm": 0.0139978202059865, - "learning_rate": 0.0001999799393401027, - "loss": 46.0, - "step": 39603 - }, - { - "epoch": 6.377954023914006, - "grad_norm": 0.0023415570612996817, - "learning_rate": 0.00019997993832678, - "loss": 46.0, - "step": 39604 - }, - { - "epoch": 6.378115060992793, - "grad_norm": 0.007179120555520058, - "learning_rate": 0.0001999799373134317, - "loss": 46.0, - "step": 39605 - }, - { - "epoch": 6.378276098071581, - "grad_norm": 0.008959531784057617, - "learning_rate": 0.00019997993630005786, - "loss": 46.0, - "step": 39606 - }, - { - "epoch": 6.378437135150368, - "grad_norm": 0.001087911194190383, - "learning_rate": 0.00019997993528665843, - "loss": 46.0, - "step": 39607 - }, - { - "epoch": 6.378598172229156, - "grad_norm": 0.008189628832042217, - "learning_rate": 0.00019997993427323338, - "loss": 46.0, - "step": 39608 - }, - { - "epoch": 6.378759209307943, - "grad_norm": 0.0056917001493275166, - "learning_rate": 0.00019997993325978274, - "loss": 46.0, - "step": 39609 - }, - { - "epoch": 6.3789202463867305, - "grad_norm": 0.005968928802758455, - "learning_rate": 0.00019997993224630652, - "loss": 46.0, - "step": 39610 - }, - { - "epoch": 6.379081283465518, - "grad_norm": 0.00679363077506423, - "learning_rate": 0.0001999799312328047, - "loss": 46.0, - "step": 39611 - }, - { - "epoch": 6.379242320544305, - "grad_norm": 0.0041407993994653225, - "learning_rate": 0.00019997993021927732, - "loss": 46.0, - "step": 39612 - }, - { - "epoch": 6.379403357623093, - "grad_norm": 0.00477386312559247, - "learning_rate": 0.00019997992920572433, - "loss": 46.0, - "step": 39613 - }, - { - "epoch": 6.37956439470188, - "grad_norm": 0.02691279724240303, - "learning_rate": 0.00019997992819214576, - "loss": 46.0, - "step": 39614 - }, - { - "epoch": 6.379725431780668, - "grad_norm": 0.004952750168740749, - "learning_rate": 0.0001999799271785416, - "loss": 46.0, - "step": 39615 - }, - { - "epoch": 6.379886468859455, - "grad_norm": 0.014814638532698154, - "learning_rate": 0.00019997992616491183, - "loss": 46.0, - "step": 39616 - }, - { - "epoch": 6.380047505938243, - "grad_norm": 0.0016894518630579114, - "learning_rate": 0.0001999799251512565, - "loss": 46.0, - "step": 39617 - }, - { - "epoch": 6.38020854301703, - "grad_norm": 0.0017963474383577704, - "learning_rate": 0.00019997992413757552, - "loss": 46.0, - "step": 39618 - }, - { - "epoch": 6.380369580095817, - "grad_norm": 0.0059068575501441956, - "learning_rate": 0.00019997992312386902, - "loss": 46.0, - "step": 39619 - }, - { - "epoch": 6.380530617174604, - "grad_norm": 0.008470118045806885, - "learning_rate": 0.00019997992211013692, - "loss": 46.0, - "step": 39620 - }, - { - "epoch": 6.380691654253392, - "grad_norm": 0.0018899007700383663, - "learning_rate": 0.0001999799210963792, - "loss": 46.0, - "step": 39621 - }, - { - "epoch": 6.380852691332179, - "grad_norm": 0.001159351202659309, - "learning_rate": 0.0001999799200825959, - "loss": 46.0, - "step": 39622 - }, - { - "epoch": 6.3810137284109665, - "grad_norm": 0.007207023445516825, - "learning_rate": 0.000199979919068787, - "loss": 46.0, - "step": 39623 - }, - { - "epoch": 6.381174765489754, - "grad_norm": 0.004233041312545538, - "learning_rate": 0.00019997991805495257, - "loss": 46.0, - "step": 39624 - }, - { - "epoch": 6.381335802568541, - "grad_norm": 0.00571990804746747, - "learning_rate": 0.00019997991704109248, - "loss": 46.0, - "step": 39625 - }, - { - "epoch": 6.381496839647329, - "grad_norm": 0.002765171229839325, - "learning_rate": 0.0001999799160272068, - "loss": 46.0, - "step": 39626 - }, - { - "epoch": 6.381657876726116, - "grad_norm": 0.007528841495513916, - "learning_rate": 0.00019997991501329558, - "loss": 46.0, - "step": 39627 - }, - { - "epoch": 6.381818913804904, - "grad_norm": 0.005357755813747644, - "learning_rate": 0.00019997991399935873, - "loss": 46.0, - "step": 39628 - }, - { - "epoch": 6.381979950883691, - "grad_norm": 0.004060770384967327, - "learning_rate": 0.0001999799129853963, - "loss": 46.0, - "step": 39629 - }, - { - "epoch": 6.382140987962479, - "grad_norm": 0.008046452887356281, - "learning_rate": 0.0001999799119714083, - "loss": 46.0, - "step": 39630 - }, - { - "epoch": 6.382302025041266, - "grad_norm": 0.006763724144548178, - "learning_rate": 0.0001999799109573947, - "loss": 46.0, - "step": 39631 - }, - { - "epoch": 6.3824630621200535, - "grad_norm": 0.006087682209908962, - "learning_rate": 0.0001999799099433555, - "loss": 46.0, - "step": 39632 - }, - { - "epoch": 6.382624099198841, - "grad_norm": 0.003705762792378664, - "learning_rate": 0.00019997990892929072, - "loss": 46.0, - "step": 39633 - }, - { - "epoch": 6.382785136277628, - "grad_norm": 0.006714996881783009, - "learning_rate": 0.00019997990791520035, - "loss": 46.0, - "step": 39634 - }, - { - "epoch": 6.382946173356415, - "grad_norm": 0.005154167767614126, - "learning_rate": 0.0001999799069010844, - "loss": 46.0, - "step": 39635 - }, - { - "epoch": 6.3831072104352025, - "grad_norm": 0.00879506766796112, - "learning_rate": 0.00019997990588694283, - "loss": 46.0, - "step": 39636 - }, - { - "epoch": 6.38326824751399, - "grad_norm": 0.022557592019438744, - "learning_rate": 0.0001999799048727757, - "loss": 46.0, - "step": 39637 - }, - { - "epoch": 6.383429284592777, - "grad_norm": 0.0012223473750054836, - "learning_rate": 0.00019997990385858295, - "loss": 46.0, - "step": 39638 - }, - { - "epoch": 6.383590321671565, - "grad_norm": 0.0034364170860499144, - "learning_rate": 0.00019997990284436465, - "loss": 46.0, - "step": 39639 - }, - { - "epoch": 6.383751358750352, - "grad_norm": 0.014220552518963814, - "learning_rate": 0.00019997990183012073, - "loss": 46.0, - "step": 39640 - }, - { - "epoch": 6.38391239582914, - "grad_norm": 0.0046723405830562115, - "learning_rate": 0.00019997990081585122, - "loss": 46.0, - "step": 39641 - }, - { - "epoch": 6.384073432907927, - "grad_norm": 0.007212279364466667, - "learning_rate": 0.00019997989980155613, - "loss": 46.0, - "step": 39642 - }, - { - "epoch": 6.384234469986715, - "grad_norm": 0.002519486704841256, - "learning_rate": 0.00019997989878723548, - "loss": 46.0, - "step": 39643 - }, - { - "epoch": 6.384395507065502, - "grad_norm": 0.0023943139240145683, - "learning_rate": 0.00019997989777288918, - "loss": 46.0, - "step": 39644 - }, - { - "epoch": 6.3845565441442895, - "grad_norm": 0.0026246444322168827, - "learning_rate": 0.00019997989675851733, - "loss": 46.0, - "step": 39645 - }, - { - "epoch": 6.384717581223077, - "grad_norm": 0.01296649593859911, - "learning_rate": 0.0001999798957441199, - "loss": 46.0, - "step": 39646 - }, - { - "epoch": 6.3848786183018635, - "grad_norm": 0.004096482414752245, - "learning_rate": 0.00019997989472969686, - "loss": 46.0, - "step": 39647 - }, - { - "epoch": 6.385039655380651, - "grad_norm": 0.00932546891272068, - "learning_rate": 0.00019997989371524821, - "loss": 46.0, - "step": 39648 - }, - { - "epoch": 6.385200692459438, - "grad_norm": 0.005613727029412985, - "learning_rate": 0.00019997989270077398, - "loss": 46.0, - "step": 39649 - }, - { - "epoch": 6.385361729538226, - "grad_norm": 0.004359958693385124, - "learning_rate": 0.0001999798916862742, - "loss": 46.0, - "step": 39650 - }, - { - "epoch": 6.385522766617013, - "grad_norm": 0.0027462400030344725, - "learning_rate": 0.00019997989067174882, - "loss": 46.0, - "step": 39651 - }, - { - "epoch": 6.385683803695801, - "grad_norm": 0.0024416567757725716, - "learning_rate": 0.0001999798896571978, - "loss": 46.0, - "step": 39652 - }, - { - "epoch": 6.385844840774588, - "grad_norm": 0.007831353694200516, - "learning_rate": 0.00019997988864262122, - "loss": 46.0, - "step": 39653 - }, - { - "epoch": 6.386005877853376, - "grad_norm": 0.01293005608022213, - "learning_rate": 0.00019997988762801905, - "loss": 46.0, - "step": 39654 - }, - { - "epoch": 6.386166914932163, - "grad_norm": 0.013343794271349907, - "learning_rate": 0.0001999798866133913, - "loss": 46.0, - "step": 39655 - }, - { - "epoch": 6.386327952010951, - "grad_norm": 0.002792490879073739, - "learning_rate": 0.00019997988559873796, - "loss": 46.0, - "step": 39656 - }, - { - "epoch": 6.386488989089738, - "grad_norm": 0.005933757405728102, - "learning_rate": 0.00019997988458405903, - "loss": 46.0, - "step": 39657 - }, - { - "epoch": 6.3866500261685255, - "grad_norm": 0.0017501752590760589, - "learning_rate": 0.00019997988356935449, - "loss": 46.0, - "step": 39658 - }, - { - "epoch": 6.386811063247313, - "grad_norm": 0.002971403067931533, - "learning_rate": 0.00019997988255462436, - "loss": 46.0, - "step": 39659 - }, - { - "epoch": 6.3869721003261, - "grad_norm": 0.006849338300526142, - "learning_rate": 0.00019997988153986867, - "loss": 46.0, - "step": 39660 - }, - { - "epoch": 6.387133137404888, - "grad_norm": 0.008996205404400826, - "learning_rate": 0.0001999798805250874, - "loss": 46.0, - "step": 39661 - }, - { - "epoch": 6.387294174483674, - "grad_norm": 0.005194155965000391, - "learning_rate": 0.00019997987951028047, - "loss": 46.0, - "step": 39662 - }, - { - "epoch": 6.387455211562462, - "grad_norm": 0.0013714199885725975, - "learning_rate": 0.00019997987849544802, - "loss": 46.0, - "step": 39663 - }, - { - "epoch": 6.387616248641249, - "grad_norm": 0.009962350130081177, - "learning_rate": 0.00019997987748058996, - "loss": 46.0, - "step": 39664 - }, - { - "epoch": 6.387777285720037, - "grad_norm": 0.004367387853562832, - "learning_rate": 0.00019997987646570628, - "loss": 46.0, - "step": 39665 - }, - { - "epoch": 6.387938322798824, - "grad_norm": 0.008136908523738384, - "learning_rate": 0.00019997987545079706, - "loss": 46.0, - "step": 39666 - }, - { - "epoch": 6.388099359877612, - "grad_norm": 0.0021178254391998053, - "learning_rate": 0.0001999798744358622, - "loss": 46.0, - "step": 39667 - }, - { - "epoch": 6.388260396956399, - "grad_norm": 0.020958187058568, - "learning_rate": 0.0001999798734209018, - "loss": 46.0, - "step": 39668 - }, - { - "epoch": 6.388421434035187, - "grad_norm": 0.0017117172246798873, - "learning_rate": 0.00019997987240591577, - "loss": 46.0, - "step": 39669 - }, - { - "epoch": 6.388582471113974, - "grad_norm": 0.004571861587464809, - "learning_rate": 0.0001999798713909042, - "loss": 46.0, - "step": 39670 - }, - { - "epoch": 6.3887435081927615, - "grad_norm": 0.006385928951203823, - "learning_rate": 0.00019997987037586698, - "loss": 46.0, - "step": 39671 - }, - { - "epoch": 6.388904545271549, - "grad_norm": 0.026783538982272148, - "learning_rate": 0.0001999798693608042, - "loss": 46.0, - "step": 39672 - }, - { - "epoch": 6.389065582350336, - "grad_norm": 0.0008000983507372439, - "learning_rate": 0.00019997986834571584, - "loss": 46.0, - "step": 39673 - }, - { - "epoch": 6.389226619429124, - "grad_norm": 0.0019062068313360214, - "learning_rate": 0.00019997986733060187, - "loss": 46.0, - "step": 39674 - }, - { - "epoch": 6.389387656507911, - "grad_norm": 0.01616768166422844, - "learning_rate": 0.00019997986631546232, - "loss": 46.0, - "step": 39675 - }, - { - "epoch": 6.389548693586699, - "grad_norm": 0.010500124655663967, - "learning_rate": 0.00019997986530029719, - "loss": 46.0, - "step": 39676 - }, - { - "epoch": 6.389709730665485, - "grad_norm": 0.004461308009922504, - "learning_rate": 0.00019997986428510646, - "loss": 46.0, - "step": 39677 - }, - { - "epoch": 6.389870767744273, - "grad_norm": 0.001676552346907556, - "learning_rate": 0.00019997986326989012, - "loss": 46.0, - "step": 39678 - }, - { - "epoch": 6.39003180482306, - "grad_norm": 0.00176837551407516, - "learning_rate": 0.00019997986225464822, - "loss": 46.0, - "step": 39679 - }, - { - "epoch": 6.390192841901848, - "grad_norm": 0.004148111212998629, - "learning_rate": 0.00019997986123938073, - "loss": 46.0, - "step": 39680 - }, - { - "epoch": 6.390353878980635, - "grad_norm": 0.0026615133974701166, - "learning_rate": 0.00019997986022408763, - "loss": 46.0, - "step": 39681 - }, - { - "epoch": 6.390514916059423, - "grad_norm": 0.0034285816363990307, - "learning_rate": 0.00019997985920876897, - "loss": 46.0, - "step": 39682 - }, - { - "epoch": 6.39067595313821, - "grad_norm": 0.007380757946521044, - "learning_rate": 0.0001999798581934247, - "loss": 46.0, - "step": 39683 - }, - { - "epoch": 6.3908369902169975, - "grad_norm": 0.0022231696639209986, - "learning_rate": 0.00019997985717805483, - "loss": 46.0, - "step": 39684 - }, - { - "epoch": 6.390998027295785, - "grad_norm": 0.003195648081600666, - "learning_rate": 0.0001999798561626594, - "loss": 46.0, - "step": 39685 - }, - { - "epoch": 6.391159064374572, - "grad_norm": 0.0016613835468888283, - "learning_rate": 0.00019997985514723834, - "loss": 46.0, - "step": 39686 - }, - { - "epoch": 6.39132010145336, - "grad_norm": 0.002278456464409828, - "learning_rate": 0.0001999798541317917, - "loss": 46.0, - "step": 39687 - }, - { - "epoch": 6.391481138532147, - "grad_norm": 0.001075371983461082, - "learning_rate": 0.0001999798531163195, - "loss": 46.0, - "step": 39688 - }, - { - "epoch": 6.391642175610935, - "grad_norm": 0.014691442251205444, - "learning_rate": 0.00019997985210082168, - "loss": 46.0, - "step": 39689 - }, - { - "epoch": 6.391803212689722, - "grad_norm": 0.009903491474688053, - "learning_rate": 0.0001999798510852983, - "loss": 46.0, - "step": 39690 - }, - { - "epoch": 6.39196424976851, - "grad_norm": 0.0012265342520549893, - "learning_rate": 0.0001999798500697493, - "loss": 46.0, - "step": 39691 - }, - { - "epoch": 6.392125286847296, - "grad_norm": 0.00846842210739851, - "learning_rate": 0.00019997984905417474, - "loss": 46.0, - "step": 39692 - }, - { - "epoch": 6.392286323926084, - "grad_norm": 0.004979741759598255, - "learning_rate": 0.00019997984803857456, - "loss": 46.0, - "step": 39693 - }, - { - "epoch": 6.392447361004871, - "grad_norm": 0.00590496463701129, - "learning_rate": 0.0001999798470229488, - "loss": 46.0, - "step": 39694 - }, - { - "epoch": 6.3926083980836585, - "grad_norm": 0.009984750300645828, - "learning_rate": 0.00019997984600729748, - "loss": 46.0, - "step": 39695 - }, - { - "epoch": 6.392769435162446, - "grad_norm": 0.01124607678502798, - "learning_rate": 0.00019997984499162052, - "loss": 46.0, - "step": 39696 - }, - { - "epoch": 6.3929304722412335, - "grad_norm": 0.002240577945485711, - "learning_rate": 0.000199979843975918, - "loss": 46.0, - "step": 39697 - }, - { - "epoch": 6.393091509320021, - "grad_norm": 0.013211602345108986, - "learning_rate": 0.00019997984296018988, - "loss": 46.0, - "step": 39698 - }, - { - "epoch": 6.393252546398808, - "grad_norm": 0.00550662400200963, - "learning_rate": 0.00019997984194443618, - "loss": 46.0, - "step": 39699 - }, - { - "epoch": 6.393413583477596, - "grad_norm": 0.00861713569611311, - "learning_rate": 0.0001999798409286569, - "loss": 46.0, - "step": 39700 - }, - { - "epoch": 6.393574620556383, - "grad_norm": 0.004874045494943857, - "learning_rate": 0.000199979839912852, - "loss": 46.0, - "step": 39701 - }, - { - "epoch": 6.393735657635171, - "grad_norm": 0.007115269545465708, - "learning_rate": 0.00019997983889702154, - "loss": 46.0, - "step": 39702 - }, - { - "epoch": 6.393896694713958, - "grad_norm": 0.011768237687647343, - "learning_rate": 0.00019997983788116547, - "loss": 46.0, - "step": 39703 - }, - { - "epoch": 6.394057731792746, - "grad_norm": 0.0038908589631319046, - "learning_rate": 0.00019997983686528384, - "loss": 46.0, - "step": 39704 - }, - { - "epoch": 6.394218768871533, - "grad_norm": 0.0010088979033753276, - "learning_rate": 0.00019997983584937656, - "loss": 46.0, - "step": 39705 - }, - { - "epoch": 6.3943798059503205, - "grad_norm": 0.0034306878224015236, - "learning_rate": 0.00019997983483344375, - "loss": 46.0, - "step": 39706 - }, - { - "epoch": 6.394540843029107, - "grad_norm": 0.0018780187238007784, - "learning_rate": 0.00019997983381748533, - "loss": 46.0, - "step": 39707 - }, - { - "epoch": 6.3947018801078945, - "grad_norm": 0.003269060980528593, - "learning_rate": 0.00019997983280150132, - "loss": 46.0, - "step": 39708 - }, - { - "epoch": 6.394862917186682, - "grad_norm": 0.0015705307014286518, - "learning_rate": 0.0001999798317854917, - "loss": 46.0, - "step": 39709 - }, - { - "epoch": 6.395023954265469, - "grad_norm": 0.005281734745949507, - "learning_rate": 0.00019997983076945651, - "loss": 46.0, - "step": 39710 - }, - { - "epoch": 6.395184991344257, - "grad_norm": 0.0012669767020270228, - "learning_rate": 0.00019997982975339574, - "loss": 46.0, - "step": 39711 - }, - { - "epoch": 6.395346028423044, - "grad_norm": 0.006262979935854673, - "learning_rate": 0.00019997982873730936, - "loss": 46.0, - "step": 39712 - }, - { - "epoch": 6.395507065501832, - "grad_norm": 0.002279779640957713, - "learning_rate": 0.00019997982772119741, - "loss": 46.0, - "step": 39713 - }, - { - "epoch": 6.395668102580619, - "grad_norm": 0.0008431622409261763, - "learning_rate": 0.00019997982670505985, - "loss": 46.0, - "step": 39714 - }, - { - "epoch": 6.395829139659407, - "grad_norm": 0.004218118730932474, - "learning_rate": 0.00019997982568889674, - "loss": 46.0, - "step": 39715 - }, - { - "epoch": 6.395990176738194, - "grad_norm": 0.00430978974327445, - "learning_rate": 0.000199979824672708, - "loss": 46.0, - "step": 39716 - }, - { - "epoch": 6.396151213816982, - "grad_norm": 0.002378294011577964, - "learning_rate": 0.00019997982365649368, - "loss": 46.0, - "step": 39717 - }, - { - "epoch": 6.396312250895769, - "grad_norm": 0.003344185883179307, - "learning_rate": 0.00019997982264025375, - "loss": 46.0, - "step": 39718 - }, - { - "epoch": 6.3964732879745565, - "grad_norm": 0.0019271261990070343, - "learning_rate": 0.00019997982162398828, - "loss": 46.0, - "step": 39719 - }, - { - "epoch": 6.396634325053343, - "grad_norm": 0.008766323328018188, - "learning_rate": 0.00019997982060769717, - "loss": 46.0, - "step": 39720 - }, - { - "epoch": 6.3967953621321305, - "grad_norm": 0.0019945718813687563, - "learning_rate": 0.0001999798195913805, - "loss": 46.0, - "step": 39721 - }, - { - "epoch": 6.396956399210918, - "grad_norm": 0.014293872751295567, - "learning_rate": 0.00019997981857503821, - "loss": 46.0, - "step": 39722 - }, - { - "epoch": 6.397117436289705, - "grad_norm": 0.008086971007287502, - "learning_rate": 0.00019997981755867037, - "loss": 46.0, - "step": 39723 - }, - { - "epoch": 6.397278473368493, - "grad_norm": 0.012430197559297085, - "learning_rate": 0.0001999798165422769, - "loss": 46.0, - "step": 39724 - }, - { - "epoch": 6.39743951044728, - "grad_norm": 0.0034204518888145685, - "learning_rate": 0.00019997981552585787, - "loss": 46.0, - "step": 39725 - }, - { - "epoch": 6.397600547526068, - "grad_norm": 0.01530166994780302, - "learning_rate": 0.00019997981450941326, - "loss": 46.0, - "step": 39726 - }, - { - "epoch": 6.397761584604855, - "grad_norm": 0.005163129884749651, - "learning_rate": 0.00019997981349294307, - "loss": 46.0, - "step": 39727 - }, - { - "epoch": 6.397922621683643, - "grad_norm": 0.0057404255494475365, - "learning_rate": 0.00019997981247644724, - "loss": 46.0, - "step": 39728 - }, - { - "epoch": 6.39808365876243, - "grad_norm": 0.004381768871098757, - "learning_rate": 0.00019997981145992584, - "loss": 46.0, - "step": 39729 - }, - { - "epoch": 6.398244695841218, - "grad_norm": 0.009724602103233337, - "learning_rate": 0.00019997981044337886, - "loss": 46.0, - "step": 39730 - }, - { - "epoch": 6.398405732920005, - "grad_norm": 0.015879547223448753, - "learning_rate": 0.0001999798094268063, - "loss": 46.0, - "step": 39731 - }, - { - "epoch": 6.3985667699987925, - "grad_norm": 0.006917417980730534, - "learning_rate": 0.0001999798084102081, - "loss": 46.0, - "step": 39732 - }, - { - "epoch": 6.39872780707758, - "grad_norm": 0.00795740820467472, - "learning_rate": 0.00019997980739358434, - "loss": 46.0, - "step": 39733 - }, - { - "epoch": 6.398888844156367, - "grad_norm": 0.0063113742507994175, - "learning_rate": 0.000199979806376935, - "loss": 46.0, - "step": 39734 - }, - { - "epoch": 6.399049881235154, - "grad_norm": 0.0043883859179914, - "learning_rate": 0.0001999798053602601, - "loss": 46.0, - "step": 39735 - }, - { - "epoch": 6.399210918313941, - "grad_norm": 0.0016365100163966417, - "learning_rate": 0.00019997980434355956, - "loss": 46.0, - "step": 39736 - }, - { - "epoch": 6.399371955392729, - "grad_norm": 0.005349626764655113, - "learning_rate": 0.00019997980332683347, - "loss": 46.0, - "step": 39737 - }, - { - "epoch": 6.399532992471516, - "grad_norm": 0.024173546582460403, - "learning_rate": 0.00019997980231008177, - "loss": 46.0, - "step": 39738 - }, - { - "epoch": 6.399694029550304, - "grad_norm": 0.0015852623619139194, - "learning_rate": 0.00019997980129330447, - "loss": 46.0, - "step": 39739 - }, - { - "epoch": 6.399855066629091, - "grad_norm": 0.0010524904355406761, - "learning_rate": 0.00019997980027650157, - "loss": 46.0, - "step": 39740 - }, - { - "epoch": 6.400016103707879, - "grad_norm": 0.0010647393064573407, - "learning_rate": 0.00019997979925967313, - "loss": 46.0, - "step": 39741 - }, - { - "epoch": 6.400177140786666, - "grad_norm": 0.02489626407623291, - "learning_rate": 0.00019997979824281904, - "loss": 46.0, - "step": 39742 - }, - { - "epoch": 6.400338177865454, - "grad_norm": 0.010901028290390968, - "learning_rate": 0.0001999797972259394, - "loss": 46.0, - "step": 39743 - }, - { - "epoch": 6.400499214944241, - "grad_norm": 0.0010789092630147934, - "learning_rate": 0.00019997979620903415, - "loss": 46.0, - "step": 39744 - }, - { - "epoch": 6.4006602520230285, - "grad_norm": 0.004068130627274513, - "learning_rate": 0.00019997979519210333, - "loss": 46.0, - "step": 39745 - }, - { - "epoch": 6.400821289101816, - "grad_norm": 0.001501579419709742, - "learning_rate": 0.0001999797941751469, - "loss": 46.0, - "step": 39746 - }, - { - "epoch": 6.400982326180603, - "grad_norm": 0.0024193590506911278, - "learning_rate": 0.00019997979315816488, - "loss": 46.0, - "step": 39747 - }, - { - "epoch": 6.401143363259391, - "grad_norm": 0.005171597935259342, - "learning_rate": 0.0001999797921411573, - "loss": 46.0, - "step": 39748 - }, - { - "epoch": 6.401304400338178, - "grad_norm": 0.003315950045362115, - "learning_rate": 0.0001999797911241241, - "loss": 46.0, - "step": 39749 - }, - { - "epoch": 6.401465437416965, - "grad_norm": 0.0035181797575205564, - "learning_rate": 0.00019997979010706534, - "loss": 46.0, - "step": 39750 - }, - { - "epoch": 6.401626474495752, - "grad_norm": 0.004503350239247084, - "learning_rate": 0.00019997978908998097, - "loss": 46.0, - "step": 39751 - }, - { - "epoch": 6.40178751157454, - "grad_norm": 0.004382152575999498, - "learning_rate": 0.000199979788072871, - "loss": 46.0, - "step": 39752 - }, - { - "epoch": 6.401948548653327, - "grad_norm": 0.000647125591058284, - "learning_rate": 0.00019997978705573542, - "loss": 46.0, - "step": 39753 - }, - { - "epoch": 6.402109585732115, - "grad_norm": 0.007898072712123394, - "learning_rate": 0.0001999797860385743, - "loss": 46.0, - "step": 39754 - }, - { - "epoch": 6.402270622810902, - "grad_norm": 0.0011270168470218778, - "learning_rate": 0.00019997978502138758, - "loss": 46.0, - "step": 39755 - }, - { - "epoch": 6.4024316598896895, - "grad_norm": 0.01105918362736702, - "learning_rate": 0.00019997978400417528, - "loss": 46.0, - "step": 39756 - }, - { - "epoch": 6.402592696968477, - "grad_norm": 0.004094884265214205, - "learning_rate": 0.00019997978298693736, - "loss": 46.0, - "step": 39757 - }, - { - "epoch": 6.4027537340472644, - "grad_norm": 0.018097953870892525, - "learning_rate": 0.00019997978196967386, - "loss": 46.0, - "step": 39758 - }, - { - "epoch": 6.402914771126052, - "grad_norm": 0.0019281319109722972, - "learning_rate": 0.00019997978095238477, - "loss": 46.0, - "step": 39759 - }, - { - "epoch": 6.403075808204839, - "grad_norm": 0.0011446558637544513, - "learning_rate": 0.0001999797799350701, - "loss": 46.0, - "step": 39760 - }, - { - "epoch": 6.403236845283627, - "grad_norm": 0.004522460047155619, - "learning_rate": 0.00019997977891772983, - "loss": 46.0, - "step": 39761 - }, - { - "epoch": 6.403397882362414, - "grad_norm": 0.002561889123171568, - "learning_rate": 0.00019997977790036397, - "loss": 46.0, - "step": 39762 - }, - { - "epoch": 6.403558919441202, - "grad_norm": 0.0010753024835139513, - "learning_rate": 0.00019997977688297254, - "loss": 46.0, - "step": 39763 - }, - { - "epoch": 6.403719956519989, - "grad_norm": 0.01704479567706585, - "learning_rate": 0.0001999797758655555, - "loss": 46.0, - "step": 39764 - }, - { - "epoch": 6.403880993598776, - "grad_norm": 0.00651383213698864, - "learning_rate": 0.00019997977484811287, - "loss": 46.0, - "step": 39765 - }, - { - "epoch": 6.404042030677563, - "grad_norm": 0.005670061334967613, - "learning_rate": 0.00019997977383064467, - "loss": 46.0, - "step": 39766 - }, - { - "epoch": 6.404203067756351, - "grad_norm": 0.007568455766886473, - "learning_rate": 0.00019997977281315085, - "loss": 46.0, - "step": 39767 - }, - { - "epoch": 6.404364104835138, - "grad_norm": 0.003849287983030081, - "learning_rate": 0.00019997977179563145, - "loss": 46.0, - "step": 39768 - }, - { - "epoch": 6.4045251419139255, - "grad_norm": 0.0026378552429378033, - "learning_rate": 0.00019997977077808646, - "loss": 46.0, - "step": 39769 - }, - { - "epoch": 6.404686178992713, - "grad_norm": 0.00456943828612566, - "learning_rate": 0.0001999797697605159, - "loss": 46.0, - "step": 39770 - }, - { - "epoch": 6.4048472160715, - "grad_norm": 0.001680535264313221, - "learning_rate": 0.00019997976874291972, - "loss": 46.0, - "step": 39771 - }, - { - "epoch": 6.405008253150288, - "grad_norm": 0.014071165584027767, - "learning_rate": 0.00019997976772529797, - "loss": 46.0, - "step": 39772 - }, - { - "epoch": 6.405169290229075, - "grad_norm": 0.0013729599304497242, - "learning_rate": 0.00019997976670765063, - "loss": 46.0, - "step": 39773 - }, - { - "epoch": 6.405330327307863, - "grad_norm": 0.014479031786322594, - "learning_rate": 0.0001999797656899777, - "loss": 46.0, - "step": 39774 - }, - { - "epoch": 6.40549136438665, - "grad_norm": 0.0012981047620996833, - "learning_rate": 0.0001999797646722792, - "loss": 46.0, - "step": 39775 - }, - { - "epoch": 6.405652401465438, - "grad_norm": 0.003990803845226765, - "learning_rate": 0.0001999797636545551, - "loss": 46.0, - "step": 39776 - }, - { - "epoch": 6.405813438544225, - "grad_norm": 0.006770376581698656, - "learning_rate": 0.00019997976263680538, - "loss": 46.0, - "step": 39777 - }, - { - "epoch": 6.405974475623013, - "grad_norm": 0.014279181137681007, - "learning_rate": 0.00019997976161903008, - "loss": 46.0, - "step": 39778 - }, - { - "epoch": 6.4061355127018, - "grad_norm": 0.014250132255256176, - "learning_rate": 0.0001999797606012292, - "loss": 46.0, - "step": 39779 - }, - { - "epoch": 6.406296549780587, - "grad_norm": 0.002729963744059205, - "learning_rate": 0.00019997975958340275, - "loss": 46.0, - "step": 39780 - }, - { - "epoch": 6.406457586859374, - "grad_norm": 0.003831196343526244, - "learning_rate": 0.00019997975856555068, - "loss": 46.0, - "step": 39781 - }, - { - "epoch": 6.4066186239381615, - "grad_norm": 0.0032163283322006464, - "learning_rate": 0.00019997975754767303, - "loss": 46.0, - "step": 39782 - }, - { - "epoch": 6.406779661016949, - "grad_norm": 0.005565740168094635, - "learning_rate": 0.0001999797565297698, - "loss": 46.0, - "step": 39783 - }, - { - "epoch": 6.406940698095736, - "grad_norm": 0.0036747378762811422, - "learning_rate": 0.00019997975551184097, - "loss": 46.0, - "step": 39784 - }, - { - "epoch": 6.407101735174524, - "grad_norm": 0.006656508427113295, - "learning_rate": 0.00019997975449388653, - "loss": 46.0, - "step": 39785 - }, - { - "epoch": 6.407262772253311, - "grad_norm": 0.00356505555100739, - "learning_rate": 0.00019997975347590654, - "loss": 46.0, - "step": 39786 - }, - { - "epoch": 6.407423809332099, - "grad_norm": 0.001133436686359346, - "learning_rate": 0.00019997975245790092, - "loss": 46.0, - "step": 39787 - }, - { - "epoch": 6.407584846410886, - "grad_norm": 0.0012598708271980286, - "learning_rate": 0.00019997975143986975, - "loss": 46.0, - "step": 39788 - }, - { - "epoch": 6.407745883489674, - "grad_norm": 0.008294720202684402, - "learning_rate": 0.00019997975042181296, - "loss": 46.0, - "step": 39789 - }, - { - "epoch": 6.407906920568461, - "grad_norm": 0.010146544314920902, - "learning_rate": 0.00019997974940373062, - "loss": 46.0, - "step": 39790 - }, - { - "epoch": 6.408067957647249, - "grad_norm": 0.001885155332274735, - "learning_rate": 0.00019997974838562266, - "loss": 46.0, - "step": 39791 - }, - { - "epoch": 6.408228994726036, - "grad_norm": 0.00269310618750751, - "learning_rate": 0.0001999797473674891, - "loss": 46.0, - "step": 39792 - }, - { - "epoch": 6.4083900318048235, - "grad_norm": 0.008755331858992577, - "learning_rate": 0.00019997974634932997, - "loss": 46.0, - "step": 39793 - }, - { - "epoch": 6.40855106888361, - "grad_norm": 0.002146894345059991, - "learning_rate": 0.00019997974533114525, - "loss": 46.0, - "step": 39794 - }, - { - "epoch": 6.4087121059623975, - "grad_norm": 0.008886881172657013, - "learning_rate": 0.00019997974431293494, - "loss": 46.0, - "step": 39795 - }, - { - "epoch": 6.408873143041185, - "grad_norm": 0.001966224517673254, - "learning_rate": 0.00019997974329469904, - "loss": 46.0, - "step": 39796 - }, - { - "epoch": 6.409034180119972, - "grad_norm": 0.008079875260591507, - "learning_rate": 0.00019997974227643753, - "loss": 46.0, - "step": 39797 - }, - { - "epoch": 6.40919521719876, - "grad_norm": 0.0045585534535348415, - "learning_rate": 0.00019997974125815043, - "loss": 46.0, - "step": 39798 - }, - { - "epoch": 6.409356254277547, - "grad_norm": 0.00700960960239172, - "learning_rate": 0.00019997974023983777, - "loss": 46.0, - "step": 39799 - }, - { - "epoch": 6.409517291356335, - "grad_norm": 0.01103118248283863, - "learning_rate": 0.0001999797392214995, - "loss": 46.0, - "step": 39800 - }, - { - "epoch": 6.409678328435122, - "grad_norm": 0.0008251548861153424, - "learning_rate": 0.00019997973820313564, - "loss": 46.0, - "step": 39801 - }, - { - "epoch": 6.40983936551391, - "grad_norm": 0.00289416266605258, - "learning_rate": 0.00019997973718474622, - "loss": 46.0, - "step": 39802 - }, - { - "epoch": 6.410000402592697, - "grad_norm": 0.0012698157224804163, - "learning_rate": 0.00019997973616633119, - "loss": 46.0, - "step": 39803 - }, - { - "epoch": 6.4101614396714846, - "grad_norm": 0.0016321815783157945, - "learning_rate": 0.00019997973514789056, - "loss": 46.0, - "step": 39804 - }, - { - "epoch": 6.410322476750272, - "grad_norm": 0.012155700474977493, - "learning_rate": 0.00019997973412942436, - "loss": 46.0, - "step": 39805 - }, - { - "epoch": 6.4104835138290595, - "grad_norm": 0.0021846024319529533, - "learning_rate": 0.00019997973311093253, - "loss": 46.0, - "step": 39806 - }, - { - "epoch": 6.410644550907847, - "grad_norm": 0.002425346989184618, - "learning_rate": 0.00019997973209241515, - "loss": 46.0, - "step": 39807 - }, - { - "epoch": 6.4108055879866335, - "grad_norm": 0.003928133752197027, - "learning_rate": 0.00019997973107387215, - "loss": 46.0, - "step": 39808 - }, - { - "epoch": 6.410966625065421, - "grad_norm": 0.0027051239740103483, - "learning_rate": 0.0001999797300553036, - "loss": 46.0, - "step": 39809 - }, - { - "epoch": 6.411127662144208, - "grad_norm": 0.0018706123810261488, - "learning_rate": 0.00019997972903670945, - "loss": 46.0, - "step": 39810 - }, - { - "epoch": 6.411288699222996, - "grad_norm": 0.002114361384883523, - "learning_rate": 0.0001999797280180897, - "loss": 46.0, - "step": 39811 - }, - { - "epoch": 6.411449736301783, - "grad_norm": 0.00814177468419075, - "learning_rate": 0.00019997972699944435, - "loss": 46.0, - "step": 39812 - }, - { - "epoch": 6.411610773380571, - "grad_norm": 0.0031662913970649242, - "learning_rate": 0.0001999797259807734, - "loss": 46.0, - "step": 39813 - }, - { - "epoch": 6.411771810459358, - "grad_norm": 0.0016604793490841985, - "learning_rate": 0.00019997972496207692, - "loss": 46.0, - "step": 39814 - }, - { - "epoch": 6.411932847538146, - "grad_norm": 0.003207709640264511, - "learning_rate": 0.00019997972394335478, - "loss": 46.0, - "step": 39815 - }, - { - "epoch": 6.412093884616933, - "grad_norm": 0.01981075294315815, - "learning_rate": 0.0001999797229246071, - "loss": 46.0, - "step": 39816 - }, - { - "epoch": 6.4122549216957205, - "grad_norm": 0.00438704015687108, - "learning_rate": 0.0001999797219058338, - "loss": 46.0, - "step": 39817 - }, - { - "epoch": 6.412415958774508, - "grad_norm": 0.0021715660113841295, - "learning_rate": 0.0001999797208870349, - "loss": 46.0, - "step": 39818 - }, - { - "epoch": 6.412576995853295, - "grad_norm": 0.009480034932494164, - "learning_rate": 0.00019997971986821046, - "loss": 46.0, - "step": 39819 - }, - { - "epoch": 6.412738032932083, - "grad_norm": 0.004029309842735529, - "learning_rate": 0.00019997971884936039, - "loss": 46.0, - "step": 39820 - }, - { - "epoch": 6.41289907001087, - "grad_norm": 0.003662655595690012, - "learning_rate": 0.00019997971783048475, - "loss": 46.0, - "step": 39821 - }, - { - "epoch": 6.413060107089658, - "grad_norm": 0.0010933639714494348, - "learning_rate": 0.0001999797168115835, - "loss": 46.0, - "step": 39822 - }, - { - "epoch": 6.413221144168444, - "grad_norm": 0.0026792031712830067, - "learning_rate": 0.0001999797157926567, - "loss": 46.0, - "step": 39823 - }, - { - "epoch": 6.413382181247232, - "grad_norm": 0.0029935420025140047, - "learning_rate": 0.00019997971477370429, - "loss": 46.0, - "step": 39824 - }, - { - "epoch": 6.413543218326019, - "grad_norm": 0.01949482224881649, - "learning_rate": 0.00019997971375472625, - "loss": 46.0, - "step": 39825 - }, - { - "epoch": 6.413704255404807, - "grad_norm": 0.01633744314312935, - "learning_rate": 0.00019997971273572266, - "loss": 46.0, - "step": 39826 - }, - { - "epoch": 6.413865292483594, - "grad_norm": 0.002060030587017536, - "learning_rate": 0.0001999797117166935, - "loss": 46.0, - "step": 39827 - }, - { - "epoch": 6.414026329562382, - "grad_norm": 0.005133353173732758, - "learning_rate": 0.0001999797106976387, - "loss": 46.0, - "step": 39828 - }, - { - "epoch": 6.414187366641169, - "grad_norm": 0.0031170567963272333, - "learning_rate": 0.00019997970967855838, - "loss": 46.0, - "step": 39829 - }, - { - "epoch": 6.4143484037199565, - "grad_norm": 0.0056282877922058105, - "learning_rate": 0.0001999797086594524, - "loss": 46.0, - "step": 39830 - }, - { - "epoch": 6.414509440798744, - "grad_norm": 0.00829550065100193, - "learning_rate": 0.00019997970764032086, - "loss": 46.0, - "step": 39831 - }, - { - "epoch": 6.414670477877531, - "grad_norm": 0.0027705435641109943, - "learning_rate": 0.0001999797066211637, - "loss": 46.0, - "step": 39832 - }, - { - "epoch": 6.414831514956319, - "grad_norm": 0.003135720267891884, - "learning_rate": 0.00019997970560198098, - "loss": 46.0, - "step": 39833 - }, - { - "epoch": 6.414992552035106, - "grad_norm": 0.009653245098888874, - "learning_rate": 0.0001999797045827727, - "loss": 46.0, - "step": 39834 - }, - { - "epoch": 6.415153589113894, - "grad_norm": 0.0020820614881813526, - "learning_rate": 0.00019997970356353876, - "loss": 46.0, - "step": 39835 - }, - { - "epoch": 6.415314626192681, - "grad_norm": 0.0022661357652395964, - "learning_rate": 0.0001999797025442793, - "loss": 46.0, - "step": 39836 - }, - { - "epoch": 6.415475663271469, - "grad_norm": 0.003800329752266407, - "learning_rate": 0.00019997970152499421, - "loss": 46.0, - "step": 39837 - }, - { - "epoch": 6.415636700350255, - "grad_norm": 0.0021906327456235886, - "learning_rate": 0.00019997970050568355, - "loss": 46.0, - "step": 39838 - }, - { - "epoch": 6.415797737429043, - "grad_norm": 0.0018767487490549684, - "learning_rate": 0.0001999796994863473, - "loss": 46.0, - "step": 39839 - }, - { - "epoch": 6.41595877450783, - "grad_norm": 0.00743872718885541, - "learning_rate": 0.0001999796984669854, - "loss": 46.0, - "step": 39840 - }, - { - "epoch": 6.416119811586618, - "grad_norm": 0.008345180191099644, - "learning_rate": 0.000199979697447598, - "loss": 46.0, - "step": 39841 - }, - { - "epoch": 6.416280848665405, - "grad_norm": 0.002836718689650297, - "learning_rate": 0.00019997969642818495, - "loss": 46.0, - "step": 39842 - }, - { - "epoch": 6.4164418857441925, - "grad_norm": 0.0029085404239594936, - "learning_rate": 0.00019997969540874633, - "loss": 46.0, - "step": 39843 - }, - { - "epoch": 6.41660292282298, - "grad_norm": 0.0067632547579705715, - "learning_rate": 0.00019997969438928214, - "loss": 46.0, - "step": 39844 - }, - { - "epoch": 6.416763959901767, - "grad_norm": 0.0033712266013026237, - "learning_rate": 0.00019997969336979233, - "loss": 46.0, - "step": 39845 - }, - { - "epoch": 6.416924996980555, - "grad_norm": 0.005762216169387102, - "learning_rate": 0.00019997969235027694, - "loss": 46.0, - "step": 39846 - }, - { - "epoch": 6.417086034059342, - "grad_norm": 0.005931038875132799, - "learning_rate": 0.00019997969133073596, - "loss": 46.0, - "step": 39847 - }, - { - "epoch": 6.41724707113813, - "grad_norm": 0.013277573511004448, - "learning_rate": 0.0001999796903111694, - "loss": 46.0, - "step": 39848 - }, - { - "epoch": 6.417408108216917, - "grad_norm": 0.008250647224485874, - "learning_rate": 0.00019997968929157724, - "loss": 46.0, - "step": 39849 - }, - { - "epoch": 6.417569145295705, - "grad_norm": 0.008172634989023209, - "learning_rate": 0.00019997968827195948, - "loss": 46.0, - "step": 39850 - }, - { - "epoch": 6.417730182374492, - "grad_norm": 0.004523251671344042, - "learning_rate": 0.00019997968725231615, - "loss": 46.0, - "step": 39851 - }, - { - "epoch": 6.41789121945328, - "grad_norm": 0.023154499009251595, - "learning_rate": 0.00019997968623264724, - "loss": 46.0, - "step": 39852 - }, - { - "epoch": 6.418052256532066, - "grad_norm": 0.002058671787381172, - "learning_rate": 0.0001999796852129527, - "loss": 46.0, - "step": 39853 - }, - { - "epoch": 6.418213293610854, - "grad_norm": 0.005009278655052185, - "learning_rate": 0.00019997968419323262, - "loss": 46.0, - "step": 39854 - }, - { - "epoch": 6.418374330689641, - "grad_norm": 0.004390972200781107, - "learning_rate": 0.0001999796831734869, - "loss": 46.0, - "step": 39855 - }, - { - "epoch": 6.4185353677684285, - "grad_norm": 0.0029138128738850355, - "learning_rate": 0.0001999796821537156, - "loss": 46.0, - "step": 39856 - }, - { - "epoch": 6.418696404847216, - "grad_norm": 0.006636871490627527, - "learning_rate": 0.00019997968113391875, - "loss": 46.0, - "step": 39857 - }, - { - "epoch": 6.418857441926003, - "grad_norm": 0.003436867380514741, - "learning_rate": 0.00019997968011409628, - "loss": 46.0, - "step": 39858 - }, - { - "epoch": 6.419018479004791, - "grad_norm": 0.009712686762213707, - "learning_rate": 0.0001999796790942482, - "loss": 46.0, - "step": 39859 - }, - { - "epoch": 6.419179516083578, - "grad_norm": 0.004144848790019751, - "learning_rate": 0.00019997967807437457, - "loss": 46.0, - "step": 39860 - }, - { - "epoch": 6.419340553162366, - "grad_norm": 0.01024557463824749, - "learning_rate": 0.00019997967705447534, - "loss": 46.0, - "step": 39861 - }, - { - "epoch": 6.419501590241153, - "grad_norm": 0.008240933530032635, - "learning_rate": 0.00019997967603455053, - "loss": 46.0, - "step": 39862 - }, - { - "epoch": 6.419662627319941, - "grad_norm": 0.009815323166549206, - "learning_rate": 0.0001999796750146001, - "loss": 46.0, - "step": 39863 - }, - { - "epoch": 6.419823664398728, - "grad_norm": 0.0018769773887470365, - "learning_rate": 0.0001999796739946241, - "loss": 46.0, - "step": 39864 - }, - { - "epoch": 6.4199847014775155, - "grad_norm": 0.02064286358654499, - "learning_rate": 0.0001999796729746225, - "loss": 46.0, - "step": 39865 - }, - { - "epoch": 6.420145738556303, - "grad_norm": 0.0028559728525578976, - "learning_rate": 0.00019997967195459535, - "loss": 46.0, - "step": 39866 - }, - { - "epoch": 6.4203067756350904, - "grad_norm": 0.004231594502925873, - "learning_rate": 0.00019997967093454255, - "loss": 46.0, - "step": 39867 - }, - { - "epoch": 6.420467812713877, - "grad_norm": 0.0037256081122905016, - "learning_rate": 0.00019997966991446418, - "loss": 46.0, - "step": 39868 - }, - { - "epoch": 6.4206288497926645, - "grad_norm": 0.004277756903320551, - "learning_rate": 0.00019997966889436026, - "loss": 46.0, - "step": 39869 - }, - { - "epoch": 6.420789886871452, - "grad_norm": 0.003637191839516163, - "learning_rate": 0.0001999796678742307, - "loss": 46.0, - "step": 39870 - }, - { - "epoch": 6.420950923950239, - "grad_norm": 0.005565167870372534, - "learning_rate": 0.00019997966685407557, - "loss": 46.0, - "step": 39871 - }, - { - "epoch": 6.421111961029027, - "grad_norm": 0.00239057675935328, - "learning_rate": 0.00019997966583389486, - "loss": 46.0, - "step": 39872 - }, - { - "epoch": 6.421272998107814, - "grad_norm": 0.0013104741228744388, - "learning_rate": 0.00019997966481368856, - "loss": 46.0, - "step": 39873 - }, - { - "epoch": 6.421434035186602, - "grad_norm": 0.014932210557162762, - "learning_rate": 0.00019997966379345665, - "loss": 46.0, - "step": 39874 - }, - { - "epoch": 6.421595072265389, - "grad_norm": 0.0022315820679068565, - "learning_rate": 0.00019997966277319917, - "loss": 46.0, - "step": 39875 - }, - { - "epoch": 6.421756109344177, - "grad_norm": 0.001118012354709208, - "learning_rate": 0.00019997966175291606, - "loss": 46.0, - "step": 39876 - }, - { - "epoch": 6.421917146422964, - "grad_norm": 0.0058967643417418, - "learning_rate": 0.0001999796607326074, - "loss": 46.0, - "step": 39877 - }, - { - "epoch": 6.4220781835017515, - "grad_norm": 0.0029340407345443964, - "learning_rate": 0.00019997965971227315, - "loss": 46.0, - "step": 39878 - }, - { - "epoch": 6.422239220580539, - "grad_norm": 0.005983872339129448, - "learning_rate": 0.0001999796586919133, - "loss": 46.0, - "step": 39879 - }, - { - "epoch": 6.422400257659326, - "grad_norm": 0.007284213323146105, - "learning_rate": 0.00019997965767152786, - "loss": 46.0, - "step": 39880 - }, - { - "epoch": 6.422561294738113, - "grad_norm": 0.005100995302200317, - "learning_rate": 0.00019997965665111684, - "loss": 46.0, - "step": 39881 - }, - { - "epoch": 6.4227223318169, - "grad_norm": 0.0020110623445361853, - "learning_rate": 0.00019997965563068023, - "loss": 46.0, - "step": 39882 - }, - { - "epoch": 6.422883368895688, - "grad_norm": 0.0035027451813220978, - "learning_rate": 0.000199979654610218, - "loss": 46.0, - "step": 39883 - }, - { - "epoch": 6.423044405974475, - "grad_norm": 0.0021621331106871367, - "learning_rate": 0.00019997965358973022, - "loss": 46.0, - "step": 39884 - }, - { - "epoch": 6.423205443053263, - "grad_norm": 0.012578175403177738, - "learning_rate": 0.00019997965256921685, - "loss": 46.0, - "step": 39885 - }, - { - "epoch": 6.42336648013205, - "grad_norm": 0.005133448168635368, - "learning_rate": 0.00019997965154867786, - "loss": 46.0, - "step": 39886 - }, - { - "epoch": 6.423527517210838, - "grad_norm": 0.0025536410976201296, - "learning_rate": 0.0001999796505281133, - "loss": 46.0, - "step": 39887 - }, - { - "epoch": 6.423688554289625, - "grad_norm": 0.00344303366728127, - "learning_rate": 0.00019997964950752315, - "loss": 46.0, - "step": 39888 - }, - { - "epoch": 6.423849591368413, - "grad_norm": 0.004066459368914366, - "learning_rate": 0.0001999796484869074, - "loss": 46.0, - "step": 39889 - }, - { - "epoch": 6.4240106284472, - "grad_norm": 0.015413583256304264, - "learning_rate": 0.00019997964746626607, - "loss": 46.0, - "step": 39890 - }, - { - "epoch": 6.4241716655259875, - "grad_norm": 0.02832922898232937, - "learning_rate": 0.00019997964644559915, - "loss": 46.0, - "step": 39891 - }, - { - "epoch": 6.424332702604775, - "grad_norm": 0.0025692095514386892, - "learning_rate": 0.00019997964542490664, - "loss": 46.0, - "step": 39892 - }, - { - "epoch": 6.424493739683562, - "grad_norm": 0.002435144269838929, - "learning_rate": 0.00019997964440418852, - "loss": 46.0, - "step": 39893 - }, - { - "epoch": 6.42465477676235, - "grad_norm": 0.007873868569731712, - "learning_rate": 0.00019997964338344483, - "loss": 46.0, - "step": 39894 - }, - { - "epoch": 6.424815813841137, - "grad_norm": 0.003546998370438814, - "learning_rate": 0.00019997964236267553, - "loss": 46.0, - "step": 39895 - }, - { - "epoch": 6.424976850919924, - "grad_norm": 0.002073277486488223, - "learning_rate": 0.00019997964134188068, - "loss": 46.0, - "step": 39896 - }, - { - "epoch": 6.425137887998711, - "grad_norm": 0.014415815472602844, - "learning_rate": 0.00019997964032106023, - "loss": 46.0, - "step": 39897 - }, - { - "epoch": 6.425298925077499, - "grad_norm": 0.0028904308564960957, - "learning_rate": 0.00019997963930021414, - "loss": 46.0, - "step": 39898 - }, - { - "epoch": 6.425459962156286, - "grad_norm": 0.008536191657185555, - "learning_rate": 0.00019997963827934252, - "loss": 46.0, - "step": 39899 - }, - { - "epoch": 6.425620999235074, - "grad_norm": 0.0025488147512078285, - "learning_rate": 0.00019997963725844532, - "loss": 46.0, - "step": 39900 - }, - { - "epoch": 6.425782036313861, - "grad_norm": 0.0025260413531214, - "learning_rate": 0.00019997963623752247, - "loss": 46.0, - "step": 39901 - }, - { - "epoch": 6.425943073392649, - "grad_norm": 0.005069076549261808, - "learning_rate": 0.00019997963521657406, - "loss": 46.0, - "step": 39902 - }, - { - "epoch": 6.426104110471436, - "grad_norm": 0.0027852696366608143, - "learning_rate": 0.00019997963419560007, - "loss": 46.0, - "step": 39903 - }, - { - "epoch": 6.4262651475502235, - "grad_norm": 0.007520048413425684, - "learning_rate": 0.00019997963317460046, - "loss": 46.0, - "step": 39904 - }, - { - "epoch": 6.426426184629011, - "grad_norm": 0.008179187774658203, - "learning_rate": 0.00019997963215357529, - "loss": 46.0, - "step": 39905 - }, - { - "epoch": 6.426587221707798, - "grad_norm": 0.008504759520292282, - "learning_rate": 0.0001999796311325245, - "loss": 46.0, - "step": 39906 - }, - { - "epoch": 6.426748258786586, - "grad_norm": 0.0013578677317127585, - "learning_rate": 0.00019997963011144818, - "loss": 46.0, - "step": 39907 - }, - { - "epoch": 6.426909295865373, - "grad_norm": 0.0014628483913838863, - "learning_rate": 0.00019997962909034623, - "loss": 46.0, - "step": 39908 - }, - { - "epoch": 6.427070332944161, - "grad_norm": 0.006598500534892082, - "learning_rate": 0.00019997962806921868, - "loss": 46.0, - "step": 39909 - }, - { - "epoch": 6.427231370022948, - "grad_norm": 0.00779604772105813, - "learning_rate": 0.00019997962704806558, - "loss": 46.0, - "step": 39910 - }, - { - "epoch": 6.427392407101735, - "grad_norm": 0.009120805189013481, - "learning_rate": 0.00019997962602688683, - "loss": 46.0, - "step": 39911 - }, - { - "epoch": 6.427553444180522, - "grad_norm": 0.022817475721240044, - "learning_rate": 0.00019997962500568255, - "loss": 46.0, - "step": 39912 - }, - { - "epoch": 6.42771448125931, - "grad_norm": 0.020378459244966507, - "learning_rate": 0.00019997962398445265, - "loss": 46.0, - "step": 39913 - }, - { - "epoch": 6.427875518338097, - "grad_norm": 0.007933182641863823, - "learning_rate": 0.00019997962296319715, - "loss": 46.0, - "step": 39914 - }, - { - "epoch": 6.428036555416885, - "grad_norm": 0.0112680085003376, - "learning_rate": 0.00019997962194191605, - "loss": 46.0, - "step": 39915 - }, - { - "epoch": 6.428197592495672, - "grad_norm": 0.001529692322947085, - "learning_rate": 0.00019997962092060942, - "loss": 46.0, - "step": 39916 - }, - { - "epoch": 6.4283586295744595, - "grad_norm": 0.008812527172267437, - "learning_rate": 0.00019997961989927715, - "loss": 46.0, - "step": 39917 - }, - { - "epoch": 6.428519666653247, - "grad_norm": 0.004057199228554964, - "learning_rate": 0.00019997961887791932, - "loss": 46.0, - "step": 39918 - }, - { - "epoch": 6.428680703732034, - "grad_norm": 0.0029255924746394157, - "learning_rate": 0.00019997961785653588, - "loss": 46.0, - "step": 39919 - }, - { - "epoch": 6.428841740810822, - "grad_norm": 0.012478676624596119, - "learning_rate": 0.00019997961683512684, - "loss": 46.0, - "step": 39920 - }, - { - "epoch": 6.429002777889609, - "grad_norm": 0.0013470868580043316, - "learning_rate": 0.00019997961581369223, - "loss": 46.0, - "step": 39921 - }, - { - "epoch": 6.429163814968397, - "grad_norm": 0.0019315013196319342, - "learning_rate": 0.00019997961479223205, - "loss": 46.0, - "step": 39922 - }, - { - "epoch": 6.429324852047184, - "grad_norm": 0.0019911068957298994, - "learning_rate": 0.00019997961377074623, - "loss": 46.0, - "step": 39923 - }, - { - "epoch": 6.429485889125972, - "grad_norm": 0.007775294128805399, - "learning_rate": 0.00019997961274923487, - "loss": 46.0, - "step": 39924 - }, - { - "epoch": 6.429646926204759, - "grad_norm": 0.0005506537854671478, - "learning_rate": 0.00019997961172769788, - "loss": 46.0, - "step": 39925 - }, - { - "epoch": 6.429807963283546, - "grad_norm": 0.005360723938792944, - "learning_rate": 0.00019997961070613532, - "loss": 46.0, - "step": 39926 - }, - { - "epoch": 6.429969000362333, - "grad_norm": 0.0028473397251218557, - "learning_rate": 0.00019997960968454716, - "loss": 46.0, - "step": 39927 - }, - { - "epoch": 6.4301300374411205, - "grad_norm": 0.00832462403923273, - "learning_rate": 0.00019997960866293343, - "loss": 46.0, - "step": 39928 - }, - { - "epoch": 6.430291074519908, - "grad_norm": 0.003507880959659815, - "learning_rate": 0.00019997960764129408, - "loss": 46.0, - "step": 39929 - }, - { - "epoch": 6.4304521115986955, - "grad_norm": 0.0037783945444971323, - "learning_rate": 0.00019997960661962918, - "loss": 46.0, - "step": 39930 - }, - { - "epoch": 6.430613148677483, - "grad_norm": 0.0014754581497982144, - "learning_rate": 0.00019997960559793866, - "loss": 46.0, - "step": 39931 - }, - { - "epoch": 6.43077418575627, - "grad_norm": 0.02546934224665165, - "learning_rate": 0.00019997960457622259, - "loss": 46.0, - "step": 39932 - }, - { - "epoch": 6.430935222835058, - "grad_norm": 0.003229832975193858, - "learning_rate": 0.0001999796035544809, - "loss": 46.0, - "step": 39933 - }, - { - "epoch": 6.431096259913845, - "grad_norm": 0.004819541238248348, - "learning_rate": 0.0001999796025327136, - "loss": 46.0, - "step": 39934 - }, - { - "epoch": 6.431257296992633, - "grad_norm": 0.01111472025513649, - "learning_rate": 0.00019997960151092072, - "loss": 46.0, - "step": 39935 - }, - { - "epoch": 6.43141833407142, - "grad_norm": 0.0025102586951106787, - "learning_rate": 0.00019997960048910227, - "loss": 46.0, - "step": 39936 - }, - { - "epoch": 6.431579371150208, - "grad_norm": 0.004387294873595238, - "learning_rate": 0.00019997959946725823, - "loss": 46.0, - "step": 39937 - }, - { - "epoch": 6.431740408228995, - "grad_norm": 0.0065118856728076935, - "learning_rate": 0.00019997959844538857, - "loss": 46.0, - "step": 39938 - }, - { - "epoch": 6.4319014453077825, - "grad_norm": 0.0028932224959135056, - "learning_rate": 0.00019997959742349336, - "loss": 46.0, - "step": 39939 - }, - { - "epoch": 6.43206248238657, - "grad_norm": 0.0025285137817263603, - "learning_rate": 0.00019997959640157255, - "loss": 46.0, - "step": 39940 - }, - { - "epoch": 6.4322235194653565, - "grad_norm": 0.0064513226971030235, - "learning_rate": 0.00019997959537962614, - "loss": 46.0, - "step": 39941 - }, - { - "epoch": 6.432384556544144, - "grad_norm": 0.0023720781318843365, - "learning_rate": 0.00019997959435765413, - "loss": 46.0, - "step": 39942 - }, - { - "epoch": 6.432545593622931, - "grad_norm": 0.0040083895437419415, - "learning_rate": 0.00019997959333565657, - "loss": 46.0, - "step": 39943 - }, - { - "epoch": 6.432706630701719, - "grad_norm": 0.005140585359185934, - "learning_rate": 0.0001999795923136334, - "loss": 46.0, - "step": 39944 - }, - { - "epoch": 6.432867667780506, - "grad_norm": 0.0031324916053563356, - "learning_rate": 0.00019997959129158463, - "loss": 46.0, - "step": 39945 - }, - { - "epoch": 6.433028704859294, - "grad_norm": 0.008028978481888771, - "learning_rate": 0.00019997959026951025, - "loss": 46.0, - "step": 39946 - }, - { - "epoch": 6.433189741938081, - "grad_norm": 0.006418431177735329, - "learning_rate": 0.00019997958924741034, - "loss": 46.0, - "step": 39947 - }, - { - "epoch": 6.433350779016869, - "grad_norm": 0.012938342057168484, - "learning_rate": 0.0001999795882252848, - "loss": 46.0, - "step": 39948 - }, - { - "epoch": 6.433511816095656, - "grad_norm": 0.008282676339149475, - "learning_rate": 0.00019997958720313367, - "loss": 46.0, - "step": 39949 - }, - { - "epoch": 6.433672853174444, - "grad_norm": 0.0019169311271980405, - "learning_rate": 0.00019997958618095697, - "loss": 46.0, - "step": 39950 - }, - { - "epoch": 6.433833890253231, - "grad_norm": 0.003588273422792554, - "learning_rate": 0.00019997958515875465, - "loss": 46.0, - "step": 39951 - }, - { - "epoch": 6.4339949273320185, - "grad_norm": 0.002353373449295759, - "learning_rate": 0.00019997958413652675, - "loss": 46.0, - "step": 39952 - }, - { - "epoch": 6.434155964410806, - "grad_norm": 0.00157739722635597, - "learning_rate": 0.00019997958311427326, - "loss": 46.0, - "step": 39953 - }, - { - "epoch": 6.4343170014895925, - "grad_norm": 0.0011100948322564363, - "learning_rate": 0.0001999795820919942, - "loss": 46.0, - "step": 39954 - }, - { - "epoch": 6.43447803856838, - "grad_norm": 0.016226520761847496, - "learning_rate": 0.00019997958106968952, - "loss": 46.0, - "step": 39955 - }, - { - "epoch": 6.434639075647167, - "grad_norm": 0.018520256504416466, - "learning_rate": 0.0001999795800473593, - "loss": 46.0, - "step": 39956 - }, - { - "epoch": 6.434800112725955, - "grad_norm": 0.0020396767649799585, - "learning_rate": 0.00019997957902500346, - "loss": 46.0, - "step": 39957 - }, - { - "epoch": 6.434961149804742, - "grad_norm": 0.01737462542951107, - "learning_rate": 0.000199979578002622, - "loss": 46.0, - "step": 39958 - }, - { - "epoch": 6.43512218688353, - "grad_norm": 0.02390354871749878, - "learning_rate": 0.000199979576980215, - "loss": 46.0, - "step": 39959 - }, - { - "epoch": 6.435283223962317, - "grad_norm": 0.005807912442833185, - "learning_rate": 0.0001999795759577824, - "loss": 46.0, - "step": 39960 - }, - { - "epoch": 6.435444261041105, - "grad_norm": 0.009375504218041897, - "learning_rate": 0.0001999795749353242, - "loss": 46.0, - "step": 39961 - }, - { - "epoch": 6.435605298119892, - "grad_norm": 0.009242935106158257, - "learning_rate": 0.0001999795739128404, - "loss": 46.0, - "step": 39962 - }, - { - "epoch": 6.43576633519868, - "grad_norm": 0.0029400717467069626, - "learning_rate": 0.00019997957289033102, - "loss": 46.0, - "step": 39963 - }, - { - "epoch": 6.435927372277467, - "grad_norm": 0.014346753247082233, - "learning_rate": 0.00019997957186779607, - "loss": 46.0, - "step": 39964 - }, - { - "epoch": 6.4360884093562545, - "grad_norm": 0.010555626824498177, - "learning_rate": 0.00019997957084523548, - "loss": 46.0, - "step": 39965 - }, - { - "epoch": 6.436249446435042, - "grad_norm": 0.005844416096806526, - "learning_rate": 0.00019997956982264933, - "loss": 46.0, - "step": 39966 - }, - { - "epoch": 6.436410483513829, - "grad_norm": 0.014791611582040787, - "learning_rate": 0.00019997956880003762, - "loss": 46.0, - "step": 39967 - }, - { - "epoch": 6.436571520592617, - "grad_norm": 0.0034442765172570944, - "learning_rate": 0.0001999795677774003, - "loss": 46.0, - "step": 39968 - }, - { - "epoch": 6.436732557671403, - "grad_norm": 0.0019294825615361333, - "learning_rate": 0.00019997956675473736, - "loss": 46.0, - "step": 39969 - }, - { - "epoch": 6.436893594750191, - "grad_norm": 0.0016740956343710423, - "learning_rate": 0.00019997956573204886, - "loss": 46.0, - "step": 39970 - }, - { - "epoch": 6.437054631828978, - "grad_norm": 0.015755239874124527, - "learning_rate": 0.00019997956470933477, - "loss": 46.0, - "step": 39971 - }, - { - "epoch": 6.437215668907766, - "grad_norm": 0.0018250519642606378, - "learning_rate": 0.0001999795636865951, - "loss": 46.0, - "step": 39972 - }, - { - "epoch": 6.437376705986553, - "grad_norm": 0.0018212461145594716, - "learning_rate": 0.0001999795626638298, - "loss": 46.0, - "step": 39973 - }, - { - "epoch": 6.437537743065341, - "grad_norm": 0.0012948332587257028, - "learning_rate": 0.00019997956164103894, - "loss": 46.0, - "step": 39974 - }, - { - "epoch": 6.437698780144128, - "grad_norm": 0.017226548865437508, - "learning_rate": 0.0001999795606182225, - "loss": 46.0, - "step": 39975 - }, - { - "epoch": 6.437859817222916, - "grad_norm": 0.006045111455023289, - "learning_rate": 0.00019997955959538043, - "loss": 46.0, - "step": 39976 - }, - { - "epoch": 6.438020854301703, - "grad_norm": 0.004506081808358431, - "learning_rate": 0.00019997955857251282, - "loss": 46.0, - "step": 39977 - }, - { - "epoch": 6.4381818913804905, - "grad_norm": 0.002988075837492943, - "learning_rate": 0.0001999795575496196, - "loss": 46.0, - "step": 39978 - }, - { - "epoch": 6.438342928459278, - "grad_norm": 0.00566342007368803, - "learning_rate": 0.00019997955652670079, - "loss": 46.0, - "step": 39979 - }, - { - "epoch": 6.438503965538065, - "grad_norm": 0.010630440898239613, - "learning_rate": 0.0001999795555037564, - "loss": 46.0, - "step": 39980 - }, - { - "epoch": 6.438665002616853, - "grad_norm": 0.026092644780874252, - "learning_rate": 0.00019997955448078638, - "loss": 46.0, - "step": 39981 - }, - { - "epoch": 6.43882603969564, - "grad_norm": 0.0012930453522130847, - "learning_rate": 0.0001999795534577908, - "loss": 46.0, - "step": 39982 - }, - { - "epoch": 6.438987076774428, - "grad_norm": 0.009602982550859451, - "learning_rate": 0.00019997955243476962, - "loss": 46.0, - "step": 39983 - }, - { - "epoch": 6.439148113853214, - "grad_norm": 0.005496717058122158, - "learning_rate": 0.00019997955141172287, - "loss": 46.0, - "step": 39984 - }, - { - "epoch": 6.439309150932002, - "grad_norm": 0.0011985086603090167, - "learning_rate": 0.00019997955038865054, - "loss": 46.0, - "step": 39985 - }, - { - "epoch": 6.439470188010789, - "grad_norm": 0.00453329598531127, - "learning_rate": 0.00019997954936555256, - "loss": 46.0, - "step": 39986 - }, - { - "epoch": 6.439631225089577, - "grad_norm": 0.0018771899631246924, - "learning_rate": 0.00019997954834242906, - "loss": 46.0, - "step": 39987 - }, - { - "epoch": 6.439792262168364, - "grad_norm": 0.0022936861496418715, - "learning_rate": 0.00019997954731927994, - "loss": 46.0, - "step": 39988 - }, - { - "epoch": 6.4399532992471515, - "grad_norm": 0.00746731273829937, - "learning_rate": 0.0001999795462961052, - "loss": 46.0, - "step": 39989 - }, - { - "epoch": 6.440114336325939, - "grad_norm": 0.003333126427605748, - "learning_rate": 0.00019997954527290493, - "loss": 46.0, - "step": 39990 - }, - { - "epoch": 6.4402753734047264, - "grad_norm": 0.005318079609423876, - "learning_rate": 0.00019997954424967902, - "loss": 46.0, - "step": 39991 - }, - { - "epoch": 6.440436410483514, - "grad_norm": 0.00217154691927135, - "learning_rate": 0.00019997954322642755, - "loss": 46.0, - "step": 39992 - }, - { - "epoch": 6.440597447562301, - "grad_norm": 0.007333929650485516, - "learning_rate": 0.00019997954220315046, - "loss": 46.0, - "step": 39993 - }, - { - "epoch": 6.440758484641089, - "grad_norm": 0.003449036506935954, - "learning_rate": 0.00019997954117984782, - "loss": 46.0, - "step": 39994 - }, - { - "epoch": 6.440919521719876, - "grad_norm": 0.011705799959599972, - "learning_rate": 0.0001999795401565196, - "loss": 46.0, - "step": 39995 - }, - { - "epoch": 6.441080558798664, - "grad_norm": 0.010781088843941689, - "learning_rate": 0.00019997953913316577, - "loss": 46.0, - "step": 39996 - }, - { - "epoch": 6.441241595877451, - "grad_norm": 0.015286247245967388, - "learning_rate": 0.00019997953810978633, - "loss": 46.0, - "step": 39997 - }, - { - "epoch": 6.441402632956239, - "grad_norm": 0.007600422948598862, - "learning_rate": 0.00019997953708638131, - "loss": 46.0, - "step": 39998 - }, - { - "epoch": 6.441563670035025, - "grad_norm": 0.013189624063670635, - "learning_rate": 0.0001999795360629507, - "loss": 46.0, - "step": 39999 - }, - { - "epoch": 6.441724707113813, - "grad_norm": 0.0012945410562679172, - "learning_rate": 0.00019997953503949448, - "loss": 46.0, - "step": 40000 - }, - { - "epoch": 6.4418857441926, - "grad_norm": 0.022465655580163002, - "learning_rate": 0.0001999795340160127, - "loss": 46.0, - "step": 40001 - }, - { - "epoch": 6.4420467812713875, - "grad_norm": 0.004609566181898117, - "learning_rate": 0.00019997953299250533, - "loss": 46.0, - "step": 40002 - }, - { - "epoch": 6.442207818350175, - "grad_norm": 0.0034489461686462164, - "learning_rate": 0.00019997953196897238, - "loss": 46.0, - "step": 40003 - }, - { - "epoch": 6.442368855428962, - "grad_norm": 0.001429785043001175, - "learning_rate": 0.0001999795309454138, - "loss": 46.0, - "step": 40004 - }, - { - "epoch": 6.44252989250775, - "grad_norm": 0.0014718134189024568, - "learning_rate": 0.00019997952992182967, - "loss": 46.0, - "step": 40005 - }, - { - "epoch": 6.442690929586537, - "grad_norm": 0.013667715713381767, - "learning_rate": 0.00019997952889821993, - "loss": 46.0, - "step": 40006 - }, - { - "epoch": 6.442851966665325, - "grad_norm": 0.00549883721396327, - "learning_rate": 0.00019997952787458462, - "loss": 46.0, - "step": 40007 - }, - { - "epoch": 6.443013003744112, - "grad_norm": 0.0064135026186704636, - "learning_rate": 0.0001999795268509237, - "loss": 46.0, - "step": 40008 - }, - { - "epoch": 6.4431740408229, - "grad_norm": 0.004383997060358524, - "learning_rate": 0.00019997952582723723, - "loss": 46.0, - "step": 40009 - }, - { - "epoch": 6.443335077901687, - "grad_norm": 0.003669844940304756, - "learning_rate": 0.00019997952480352513, - "loss": 46.0, - "step": 40010 - }, - { - "epoch": 6.443496114980475, - "grad_norm": 0.0008407138520851731, - "learning_rate": 0.00019997952377978742, - "loss": 46.0, - "step": 40011 - }, - { - "epoch": 6.443657152059262, - "grad_norm": 0.004744230303913355, - "learning_rate": 0.00019997952275602416, - "loss": 46.0, - "step": 40012 - }, - { - "epoch": 6.4438181891380495, - "grad_norm": 0.005047994665801525, - "learning_rate": 0.0001999795217322353, - "loss": 46.0, - "step": 40013 - }, - { - "epoch": 6.443979226216836, - "grad_norm": 0.00792969111353159, - "learning_rate": 0.00019997952070842088, - "loss": 46.0, - "step": 40014 - }, - { - "epoch": 6.4441402632956235, - "grad_norm": 0.0027087705675512552, - "learning_rate": 0.00019997951968458083, - "loss": 46.0, - "step": 40015 - }, - { - "epoch": 6.444301300374411, - "grad_norm": 0.006329684052616358, - "learning_rate": 0.00019997951866071518, - "loss": 46.0, - "step": 40016 - }, - { - "epoch": 6.444462337453198, - "grad_norm": 0.0010234903311356902, - "learning_rate": 0.00019997951763682398, - "loss": 46.0, - "step": 40017 - }, - { - "epoch": 6.444623374531986, - "grad_norm": 0.008537469431757927, - "learning_rate": 0.00019997951661290716, - "loss": 46.0, - "step": 40018 - }, - { - "epoch": 6.444784411610773, - "grad_norm": 0.006302362307906151, - "learning_rate": 0.00019997951558896478, - "loss": 46.0, - "step": 40019 - }, - { - "epoch": 6.444945448689561, - "grad_norm": 0.0013192294863983989, - "learning_rate": 0.0001999795145649968, - "loss": 46.0, - "step": 40020 - }, - { - "epoch": 6.445106485768348, - "grad_norm": 0.0026684666518121958, - "learning_rate": 0.00019997951354100318, - "loss": 46.0, - "step": 40021 - }, - { - "epoch": 6.445267522847136, - "grad_norm": 0.007107526995241642, - "learning_rate": 0.00019997951251698404, - "loss": 46.0, - "step": 40022 - }, - { - "epoch": 6.445428559925923, - "grad_norm": 0.005974226165562868, - "learning_rate": 0.00019997951149293926, - "loss": 46.0, - "step": 40023 - }, - { - "epoch": 6.445589597004711, - "grad_norm": 0.001770784961991012, - "learning_rate": 0.00019997951046886895, - "loss": 46.0, - "step": 40024 - }, - { - "epoch": 6.445750634083498, - "grad_norm": 0.0035143611021339893, - "learning_rate": 0.000199979509444773, - "loss": 46.0, - "step": 40025 - }, - { - "epoch": 6.4459116711622855, - "grad_norm": 0.010423265397548676, - "learning_rate": 0.00019997950842065148, - "loss": 46.0, - "step": 40026 - }, - { - "epoch": 6.446072708241072, - "grad_norm": 0.0038291343953460455, - "learning_rate": 0.00019997950739650435, - "loss": 46.0, - "step": 40027 - }, - { - "epoch": 6.4462337453198595, - "grad_norm": 0.0021179742179811, - "learning_rate": 0.00019997950637233166, - "loss": 46.0, - "step": 40028 - }, - { - "epoch": 6.446394782398647, - "grad_norm": 0.0076445299200713634, - "learning_rate": 0.00019997950534813335, - "loss": 46.0, - "step": 40029 - }, - { - "epoch": 6.446555819477434, - "grad_norm": 0.0016426180955022573, - "learning_rate": 0.0001999795043239095, - "loss": 46.0, - "step": 40030 - }, - { - "epoch": 6.446716856556222, - "grad_norm": 0.0028022995684295893, - "learning_rate": 0.00019997950329966, - "loss": 46.0, - "step": 40031 - }, - { - "epoch": 6.446877893635009, - "grad_norm": 0.003427430521696806, - "learning_rate": 0.00019997950227538494, - "loss": 46.0, - "step": 40032 - }, - { - "epoch": 6.447038930713797, - "grad_norm": 0.006693064235150814, - "learning_rate": 0.0001999795012510843, - "loss": 46.0, - "step": 40033 - }, - { - "epoch": 6.447199967792584, - "grad_norm": 0.0017944596474990249, - "learning_rate": 0.00019997950022675805, - "loss": 46.0, - "step": 40034 - }, - { - "epoch": 6.447361004871372, - "grad_norm": 0.015099226497113705, - "learning_rate": 0.00019997949920240622, - "loss": 46.0, - "step": 40035 - }, - { - "epoch": 6.447522041950159, - "grad_norm": 0.017275149002671242, - "learning_rate": 0.00019997949817802878, - "loss": 46.0, - "step": 40036 - }, - { - "epoch": 6.4476830790289466, - "grad_norm": 0.0040924581699073315, - "learning_rate": 0.00019997949715362578, - "loss": 46.0, - "step": 40037 - }, - { - "epoch": 6.447844116107734, - "grad_norm": 0.003378939349204302, - "learning_rate": 0.00019997949612919716, - "loss": 46.0, - "step": 40038 - }, - { - "epoch": 6.4480051531865215, - "grad_norm": 0.002016445156186819, - "learning_rate": 0.000199979495104743, - "loss": 46.0, - "step": 40039 - }, - { - "epoch": 6.448166190265309, - "grad_norm": 0.0035596839152276516, - "learning_rate": 0.0001999794940802632, - "loss": 46.0, - "step": 40040 - }, - { - "epoch": 6.448327227344096, - "grad_norm": 0.0029883934184908867, - "learning_rate": 0.00019997949305575782, - "loss": 46.0, - "step": 40041 - }, - { - "epoch": 6.448488264422883, - "grad_norm": 0.002941165817901492, - "learning_rate": 0.00019997949203122688, - "loss": 46.0, - "step": 40042 - }, - { - "epoch": 6.44864930150167, - "grad_norm": 0.004230792168527842, - "learning_rate": 0.00019997949100667033, - "loss": 46.0, - "step": 40043 - }, - { - "epoch": 6.448810338580458, - "grad_norm": 0.0031707226298749447, - "learning_rate": 0.0001999794899820882, - "loss": 46.0, - "step": 40044 - }, - { - "epoch": 6.448971375659245, - "grad_norm": 0.0026096939109265804, - "learning_rate": 0.00019997948895748047, - "loss": 46.0, - "step": 40045 - }, - { - "epoch": 6.449132412738033, - "grad_norm": 0.0022774513345211744, - "learning_rate": 0.00019997948793284715, - "loss": 46.0, - "step": 40046 - }, - { - "epoch": 6.44929344981682, - "grad_norm": 0.005252016242593527, - "learning_rate": 0.00019997948690818825, - "loss": 46.0, - "step": 40047 - }, - { - "epoch": 6.449454486895608, - "grad_norm": 0.004806511104106903, - "learning_rate": 0.00019997948588350374, - "loss": 46.0, - "step": 40048 - }, - { - "epoch": 6.449615523974395, - "grad_norm": 0.0052448175847530365, - "learning_rate": 0.00019997948485879366, - "loss": 46.0, - "step": 40049 - }, - { - "epoch": 6.4497765610531825, - "grad_norm": 0.003529990091919899, - "learning_rate": 0.00019997948383405797, - "loss": 46.0, - "step": 40050 - }, - { - "epoch": 6.44993759813197, - "grad_norm": 0.004390238784253597, - "learning_rate": 0.00019997948280929672, - "loss": 46.0, - "step": 40051 - }, - { - "epoch": 6.450098635210757, - "grad_norm": 0.005980095360428095, - "learning_rate": 0.00019997948178450986, - "loss": 46.0, - "step": 40052 - }, - { - "epoch": 6.450259672289545, - "grad_norm": 0.005523684434592724, - "learning_rate": 0.0001999794807596974, - "loss": 46.0, - "step": 40053 - }, - { - "epoch": 6.450420709368332, - "grad_norm": 0.0012795368675142527, - "learning_rate": 0.00019997947973485937, - "loss": 46.0, - "step": 40054 - }, - { - "epoch": 6.45058174644712, - "grad_norm": 0.0007410626276396215, - "learning_rate": 0.00019997947870999575, - "loss": 46.0, - "step": 40055 - }, - { - "epoch": 6.450742783525907, - "grad_norm": 0.0017558034742251039, - "learning_rate": 0.00019997947768510654, - "loss": 46.0, - "step": 40056 - }, - { - "epoch": 6.450903820604694, - "grad_norm": 0.0038226377218961716, - "learning_rate": 0.00019997947666019174, - "loss": 46.0, - "step": 40057 - }, - { - "epoch": 6.451064857683481, - "grad_norm": 0.01705530285835266, - "learning_rate": 0.00019997947563525132, - "loss": 46.0, - "step": 40058 - }, - { - "epoch": 6.451225894762269, - "grad_norm": 0.009139101020991802, - "learning_rate": 0.00019997947461028535, - "loss": 46.0, - "step": 40059 - }, - { - "epoch": 6.451386931841056, - "grad_norm": 0.0032983715645968914, - "learning_rate": 0.0001999794735852938, - "loss": 46.0, - "step": 40060 - }, - { - "epoch": 6.451547968919844, - "grad_norm": 0.0030108338687568903, - "learning_rate": 0.0001999794725602766, - "loss": 46.0, - "step": 40061 - }, - { - "epoch": 6.451709005998631, - "grad_norm": 0.013655808754265308, - "learning_rate": 0.00019997947153523388, - "loss": 46.0, - "step": 40062 - }, - { - "epoch": 6.4518700430774185, - "grad_norm": 0.0029608544427901506, - "learning_rate": 0.00019997947051016553, - "loss": 46.0, - "step": 40063 - }, - { - "epoch": 6.452031080156206, - "grad_norm": 0.004351452458649874, - "learning_rate": 0.0001999794694850716, - "loss": 46.0, - "step": 40064 - }, - { - "epoch": 6.452192117234993, - "grad_norm": 0.005431823898106813, - "learning_rate": 0.00019997946845995207, - "loss": 46.0, - "step": 40065 - }, - { - "epoch": 6.452353154313781, - "grad_norm": 0.011322613805532455, - "learning_rate": 0.00019997946743480696, - "loss": 46.0, - "step": 40066 - }, - { - "epoch": 6.452514191392568, - "grad_norm": 0.002415593946352601, - "learning_rate": 0.00019997946640963626, - "loss": 46.0, - "step": 40067 - }, - { - "epoch": 6.452675228471356, - "grad_norm": 0.0038729137741029263, - "learning_rate": 0.00019997946538443997, - "loss": 46.0, - "step": 40068 - }, - { - "epoch": 6.452836265550143, - "grad_norm": 0.009057389572262764, - "learning_rate": 0.0001999794643592181, - "loss": 46.0, - "step": 40069 - }, - { - "epoch": 6.452997302628931, - "grad_norm": 0.022179776802659035, - "learning_rate": 0.00019997946333397061, - "loss": 46.0, - "step": 40070 - }, - { - "epoch": 6.453158339707718, - "grad_norm": 0.00583422789350152, - "learning_rate": 0.00019997946230869757, - "loss": 46.0, - "step": 40071 - }, - { - "epoch": 6.453319376786505, - "grad_norm": 0.005262425635010004, - "learning_rate": 0.00019997946128339893, - "loss": 46.0, - "step": 40072 - }, - { - "epoch": 6.453480413865292, - "grad_norm": 0.0029457639902830124, - "learning_rate": 0.00019997946025807469, - "loss": 46.0, - "step": 40073 - }, - { - "epoch": 6.45364145094408, - "grad_norm": 0.004011599812656641, - "learning_rate": 0.00019997945923272488, - "loss": 46.0, - "step": 40074 - }, - { - "epoch": 6.453802488022867, - "grad_norm": 0.002502941060811281, - "learning_rate": 0.00019997945820734945, - "loss": 46.0, - "step": 40075 - }, - { - "epoch": 6.4539635251016545, - "grad_norm": 0.0024388267192989588, - "learning_rate": 0.00019997945718194844, - "loss": 46.0, - "step": 40076 - }, - { - "epoch": 6.454124562180442, - "grad_norm": 0.00433287862688303, - "learning_rate": 0.00019997945615652185, - "loss": 46.0, - "step": 40077 - }, - { - "epoch": 6.454285599259229, - "grad_norm": 0.0031550717540085316, - "learning_rate": 0.00019997945513106966, - "loss": 46.0, - "step": 40078 - }, - { - "epoch": 6.454446636338017, - "grad_norm": 0.006372411735355854, - "learning_rate": 0.0001999794541055919, - "loss": 46.0, - "step": 40079 - }, - { - "epoch": 6.454607673416804, - "grad_norm": 0.0012201223289594054, - "learning_rate": 0.0001999794530800885, - "loss": 46.0, - "step": 40080 - }, - { - "epoch": 6.454768710495592, - "grad_norm": 0.001294377027079463, - "learning_rate": 0.00019997945205455956, - "loss": 46.0, - "step": 40081 - }, - { - "epoch": 6.454929747574379, - "grad_norm": 0.004865513648837805, - "learning_rate": 0.00019997945102900503, - "loss": 46.0, - "step": 40082 - }, - { - "epoch": 6.455090784653167, - "grad_norm": 0.0015484819887205958, - "learning_rate": 0.00019997945000342488, - "loss": 46.0, - "step": 40083 - }, - { - "epoch": 6.455251821731954, - "grad_norm": 0.0014008554862812161, - "learning_rate": 0.00019997944897781915, - "loss": 46.0, - "step": 40084 - }, - { - "epoch": 6.455412858810742, - "grad_norm": 0.004493103828281164, - "learning_rate": 0.00019997944795218783, - "loss": 46.0, - "step": 40085 - }, - { - "epoch": 6.455573895889529, - "grad_norm": 0.00622917665168643, - "learning_rate": 0.00019997944692653094, - "loss": 46.0, - "step": 40086 - }, - { - "epoch": 6.455734932968316, - "grad_norm": 0.0022607920691370964, - "learning_rate": 0.00019997944590084845, - "loss": 46.0, - "step": 40087 - }, - { - "epoch": 6.455895970047103, - "grad_norm": 0.002559753367677331, - "learning_rate": 0.00019997944487514037, - "loss": 46.0, - "step": 40088 - }, - { - "epoch": 6.4560570071258905, - "grad_norm": 0.0028425741475075483, - "learning_rate": 0.0001999794438494067, - "loss": 46.0, - "step": 40089 - }, - { - "epoch": 6.456218044204678, - "grad_norm": 0.0009906552731990814, - "learning_rate": 0.00019997944282364744, - "loss": 46.0, - "step": 40090 - }, - { - "epoch": 6.456379081283465, - "grad_norm": 0.019082501530647278, - "learning_rate": 0.0001999794417978626, - "loss": 46.0, - "step": 40091 - }, - { - "epoch": 6.456540118362253, - "grad_norm": 0.008088387548923492, - "learning_rate": 0.00019997944077205216, - "loss": 46.0, - "step": 40092 - }, - { - "epoch": 6.45670115544104, - "grad_norm": 0.0030749980360269547, - "learning_rate": 0.00019997943974621612, - "loss": 46.0, - "step": 40093 - }, - { - "epoch": 6.456862192519828, - "grad_norm": 0.0019023667555302382, - "learning_rate": 0.00019997943872035448, - "loss": 46.0, - "step": 40094 - }, - { - "epoch": 6.457023229598615, - "grad_norm": 0.00077101617353037, - "learning_rate": 0.0001999794376944673, - "loss": 46.0, - "step": 40095 - }, - { - "epoch": 6.457184266677403, - "grad_norm": 0.01156501192599535, - "learning_rate": 0.0001999794366685545, - "loss": 46.0, - "step": 40096 - }, - { - "epoch": 6.45734530375619, - "grad_norm": 0.003724921029061079, - "learning_rate": 0.00019997943564261612, - "loss": 46.0, - "step": 40097 - }, - { - "epoch": 6.4575063408349775, - "grad_norm": 0.013687696307897568, - "learning_rate": 0.00019997943461665213, - "loss": 46.0, - "step": 40098 - }, - { - "epoch": 6.457667377913765, - "grad_norm": 0.007870011031627655, - "learning_rate": 0.00019997943359066256, - "loss": 46.0, - "step": 40099 - }, - { - "epoch": 6.4578284149925524, - "grad_norm": 0.0033407360315322876, - "learning_rate": 0.0001999794325646474, - "loss": 46.0, - "step": 40100 - }, - { - "epoch": 6.45798945207134, - "grad_norm": 0.015944834798574448, - "learning_rate": 0.00019997943153860667, - "loss": 46.0, - "step": 40101 - }, - { - "epoch": 6.4581504891501265, - "grad_norm": 0.0017989519983530045, - "learning_rate": 0.00019997943051254033, - "loss": 46.0, - "step": 40102 - }, - { - "epoch": 6.458311526228914, - "grad_norm": 0.010646210052073002, - "learning_rate": 0.00019997942948644842, - "loss": 46.0, - "step": 40103 - }, - { - "epoch": 6.458472563307701, - "grad_norm": 0.004232795909047127, - "learning_rate": 0.00019997942846033086, - "loss": 46.0, - "step": 40104 - }, - { - "epoch": 6.458633600386489, - "grad_norm": 0.0007176382350735366, - "learning_rate": 0.0001999794274341878, - "loss": 46.0, - "step": 40105 - }, - { - "epoch": 6.458794637465276, - "grad_norm": 0.0025290539488196373, - "learning_rate": 0.0001999794264080191, - "loss": 46.0, - "step": 40106 - }, - { - "epoch": 6.458955674544064, - "grad_norm": 0.013825730420649052, - "learning_rate": 0.0001999794253818248, - "loss": 46.0, - "step": 40107 - }, - { - "epoch": 6.459116711622851, - "grad_norm": 0.002387827495113015, - "learning_rate": 0.00019997942435560494, - "loss": 46.0, - "step": 40108 - }, - { - "epoch": 6.459277748701639, - "grad_norm": 0.010204867459833622, - "learning_rate": 0.00019997942332935947, - "loss": 46.0, - "step": 40109 - }, - { - "epoch": 6.459438785780426, - "grad_norm": 0.003758529433980584, - "learning_rate": 0.00019997942230308842, - "loss": 46.0, - "step": 40110 - }, - { - "epoch": 6.4595998228592135, - "grad_norm": 0.004909013397991657, - "learning_rate": 0.00019997942127679178, - "loss": 46.0, - "step": 40111 - }, - { - "epoch": 6.459760859938001, - "grad_norm": 0.001407152391038835, - "learning_rate": 0.00019997942025046955, - "loss": 46.0, - "step": 40112 - }, - { - "epoch": 6.459921897016788, - "grad_norm": 0.005716997664421797, - "learning_rate": 0.0001999794192241217, - "loss": 46.0, - "step": 40113 - }, - { - "epoch": 6.460082934095576, - "grad_norm": 0.0028513907454907894, - "learning_rate": 0.0001999794181977483, - "loss": 46.0, - "step": 40114 - }, - { - "epoch": 6.460243971174362, - "grad_norm": 0.0013694504741579294, - "learning_rate": 0.00019997941717134928, - "loss": 46.0, - "step": 40115 - }, - { - "epoch": 6.46040500825315, - "grad_norm": 0.002183281583711505, - "learning_rate": 0.0001999794161449247, - "loss": 46.0, - "step": 40116 - }, - { - "epoch": 6.460566045331937, - "grad_norm": 0.014641164802014828, - "learning_rate": 0.00019997941511847454, - "loss": 46.0, - "step": 40117 - }, - { - "epoch": 6.460727082410725, - "grad_norm": 0.002976014045998454, - "learning_rate": 0.00019997941409199876, - "loss": 46.0, - "step": 40118 - }, - { - "epoch": 6.460888119489512, - "grad_norm": 0.006201675161719322, - "learning_rate": 0.00019997941306549742, - "loss": 46.0, - "step": 40119 - }, - { - "epoch": 6.4610491565683, - "grad_norm": 0.002512093400582671, - "learning_rate": 0.00019997941203897047, - "loss": 46.0, - "step": 40120 - }, - { - "epoch": 6.461210193647087, - "grad_norm": 0.0016711759380996227, - "learning_rate": 0.0001999794110124179, - "loss": 46.0, - "step": 40121 - }, - { - "epoch": 6.461371230725875, - "grad_norm": 0.0021573281846940517, - "learning_rate": 0.00019997940998583977, - "loss": 46.0, - "step": 40122 - }, - { - "epoch": 6.461532267804662, - "grad_norm": 0.003349128644913435, - "learning_rate": 0.00019997940895923608, - "loss": 46.0, - "step": 40123 - }, - { - "epoch": 6.4616933048834495, - "grad_norm": 0.006997830234467983, - "learning_rate": 0.00019997940793260678, - "loss": 46.0, - "step": 40124 - }, - { - "epoch": 6.461854341962237, - "grad_norm": 0.0030931050423532724, - "learning_rate": 0.00019997940690595186, - "loss": 46.0, - "step": 40125 - }, - { - "epoch": 6.462015379041024, - "grad_norm": 0.008127564564347267, - "learning_rate": 0.00019997940587927139, - "loss": 46.0, - "step": 40126 - }, - { - "epoch": 6.462176416119812, - "grad_norm": 0.005348661914467812, - "learning_rate": 0.0001999794048525653, - "loss": 46.0, - "step": 40127 - }, - { - "epoch": 6.462337453198599, - "grad_norm": 0.0026755311992019415, - "learning_rate": 0.00019997940382583362, - "loss": 46.0, - "step": 40128 - }, - { - "epoch": 6.462498490277387, - "grad_norm": 0.0032196207903325558, - "learning_rate": 0.00019997940279907638, - "loss": 46.0, - "step": 40129 - }, - { - "epoch": 6.462659527356173, - "grad_norm": 0.003318954259157181, - "learning_rate": 0.00019997940177229353, - "loss": 46.0, - "step": 40130 - }, - { - "epoch": 6.462820564434961, - "grad_norm": 0.002483679447323084, - "learning_rate": 0.0001999794007454851, - "loss": 46.0, - "step": 40131 - }, - { - "epoch": 6.462981601513748, - "grad_norm": 0.004994211718440056, - "learning_rate": 0.00019997939971865106, - "loss": 46.0, - "step": 40132 - }, - { - "epoch": 6.463142638592536, - "grad_norm": 0.014873869717121124, - "learning_rate": 0.00019997939869179145, - "loss": 46.0, - "step": 40133 - }, - { - "epoch": 6.463303675671323, - "grad_norm": 0.0029470722656697035, - "learning_rate": 0.00019997939766490624, - "loss": 46.0, - "step": 40134 - }, - { - "epoch": 6.463464712750111, - "grad_norm": 0.0030978533904999495, - "learning_rate": 0.00019997939663799548, - "loss": 46.0, - "step": 40135 - }, - { - "epoch": 6.463625749828898, - "grad_norm": 0.002906282665207982, - "learning_rate": 0.00019997939561105908, - "loss": 46.0, - "step": 40136 - }, - { - "epoch": 6.4637867869076855, - "grad_norm": 0.0015427491161972284, - "learning_rate": 0.00019997939458409712, - "loss": 46.0, - "step": 40137 - }, - { - "epoch": 6.463947823986473, - "grad_norm": 0.0049125440418720245, - "learning_rate": 0.00019997939355710954, - "loss": 46.0, - "step": 40138 - }, - { - "epoch": 6.46410886106526, - "grad_norm": 0.012352939695119858, - "learning_rate": 0.00019997939253009638, - "loss": 46.0, - "step": 40139 - }, - { - "epoch": 6.464269898144048, - "grad_norm": 0.005172920413315296, - "learning_rate": 0.00019997939150305768, - "loss": 46.0, - "step": 40140 - }, - { - "epoch": 6.464430935222835, - "grad_norm": 0.001561078941449523, - "learning_rate": 0.0001999793904759933, - "loss": 46.0, - "step": 40141 - }, - { - "epoch": 6.464591972301623, - "grad_norm": 0.0011646532220765948, - "learning_rate": 0.0001999793894489034, - "loss": 46.0, - "step": 40142 - }, - { - "epoch": 6.46475300938041, - "grad_norm": 0.0011256361613050103, - "learning_rate": 0.0001999793884217879, - "loss": 46.0, - "step": 40143 - }, - { - "epoch": 6.464914046459198, - "grad_norm": 0.0028008469380438328, - "learning_rate": 0.0001999793873946468, - "loss": 46.0, - "step": 40144 - }, - { - "epoch": 6.465075083537984, - "grad_norm": 0.006941705942153931, - "learning_rate": 0.00019997938636748011, - "loss": 46.0, - "step": 40145 - }, - { - "epoch": 6.465236120616772, - "grad_norm": 0.00401967391371727, - "learning_rate": 0.0001999793853402878, - "loss": 46.0, - "step": 40146 - }, - { - "epoch": 6.465397157695559, - "grad_norm": 0.017262045294046402, - "learning_rate": 0.00019997938431306995, - "loss": 46.0, - "step": 40147 - }, - { - "epoch": 6.465558194774347, - "grad_norm": 0.0008813973981887102, - "learning_rate": 0.0001999793832858265, - "loss": 46.0, - "step": 40148 - }, - { - "epoch": 6.465719231853134, - "grad_norm": 0.008579926565289497, - "learning_rate": 0.00019997938225855747, - "loss": 46.0, - "step": 40149 - }, - { - "epoch": 6.4658802689319215, - "grad_norm": 0.004942048341035843, - "learning_rate": 0.00019997938123126282, - "loss": 46.0, - "step": 40150 - }, - { - "epoch": 6.466041306010709, - "grad_norm": 0.0009261569357477129, - "learning_rate": 0.0001999793802039426, - "loss": 46.0, - "step": 40151 - }, - { - "epoch": 6.466202343089496, - "grad_norm": 0.008591445162892342, - "learning_rate": 0.00019997937917659678, - "loss": 46.0, - "step": 40152 - }, - { - "epoch": 6.466363380168284, - "grad_norm": 0.0028812282253056765, - "learning_rate": 0.00019997937814922537, - "loss": 46.0, - "step": 40153 - }, - { - "epoch": 6.466524417247071, - "grad_norm": 0.0044649397023022175, - "learning_rate": 0.00019997937712182837, - "loss": 46.0, - "step": 40154 - }, - { - "epoch": 6.466685454325859, - "grad_norm": 0.0053070043213665485, - "learning_rate": 0.0001999793760944058, - "loss": 46.0, - "step": 40155 - }, - { - "epoch": 6.466846491404646, - "grad_norm": 0.0022988743148744106, - "learning_rate": 0.00019997937506695764, - "loss": 46.0, - "step": 40156 - }, - { - "epoch": 6.467007528483434, - "grad_norm": 0.0038261639419943094, - "learning_rate": 0.00019997937403948385, - "loss": 46.0, - "step": 40157 - }, - { - "epoch": 6.467168565562221, - "grad_norm": 0.004226604010909796, - "learning_rate": 0.0001999793730119845, - "loss": 46.0, - "step": 40158 - }, - { - "epoch": 6.4673296026410085, - "grad_norm": 0.002454396802932024, - "learning_rate": 0.00019997937198445954, - "loss": 46.0, - "step": 40159 - }, - { - "epoch": 6.467490639719795, - "grad_norm": 0.002462859731167555, - "learning_rate": 0.00019997937095690902, - "loss": 46.0, - "step": 40160 - }, - { - "epoch": 6.4676516767985825, - "grad_norm": 0.0024687927216291428, - "learning_rate": 0.0001999793699293329, - "loss": 46.0, - "step": 40161 - }, - { - "epoch": 6.46781271387737, - "grad_norm": 0.004976825788617134, - "learning_rate": 0.0001999793689017312, - "loss": 46.0, - "step": 40162 - }, - { - "epoch": 6.4679737509561575, - "grad_norm": 0.0007814789423719049, - "learning_rate": 0.0001999793678741039, - "loss": 46.0, - "step": 40163 - }, - { - "epoch": 6.468134788034945, - "grad_norm": 0.001508883317001164, - "learning_rate": 0.000199979366846451, - "loss": 46.0, - "step": 40164 - }, - { - "epoch": 6.468295825113732, - "grad_norm": 0.0031199741642922163, - "learning_rate": 0.00019997936581877252, - "loss": 46.0, - "step": 40165 - }, - { - "epoch": 6.46845686219252, - "grad_norm": 0.0024616036098450422, - "learning_rate": 0.00019997936479106845, - "loss": 46.0, - "step": 40166 - }, - { - "epoch": 6.468617899271307, - "grad_norm": 0.013303125277161598, - "learning_rate": 0.0001999793637633388, - "loss": 46.0, - "step": 40167 - }, - { - "epoch": 6.468778936350095, - "grad_norm": 0.002328054280951619, - "learning_rate": 0.00019997936273558352, - "loss": 46.0, - "step": 40168 - }, - { - "epoch": 6.468939973428882, - "grad_norm": 0.001860763761214912, - "learning_rate": 0.00019997936170780268, - "loss": 46.0, - "step": 40169 - }, - { - "epoch": 6.46910101050767, - "grad_norm": 0.0043235779739916325, - "learning_rate": 0.00019997936067999626, - "loss": 46.0, - "step": 40170 - }, - { - "epoch": 6.469262047586457, - "grad_norm": 0.0044525377452373505, - "learning_rate": 0.00019997935965216426, - "loss": 46.0, - "step": 40171 - }, - { - "epoch": 6.4694230846652445, - "grad_norm": 0.0010528952116146684, - "learning_rate": 0.00019997935862430666, - "loss": 46.0, - "step": 40172 - }, - { - "epoch": 6.469584121744032, - "grad_norm": 0.0032428554259240627, - "learning_rate": 0.00019997935759642345, - "loss": 46.0, - "step": 40173 - }, - { - "epoch": 6.469745158822819, - "grad_norm": 0.0042559741996228695, - "learning_rate": 0.00019997935656851468, - "loss": 46.0, - "step": 40174 - }, - { - "epoch": 6.469906195901606, - "grad_norm": 0.0012639843625947833, - "learning_rate": 0.00019997935554058027, - "loss": 46.0, - "step": 40175 - }, - { - "epoch": 6.470067232980393, - "grad_norm": 0.001522873411886394, - "learning_rate": 0.00019997935451262033, - "loss": 46.0, - "step": 40176 - }, - { - "epoch": 6.470228270059181, - "grad_norm": 0.009718555957078934, - "learning_rate": 0.00019997935348463477, - "loss": 46.0, - "step": 40177 - }, - { - "epoch": 6.470389307137968, - "grad_norm": 0.005866462830454111, - "learning_rate": 0.00019997935245662362, - "loss": 46.0, - "step": 40178 - }, - { - "epoch": 6.470550344216756, - "grad_norm": 0.0020871348679065704, - "learning_rate": 0.0001999793514285869, - "loss": 46.0, - "step": 40179 - }, - { - "epoch": 6.470711381295543, - "grad_norm": 0.00416685501113534, - "learning_rate": 0.00019997935040052455, - "loss": 46.0, - "step": 40180 - }, - { - "epoch": 6.470872418374331, - "grad_norm": 0.006891305558383465, - "learning_rate": 0.00019997934937243664, - "loss": 46.0, - "step": 40181 - }, - { - "epoch": 6.471033455453118, - "grad_norm": 0.005611291155219078, - "learning_rate": 0.00019997934834432312, - "loss": 46.0, - "step": 40182 - }, - { - "epoch": 6.471194492531906, - "grad_norm": 0.0030021113343536854, - "learning_rate": 0.00019997934731618404, - "loss": 46.0, - "step": 40183 - }, - { - "epoch": 6.471355529610693, - "grad_norm": 0.005294486880302429, - "learning_rate": 0.00019997934628801937, - "loss": 46.0, - "step": 40184 - }, - { - "epoch": 6.4715165666894805, - "grad_norm": 0.002153391018509865, - "learning_rate": 0.00019997934525982909, - "loss": 46.0, - "step": 40185 - }, - { - "epoch": 6.471677603768268, - "grad_norm": 0.004230473656207323, - "learning_rate": 0.00019997934423161322, - "loss": 46.0, - "step": 40186 - }, - { - "epoch": 6.471838640847055, - "grad_norm": 0.0013075951719656587, - "learning_rate": 0.00019997934320337176, - "loss": 46.0, - "step": 40187 - }, - { - "epoch": 6.471999677925842, - "grad_norm": 0.015328380279242992, - "learning_rate": 0.00019997934217510474, - "loss": 46.0, - "step": 40188 - }, - { - "epoch": 6.472160715004629, - "grad_norm": 0.0030476991087198257, - "learning_rate": 0.00019997934114681208, - "loss": 46.0, - "step": 40189 - }, - { - "epoch": 6.472321752083417, - "grad_norm": 0.00534987635910511, - "learning_rate": 0.0001999793401184939, - "loss": 46.0, - "step": 40190 - }, - { - "epoch": 6.472482789162204, - "grad_norm": 0.00708777317777276, - "learning_rate": 0.0001999793390901501, - "loss": 46.0, - "step": 40191 - }, - { - "epoch": 6.472643826240992, - "grad_norm": 0.0036839821841567755, - "learning_rate": 0.00019997933806178067, - "loss": 46.0, - "step": 40192 - }, - { - "epoch": 6.472804863319779, - "grad_norm": 0.0024663747753947973, - "learning_rate": 0.0001999793370333857, - "loss": 46.0, - "step": 40193 - }, - { - "epoch": 6.472965900398567, - "grad_norm": 0.006557401269674301, - "learning_rate": 0.0001999793360049651, - "loss": 46.0, - "step": 40194 - }, - { - "epoch": 6.473126937477354, - "grad_norm": 0.006940734572708607, - "learning_rate": 0.00019997933497651894, - "loss": 46.0, - "step": 40195 - }, - { - "epoch": 6.473287974556142, - "grad_norm": 0.004655441269278526, - "learning_rate": 0.00019997933394804717, - "loss": 46.0, - "step": 40196 - }, - { - "epoch": 6.473449011634929, - "grad_norm": 0.002498186891898513, - "learning_rate": 0.00019997933291954984, - "loss": 46.0, - "step": 40197 - }, - { - "epoch": 6.4736100487137165, - "grad_norm": 0.006033514626324177, - "learning_rate": 0.0001999793318910269, - "loss": 46.0, - "step": 40198 - }, - { - "epoch": 6.473771085792504, - "grad_norm": 0.002992918249219656, - "learning_rate": 0.00019997933086247837, - "loss": 46.0, - "step": 40199 - }, - { - "epoch": 6.473932122871291, - "grad_norm": 0.0022429730743169785, - "learning_rate": 0.00019997932983390425, - "loss": 46.0, - "step": 40200 - }, - { - "epoch": 6.474093159950079, - "grad_norm": 0.012457989156246185, - "learning_rate": 0.00019997932880530455, - "loss": 46.0, - "step": 40201 - }, - { - "epoch": 6.474254197028866, - "grad_norm": 0.0008719643228687346, - "learning_rate": 0.00019997932777667928, - "loss": 46.0, - "step": 40202 - }, - { - "epoch": 6.474415234107653, - "grad_norm": 0.0030901897698640823, - "learning_rate": 0.00019997932674802838, - "loss": 46.0, - "step": 40203 - }, - { - "epoch": 6.47457627118644, - "grad_norm": 0.016922269016504288, - "learning_rate": 0.00019997932571935189, - "loss": 46.0, - "step": 40204 - }, - { - "epoch": 6.474737308265228, - "grad_norm": 0.0035538964439183474, - "learning_rate": 0.00019997932469064983, - "loss": 46.0, - "step": 40205 - }, - { - "epoch": 6.474898345344015, - "grad_norm": 0.008031846955418587, - "learning_rate": 0.0001999793236619222, - "loss": 46.0, - "step": 40206 - }, - { - "epoch": 6.475059382422803, - "grad_norm": 0.004215062130242586, - "learning_rate": 0.00019997932263316894, - "loss": 46.0, - "step": 40207 - }, - { - "epoch": 6.47522041950159, - "grad_norm": 0.006000776309520006, - "learning_rate": 0.0001999793216043901, - "loss": 46.0, - "step": 40208 - }, - { - "epoch": 6.475381456580378, - "grad_norm": 0.005052017979323864, - "learning_rate": 0.0001999793205755857, - "loss": 46.0, - "step": 40209 - }, - { - "epoch": 6.475542493659165, - "grad_norm": 0.009588254615664482, - "learning_rate": 0.00019997931954675568, - "loss": 46.0, - "step": 40210 - }, - { - "epoch": 6.4757035307379525, - "grad_norm": 0.0022454005666077137, - "learning_rate": 0.00019997931851790005, - "loss": 46.0, - "step": 40211 - }, - { - "epoch": 6.47586456781674, - "grad_norm": 0.0036979801952838898, - "learning_rate": 0.00019997931748901888, - "loss": 46.0, - "step": 40212 - }, - { - "epoch": 6.476025604895527, - "grad_norm": 0.006309863179922104, - "learning_rate": 0.0001999793164601121, - "loss": 46.0, - "step": 40213 - }, - { - "epoch": 6.476186641974315, - "grad_norm": 0.008912652730941772, - "learning_rate": 0.00019997931543117972, - "loss": 46.0, - "step": 40214 - }, - { - "epoch": 6.476347679053102, - "grad_norm": 0.002049471717327833, - "learning_rate": 0.00019997931440222176, - "loss": 46.0, - "step": 40215 - }, - { - "epoch": 6.47650871613189, - "grad_norm": 0.003810411086305976, - "learning_rate": 0.0001999793133732382, - "loss": 46.0, - "step": 40216 - }, - { - "epoch": 6.476669753210677, - "grad_norm": 0.009678413160145283, - "learning_rate": 0.00019997931234422907, - "loss": 46.0, - "step": 40217 - }, - { - "epoch": 6.476830790289464, - "grad_norm": 0.007415224332362413, - "learning_rate": 0.00019997931131519433, - "loss": 46.0, - "step": 40218 - }, - { - "epoch": 6.476991827368251, - "grad_norm": 0.0014222897589206696, - "learning_rate": 0.00019997931028613403, - "loss": 46.0, - "step": 40219 - }, - { - "epoch": 6.477152864447039, - "grad_norm": 0.003049605991691351, - "learning_rate": 0.00019997930925704812, - "loss": 46.0, - "step": 40220 - }, - { - "epoch": 6.477313901525826, - "grad_norm": 0.0019180566305294633, - "learning_rate": 0.0001999793082279366, - "loss": 46.0, - "step": 40221 - }, - { - "epoch": 6.4774749386046135, - "grad_norm": 0.0035815176088362932, - "learning_rate": 0.00019997930719879952, - "loss": 46.0, - "step": 40222 - }, - { - "epoch": 6.477635975683401, - "grad_norm": 0.004851811565458775, - "learning_rate": 0.00019997930616963685, - "loss": 46.0, - "step": 40223 - }, - { - "epoch": 6.4777970127621884, - "grad_norm": 0.007060496136546135, - "learning_rate": 0.0001999793051404486, - "loss": 46.0, - "step": 40224 - }, - { - "epoch": 6.477958049840976, - "grad_norm": 0.007451748475432396, - "learning_rate": 0.00019997930411123473, - "loss": 46.0, - "step": 40225 - }, - { - "epoch": 6.478119086919763, - "grad_norm": 0.002310977317392826, - "learning_rate": 0.0001999793030819953, - "loss": 46.0, - "step": 40226 - }, - { - "epoch": 6.478280123998551, - "grad_norm": 0.0038187282625585794, - "learning_rate": 0.00019997930205273024, - "loss": 46.0, - "step": 40227 - }, - { - "epoch": 6.478441161077338, - "grad_norm": 0.005842072889208794, - "learning_rate": 0.00019997930102343966, - "loss": 46.0, - "step": 40228 - }, - { - "epoch": 6.478602198156126, - "grad_norm": 0.006124195642769337, - "learning_rate": 0.0001999792999941234, - "loss": 46.0, - "step": 40229 - }, - { - "epoch": 6.478763235234913, - "grad_norm": 0.0029630945064127445, - "learning_rate": 0.00019997929896478162, - "loss": 46.0, - "step": 40230 - }, - { - "epoch": 6.478924272313701, - "grad_norm": 0.0009804241126403213, - "learning_rate": 0.00019997929793541425, - "loss": 46.0, - "step": 40231 - }, - { - "epoch": 6.479085309392488, - "grad_norm": 0.0014537015231326222, - "learning_rate": 0.00019997929690602123, - "loss": 46.0, - "step": 40232 - }, - { - "epoch": 6.479246346471275, - "grad_norm": 0.0010318144923076034, - "learning_rate": 0.00019997929587660268, - "loss": 46.0, - "step": 40233 - }, - { - "epoch": 6.479407383550062, - "grad_norm": 0.0032097308430820704, - "learning_rate": 0.00019997929484715852, - "loss": 46.0, - "step": 40234 - }, - { - "epoch": 6.4795684206288495, - "grad_norm": 0.0027899506967514753, - "learning_rate": 0.00019997929381768877, - "loss": 46.0, - "step": 40235 - }, - { - "epoch": 6.479729457707637, - "grad_norm": 0.007092115934938192, - "learning_rate": 0.00019997929278819343, - "loss": 46.0, - "step": 40236 - }, - { - "epoch": 6.479890494786424, - "grad_norm": 0.010679597035050392, - "learning_rate": 0.00019997929175867248, - "loss": 46.0, - "step": 40237 - }, - { - "epoch": 6.480051531865212, - "grad_norm": 0.0014577755937352777, - "learning_rate": 0.00019997929072912597, - "loss": 46.0, - "step": 40238 - }, - { - "epoch": 6.480212568943999, - "grad_norm": 0.004734776448458433, - "learning_rate": 0.00019997928969955387, - "loss": 46.0, - "step": 40239 - }, - { - "epoch": 6.480373606022787, - "grad_norm": 0.011812287382781506, - "learning_rate": 0.00019997928866995616, - "loss": 46.0, - "step": 40240 - }, - { - "epoch": 6.480534643101574, - "grad_norm": 0.006357528269290924, - "learning_rate": 0.00019997928764033286, - "loss": 46.0, - "step": 40241 - }, - { - "epoch": 6.480695680180362, - "grad_norm": 0.01355251669883728, - "learning_rate": 0.000199979286610684, - "loss": 46.0, - "step": 40242 - }, - { - "epoch": 6.480856717259149, - "grad_norm": 0.001603866694495082, - "learning_rate": 0.0001999792855810095, - "loss": 46.0, - "step": 40243 - }, - { - "epoch": 6.481017754337937, - "grad_norm": 0.02170816995203495, - "learning_rate": 0.00019997928455130947, - "loss": 46.0, - "step": 40244 - }, - { - "epoch": 6.481178791416724, - "grad_norm": 0.00755128962919116, - "learning_rate": 0.00019997928352158382, - "loss": 46.0, - "step": 40245 - }, - { - "epoch": 6.4813398284955115, - "grad_norm": 0.005113938823342323, - "learning_rate": 0.0001999792824918326, - "loss": 46.0, - "step": 40246 - }, - { - "epoch": 6.481500865574299, - "grad_norm": 0.0018067056080326438, - "learning_rate": 0.00019997928146205576, - "loss": 46.0, - "step": 40247 - }, - { - "epoch": 6.4816619026530855, - "grad_norm": 0.005931834224611521, - "learning_rate": 0.00019997928043225332, - "loss": 46.0, - "step": 40248 - }, - { - "epoch": 6.481822939731873, - "grad_norm": 0.0011589303612709045, - "learning_rate": 0.00019997927940242533, - "loss": 46.0, - "step": 40249 - }, - { - "epoch": 6.48198397681066, - "grad_norm": 0.005806329660117626, - "learning_rate": 0.00019997927837257174, - "loss": 46.0, - "step": 40250 - }, - { - "epoch": 6.482145013889448, - "grad_norm": 0.003034749301150441, - "learning_rate": 0.00019997927734269254, - "loss": 46.0, - "step": 40251 - }, - { - "epoch": 6.482306050968235, - "grad_norm": 0.009186151437461376, - "learning_rate": 0.00019997927631278779, - "loss": 46.0, - "step": 40252 - }, - { - "epoch": 6.482467088047023, - "grad_norm": 0.008191969245672226, - "learning_rate": 0.0001999792752828574, - "loss": 46.0, - "step": 40253 - }, - { - "epoch": 6.48262812512581, - "grad_norm": 0.0043816883116960526, - "learning_rate": 0.00019997927425290145, - "loss": 46.0, - "step": 40254 - }, - { - "epoch": 6.482789162204598, - "grad_norm": 0.016037199646234512, - "learning_rate": 0.00019997927322291993, - "loss": 46.0, - "step": 40255 - }, - { - "epoch": 6.482950199283385, - "grad_norm": 0.012546764686703682, - "learning_rate": 0.0001999792721929128, - "loss": 46.0, - "step": 40256 - }, - { - "epoch": 6.483111236362173, - "grad_norm": 0.0015752743929624557, - "learning_rate": 0.00019997927116288008, - "loss": 46.0, - "step": 40257 - }, - { - "epoch": 6.48327227344096, - "grad_norm": 0.0016135724727064371, - "learning_rate": 0.00019997927013282177, - "loss": 46.0, - "step": 40258 - }, - { - "epoch": 6.4834333105197475, - "grad_norm": 0.011241168715059757, - "learning_rate": 0.00019997926910273787, - "loss": 46.0, - "step": 40259 - }, - { - "epoch": 6.483594347598535, - "grad_norm": 0.0033009974285960197, - "learning_rate": 0.00019997926807262836, - "loss": 46.0, - "step": 40260 - }, - { - "epoch": 6.4837553846773215, - "grad_norm": 0.0037363627925515175, - "learning_rate": 0.00019997926704249327, - "loss": 46.0, - "step": 40261 - }, - { - "epoch": 6.483916421756109, - "grad_norm": 0.01179394032806158, - "learning_rate": 0.0001999792660123326, - "loss": 46.0, - "step": 40262 - }, - { - "epoch": 6.484077458834896, - "grad_norm": 0.0009958798764273524, - "learning_rate": 0.00019997926498214636, - "loss": 46.0, - "step": 40263 - }, - { - "epoch": 6.484238495913684, - "grad_norm": 0.004711686633527279, - "learning_rate": 0.0001999792639519345, - "loss": 46.0, - "step": 40264 - }, - { - "epoch": 6.484399532992471, - "grad_norm": 0.002110701985657215, - "learning_rate": 0.00019997926292169709, - "loss": 46.0, - "step": 40265 - }, - { - "epoch": 6.484560570071259, - "grad_norm": 0.014985272660851479, - "learning_rate": 0.00019997926189143405, - "loss": 46.0, - "step": 40266 - }, - { - "epoch": 6.484721607150046, - "grad_norm": 0.006289361044764519, - "learning_rate": 0.00019997926086114543, - "loss": 46.0, - "step": 40267 - }, - { - "epoch": 6.484882644228834, - "grad_norm": 0.0019607581198215485, - "learning_rate": 0.0001999792598308312, - "loss": 46.0, - "step": 40268 - }, - { - "epoch": 6.485043681307621, - "grad_norm": 0.005681016948074102, - "learning_rate": 0.00019997925880049143, - "loss": 46.0, - "step": 40269 - }, - { - "epoch": 6.4852047183864086, - "grad_norm": 0.0031340315472334623, - "learning_rate": 0.00019997925777012605, - "loss": 46.0, - "step": 40270 - }, - { - "epoch": 6.485365755465196, - "grad_norm": 0.003052378771826625, - "learning_rate": 0.00019997925673973508, - "loss": 46.0, - "step": 40271 - }, - { - "epoch": 6.4855267925439835, - "grad_norm": 0.00330049148760736, - "learning_rate": 0.0001999792557093185, - "loss": 46.0, - "step": 40272 - }, - { - "epoch": 6.485687829622771, - "grad_norm": 0.009420718997716904, - "learning_rate": 0.00019997925467887635, - "loss": 46.0, - "step": 40273 - }, - { - "epoch": 6.485848866701558, - "grad_norm": 0.0036219116300344467, - "learning_rate": 0.0001999792536484086, - "loss": 46.0, - "step": 40274 - }, - { - "epoch": 6.486009903780346, - "grad_norm": 0.0015857815742492676, - "learning_rate": 0.00019997925261791525, - "loss": 46.0, - "step": 40275 - }, - { - "epoch": 6.486170940859132, - "grad_norm": 0.003069506725296378, - "learning_rate": 0.00019997925158739635, - "loss": 46.0, - "step": 40276 - }, - { - "epoch": 6.48633197793792, - "grad_norm": 0.003559347242116928, - "learning_rate": 0.00019997925055685185, - "loss": 46.0, - "step": 40277 - }, - { - "epoch": 6.486493015016707, - "grad_norm": 0.012691457755863667, - "learning_rate": 0.00019997924952628172, - "loss": 46.0, - "step": 40278 - }, - { - "epoch": 6.486654052095495, - "grad_norm": 0.011235514655709267, - "learning_rate": 0.00019997924849568603, - "loss": 46.0, - "step": 40279 - }, - { - "epoch": 6.486815089174282, - "grad_norm": 0.0017168110935017467, - "learning_rate": 0.00019997924746506475, - "loss": 46.0, - "step": 40280 - }, - { - "epoch": 6.48697612625307, - "grad_norm": 0.0033350728917866945, - "learning_rate": 0.0001999792464344179, - "loss": 46.0, - "step": 40281 - }, - { - "epoch": 6.487137163331857, - "grad_norm": 0.0035972122568637133, - "learning_rate": 0.00019997924540374542, - "loss": 46.0, - "step": 40282 - }, - { - "epoch": 6.4872982004106445, - "grad_norm": 0.0012851665960624814, - "learning_rate": 0.00019997924437304738, - "loss": 46.0, - "step": 40283 - }, - { - "epoch": 6.487459237489432, - "grad_norm": 0.010535992681980133, - "learning_rate": 0.00019997924334232375, - "loss": 46.0, - "step": 40284 - }, - { - "epoch": 6.487620274568219, - "grad_norm": 0.0014540920965373516, - "learning_rate": 0.0001999792423115745, - "loss": 46.0, - "step": 40285 - }, - { - "epoch": 6.487781311647007, - "grad_norm": 0.005983424838632345, - "learning_rate": 0.0001999792412807997, - "loss": 46.0, - "step": 40286 - }, - { - "epoch": 6.487942348725794, - "grad_norm": 0.014725212939083576, - "learning_rate": 0.0001999792402499993, - "loss": 46.0, - "step": 40287 - }, - { - "epoch": 6.488103385804582, - "grad_norm": 0.0010673404904082417, - "learning_rate": 0.00019997923921917328, - "loss": 46.0, - "step": 40288 - }, - { - "epoch": 6.488264422883369, - "grad_norm": 0.001559184631332755, - "learning_rate": 0.0001999792381883217, - "loss": 46.0, - "step": 40289 - }, - { - "epoch": 6.488425459962157, - "grad_norm": 0.0036646516527980566, - "learning_rate": 0.00019997923715744454, - "loss": 46.0, - "step": 40290 - }, - { - "epoch": 6.488586497040943, - "grad_norm": 0.0033899792470037937, - "learning_rate": 0.00019997923612654174, - "loss": 46.0, - "step": 40291 - }, - { - "epoch": 6.488747534119731, - "grad_norm": 0.00300061353482306, - "learning_rate": 0.0001999792350956134, - "loss": 46.0, - "step": 40292 - }, - { - "epoch": 6.488908571198518, - "grad_norm": 0.0012679887004196644, - "learning_rate": 0.00019997923406465945, - "loss": 46.0, - "step": 40293 - }, - { - "epoch": 6.489069608277306, - "grad_norm": 0.0013386975042521954, - "learning_rate": 0.00019997923303367992, - "loss": 46.0, - "step": 40294 - }, - { - "epoch": 6.489230645356093, - "grad_norm": 0.0008013815386220813, - "learning_rate": 0.00019997923200267478, - "loss": 46.0, - "step": 40295 - }, - { - "epoch": 6.4893916824348805, - "grad_norm": 0.004258880857378244, - "learning_rate": 0.00019997923097164408, - "loss": 46.0, - "step": 40296 - }, - { - "epoch": 6.489552719513668, - "grad_norm": 0.0037058277521282434, - "learning_rate": 0.0001999792299405878, - "loss": 46.0, - "step": 40297 - }, - { - "epoch": 6.489713756592455, - "grad_norm": 0.0032727655488997698, - "learning_rate": 0.00019997922890950588, - "loss": 46.0, - "step": 40298 - }, - { - "epoch": 6.489874793671243, - "grad_norm": 0.009909887798130512, - "learning_rate": 0.0001999792278783984, - "loss": 46.0, - "step": 40299 - }, - { - "epoch": 6.49003583075003, - "grad_norm": 0.004497320391237736, - "learning_rate": 0.00019997922684726534, - "loss": 46.0, - "step": 40300 - }, - { - "epoch": 6.490196867828818, - "grad_norm": 0.0013308433117344975, - "learning_rate": 0.00019997922581610668, - "loss": 46.0, - "step": 40301 - }, - { - "epoch": 6.490357904907605, - "grad_norm": 0.0034019825980067253, - "learning_rate": 0.00019997922478492242, - "loss": 46.0, - "step": 40302 - }, - { - "epoch": 6.490518941986393, - "grad_norm": 0.001946385600604117, - "learning_rate": 0.00019997922375371256, - "loss": 46.0, - "step": 40303 - }, - { - "epoch": 6.49067997906518, - "grad_norm": 0.005960382521152496, - "learning_rate": 0.00019997922272247713, - "loss": 46.0, - "step": 40304 - }, - { - "epoch": 6.490841016143968, - "grad_norm": 0.0016100500943139195, - "learning_rate": 0.0001999792216912161, - "loss": 46.0, - "step": 40305 - }, - { - "epoch": 6.491002053222754, - "grad_norm": 0.0014602484880015254, - "learning_rate": 0.00019997922065992951, - "loss": 46.0, - "step": 40306 - }, - { - "epoch": 6.491163090301542, - "grad_norm": 0.004667380359023809, - "learning_rate": 0.0001999792196286173, - "loss": 46.0, - "step": 40307 - }, - { - "epoch": 6.491324127380329, - "grad_norm": 0.001152698416262865, - "learning_rate": 0.00019997921859727952, - "loss": 46.0, - "step": 40308 - }, - { - "epoch": 6.4914851644591165, - "grad_norm": 0.0025428500957787037, - "learning_rate": 0.00019997921756591613, - "loss": 46.0, - "step": 40309 - }, - { - "epoch": 6.491646201537904, - "grad_norm": 0.01012470480054617, - "learning_rate": 0.00019997921653452716, - "loss": 46.0, - "step": 40310 - }, - { - "epoch": 6.491807238616691, - "grad_norm": 0.010339552536606789, - "learning_rate": 0.0001999792155031126, - "loss": 46.0, - "step": 40311 - }, - { - "epoch": 6.491968275695479, - "grad_norm": 0.002963364589959383, - "learning_rate": 0.00019997921447167247, - "loss": 46.0, - "step": 40312 - }, - { - "epoch": 6.492129312774266, - "grad_norm": 0.0012313439510762691, - "learning_rate": 0.0001999792134402067, - "loss": 46.0, - "step": 40313 - }, - { - "epoch": 6.492290349853054, - "grad_norm": 0.003133671125397086, - "learning_rate": 0.0001999792124087154, - "loss": 46.0, - "step": 40314 - }, - { - "epoch": 6.492451386931841, - "grad_norm": 0.003556988202035427, - "learning_rate": 0.0001999792113771985, - "loss": 46.0, - "step": 40315 - }, - { - "epoch": 6.492612424010629, - "grad_norm": 0.0022010449320077896, - "learning_rate": 0.000199979210345656, - "loss": 46.0, - "step": 40316 - }, - { - "epoch": 6.492773461089416, - "grad_norm": 0.023579290136694908, - "learning_rate": 0.00019997920931408788, - "loss": 46.0, - "step": 40317 - }, - { - "epoch": 6.492934498168204, - "grad_norm": 0.004673734772950411, - "learning_rate": 0.0001999792082824942, - "loss": 46.0, - "step": 40318 - }, - { - "epoch": 6.493095535246991, - "grad_norm": 0.019197355955839157, - "learning_rate": 0.00019997920725087494, - "loss": 46.0, - "step": 40319 - }, - { - "epoch": 6.4932565723257785, - "grad_norm": 0.00511762173846364, - "learning_rate": 0.00019997920621923006, - "loss": 46.0, - "step": 40320 - }, - { - "epoch": 6.493417609404565, - "grad_norm": 0.006581800524145365, - "learning_rate": 0.00019997920518755963, - "loss": 46.0, - "step": 40321 - }, - { - "epoch": 6.4935786464833525, - "grad_norm": 0.003833992872387171, - "learning_rate": 0.00019997920415586355, - "loss": 46.0, - "step": 40322 - }, - { - "epoch": 6.49373968356214, - "grad_norm": 0.008301000110805035, - "learning_rate": 0.00019997920312414194, - "loss": 46.0, - "step": 40323 - }, - { - "epoch": 6.493900720640927, - "grad_norm": 0.003949692007154226, - "learning_rate": 0.00019997920209239472, - "loss": 46.0, - "step": 40324 - }, - { - "epoch": 6.494061757719715, - "grad_norm": 0.0062357839196920395, - "learning_rate": 0.00019997920106062188, - "loss": 46.0, - "step": 40325 - }, - { - "epoch": 6.494222794798502, - "grad_norm": 0.017794236540794373, - "learning_rate": 0.0001999792000288235, - "loss": 46.0, - "step": 40326 - }, - { - "epoch": 6.49438383187729, - "grad_norm": 0.009414306841790676, - "learning_rate": 0.0001999791989969995, - "loss": 46.0, - "step": 40327 - }, - { - "epoch": 6.494544868956077, - "grad_norm": 0.006454518996179104, - "learning_rate": 0.00019997919796514992, - "loss": 46.0, - "step": 40328 - }, - { - "epoch": 6.494705906034865, - "grad_norm": 0.00879088044166565, - "learning_rate": 0.00019997919693327476, - "loss": 46.0, - "step": 40329 - }, - { - "epoch": 6.494866943113652, - "grad_norm": 0.0038317013531923294, - "learning_rate": 0.000199979195901374, - "loss": 46.0, - "step": 40330 - }, - { - "epoch": 6.4950279801924395, - "grad_norm": 0.005642174277454615, - "learning_rate": 0.00019997919486944765, - "loss": 46.0, - "step": 40331 - }, - { - "epoch": 6.495189017271227, - "grad_norm": 0.008032823912799358, - "learning_rate": 0.0001999791938374957, - "loss": 46.0, - "step": 40332 - }, - { - "epoch": 6.4953500543500144, - "grad_norm": 0.012297971174120903, - "learning_rate": 0.0001999791928055182, - "loss": 46.0, - "step": 40333 - }, - { - "epoch": 6.495511091428802, - "grad_norm": 0.0021798356901854277, - "learning_rate": 0.00019997919177351507, - "loss": 46.0, - "step": 40334 - }, - { - "epoch": 6.4956721285075885, - "grad_norm": 0.0048887706361711025, - "learning_rate": 0.00019997919074148636, - "loss": 46.0, - "step": 40335 - }, - { - "epoch": 6.495833165586376, - "grad_norm": 0.0012530314270406961, - "learning_rate": 0.0001999791897094321, - "loss": 46.0, - "step": 40336 - }, - { - "epoch": 6.495994202665163, - "grad_norm": 0.009131227619946003, - "learning_rate": 0.0001999791886773522, - "loss": 46.0, - "step": 40337 - }, - { - "epoch": 6.496155239743951, - "grad_norm": 0.0027114255353808403, - "learning_rate": 0.00019997918764524673, - "loss": 46.0, - "step": 40338 - }, - { - "epoch": 6.496316276822738, - "grad_norm": 0.004454668611288071, - "learning_rate": 0.00019997918661311564, - "loss": 46.0, - "step": 40339 - }, - { - "epoch": 6.496477313901526, - "grad_norm": 0.0008246162324212492, - "learning_rate": 0.00019997918558095897, - "loss": 46.0, - "step": 40340 - }, - { - "epoch": 6.496638350980313, - "grad_norm": 0.001442980719730258, - "learning_rate": 0.00019997918454877676, - "loss": 46.0, - "step": 40341 - }, - { - "epoch": 6.496799388059101, - "grad_norm": 0.0023324722424149513, - "learning_rate": 0.00019997918351656892, - "loss": 46.0, - "step": 40342 - }, - { - "epoch": 6.496960425137888, - "grad_norm": 0.008202840574085712, - "learning_rate": 0.00019997918248433548, - "loss": 46.0, - "step": 40343 - }, - { - "epoch": 6.4971214622166755, - "grad_norm": 0.003734815865755081, - "learning_rate": 0.00019997918145207649, - "loss": 46.0, - "step": 40344 - }, - { - "epoch": 6.497282499295463, - "grad_norm": 0.003976847976446152, - "learning_rate": 0.00019997918041979188, - "loss": 46.0, - "step": 40345 - }, - { - "epoch": 6.49744353637425, - "grad_norm": 0.01052770484238863, - "learning_rate": 0.00019997917938748168, - "loss": 46.0, - "step": 40346 - }, - { - "epoch": 6.497604573453038, - "grad_norm": 0.003329983912408352, - "learning_rate": 0.0001999791783551459, - "loss": 46.0, - "step": 40347 - }, - { - "epoch": 6.497765610531825, - "grad_norm": 0.01705816015601158, - "learning_rate": 0.00019997917732278453, - "loss": 46.0, - "step": 40348 - }, - { - "epoch": 6.497926647610612, - "grad_norm": 0.003354690968990326, - "learning_rate": 0.00019997917629039757, - "loss": 46.0, - "step": 40349 - }, - { - "epoch": 6.498087684689399, - "grad_norm": 0.010628162883222103, - "learning_rate": 0.00019997917525798502, - "loss": 46.0, - "step": 40350 - }, - { - "epoch": 6.498248721768187, - "grad_norm": 0.0010940140346065164, - "learning_rate": 0.0001999791742255469, - "loss": 46.0, - "step": 40351 - }, - { - "epoch": 6.498409758846974, - "grad_norm": 0.0038282989989966154, - "learning_rate": 0.00019997917319308314, - "loss": 46.0, - "step": 40352 - }, - { - "epoch": 6.498570795925762, - "grad_norm": 0.006773098837584257, - "learning_rate": 0.00019997917216059384, - "loss": 46.0, - "step": 40353 - }, - { - "epoch": 6.498731833004549, - "grad_norm": 0.002765289042145014, - "learning_rate": 0.00019997917112807892, - "loss": 46.0, - "step": 40354 - }, - { - "epoch": 6.498892870083337, - "grad_norm": 0.0015978373121470213, - "learning_rate": 0.0001999791700955384, - "loss": 46.0, - "step": 40355 - }, - { - "epoch": 6.499053907162124, - "grad_norm": 0.005281936377286911, - "learning_rate": 0.00019997916906297234, - "loss": 46.0, - "step": 40356 - }, - { - "epoch": 6.4992149442409115, - "grad_norm": 0.00275397184304893, - "learning_rate": 0.00019997916803038066, - "loss": 46.0, - "step": 40357 - }, - { - "epoch": 6.499375981319699, - "grad_norm": 0.01011024322360754, - "learning_rate": 0.00019997916699776341, - "loss": 46.0, - "step": 40358 - }, - { - "epoch": 6.499537018398486, - "grad_norm": 0.014196864329278469, - "learning_rate": 0.00019997916596512053, - "loss": 46.0, - "step": 40359 - }, - { - "epoch": 6.499698055477274, - "grad_norm": 0.004001722205430269, - "learning_rate": 0.0001999791649324521, - "loss": 46.0, - "step": 40360 - }, - { - "epoch": 6.499859092556061, - "grad_norm": 0.0022927215322852135, - "learning_rate": 0.00019997916389975805, - "loss": 46.0, - "step": 40361 - }, - { - "epoch": 6.500020129634849, - "grad_norm": 0.002157264156267047, - "learning_rate": 0.00019997916286703844, - "loss": 46.0, - "step": 40362 - }, - { - "epoch": 6.500181166713636, - "grad_norm": 0.007294053211808205, - "learning_rate": 0.0001999791618342932, - "loss": 46.0, - "step": 40363 - }, - { - "epoch": 6.500342203792423, - "grad_norm": 0.0013782167807221413, - "learning_rate": 0.0001999791608015224, - "loss": 46.0, - "step": 40364 - }, - { - "epoch": 6.50050324087121, - "grad_norm": 0.005053127650171518, - "learning_rate": 0.00019997915976872603, - "loss": 46.0, - "step": 40365 - }, - { - "epoch": 6.500664277949998, - "grad_norm": 0.00581505848094821, - "learning_rate": 0.00019997915873590404, - "loss": 46.0, - "step": 40366 - }, - { - "epoch": 6.500825315028785, - "grad_norm": 0.0025002227630466223, - "learning_rate": 0.00019997915770305648, - "loss": 46.0, - "step": 40367 - }, - { - "epoch": 6.500986352107573, - "grad_norm": 0.005692105740308762, - "learning_rate": 0.00019997915667018331, - "loss": 46.0, - "step": 40368 - }, - { - "epoch": 6.50114738918636, - "grad_norm": 0.009045046754181385, - "learning_rate": 0.00019997915563728456, - "loss": 46.0, - "step": 40369 - }, - { - "epoch": 6.5013084262651475, - "grad_norm": 0.0019253044156357646, - "learning_rate": 0.00019997915460436022, - "loss": 46.0, - "step": 40370 - }, - { - "epoch": 6.501469463343935, - "grad_norm": 0.0029128703754395247, - "learning_rate": 0.0001999791535714103, - "loss": 46.0, - "step": 40371 - }, - { - "epoch": 6.501630500422722, - "grad_norm": 0.010929145850241184, - "learning_rate": 0.00019997915253843477, - "loss": 46.0, - "step": 40372 - }, - { - "epoch": 6.50179153750151, - "grad_norm": 0.005774811841547489, - "learning_rate": 0.00019997915150543366, - "loss": 46.0, - "step": 40373 - }, - { - "epoch": 6.501952574580297, - "grad_norm": 0.006323961541056633, - "learning_rate": 0.00019997915047240697, - "loss": 46.0, - "step": 40374 - }, - { - "epoch": 6.502113611659085, - "grad_norm": 0.006913771387189627, - "learning_rate": 0.00019997914943935467, - "loss": 46.0, - "step": 40375 - }, - { - "epoch": 6.502274648737872, - "grad_norm": 0.00869273766875267, - "learning_rate": 0.00019997914840627677, - "loss": 46.0, - "step": 40376 - }, - { - "epoch": 6.50243568581666, - "grad_norm": 0.0034256496001034975, - "learning_rate": 0.00019997914737317332, - "loss": 46.0, - "step": 40377 - }, - { - "epoch": 6.502596722895447, - "grad_norm": 0.011104143224656582, - "learning_rate": 0.00019997914634004425, - "loss": 46.0, - "step": 40378 - }, - { - "epoch": 6.502757759974234, - "grad_norm": 0.005923696327954531, - "learning_rate": 0.00019997914530688963, - "loss": 46.0, - "step": 40379 - }, - { - "epoch": 6.502918797053021, - "grad_norm": 0.003140954999253154, - "learning_rate": 0.00019997914427370938, - "loss": 46.0, - "step": 40380 - }, - { - "epoch": 6.503079834131809, - "grad_norm": 0.0022571368608623743, - "learning_rate": 0.00019997914324050356, - "loss": 46.0, - "step": 40381 - }, - { - "epoch": 6.503240871210596, - "grad_norm": 0.012748190201818943, - "learning_rate": 0.00019997914220727214, - "loss": 46.0, - "step": 40382 - }, - { - "epoch": 6.5034019082893835, - "grad_norm": 0.004408635199069977, - "learning_rate": 0.00019997914117401516, - "loss": 46.0, - "step": 40383 - }, - { - "epoch": 6.503562945368171, - "grad_norm": 0.0014002801617607474, - "learning_rate": 0.00019997914014073255, - "loss": 46.0, - "step": 40384 - }, - { - "epoch": 6.503723982446958, - "grad_norm": 0.004015210550278425, - "learning_rate": 0.00019997913910742437, - "loss": 46.0, - "step": 40385 - }, - { - "epoch": 6.503885019525746, - "grad_norm": 0.013162247836589813, - "learning_rate": 0.00019997913807409058, - "loss": 46.0, - "step": 40386 - }, - { - "epoch": 6.504046056604533, - "grad_norm": 0.0035329218953847885, - "learning_rate": 0.00019997913704073122, - "loss": 46.0, - "step": 40387 - }, - { - "epoch": 6.504207093683321, - "grad_norm": 0.011753002181649208, - "learning_rate": 0.00019997913600734629, - "loss": 46.0, - "step": 40388 - }, - { - "epoch": 6.504368130762108, - "grad_norm": 0.005099029745906591, - "learning_rate": 0.00019997913497393573, - "loss": 46.0, - "step": 40389 - }, - { - "epoch": 6.504529167840896, - "grad_norm": 0.01428051944822073, - "learning_rate": 0.00019997913394049962, - "loss": 46.0, - "step": 40390 - }, - { - "epoch": 6.504690204919683, - "grad_norm": 0.0017496683867648244, - "learning_rate": 0.00019997913290703786, - "loss": 46.0, - "step": 40391 - }, - { - "epoch": 6.5048512419984705, - "grad_norm": 0.00535649573430419, - "learning_rate": 0.00019997913187355058, - "loss": 46.0, - "step": 40392 - }, - { - "epoch": 6.505012279077258, - "grad_norm": 0.001099920365959406, - "learning_rate": 0.00019997913084003767, - "loss": 46.0, - "step": 40393 - }, - { - "epoch": 6.5051733161560445, - "grad_norm": 0.006725079379975796, - "learning_rate": 0.0001999791298064992, - "loss": 46.0, - "step": 40394 - }, - { - "epoch": 6.505334353234832, - "grad_norm": 0.0021010739728808403, - "learning_rate": 0.0001999791287729351, - "loss": 46.0, - "step": 40395 - }, - { - "epoch": 6.5054953903136195, - "grad_norm": 0.006272085011005402, - "learning_rate": 0.00019997912773934545, - "loss": 46.0, - "step": 40396 - }, - { - "epoch": 6.505656427392407, - "grad_norm": 0.004059297032654285, - "learning_rate": 0.00019997912670573017, - "loss": 46.0, - "step": 40397 - }, - { - "epoch": 6.505817464471194, - "grad_norm": 0.002523328410461545, - "learning_rate": 0.00019997912567208933, - "loss": 46.0, - "step": 40398 - }, - { - "epoch": 6.505978501549982, - "grad_norm": 0.0030419593676924706, - "learning_rate": 0.0001999791246384229, - "loss": 46.0, - "step": 40399 - }, - { - "epoch": 6.506139538628769, - "grad_norm": 0.005724390037357807, - "learning_rate": 0.00019997912360473087, - "loss": 46.0, - "step": 40400 - }, - { - "epoch": 6.506300575707557, - "grad_norm": 0.004784403834491968, - "learning_rate": 0.00019997912257101327, - "loss": 46.0, - "step": 40401 - }, - { - "epoch": 6.506461612786344, - "grad_norm": 0.001489435089752078, - "learning_rate": 0.00019997912153727003, - "loss": 46.0, - "step": 40402 - }, - { - "epoch": 6.506622649865132, - "grad_norm": 0.0024986187927424908, - "learning_rate": 0.00019997912050350125, - "loss": 46.0, - "step": 40403 - }, - { - "epoch": 6.506783686943919, - "grad_norm": 0.005360976792871952, - "learning_rate": 0.00019997911946970686, - "loss": 46.0, - "step": 40404 - }, - { - "epoch": 6.5069447240227065, - "grad_norm": 0.0036494999658316374, - "learning_rate": 0.0001999791184358869, - "loss": 46.0, - "step": 40405 - }, - { - "epoch": 6.507105761101494, - "grad_norm": 0.014332235790789127, - "learning_rate": 0.00019997911740204133, - "loss": 46.0, - "step": 40406 - }, - { - "epoch": 6.5072667981802805, - "grad_norm": 0.01938583329319954, - "learning_rate": 0.00019997911636817018, - "loss": 46.0, - "step": 40407 - }, - { - "epoch": 6.507427835259069, - "grad_norm": 0.001076909713447094, - "learning_rate": 0.0001999791153342734, - "loss": 46.0, - "step": 40408 - }, - { - "epoch": 6.507588872337855, - "grad_norm": 0.0043213204480707645, - "learning_rate": 0.0001999791143003511, - "loss": 46.0, - "step": 40409 - }, - { - "epoch": 6.507749909416643, - "grad_norm": 0.02043306455016136, - "learning_rate": 0.0001999791132664032, - "loss": 46.0, - "step": 40410 - }, - { - "epoch": 6.50791094649543, - "grad_norm": 0.001814353629015386, - "learning_rate": 0.00019997911223242965, - "loss": 46.0, - "step": 40411 - }, - { - "epoch": 6.508071983574218, - "grad_norm": 0.014954142272472382, - "learning_rate": 0.00019997911119843057, - "loss": 46.0, - "step": 40412 - }, - { - "epoch": 6.508233020653005, - "grad_norm": 0.008600250817835331, - "learning_rate": 0.00019997911016440587, - "loss": 46.0, - "step": 40413 - }, - { - "epoch": 6.508394057731793, - "grad_norm": 0.0025109057314693928, - "learning_rate": 0.0001999791091303556, - "loss": 46.0, - "step": 40414 - }, - { - "epoch": 6.50855509481058, - "grad_norm": 0.0009861724684014916, - "learning_rate": 0.00019997910809627973, - "loss": 46.0, - "step": 40415 - }, - { - "epoch": 6.508716131889368, - "grad_norm": 0.005474052857607603, - "learning_rate": 0.00019997910706217827, - "loss": 46.0, - "step": 40416 - }, - { - "epoch": 6.508877168968155, - "grad_norm": 0.002236713655292988, - "learning_rate": 0.00019997910602805122, - "loss": 46.0, - "step": 40417 - }, - { - "epoch": 6.5090382060469425, - "grad_norm": 0.0008153865928761661, - "learning_rate": 0.0001999791049938986, - "loss": 46.0, - "step": 40418 - }, - { - "epoch": 6.50919924312573, - "grad_norm": 0.0026476015336811543, - "learning_rate": 0.00019997910395972034, - "loss": 46.0, - "step": 40419 - }, - { - "epoch": 6.509360280204517, - "grad_norm": 0.0034569473937153816, - "learning_rate": 0.00019997910292551653, - "loss": 46.0, - "step": 40420 - }, - { - "epoch": 6.509521317283305, - "grad_norm": 0.002157674403861165, - "learning_rate": 0.00019997910189128713, - "loss": 46.0, - "step": 40421 - }, - { - "epoch": 6.509682354362091, - "grad_norm": 0.004206405952572823, - "learning_rate": 0.00019997910085703212, - "loss": 46.0, - "step": 40422 - }, - { - "epoch": 6.50984339144088, - "grad_norm": 0.0036905494052916765, - "learning_rate": 0.00019997909982275155, - "loss": 46.0, - "step": 40423 - }, - { - "epoch": 6.510004428519666, - "grad_norm": 0.0018899135757237673, - "learning_rate": 0.0001999790987884454, - "loss": 46.0, - "step": 40424 - }, - { - "epoch": 6.510165465598454, - "grad_norm": 0.002122113946825266, - "learning_rate": 0.00019997909775411362, - "loss": 46.0, - "step": 40425 - }, - { - "epoch": 6.510326502677241, - "grad_norm": 0.0012016886612400413, - "learning_rate": 0.00019997909671975626, - "loss": 46.0, - "step": 40426 - }, - { - "epoch": 6.510487539756029, - "grad_norm": 0.004437318537384272, - "learning_rate": 0.0001999790956853733, - "loss": 46.0, - "step": 40427 - }, - { - "epoch": 6.510648576834816, - "grad_norm": 0.002166911493986845, - "learning_rate": 0.00019997909465096478, - "loss": 46.0, - "step": 40428 - }, - { - "epoch": 6.510809613913604, - "grad_norm": 0.007263000123202801, - "learning_rate": 0.00019997909361653066, - "loss": 46.0, - "step": 40429 - }, - { - "epoch": 6.510970650992391, - "grad_norm": 0.0013375879498198628, - "learning_rate": 0.00019997909258207095, - "loss": 46.0, - "step": 40430 - }, - { - "epoch": 6.5111316880711785, - "grad_norm": 0.002949443878605962, - "learning_rate": 0.00019997909154758565, - "loss": 46.0, - "step": 40431 - }, - { - "epoch": 6.511292725149966, - "grad_norm": 0.01013131532818079, - "learning_rate": 0.00019997909051307474, - "loss": 46.0, - "step": 40432 - }, - { - "epoch": 6.511453762228753, - "grad_norm": 0.005717073101550341, - "learning_rate": 0.00019997908947853827, - "loss": 46.0, - "step": 40433 - }, - { - "epoch": 6.511614799307541, - "grad_norm": 0.0020720921456813812, - "learning_rate": 0.0001999790884439762, - "loss": 46.0, - "step": 40434 - }, - { - "epoch": 6.511775836386328, - "grad_norm": 0.004989050328731537, - "learning_rate": 0.00019997908740938854, - "loss": 46.0, - "step": 40435 - }, - { - "epoch": 6.511936873465116, - "grad_norm": 0.017240235581994057, - "learning_rate": 0.00019997908637477528, - "loss": 46.0, - "step": 40436 - }, - { - "epoch": 6.512097910543902, - "grad_norm": 0.0009644894162192941, - "learning_rate": 0.00019997908534013646, - "loss": 46.0, - "step": 40437 - }, - { - "epoch": 6.51225894762269, - "grad_norm": 0.003865724429488182, - "learning_rate": 0.00019997908430547203, - "loss": 46.0, - "step": 40438 - }, - { - "epoch": 6.512419984701477, - "grad_norm": 0.0022453987039625645, - "learning_rate": 0.000199979083270782, - "loss": 46.0, - "step": 40439 - }, - { - "epoch": 6.512581021780265, - "grad_norm": 0.007251511327922344, - "learning_rate": 0.0001999790822360664, - "loss": 46.0, - "step": 40440 - }, - { - "epoch": 6.512742058859052, - "grad_norm": 0.0015736159402877092, - "learning_rate": 0.0001999790812013252, - "loss": 46.0, - "step": 40441 - }, - { - "epoch": 6.51290309593784, - "grad_norm": 0.0032892003655433655, - "learning_rate": 0.00019997908016655843, - "loss": 46.0, - "step": 40442 - }, - { - "epoch": 6.513064133016627, - "grad_norm": 0.008809706196188927, - "learning_rate": 0.00019997907913176603, - "loss": 46.0, - "step": 40443 - }, - { - "epoch": 6.5132251700954145, - "grad_norm": 0.0073125348426401615, - "learning_rate": 0.00019997907809694808, - "loss": 46.0, - "step": 40444 - }, - { - "epoch": 6.513386207174202, - "grad_norm": 0.0027208244428038597, - "learning_rate": 0.00019997907706210453, - "loss": 46.0, - "step": 40445 - }, - { - "epoch": 6.513547244252989, - "grad_norm": 0.004564281087368727, - "learning_rate": 0.00019997907602723537, - "loss": 46.0, - "step": 40446 - }, - { - "epoch": 6.513708281331777, - "grad_norm": 0.0024781201500445604, - "learning_rate": 0.00019997907499234066, - "loss": 46.0, - "step": 40447 - }, - { - "epoch": 6.513869318410564, - "grad_norm": 0.00413032341748476, - "learning_rate": 0.00019997907395742032, - "loss": 46.0, - "step": 40448 - }, - { - "epoch": 6.514030355489352, - "grad_norm": 0.007044166326522827, - "learning_rate": 0.00019997907292247443, - "loss": 46.0, - "step": 40449 - }, - { - "epoch": 6.514191392568139, - "grad_norm": 0.004675073549151421, - "learning_rate": 0.0001999790718875029, - "loss": 46.0, - "step": 40450 - }, - { - "epoch": 6.514352429646927, - "grad_norm": 0.0032668865751475096, - "learning_rate": 0.00019997907085250583, - "loss": 46.0, - "step": 40451 - }, - { - "epoch": 6.514513466725713, - "grad_norm": 0.001038560178130865, - "learning_rate": 0.00019997906981748312, - "loss": 46.0, - "step": 40452 - }, - { - "epoch": 6.514674503804501, - "grad_norm": 0.002219228306785226, - "learning_rate": 0.00019997906878243486, - "loss": 46.0, - "step": 40453 - }, - { - "epoch": 6.514835540883288, - "grad_norm": 0.007459376938641071, - "learning_rate": 0.000199979067747361, - "loss": 46.0, - "step": 40454 - }, - { - "epoch": 6.5149965779620755, - "grad_norm": 0.006110853981226683, - "learning_rate": 0.00019997906671226153, - "loss": 46.0, - "step": 40455 - }, - { - "epoch": 6.515157615040863, - "grad_norm": 0.003304006764665246, - "learning_rate": 0.0001999790656771365, - "loss": 46.0, - "step": 40456 - }, - { - "epoch": 6.5153186521196504, - "grad_norm": 0.0014726794324815273, - "learning_rate": 0.0001999790646419859, - "loss": 46.0, - "step": 40457 - }, - { - "epoch": 6.515479689198438, - "grad_norm": 0.008875620551407337, - "learning_rate": 0.00019997906360680966, - "loss": 46.0, - "step": 40458 - }, - { - "epoch": 6.515640726277225, - "grad_norm": 0.023572100326418877, - "learning_rate": 0.00019997906257160787, - "loss": 46.0, - "step": 40459 - }, - { - "epoch": 6.515801763356013, - "grad_norm": 0.004309433978050947, - "learning_rate": 0.00019997906153638043, - "loss": 46.0, - "step": 40460 - }, - { - "epoch": 6.5159628004348, - "grad_norm": 0.001962625188753009, - "learning_rate": 0.00019997906050112747, - "loss": 46.0, - "step": 40461 - }, - { - "epoch": 6.516123837513588, - "grad_norm": 0.0007694255327805877, - "learning_rate": 0.0001999790594658489, - "loss": 46.0, - "step": 40462 - }, - { - "epoch": 6.516284874592375, - "grad_norm": 0.005986039061099291, - "learning_rate": 0.00019997905843054472, - "loss": 46.0, - "step": 40463 - }, - { - "epoch": 6.516445911671163, - "grad_norm": 0.0012874397216364741, - "learning_rate": 0.00019997905739521497, - "loss": 46.0, - "step": 40464 - }, - { - "epoch": 6.51660694874995, - "grad_norm": 0.005189476069062948, - "learning_rate": 0.00019997905635985963, - "loss": 46.0, - "step": 40465 - }, - { - "epoch": 6.5167679858287375, - "grad_norm": 0.006326170638203621, - "learning_rate": 0.0001999790553244787, - "loss": 46.0, - "step": 40466 - }, - { - "epoch": 6.516929022907524, - "grad_norm": 0.0013067203108221292, - "learning_rate": 0.00019997905428907218, - "loss": 46.0, - "step": 40467 - }, - { - "epoch": 6.5170900599863115, - "grad_norm": 0.0038871399592608213, - "learning_rate": 0.00019997905325364003, - "loss": 46.0, - "step": 40468 - }, - { - "epoch": 6.517251097065099, - "grad_norm": 0.0070176864974200726, - "learning_rate": 0.00019997905221818236, - "loss": 46.0, - "step": 40469 - }, - { - "epoch": 6.517412134143886, - "grad_norm": 0.005611415486782789, - "learning_rate": 0.00019997905118269906, - "loss": 46.0, - "step": 40470 - }, - { - "epoch": 6.517573171222674, - "grad_norm": 0.006022224202752113, - "learning_rate": 0.0001999790501471902, - "loss": 46.0, - "step": 40471 - }, - { - "epoch": 6.517734208301461, - "grad_norm": 0.001275284099392593, - "learning_rate": 0.0001999790491116557, - "loss": 46.0, - "step": 40472 - }, - { - "epoch": 6.517895245380249, - "grad_norm": 0.012299781665205956, - "learning_rate": 0.00019997904807609565, - "loss": 46.0, - "step": 40473 - }, - { - "epoch": 6.518056282459036, - "grad_norm": 0.002479087794199586, - "learning_rate": 0.00019997904704050997, - "loss": 46.0, - "step": 40474 - }, - { - "epoch": 6.518217319537824, - "grad_norm": 0.01013909187167883, - "learning_rate": 0.00019997904600489873, - "loss": 46.0, - "step": 40475 - }, - { - "epoch": 6.518378356616611, - "grad_norm": 0.0018155794823542237, - "learning_rate": 0.0001999790449692619, - "loss": 46.0, - "step": 40476 - }, - { - "epoch": 6.518539393695399, - "grad_norm": 0.0007866375381127, - "learning_rate": 0.0001999790439335995, - "loss": 46.0, - "step": 40477 - }, - { - "epoch": 6.518700430774186, - "grad_norm": 0.015021715313196182, - "learning_rate": 0.0001999790428979115, - "loss": 46.0, - "step": 40478 - }, - { - "epoch": 6.5188614678529735, - "grad_norm": 0.0028334539383649826, - "learning_rate": 0.0001999790418621979, - "loss": 46.0, - "step": 40479 - }, - { - "epoch": 6.51902250493176, - "grad_norm": 0.0025506368838250637, - "learning_rate": 0.0001999790408264587, - "loss": 46.0, - "step": 40480 - }, - { - "epoch": 6.519183542010548, - "grad_norm": 0.006063203793019056, - "learning_rate": 0.00019997903979069394, - "loss": 46.0, - "step": 40481 - }, - { - "epoch": 6.519344579089335, - "grad_norm": 0.013854878954589367, - "learning_rate": 0.00019997903875490356, - "loss": 46.0, - "step": 40482 - }, - { - "epoch": 6.519505616168122, - "grad_norm": 0.004554451443254948, - "learning_rate": 0.0001999790377190876, - "loss": 46.0, - "step": 40483 - }, - { - "epoch": 6.51966665324691, - "grad_norm": 0.002264037961140275, - "learning_rate": 0.00019997903668324604, - "loss": 46.0, - "step": 40484 - }, - { - "epoch": 6.519827690325697, - "grad_norm": 0.0024320161901414394, - "learning_rate": 0.00019997903564737893, - "loss": 46.0, - "step": 40485 - }, - { - "epoch": 6.519988727404485, - "grad_norm": 0.0007251136703416705, - "learning_rate": 0.0001999790346114862, - "loss": 46.0, - "step": 40486 - }, - { - "epoch": 6.520149764483272, - "grad_norm": 0.0049898019060492516, - "learning_rate": 0.0001999790335755679, - "loss": 46.0, - "step": 40487 - }, - { - "epoch": 6.52031080156206, - "grad_norm": 0.012271677143871784, - "learning_rate": 0.00019997903253962397, - "loss": 46.0, - "step": 40488 - }, - { - "epoch": 6.520471838640847, - "grad_norm": 0.009346774779260159, - "learning_rate": 0.00019997903150365448, - "loss": 46.0, - "step": 40489 - }, - { - "epoch": 6.520632875719635, - "grad_norm": 0.014756559394299984, - "learning_rate": 0.0001999790304676594, - "loss": 46.0, - "step": 40490 - }, - { - "epoch": 6.520793912798422, - "grad_norm": 0.0011654084082692862, - "learning_rate": 0.00019997902943163874, - "loss": 46.0, - "step": 40491 - }, - { - "epoch": 6.5209549498772095, - "grad_norm": 0.003624498378485441, - "learning_rate": 0.00019997902839559247, - "loss": 46.0, - "step": 40492 - }, - { - "epoch": 6.521115986955997, - "grad_norm": 0.004516322165727615, - "learning_rate": 0.0001999790273595206, - "loss": 46.0, - "step": 40493 - }, - { - "epoch": 6.521277024034784, - "grad_norm": 0.012934727594256401, - "learning_rate": 0.00019997902632342318, - "loss": 46.0, - "step": 40494 - }, - { - "epoch": 6.521438061113571, - "grad_norm": 0.001190581708215177, - "learning_rate": 0.00019997902528730014, - "loss": 46.0, - "step": 40495 - }, - { - "epoch": 6.521599098192359, - "grad_norm": 0.009363081306219101, - "learning_rate": 0.00019997902425115155, - "loss": 46.0, - "step": 40496 - }, - { - "epoch": 6.521760135271146, - "grad_norm": 0.007522832602262497, - "learning_rate": 0.00019997902321497734, - "loss": 46.0, - "step": 40497 - }, - { - "epoch": 6.521921172349933, - "grad_norm": 0.003615233115851879, - "learning_rate": 0.00019997902217877754, - "loss": 46.0, - "step": 40498 - }, - { - "epoch": 6.522082209428721, - "grad_norm": 0.0009744943236000836, - "learning_rate": 0.00019997902114255212, - "loss": 46.0, - "step": 40499 - }, - { - "epoch": 6.522243246507508, - "grad_norm": 0.007887487299740314, - "learning_rate": 0.00019997902010630115, - "loss": 46.0, - "step": 40500 - }, - { - "epoch": 6.522404283586296, - "grad_norm": 0.0020922317635267973, - "learning_rate": 0.0001999790190700246, - "loss": 46.0, - "step": 40501 - }, - { - "epoch": 6.522565320665083, - "grad_norm": 0.00752679817378521, - "learning_rate": 0.00019997901803372244, - "loss": 46.0, - "step": 40502 - }, - { - "epoch": 6.5227263577438706, - "grad_norm": 0.013033091090619564, - "learning_rate": 0.0001999790169973947, - "loss": 46.0, - "step": 40503 - }, - { - "epoch": 6.522887394822658, - "grad_norm": 0.007432173006236553, - "learning_rate": 0.00019997901596104133, - "loss": 46.0, - "step": 40504 - }, - { - "epoch": 6.5230484319014455, - "grad_norm": 0.0029655941762030125, - "learning_rate": 0.00019997901492466242, - "loss": 46.0, - "step": 40505 - }, - { - "epoch": 6.523209468980233, - "grad_norm": 0.0011716659646481276, - "learning_rate": 0.0001999790138882579, - "loss": 46.0, - "step": 40506 - }, - { - "epoch": 6.52337050605902, - "grad_norm": 0.0024206924717873335, - "learning_rate": 0.00019997901285182781, - "loss": 46.0, - "step": 40507 - }, - { - "epoch": 6.523531543137808, - "grad_norm": 0.005756554193794727, - "learning_rate": 0.00019997901181537212, - "loss": 46.0, - "step": 40508 - }, - { - "epoch": 6.523692580216595, - "grad_norm": 0.0036053687799721956, - "learning_rate": 0.00019997901077889083, - "loss": 46.0, - "step": 40509 - }, - { - "epoch": 6.523853617295382, - "grad_norm": 0.003992556128650904, - "learning_rate": 0.00019997900974238396, - "loss": 46.0, - "step": 40510 - }, - { - "epoch": 6.524014654374169, - "grad_norm": 0.003620494157075882, - "learning_rate": 0.0001999790087058515, - "loss": 46.0, - "step": 40511 - }, - { - "epoch": 6.524175691452957, - "grad_norm": 0.016325142234563828, - "learning_rate": 0.00019997900766929343, - "loss": 46.0, - "step": 40512 - }, - { - "epoch": 6.524336728531744, - "grad_norm": 0.010594401508569717, - "learning_rate": 0.00019997900663270982, - "loss": 46.0, - "step": 40513 - }, - { - "epoch": 6.524497765610532, - "grad_norm": 0.01333087868988514, - "learning_rate": 0.00019997900559610057, - "loss": 46.0, - "step": 40514 - }, - { - "epoch": 6.524658802689319, - "grad_norm": 0.003551728092133999, - "learning_rate": 0.00019997900455946574, - "loss": 46.0, - "step": 40515 - }, - { - "epoch": 6.5248198397681065, - "grad_norm": 0.010311528109014034, - "learning_rate": 0.00019997900352280534, - "loss": 46.0, - "step": 40516 - }, - { - "epoch": 6.524980876846894, - "grad_norm": 0.00210933992639184, - "learning_rate": 0.00019997900248611933, - "loss": 46.0, - "step": 40517 - }, - { - "epoch": 6.525141913925681, - "grad_norm": 0.0017669072840362787, - "learning_rate": 0.00019997900144940776, - "loss": 46.0, - "step": 40518 - }, - { - "epoch": 6.525302951004469, - "grad_norm": 0.003850339911878109, - "learning_rate": 0.00019997900041267058, - "loss": 46.0, - "step": 40519 - }, - { - "epoch": 6.525463988083256, - "grad_norm": 0.009675009176135063, - "learning_rate": 0.0001999789993759078, - "loss": 46.0, - "step": 40520 - }, - { - "epoch": 6.525625025162044, - "grad_norm": 0.005492130294442177, - "learning_rate": 0.00019997899833911945, - "loss": 46.0, - "step": 40521 - }, - { - "epoch": 6.525786062240831, - "grad_norm": 0.0045510828495025635, - "learning_rate": 0.0001999789973023055, - "loss": 46.0, - "step": 40522 - }, - { - "epoch": 6.525947099319619, - "grad_norm": 0.005213772412389517, - "learning_rate": 0.00019997899626546597, - "loss": 46.0, - "step": 40523 - }, - { - "epoch": 6.526108136398406, - "grad_norm": 0.00515779247507453, - "learning_rate": 0.00019997899522860085, - "loss": 46.0, - "step": 40524 - }, - { - "epoch": 6.526269173477193, - "grad_norm": 0.005757125560194254, - "learning_rate": 0.00019997899419171012, - "loss": 46.0, - "step": 40525 - }, - { - "epoch": 6.52643021055598, - "grad_norm": 0.005522201303392649, - "learning_rate": 0.00019997899315479383, - "loss": 46.0, - "step": 40526 - }, - { - "epoch": 6.526591247634768, - "grad_norm": 0.01856265403330326, - "learning_rate": 0.00019997899211785192, - "loss": 46.0, - "step": 40527 - }, - { - "epoch": 6.526752284713555, - "grad_norm": 0.004763638600707054, - "learning_rate": 0.00019997899108088442, - "loss": 46.0, - "step": 40528 - }, - { - "epoch": 6.5269133217923425, - "grad_norm": 0.018956495448946953, - "learning_rate": 0.00019997899004389137, - "loss": 46.0, - "step": 40529 - }, - { - "epoch": 6.52707435887113, - "grad_norm": 0.0008553876541554928, - "learning_rate": 0.0001999789890068727, - "loss": 46.0, - "step": 40530 - }, - { - "epoch": 6.527235395949917, - "grad_norm": 0.0019558309577405453, - "learning_rate": 0.00019997898796982847, - "loss": 46.0, - "step": 40531 - }, - { - "epoch": 6.527396433028705, - "grad_norm": 0.002452574437484145, - "learning_rate": 0.00019997898693275862, - "loss": 46.0, - "step": 40532 - }, - { - "epoch": 6.527557470107492, - "grad_norm": 0.0070543475449085236, - "learning_rate": 0.0001999789858956632, - "loss": 46.0, - "step": 40533 - }, - { - "epoch": 6.52771850718628, - "grad_norm": 0.0019224740099161863, - "learning_rate": 0.00019997898485854217, - "loss": 46.0, - "step": 40534 - }, - { - "epoch": 6.527879544265067, - "grad_norm": 0.0032826277893036604, - "learning_rate": 0.00019997898382139554, - "loss": 46.0, - "step": 40535 - }, - { - "epoch": 6.528040581343855, - "grad_norm": 0.0015773138729855418, - "learning_rate": 0.00019997898278422335, - "loss": 46.0, - "step": 40536 - }, - { - "epoch": 6.528201618422642, - "grad_norm": 0.002164094476029277, - "learning_rate": 0.00019997898174702557, - "loss": 46.0, - "step": 40537 - }, - { - "epoch": 6.52836265550143, - "grad_norm": 0.011215342208743095, - "learning_rate": 0.0001999789807098022, - "loss": 46.0, - "step": 40538 - }, - { - "epoch": 6.528523692580217, - "grad_norm": 0.008242777548730373, - "learning_rate": 0.00019997897967255322, - "loss": 46.0, - "step": 40539 - }, - { - "epoch": 6.528684729659004, - "grad_norm": 0.009067090228199959, - "learning_rate": 0.00019997897863527865, - "loss": 46.0, - "step": 40540 - }, - { - "epoch": 6.528845766737791, - "grad_norm": 0.004935994744300842, - "learning_rate": 0.00019997897759797852, - "loss": 46.0, - "step": 40541 - }, - { - "epoch": 6.5290068038165785, - "grad_norm": 0.0013682785211130977, - "learning_rate": 0.00019997897656065278, - "loss": 46.0, - "step": 40542 - }, - { - "epoch": 6.529167840895366, - "grad_norm": 0.0028611968737095594, - "learning_rate": 0.00019997897552330145, - "loss": 46.0, - "step": 40543 - }, - { - "epoch": 6.529328877974153, - "grad_norm": 0.004627167247235775, - "learning_rate": 0.00019997897448592453, - "loss": 46.0, - "step": 40544 - }, - { - "epoch": 6.529489915052941, - "grad_norm": 0.007628864608705044, - "learning_rate": 0.000199978973448522, - "loss": 46.0, - "step": 40545 - }, - { - "epoch": 6.529650952131728, - "grad_norm": 0.011871740221977234, - "learning_rate": 0.00019997897241109394, - "loss": 46.0, - "step": 40546 - }, - { - "epoch": 6.529811989210516, - "grad_norm": 0.003553625661879778, - "learning_rate": 0.00019997897137364023, - "loss": 46.0, - "step": 40547 - }, - { - "epoch": 6.529973026289303, - "grad_norm": 0.0022385010961443186, - "learning_rate": 0.00019997897033616096, - "loss": 46.0, - "step": 40548 - }, - { - "epoch": 6.530134063368091, - "grad_norm": 0.004715350456535816, - "learning_rate": 0.0001999789692986561, - "loss": 46.0, - "step": 40549 - }, - { - "epoch": 6.530295100446878, - "grad_norm": 0.0049905963242053986, - "learning_rate": 0.00019997896826112567, - "loss": 46.0, - "step": 40550 - }, - { - "epoch": 6.530456137525666, - "grad_norm": 0.0024281549267470837, - "learning_rate": 0.00019997896722356962, - "loss": 46.0, - "step": 40551 - }, - { - "epoch": 6.530617174604453, - "grad_norm": 0.004370152018964291, - "learning_rate": 0.000199978966185988, - "loss": 46.0, - "step": 40552 - }, - { - "epoch": 6.53077821168324, - "grad_norm": 0.008099764585494995, - "learning_rate": 0.00019997896514838077, - "loss": 46.0, - "step": 40553 - }, - { - "epoch": 6.530939248762028, - "grad_norm": 0.0015924018807709217, - "learning_rate": 0.00019997896411074796, - "loss": 46.0, - "step": 40554 - }, - { - "epoch": 6.5311002858408145, - "grad_norm": 0.0010014927247539163, - "learning_rate": 0.00019997896307308955, - "loss": 46.0, - "step": 40555 - }, - { - "epoch": 6.531261322919602, - "grad_norm": 0.003553119720891118, - "learning_rate": 0.00019997896203540556, - "loss": 46.0, - "step": 40556 - }, - { - "epoch": 6.531422359998389, - "grad_norm": 0.0023748748935759068, - "learning_rate": 0.000199978960997696, - "loss": 46.0, - "step": 40557 - }, - { - "epoch": 6.531583397077177, - "grad_norm": 0.005541473627090454, - "learning_rate": 0.00019997895995996082, - "loss": 46.0, - "step": 40558 - }, - { - "epoch": 6.531744434155964, - "grad_norm": 0.015427596867084503, - "learning_rate": 0.00019997895892220007, - "loss": 46.0, - "step": 40559 - }, - { - "epoch": 6.531905471234752, - "grad_norm": 0.006583834998309612, - "learning_rate": 0.00019997895788441373, - "loss": 46.0, - "step": 40560 - }, - { - "epoch": 6.532066508313539, - "grad_norm": 0.005457608494907618, - "learning_rate": 0.00019997895684660178, - "loss": 46.0, - "step": 40561 - }, - { - "epoch": 6.532227545392327, - "grad_norm": 0.00598899507895112, - "learning_rate": 0.00019997895580876426, - "loss": 46.0, - "step": 40562 - }, - { - "epoch": 6.532388582471114, - "grad_norm": 0.0022378363646566868, - "learning_rate": 0.00019997895477090114, - "loss": 46.0, - "step": 40563 - }, - { - "epoch": 6.5325496195499015, - "grad_norm": 0.0029661315493285656, - "learning_rate": 0.00019997895373301242, - "loss": 46.0, - "step": 40564 - }, - { - "epoch": 6.532710656628689, - "grad_norm": 0.0018811047775670886, - "learning_rate": 0.00019997895269509815, - "loss": 46.0, - "step": 40565 - }, - { - "epoch": 6.5328716937074764, - "grad_norm": 0.010170129127800465, - "learning_rate": 0.00019997895165715826, - "loss": 46.0, - "step": 40566 - }, - { - "epoch": 6.533032730786264, - "grad_norm": 0.0016801892779767513, - "learning_rate": 0.00019997895061919278, - "loss": 46.0, - "step": 40567 - }, - { - "epoch": 6.5331937678650505, - "grad_norm": 0.0027235024608671665, - "learning_rate": 0.00019997894958120172, - "loss": 46.0, - "step": 40568 - }, - { - "epoch": 6.533354804943839, - "grad_norm": 0.015057950280606747, - "learning_rate": 0.00019997894854318507, - "loss": 46.0, - "step": 40569 - }, - { - "epoch": 6.533515842022625, - "grad_norm": 0.01454103458672762, - "learning_rate": 0.00019997894750514283, - "loss": 46.0, - "step": 40570 - }, - { - "epoch": 6.533676879101413, - "grad_norm": 0.002031525131314993, - "learning_rate": 0.00019997894646707498, - "loss": 46.0, - "step": 40571 - }, - { - "epoch": 6.5338379161802, - "grad_norm": 0.005130839999765158, - "learning_rate": 0.00019997894542898156, - "loss": 46.0, - "step": 40572 - }, - { - "epoch": 6.533998953258988, - "grad_norm": 0.0019787601195275784, - "learning_rate": 0.00019997894439086257, - "loss": 46.0, - "step": 40573 - }, - { - "epoch": 6.534159990337775, - "grad_norm": 0.003488605609163642, - "learning_rate": 0.00019997894335271795, - "loss": 46.0, - "step": 40574 - }, - { - "epoch": 6.534321027416563, - "grad_norm": 0.01663118042051792, - "learning_rate": 0.00019997894231454778, - "loss": 46.0, - "step": 40575 - }, - { - "epoch": 6.53448206449535, - "grad_norm": 0.0049020093865692616, - "learning_rate": 0.000199978941276352, - "loss": 46.0, - "step": 40576 - }, - { - "epoch": 6.5346431015741375, - "grad_norm": 0.0044265445321798325, - "learning_rate": 0.00019997894023813061, - "loss": 46.0, - "step": 40577 - }, - { - "epoch": 6.534804138652925, - "grad_norm": 0.008141937665641308, - "learning_rate": 0.00019997893919988365, - "loss": 46.0, - "step": 40578 - }, - { - "epoch": 6.534965175731712, - "grad_norm": 0.009648614563047886, - "learning_rate": 0.0001999789381616111, - "loss": 46.0, - "step": 40579 - }, - { - "epoch": 6.5351262128105, - "grad_norm": 0.015393831767141819, - "learning_rate": 0.000199978937123313, - "loss": 46.0, - "step": 40580 - }, - { - "epoch": 6.535287249889287, - "grad_norm": 0.006348058115690947, - "learning_rate": 0.00019997893608498924, - "loss": 46.0, - "step": 40581 - }, - { - "epoch": 6.535448286968075, - "grad_norm": 0.006249545142054558, - "learning_rate": 0.00019997893504663993, - "loss": 46.0, - "step": 40582 - }, - { - "epoch": 6.535609324046861, - "grad_norm": 0.004322050604969263, - "learning_rate": 0.00019997893400826503, - "loss": 46.0, - "step": 40583 - }, - { - "epoch": 6.535770361125649, - "grad_norm": 0.011012271046638489, - "learning_rate": 0.00019997893296986455, - "loss": 46.0, - "step": 40584 - }, - { - "epoch": 6.535931398204436, - "grad_norm": 0.0016725219320505857, - "learning_rate": 0.00019997893193143847, - "loss": 46.0, - "step": 40585 - }, - { - "epoch": 6.536092435283224, - "grad_norm": 0.0028530291747301817, - "learning_rate": 0.0001999789308929868, - "loss": 46.0, - "step": 40586 - }, - { - "epoch": 6.536253472362011, - "grad_norm": 0.00382414273917675, - "learning_rate": 0.00019997892985450951, - "loss": 46.0, - "step": 40587 - }, - { - "epoch": 6.536414509440799, - "grad_norm": 0.009705050848424435, - "learning_rate": 0.00019997892881600665, - "loss": 46.0, - "step": 40588 - }, - { - "epoch": 6.536575546519586, - "grad_norm": 0.011702614836394787, - "learning_rate": 0.00019997892777747823, - "loss": 46.0, - "step": 40589 - }, - { - "epoch": 6.5367365835983735, - "grad_norm": 0.012955775484442711, - "learning_rate": 0.00019997892673892417, - "loss": 46.0, - "step": 40590 - }, - { - "epoch": 6.536897620677161, - "grad_norm": 0.0059370798990130424, - "learning_rate": 0.00019997892570034455, - "loss": 46.0, - "step": 40591 - }, - { - "epoch": 6.537058657755948, - "grad_norm": 0.002202889183536172, - "learning_rate": 0.00019997892466173936, - "loss": 46.0, - "step": 40592 - }, - { - "epoch": 6.537219694834736, - "grad_norm": 0.003232336137443781, - "learning_rate": 0.00019997892362310857, - "loss": 46.0, - "step": 40593 - }, - { - "epoch": 6.537380731913523, - "grad_norm": 0.003538022981956601, - "learning_rate": 0.00019997892258445216, - "loss": 46.0, - "step": 40594 - }, - { - "epoch": 6.537541768992311, - "grad_norm": 0.006802130024880171, - "learning_rate": 0.00019997892154577018, - "loss": 46.0, - "step": 40595 - }, - { - "epoch": 6.537702806071098, - "grad_norm": 0.0020270701497793198, - "learning_rate": 0.00019997892050706263, - "loss": 46.0, - "step": 40596 - }, - { - "epoch": 6.537863843149886, - "grad_norm": 0.011947513557970524, - "learning_rate": 0.00019997891946832948, - "loss": 46.0, - "step": 40597 - }, - { - "epoch": 6.538024880228672, - "grad_norm": 0.004161762539297342, - "learning_rate": 0.0001999789184295707, - "loss": 46.0, - "step": 40598 - }, - { - "epoch": 6.53818591730746, - "grad_norm": 0.001710326294414699, - "learning_rate": 0.00019997891739078637, - "loss": 46.0, - "step": 40599 - }, - { - "epoch": 6.538346954386247, - "grad_norm": 0.008246785029768944, - "learning_rate": 0.00019997891635197646, - "loss": 46.0, - "step": 40600 - }, - { - "epoch": 6.538507991465035, - "grad_norm": 0.001023536897264421, - "learning_rate": 0.00019997891531314092, - "loss": 46.0, - "step": 40601 - }, - { - "epoch": 6.538669028543822, - "grad_norm": 0.005823458544909954, - "learning_rate": 0.00019997891427427983, - "loss": 46.0, - "step": 40602 - }, - { - "epoch": 6.5388300656226095, - "grad_norm": 0.0016505605308339, - "learning_rate": 0.00019997891323539314, - "loss": 46.0, - "step": 40603 - }, - { - "epoch": 6.538991102701397, - "grad_norm": 0.004465322941541672, - "learning_rate": 0.00019997891219648083, - "loss": 46.0, - "step": 40604 - }, - { - "epoch": 6.539152139780184, - "grad_norm": 0.002746766898781061, - "learning_rate": 0.00019997891115754296, - "loss": 46.0, - "step": 40605 - }, - { - "epoch": 6.539313176858972, - "grad_norm": 0.010930893942713737, - "learning_rate": 0.00019997891011857953, - "loss": 46.0, - "step": 40606 - }, - { - "epoch": 6.539474213937759, - "grad_norm": 0.005336800590157509, - "learning_rate": 0.00019997890907959046, - "loss": 46.0, - "step": 40607 - }, - { - "epoch": 6.539635251016547, - "grad_norm": 0.001621581264771521, - "learning_rate": 0.00019997890804057582, - "loss": 46.0, - "step": 40608 - }, - { - "epoch": 6.539796288095334, - "grad_norm": 0.005950243212282658, - "learning_rate": 0.00019997890700153558, - "loss": 46.0, - "step": 40609 - }, - { - "epoch": 6.539957325174122, - "grad_norm": 0.008815617300570011, - "learning_rate": 0.00019997890596246975, - "loss": 46.0, - "step": 40610 - }, - { - "epoch": 6.540118362252909, - "grad_norm": 0.002849416807293892, - "learning_rate": 0.00019997890492337835, - "loss": 46.0, - "step": 40611 - }, - { - "epoch": 6.5402793993316966, - "grad_norm": 0.0018635190790519118, - "learning_rate": 0.00019997890388426135, - "loss": 46.0, - "step": 40612 - }, - { - "epoch": 6.540440436410483, - "grad_norm": 0.008092408068478107, - "learning_rate": 0.00019997890284511875, - "loss": 46.0, - "step": 40613 - }, - { - "epoch": 6.540601473489271, - "grad_norm": 0.0025586779229342937, - "learning_rate": 0.00019997890180595057, - "loss": 46.0, - "step": 40614 - }, - { - "epoch": 6.540762510568058, - "grad_norm": 0.0016096540493890643, - "learning_rate": 0.00019997890076675683, - "loss": 46.0, - "step": 40615 - }, - { - "epoch": 6.5409235476468455, - "grad_norm": 0.01131502166390419, - "learning_rate": 0.00019997889972753744, - "loss": 46.0, - "step": 40616 - }, - { - "epoch": 6.541084584725633, - "grad_norm": 0.00605749199166894, - "learning_rate": 0.00019997889868829253, - "loss": 46.0, - "step": 40617 - }, - { - "epoch": 6.54124562180442, - "grad_norm": 0.007547752000391483, - "learning_rate": 0.00019997889764902197, - "loss": 46.0, - "step": 40618 - }, - { - "epoch": 6.541406658883208, - "grad_norm": 0.00610667048022151, - "learning_rate": 0.00019997889660972585, - "loss": 46.0, - "step": 40619 - }, - { - "epoch": 6.541567695961995, - "grad_norm": 0.013484025374054909, - "learning_rate": 0.00019997889557040415, - "loss": 46.0, - "step": 40620 - }, - { - "epoch": 6.541728733040783, - "grad_norm": 0.00537380576133728, - "learning_rate": 0.00019997889453105683, - "loss": 46.0, - "step": 40621 - }, - { - "epoch": 6.54188977011957, - "grad_norm": 0.002446510596200824, - "learning_rate": 0.00019997889349168392, - "loss": 46.0, - "step": 40622 - }, - { - "epoch": 6.542050807198358, - "grad_norm": 0.003181547624990344, - "learning_rate": 0.00019997889245228546, - "loss": 46.0, - "step": 40623 - }, - { - "epoch": 6.542211844277145, - "grad_norm": 0.017466126009821892, - "learning_rate": 0.0001999788914128614, - "loss": 46.0, - "step": 40624 - }, - { - "epoch": 6.5423728813559325, - "grad_norm": 0.0033442063722759485, - "learning_rate": 0.0001999788903734117, - "loss": 46.0, - "step": 40625 - }, - { - "epoch": 6.542533918434719, - "grad_norm": 0.0013541518710553646, - "learning_rate": 0.00019997888933393645, - "loss": 46.0, - "step": 40626 - }, - { - "epoch": 6.542694955513507, - "grad_norm": 0.0019404829945415258, - "learning_rate": 0.0001999788882944356, - "loss": 46.0, - "step": 40627 - }, - { - "epoch": 6.542855992592294, - "grad_norm": 0.003663154784590006, - "learning_rate": 0.00019997888725490918, - "loss": 46.0, - "step": 40628 - }, - { - "epoch": 6.5430170296710815, - "grad_norm": 0.0036188082303851843, - "learning_rate": 0.00019997888621535716, - "loss": 46.0, - "step": 40629 - }, - { - "epoch": 6.543178066749869, - "grad_norm": 0.01687401905655861, - "learning_rate": 0.00019997888517577956, - "loss": 46.0, - "step": 40630 - }, - { - "epoch": 6.543339103828656, - "grad_norm": 0.0010863416828215122, - "learning_rate": 0.00019997888413617637, - "loss": 46.0, - "step": 40631 - }, - { - "epoch": 6.543500140907444, - "grad_norm": 0.011693691834807396, - "learning_rate": 0.00019997888309654756, - "loss": 46.0, - "step": 40632 - }, - { - "epoch": 6.543661177986231, - "grad_norm": 0.00698266364634037, - "learning_rate": 0.0001999788820568932, - "loss": 46.0, - "step": 40633 - }, - { - "epoch": 6.543822215065019, - "grad_norm": 0.0019774027168750763, - "learning_rate": 0.00019997888101721322, - "loss": 46.0, - "step": 40634 - }, - { - "epoch": 6.543983252143806, - "grad_norm": 0.006216689944267273, - "learning_rate": 0.00019997887997750765, - "loss": 46.0, - "step": 40635 - }, - { - "epoch": 6.544144289222594, - "grad_norm": 0.015553610399365425, - "learning_rate": 0.00019997887893777653, - "loss": 46.0, - "step": 40636 - }, - { - "epoch": 6.544305326301381, - "grad_norm": 0.004962827544659376, - "learning_rate": 0.00019997887789801978, - "loss": 46.0, - "step": 40637 - }, - { - "epoch": 6.5444663633801685, - "grad_norm": 0.003268804866820574, - "learning_rate": 0.00019997887685823743, - "loss": 46.0, - "step": 40638 - }, - { - "epoch": 6.544627400458956, - "grad_norm": 0.019117962568998337, - "learning_rate": 0.00019997887581842954, - "loss": 46.0, - "step": 40639 - }, - { - "epoch": 6.544788437537743, - "grad_norm": 0.0073148044757544994, - "learning_rate": 0.00019997887477859604, - "loss": 46.0, - "step": 40640 - }, - { - "epoch": 6.54494947461653, - "grad_norm": 0.0012962461914867163, - "learning_rate": 0.00019997887373873695, - "loss": 46.0, - "step": 40641 - }, - { - "epoch": 6.545110511695318, - "grad_norm": 0.005489963572472334, - "learning_rate": 0.00019997887269885225, - "loss": 46.0, - "step": 40642 - }, - { - "epoch": 6.545271548774105, - "grad_norm": 0.005328271072357893, - "learning_rate": 0.00019997887165894198, - "loss": 46.0, - "step": 40643 - }, - { - "epoch": 6.545432585852892, - "grad_norm": 0.010754041373729706, - "learning_rate": 0.00019997887061900613, - "loss": 46.0, - "step": 40644 - }, - { - "epoch": 6.54559362293168, - "grad_norm": 0.0083598168566823, - "learning_rate": 0.00019997886957904466, - "loss": 46.0, - "step": 40645 - }, - { - "epoch": 6.545754660010467, - "grad_norm": 0.001120619010180235, - "learning_rate": 0.0001999788685390576, - "loss": 46.0, - "step": 40646 - }, - { - "epoch": 6.545915697089255, - "grad_norm": 0.0011444613337516785, - "learning_rate": 0.000199978867499045, - "loss": 46.0, - "step": 40647 - }, - { - "epoch": 6.546076734168042, - "grad_norm": 0.0014339227927848697, - "learning_rate": 0.00019997886645900677, - "loss": 46.0, - "step": 40648 - }, - { - "epoch": 6.54623777124683, - "grad_norm": 0.0035265947226434946, - "learning_rate": 0.00019997886541894296, - "loss": 46.0, - "step": 40649 - }, - { - "epoch": 6.546398808325617, - "grad_norm": 0.004440826829522848, - "learning_rate": 0.00019997886437885356, - "loss": 46.0, - "step": 40650 - }, - { - "epoch": 6.5465598454044045, - "grad_norm": 0.004230854567140341, - "learning_rate": 0.00019997886333873857, - "loss": 46.0, - "step": 40651 - }, - { - "epoch": 6.546720882483192, - "grad_norm": 0.0028550291899591684, - "learning_rate": 0.000199978862298598, - "loss": 46.0, - "step": 40652 - }, - { - "epoch": 6.546881919561979, - "grad_norm": 0.0029974381905049086, - "learning_rate": 0.0001999788612584318, - "loss": 46.0, - "step": 40653 - }, - { - "epoch": 6.547042956640767, - "grad_norm": 0.006866373587399721, - "learning_rate": 0.00019997886021824005, - "loss": 46.0, - "step": 40654 - }, - { - "epoch": 6.547203993719554, - "grad_norm": 0.0030630505643785, - "learning_rate": 0.00019997885917802272, - "loss": 46.0, - "step": 40655 - }, - { - "epoch": 6.547365030798341, - "grad_norm": 0.002616151934489608, - "learning_rate": 0.00019997885813777976, - "loss": 46.0, - "step": 40656 - }, - { - "epoch": 6.547526067877129, - "grad_norm": 0.006250202655792236, - "learning_rate": 0.00019997885709751125, - "loss": 46.0, - "step": 40657 - }, - { - "epoch": 6.547687104955916, - "grad_norm": 0.0033784371335059404, - "learning_rate": 0.00019997885605721715, - "loss": 46.0, - "step": 40658 - }, - { - "epoch": 6.547848142034703, - "grad_norm": 0.005065184086561203, - "learning_rate": 0.00019997885501689744, - "loss": 46.0, - "step": 40659 - }, - { - "epoch": 6.548009179113491, - "grad_norm": 0.0019097983604297042, - "learning_rate": 0.00019997885397655214, - "loss": 46.0, - "step": 40660 - }, - { - "epoch": 6.548170216192278, - "grad_norm": 0.02277885004878044, - "learning_rate": 0.00019997885293618125, - "loss": 46.0, - "step": 40661 - }, - { - "epoch": 6.548331253271066, - "grad_norm": 0.0019294701050966978, - "learning_rate": 0.00019997885189578478, - "loss": 46.0, - "step": 40662 - }, - { - "epoch": 6.548492290349853, - "grad_norm": 0.007393193896859884, - "learning_rate": 0.00019997885085536272, - "loss": 46.0, - "step": 40663 - }, - { - "epoch": 6.5486533274286405, - "grad_norm": 0.005524695850908756, - "learning_rate": 0.00019997884981491507, - "loss": 46.0, - "step": 40664 - }, - { - "epoch": 6.548814364507428, - "grad_norm": 0.022860087454319, - "learning_rate": 0.00019997884877444183, - "loss": 46.0, - "step": 40665 - }, - { - "epoch": 6.548975401586215, - "grad_norm": 0.008511630818247795, - "learning_rate": 0.000199978847733943, - "loss": 46.0, - "step": 40666 - }, - { - "epoch": 6.549136438665003, - "grad_norm": 0.0018915238324552774, - "learning_rate": 0.00019997884669341857, - "loss": 46.0, - "step": 40667 - }, - { - "epoch": 6.54929747574379, - "grad_norm": 0.00618374440819025, - "learning_rate": 0.00019997884565286857, - "loss": 46.0, - "step": 40668 - }, - { - "epoch": 6.549458512822578, - "grad_norm": 0.00811982061713934, - "learning_rate": 0.00019997884461229296, - "loss": 46.0, - "step": 40669 - }, - { - "epoch": 6.549619549901365, - "grad_norm": 0.015774866566061974, - "learning_rate": 0.0001999788435716918, - "loss": 46.0, - "step": 40670 - }, - { - "epoch": 6.549780586980152, - "grad_norm": 0.005287447478622198, - "learning_rate": 0.000199978842531065, - "loss": 46.0, - "step": 40671 - }, - { - "epoch": 6.549941624058939, - "grad_norm": 0.0025843321345746517, - "learning_rate": 0.00019997884149041263, - "loss": 46.0, - "step": 40672 - }, - { - "epoch": 6.550102661137727, - "grad_norm": 0.011869542300701141, - "learning_rate": 0.00019997884044973467, - "loss": 46.0, - "step": 40673 - }, - { - "epoch": 6.550263698216514, - "grad_norm": 0.004435858689248562, - "learning_rate": 0.00019997883940903112, - "loss": 46.0, - "step": 40674 - }, - { - "epoch": 6.550424735295302, - "grad_norm": 0.0013995266053825617, - "learning_rate": 0.000199978838368302, - "loss": 46.0, - "step": 40675 - }, - { - "epoch": 6.550585772374089, - "grad_norm": 0.003281451528891921, - "learning_rate": 0.00019997883732754724, - "loss": 46.0, - "step": 40676 - }, - { - "epoch": 6.5507468094528765, - "grad_norm": 0.009259171783924103, - "learning_rate": 0.00019997883628676696, - "loss": 46.0, - "step": 40677 - }, - { - "epoch": 6.550907846531664, - "grad_norm": 0.0045952280052006245, - "learning_rate": 0.00019997883524596106, - "loss": 46.0, - "step": 40678 - }, - { - "epoch": 6.551068883610451, - "grad_norm": 0.012239092960953712, - "learning_rate": 0.00019997883420512955, - "loss": 46.0, - "step": 40679 - }, - { - "epoch": 6.551229920689239, - "grad_norm": 0.0015869359485805035, - "learning_rate": 0.00019997883316427245, - "loss": 46.0, - "step": 40680 - }, - { - "epoch": 6.551390957768026, - "grad_norm": 0.003501494647935033, - "learning_rate": 0.0001999788321233898, - "loss": 46.0, - "step": 40681 - }, - { - "epoch": 6.551551994846814, - "grad_norm": 0.0037833626847714186, - "learning_rate": 0.00019997883108248155, - "loss": 46.0, - "step": 40682 - }, - { - "epoch": 6.551713031925601, - "grad_norm": 0.0044454666785895824, - "learning_rate": 0.00019997883004154766, - "loss": 46.0, - "step": 40683 - }, - { - "epoch": 6.551874069004389, - "grad_norm": 0.004663343541324139, - "learning_rate": 0.00019997882900058824, - "loss": 46.0, - "step": 40684 - }, - { - "epoch": 6.552035106083176, - "grad_norm": 0.0018624671502038836, - "learning_rate": 0.0001999788279596032, - "loss": 46.0, - "step": 40685 - }, - { - "epoch": 6.552196143161963, - "grad_norm": 0.01254362240433693, - "learning_rate": 0.0001999788269185926, - "loss": 46.0, - "step": 40686 - }, - { - "epoch": 6.55235718024075, - "grad_norm": 0.004956195130944252, - "learning_rate": 0.00019997882587755638, - "loss": 46.0, - "step": 40687 - }, - { - "epoch": 6.5525182173195375, - "grad_norm": 0.002800451824441552, - "learning_rate": 0.00019997882483649456, - "loss": 46.0, - "step": 40688 - }, - { - "epoch": 6.552679254398325, - "grad_norm": 0.0033860914409160614, - "learning_rate": 0.0001999788237954072, - "loss": 46.0, - "step": 40689 - }, - { - "epoch": 6.5528402914771124, - "grad_norm": 0.007571641821414232, - "learning_rate": 0.0001999788227542942, - "loss": 46.0, - "step": 40690 - }, - { - "epoch": 6.5530013285559, - "grad_norm": 0.004056733567267656, - "learning_rate": 0.00019997882171315565, - "loss": 46.0, - "step": 40691 - }, - { - "epoch": 6.553162365634687, - "grad_norm": 0.007771741598844528, - "learning_rate": 0.0001999788206719915, - "loss": 46.0, - "step": 40692 - }, - { - "epoch": 6.553323402713475, - "grad_norm": 0.006224946118891239, - "learning_rate": 0.00019997881963080175, - "loss": 46.0, - "step": 40693 - }, - { - "epoch": 6.553484439792262, - "grad_norm": 0.013901463709771633, - "learning_rate": 0.00019997881858958643, - "loss": 46.0, - "step": 40694 - }, - { - "epoch": 6.55364547687105, - "grad_norm": 0.0015252531738951802, - "learning_rate": 0.0001999788175483455, - "loss": 46.0, - "step": 40695 - }, - { - "epoch": 6.553806513949837, - "grad_norm": 0.011995387263596058, - "learning_rate": 0.00019997881650707898, - "loss": 46.0, - "step": 40696 - }, - { - "epoch": 6.553967551028625, - "grad_norm": 0.0017018553335219622, - "learning_rate": 0.00019997881546578684, - "loss": 46.0, - "step": 40697 - }, - { - "epoch": 6.554128588107412, - "grad_norm": 0.006945356726646423, - "learning_rate": 0.00019997881442446918, - "loss": 46.0, - "step": 40698 - }, - { - "epoch": 6.5542896251861995, - "grad_norm": 0.0076630376279354095, - "learning_rate": 0.0001999788133831259, - "loss": 46.0, - "step": 40699 - }, - { - "epoch": 6.554450662264987, - "grad_norm": 0.00933765433728695, - "learning_rate": 0.00019997881234175703, - "loss": 46.0, - "step": 40700 - }, - { - "epoch": 6.5546116993437735, - "grad_norm": 0.005538893863558769, - "learning_rate": 0.00019997881130036255, - "loss": 46.0, - "step": 40701 - }, - { - "epoch": 6.554772736422561, - "grad_norm": 0.0009473536629229784, - "learning_rate": 0.00019997881025894253, - "loss": 46.0, - "step": 40702 - }, - { - "epoch": 6.554933773501348, - "grad_norm": 0.0023595639504492283, - "learning_rate": 0.00019997880921749687, - "loss": 46.0, - "step": 40703 - }, - { - "epoch": 6.555094810580136, - "grad_norm": 0.005580158904194832, - "learning_rate": 0.00019997880817602563, - "loss": 46.0, - "step": 40704 - }, - { - "epoch": 6.555255847658923, - "grad_norm": 0.011074107140302658, - "learning_rate": 0.00019997880713452882, - "loss": 46.0, - "step": 40705 - }, - { - "epoch": 6.555416884737711, - "grad_norm": 0.009842784143984318, - "learning_rate": 0.0001999788060930064, - "loss": 46.0, - "step": 40706 - }, - { - "epoch": 6.555577921816498, - "grad_norm": 0.004351463634520769, - "learning_rate": 0.0001999788050514584, - "loss": 46.0, - "step": 40707 - }, - { - "epoch": 6.555738958895286, - "grad_norm": 0.002302167471498251, - "learning_rate": 0.00019997880400988484, - "loss": 46.0, - "step": 40708 - }, - { - "epoch": 6.555899995974073, - "grad_norm": 0.013627825304865837, - "learning_rate": 0.00019997880296828563, - "loss": 46.0, - "step": 40709 - }, - { - "epoch": 6.556061033052861, - "grad_norm": 0.007735341787338257, - "learning_rate": 0.0001999788019266609, - "loss": 46.0, - "step": 40710 - }, - { - "epoch": 6.556222070131648, - "grad_norm": 0.002870397176593542, - "learning_rate": 0.00019997880088501053, - "loss": 46.0, - "step": 40711 - }, - { - "epoch": 6.5563831072104355, - "grad_norm": 0.011340837925672531, - "learning_rate": 0.0001999787998433346, - "loss": 46.0, - "step": 40712 - }, - { - "epoch": 6.556544144289223, - "grad_norm": 0.005011400673538446, - "learning_rate": 0.00019997879880163304, - "loss": 46.0, - "step": 40713 - }, - { - "epoch": 6.5567051813680095, - "grad_norm": 0.0035711501259356737, - "learning_rate": 0.00019997879775990592, - "loss": 46.0, - "step": 40714 - }, - { - "epoch": 6.556866218446798, - "grad_norm": 0.0013839555904269218, - "learning_rate": 0.00019997879671815322, - "loss": 46.0, - "step": 40715 - }, - { - "epoch": 6.557027255525584, - "grad_norm": 0.006828649435192347, - "learning_rate": 0.0001999787956763749, - "loss": 46.0, - "step": 40716 - }, - { - "epoch": 6.557188292604372, - "grad_norm": 0.013376781716942787, - "learning_rate": 0.000199978794634571, - "loss": 46.0, - "step": 40717 - }, - { - "epoch": 6.557349329683159, - "grad_norm": 0.003747333539649844, - "learning_rate": 0.00019997879359274153, - "loss": 46.0, - "step": 40718 - }, - { - "epoch": 6.557510366761947, - "grad_norm": 0.0020217036362737417, - "learning_rate": 0.00019997879255088645, - "loss": 46.0, - "step": 40719 - }, - { - "epoch": 6.557671403840734, - "grad_norm": 0.0012212367728352547, - "learning_rate": 0.00019997879150900581, - "loss": 46.0, - "step": 40720 - }, - { - "epoch": 6.557832440919522, - "grad_norm": 0.003769966773688793, - "learning_rate": 0.00019997879046709956, - "loss": 46.0, - "step": 40721 - }, - { - "epoch": 6.557993477998309, - "grad_norm": 0.005456156097352505, - "learning_rate": 0.0001999787894251677, - "loss": 46.0, - "step": 40722 - }, - { - "epoch": 6.558154515077097, - "grad_norm": 0.005896203685551882, - "learning_rate": 0.00019997878838321027, - "loss": 46.0, - "step": 40723 - }, - { - "epoch": 6.558315552155884, - "grad_norm": 0.014687170274555683, - "learning_rate": 0.00019997878734122722, - "loss": 46.0, - "step": 40724 - }, - { - "epoch": 6.5584765892346715, - "grad_norm": 0.001518176170065999, - "learning_rate": 0.00019997878629921865, - "loss": 46.0, - "step": 40725 - }, - { - "epoch": 6.558637626313459, - "grad_norm": 0.013105345889925957, - "learning_rate": 0.00019997878525718443, - "loss": 46.0, - "step": 40726 - }, - { - "epoch": 6.558798663392246, - "grad_norm": 0.007250647991895676, - "learning_rate": 0.00019997878421512466, - "loss": 46.0, - "step": 40727 - }, - { - "epoch": 6.558959700471034, - "grad_norm": 0.00250617996789515, - "learning_rate": 0.00019997878317303927, - "loss": 46.0, - "step": 40728 - }, - { - "epoch": 6.55912073754982, - "grad_norm": 0.0013489233097061515, - "learning_rate": 0.00019997878213092832, - "loss": 46.0, - "step": 40729 - }, - { - "epoch": 6.559281774628609, - "grad_norm": 0.009942890144884586, - "learning_rate": 0.00019997878108879175, - "loss": 46.0, - "step": 40730 - }, - { - "epoch": 6.559442811707395, - "grad_norm": 0.006347054615616798, - "learning_rate": 0.0001999787800466296, - "loss": 46.0, - "step": 40731 - }, - { - "epoch": 6.559603848786183, - "grad_norm": 0.0033637864980846643, - "learning_rate": 0.0001999787790044419, - "loss": 46.0, - "step": 40732 - }, - { - "epoch": 6.55976488586497, - "grad_norm": 0.006357844918966293, - "learning_rate": 0.00019997877796222853, - "loss": 46.0, - "step": 40733 - }, - { - "epoch": 6.559925922943758, - "grad_norm": 0.005246862303465605, - "learning_rate": 0.00019997877691998962, - "loss": 46.0, - "step": 40734 - }, - { - "epoch": 6.560086960022545, - "grad_norm": 0.0019441910553723574, - "learning_rate": 0.00019997877587772512, - "loss": 46.0, - "step": 40735 - }, - { - "epoch": 6.5602479971013326, - "grad_norm": 0.002969289431348443, - "learning_rate": 0.00019997877483543506, - "loss": 46.0, - "step": 40736 - }, - { - "epoch": 6.56040903418012, - "grad_norm": 0.010823934338986874, - "learning_rate": 0.00019997877379311936, - "loss": 46.0, - "step": 40737 - }, - { - "epoch": 6.5605700712589075, - "grad_norm": 0.0025764349848031998, - "learning_rate": 0.00019997877275077807, - "loss": 46.0, - "step": 40738 - }, - { - "epoch": 6.560731108337695, - "grad_norm": 0.0011528560426086187, - "learning_rate": 0.00019997877170841122, - "loss": 46.0, - "step": 40739 - }, - { - "epoch": 6.560892145416482, - "grad_norm": 0.004093565978109837, - "learning_rate": 0.00019997877066601878, - "loss": 46.0, - "step": 40740 - }, - { - "epoch": 6.56105318249527, - "grad_norm": 0.0027801082469522953, - "learning_rate": 0.00019997876962360073, - "loss": 46.0, - "step": 40741 - }, - { - "epoch": 6.561214219574057, - "grad_norm": 0.0014597009867429733, - "learning_rate": 0.00019997876858115712, - "loss": 46.0, - "step": 40742 - }, - { - "epoch": 6.561375256652845, - "grad_norm": 0.006947056390345097, - "learning_rate": 0.0001999787675386879, - "loss": 46.0, - "step": 40743 - }, - { - "epoch": 6.561536293731631, - "grad_norm": 0.0034314082004129887, - "learning_rate": 0.00019997876649619308, - "loss": 46.0, - "step": 40744 - }, - { - "epoch": 6.561697330810419, - "grad_norm": 0.0020651633385568857, - "learning_rate": 0.00019997876545367268, - "loss": 46.0, - "step": 40745 - }, - { - "epoch": 6.561858367889206, - "grad_norm": 0.018291432410478592, - "learning_rate": 0.0001999787644111267, - "loss": 46.0, - "step": 40746 - }, - { - "epoch": 6.562019404967994, - "grad_norm": 0.009931794367730618, - "learning_rate": 0.00019997876336855512, - "loss": 46.0, - "step": 40747 - }, - { - "epoch": 6.562180442046781, - "grad_norm": 0.006406045518815517, - "learning_rate": 0.00019997876232595796, - "loss": 46.0, - "step": 40748 - }, - { - "epoch": 6.5623414791255685, - "grad_norm": 0.0034832200035452843, - "learning_rate": 0.00019997876128333521, - "loss": 46.0, - "step": 40749 - }, - { - "epoch": 6.562502516204356, - "grad_norm": 0.004517795518040657, - "learning_rate": 0.00019997876024068685, - "loss": 46.0, - "step": 40750 - }, - { - "epoch": 6.562663553283143, - "grad_norm": 0.006442656274884939, - "learning_rate": 0.00019997875919801296, - "loss": 46.0, - "step": 40751 - }, - { - "epoch": 6.562824590361931, - "grad_norm": 0.004893993493169546, - "learning_rate": 0.00019997875815531342, - "loss": 46.0, - "step": 40752 - }, - { - "epoch": 6.562985627440718, - "grad_norm": 0.002450728788971901, - "learning_rate": 0.0001999787571125883, - "loss": 46.0, - "step": 40753 - }, - { - "epoch": 6.563146664519506, - "grad_norm": 0.004116914700716734, - "learning_rate": 0.00019997875606983759, - "loss": 46.0, - "step": 40754 - }, - { - "epoch": 6.563307701598293, - "grad_norm": 0.003956758417189121, - "learning_rate": 0.00019997875502706131, - "loss": 46.0, - "step": 40755 - }, - { - "epoch": 6.563468738677081, - "grad_norm": 0.017810363322496414, - "learning_rate": 0.00019997875398425943, - "loss": 46.0, - "step": 40756 - }, - { - "epoch": 6.563629775755868, - "grad_norm": 0.0020648043137043715, - "learning_rate": 0.00019997875294143196, - "loss": 46.0, - "step": 40757 - }, - { - "epoch": 6.563790812834656, - "grad_norm": 0.032018031924963, - "learning_rate": 0.0001999787518985789, - "loss": 46.0, - "step": 40758 - }, - { - "epoch": 6.563951849913442, - "grad_norm": 0.006671628449112177, - "learning_rate": 0.00019997875085570025, - "loss": 46.0, - "step": 40759 - }, - { - "epoch": 6.56411288699223, - "grad_norm": 0.016330774873495102, - "learning_rate": 0.000199978749812796, - "loss": 46.0, - "step": 40760 - }, - { - "epoch": 6.564273924071017, - "grad_norm": 0.0022147325798869133, - "learning_rate": 0.00019997874876986617, - "loss": 46.0, - "step": 40761 - }, - { - "epoch": 6.5644349611498045, - "grad_norm": 0.004885328933596611, - "learning_rate": 0.00019997874772691073, - "loss": 46.0, - "step": 40762 - }, - { - "epoch": 6.564595998228592, - "grad_norm": 0.003634854219853878, - "learning_rate": 0.00019997874668392976, - "loss": 46.0, - "step": 40763 - }, - { - "epoch": 6.564757035307379, - "grad_norm": 0.004276583902537823, - "learning_rate": 0.00019997874564092313, - "loss": 46.0, - "step": 40764 - }, - { - "epoch": 6.564918072386167, - "grad_norm": 0.010841818526387215, - "learning_rate": 0.00019997874459789096, - "loss": 46.0, - "step": 40765 - }, - { - "epoch": 6.565079109464954, - "grad_norm": 0.004987781867384911, - "learning_rate": 0.0001999787435548332, - "loss": 46.0, - "step": 40766 - }, - { - "epoch": 6.565240146543742, - "grad_norm": 0.007482773624360561, - "learning_rate": 0.00019997874251174983, - "loss": 46.0, - "step": 40767 - }, - { - "epoch": 6.565401183622529, - "grad_norm": 0.0031461124308407307, - "learning_rate": 0.0001999787414686409, - "loss": 46.0, - "step": 40768 - }, - { - "epoch": 6.565562220701317, - "grad_norm": 0.006653795950114727, - "learning_rate": 0.00019997874042550632, - "loss": 46.0, - "step": 40769 - }, - { - "epoch": 6.565723257780104, - "grad_norm": 0.02060164138674736, - "learning_rate": 0.0001999787393823462, - "loss": 46.0, - "step": 40770 - }, - { - "epoch": 6.565884294858892, - "grad_norm": 0.0013250625925138593, - "learning_rate": 0.00019997873833916047, - "loss": 46.0, - "step": 40771 - }, - { - "epoch": 6.566045331937679, - "grad_norm": 0.0012524082558229566, - "learning_rate": 0.00019997873729594916, - "loss": 46.0, - "step": 40772 - }, - { - "epoch": 6.5662063690164665, - "grad_norm": 0.01475952286273241, - "learning_rate": 0.00019997873625271227, - "loss": 46.0, - "step": 40773 - }, - { - "epoch": 6.566367406095253, - "grad_norm": 0.0017552569042891264, - "learning_rate": 0.00019997873520944976, - "loss": 46.0, - "step": 40774 - }, - { - "epoch": 6.5665284431740405, - "grad_norm": 0.002845402341336012, - "learning_rate": 0.0001999787341661617, - "loss": 46.0, - "step": 40775 - }, - { - "epoch": 6.566689480252828, - "grad_norm": 0.010622103698551655, - "learning_rate": 0.00019997873312284803, - "loss": 46.0, - "step": 40776 - }, - { - "epoch": 6.566850517331615, - "grad_norm": 0.0050599114038050175, - "learning_rate": 0.00019997873207950874, - "loss": 46.0, - "step": 40777 - }, - { - "epoch": 6.567011554410403, - "grad_norm": 0.00894957035779953, - "learning_rate": 0.00019997873103614393, - "loss": 46.0, - "step": 40778 - }, - { - "epoch": 6.56717259148919, - "grad_norm": 0.009361461736261845, - "learning_rate": 0.0001999787299927535, - "loss": 46.0, - "step": 40779 - }, - { - "epoch": 6.567333628567978, - "grad_norm": 0.003837777068838477, - "learning_rate": 0.00019997872894933746, - "loss": 46.0, - "step": 40780 - }, - { - "epoch": 6.567494665646765, - "grad_norm": 0.0020335065200924873, - "learning_rate": 0.0001999787279058958, - "loss": 46.0, - "step": 40781 - }, - { - "epoch": 6.567655702725553, - "grad_norm": 0.010169140063226223, - "learning_rate": 0.00019997872686242863, - "loss": 46.0, - "step": 40782 - }, - { - "epoch": 6.56781673980434, - "grad_norm": 0.004751738626509905, - "learning_rate": 0.00019997872581893584, - "loss": 46.0, - "step": 40783 - }, - { - "epoch": 6.567977776883128, - "grad_norm": 0.003848387161269784, - "learning_rate": 0.00019997872477541746, - "loss": 46.0, - "step": 40784 - }, - { - "epoch": 6.568138813961915, - "grad_norm": 0.001586927566677332, - "learning_rate": 0.00019997872373187346, - "loss": 46.0, - "step": 40785 - }, - { - "epoch": 6.5682998510407025, - "grad_norm": 0.003316056914627552, - "learning_rate": 0.0001999787226883039, - "loss": 46.0, - "step": 40786 - }, - { - "epoch": 6.568460888119489, - "grad_norm": 0.019763285294175148, - "learning_rate": 0.00019997872164470874, - "loss": 46.0, - "step": 40787 - }, - { - "epoch": 6.568621925198277, - "grad_norm": 0.01035550981760025, - "learning_rate": 0.000199978720601088, - "loss": 46.0, - "step": 40788 - }, - { - "epoch": 6.568782962277064, - "grad_norm": 0.0027674254961311817, - "learning_rate": 0.00019997871955744167, - "loss": 46.0, - "step": 40789 - }, - { - "epoch": 6.568943999355851, - "grad_norm": 0.010693095624446869, - "learning_rate": 0.00019997871851376976, - "loss": 46.0, - "step": 40790 - }, - { - "epoch": 6.569105036434639, - "grad_norm": 0.012854852713644505, - "learning_rate": 0.00019997871747007225, - "loss": 46.0, - "step": 40791 - }, - { - "epoch": 6.569266073513426, - "grad_norm": 0.0011087863240391016, - "learning_rate": 0.00019997871642634914, - "loss": 46.0, - "step": 40792 - }, - { - "epoch": 6.569427110592214, - "grad_norm": 0.0018523853505030274, - "learning_rate": 0.00019997871538260045, - "loss": 46.0, - "step": 40793 - }, - { - "epoch": 6.569588147671001, - "grad_norm": 0.002040497027337551, - "learning_rate": 0.00019997871433882617, - "loss": 46.0, - "step": 40794 - }, - { - "epoch": 6.569749184749789, - "grad_norm": 0.0021707494743168354, - "learning_rate": 0.0001999787132950263, - "loss": 46.0, - "step": 40795 - }, - { - "epoch": 6.569910221828576, - "grad_norm": 0.0050607346929609776, - "learning_rate": 0.00019997871225120085, - "loss": 46.0, - "step": 40796 - }, - { - "epoch": 6.5700712589073635, - "grad_norm": 0.003341927193105221, - "learning_rate": 0.0001999787112073498, - "loss": 46.0, - "step": 40797 - }, - { - "epoch": 6.570232295986151, - "grad_norm": 0.007981318980455399, - "learning_rate": 0.00019997871016347318, - "loss": 46.0, - "step": 40798 - }, - { - "epoch": 6.5703933330649384, - "grad_norm": 0.008813790045678616, - "learning_rate": 0.00019997870911957094, - "loss": 46.0, - "step": 40799 - }, - { - "epoch": 6.570554370143726, - "grad_norm": 0.013091310858726501, - "learning_rate": 0.00019997870807564314, - "loss": 46.0, - "step": 40800 - }, - { - "epoch": 6.570715407222513, - "grad_norm": 0.0035188747569918633, - "learning_rate": 0.00019997870703168972, - "loss": 46.0, - "step": 40801 - }, - { - "epoch": 6.5708764443013, - "grad_norm": 0.00641377829015255, - "learning_rate": 0.00019997870598771074, - "loss": 46.0, - "step": 40802 - }, - { - "epoch": 6.571037481380088, - "grad_norm": 0.004212986212223768, - "learning_rate": 0.00019997870494370615, - "loss": 46.0, - "step": 40803 - }, - { - "epoch": 6.571198518458875, - "grad_norm": 0.007577687036246061, - "learning_rate": 0.00019997870389967598, - "loss": 46.0, - "step": 40804 - }, - { - "epoch": 6.571359555537662, - "grad_norm": 0.004961410071700811, - "learning_rate": 0.00019997870285562018, - "loss": 46.0, - "step": 40805 - }, - { - "epoch": 6.57152059261645, - "grad_norm": 0.002276793820783496, - "learning_rate": 0.00019997870181153883, - "loss": 46.0, - "step": 40806 - }, - { - "epoch": 6.571681629695237, - "grad_norm": 0.01013907603919506, - "learning_rate": 0.0001999787007674319, - "loss": 46.0, - "step": 40807 - }, - { - "epoch": 6.571842666774025, - "grad_norm": 0.008978056721389294, - "learning_rate": 0.0001999786997232994, - "loss": 46.0, - "step": 40808 - }, - { - "epoch": 6.572003703852812, - "grad_norm": 0.005078437272459269, - "learning_rate": 0.00019997869867914125, - "loss": 46.0, - "step": 40809 - }, - { - "epoch": 6.5721647409315995, - "grad_norm": 0.0006928339716978371, - "learning_rate": 0.00019997869763495752, - "loss": 46.0, - "step": 40810 - }, - { - "epoch": 6.572325778010387, - "grad_norm": 0.001591209787875414, - "learning_rate": 0.00019997869659074823, - "loss": 46.0, - "step": 40811 - }, - { - "epoch": 6.572486815089174, - "grad_norm": 0.006118203978985548, - "learning_rate": 0.00019997869554651336, - "loss": 46.0, - "step": 40812 - }, - { - "epoch": 6.572647852167962, - "grad_norm": 0.004776508081704378, - "learning_rate": 0.0001999786945022529, - "loss": 46.0, - "step": 40813 - }, - { - "epoch": 6.572808889246749, - "grad_norm": 0.00453460868448019, - "learning_rate": 0.00019997869345796682, - "loss": 46.0, - "step": 40814 - }, - { - "epoch": 6.572969926325537, - "grad_norm": 0.004105367232114077, - "learning_rate": 0.00019997869241365516, - "loss": 46.0, - "step": 40815 - }, - { - "epoch": 6.573130963404324, - "grad_norm": 0.004433273337781429, - "learning_rate": 0.00019997869136931793, - "loss": 46.0, - "step": 40816 - }, - { - "epoch": 6.573292000483111, - "grad_norm": 0.0016228772001340985, - "learning_rate": 0.00019997869032495507, - "loss": 46.0, - "step": 40817 - }, - { - "epoch": 6.573453037561898, - "grad_norm": 0.009247311390936375, - "learning_rate": 0.00019997868928056667, - "loss": 46.0, - "step": 40818 - }, - { - "epoch": 6.573614074640686, - "grad_norm": 0.028416039422154427, - "learning_rate": 0.00019997868823615265, - "loss": 46.0, - "step": 40819 - }, - { - "epoch": 6.573775111719473, - "grad_norm": 0.005283160600811243, - "learning_rate": 0.00019997868719171305, - "loss": 46.0, - "step": 40820 - }, - { - "epoch": 6.573936148798261, - "grad_norm": 0.010821589268743992, - "learning_rate": 0.00019997868614724784, - "loss": 46.0, - "step": 40821 - }, - { - "epoch": 6.574097185877048, - "grad_norm": 0.0017294847639277577, - "learning_rate": 0.00019997868510275707, - "loss": 46.0, - "step": 40822 - }, - { - "epoch": 6.5742582229558355, - "grad_norm": 0.005353229120373726, - "learning_rate": 0.00019997868405824068, - "loss": 46.0, - "step": 40823 - }, - { - "epoch": 6.574419260034623, - "grad_norm": 0.0020743291825056076, - "learning_rate": 0.0001999786830136987, - "loss": 46.0, - "step": 40824 - }, - { - "epoch": 6.57458029711341, - "grad_norm": 0.004140579141676426, - "learning_rate": 0.00019997868196913117, - "loss": 46.0, - "step": 40825 - }, - { - "epoch": 6.574741334192198, - "grad_norm": 0.012167209759354591, - "learning_rate": 0.00019997868092453804, - "loss": 46.0, - "step": 40826 - }, - { - "epoch": 6.574902371270985, - "grad_norm": 0.0013769238721579313, - "learning_rate": 0.0001999786798799193, - "loss": 46.0, - "step": 40827 - }, - { - "epoch": 6.575063408349773, - "grad_norm": 0.007982422597706318, - "learning_rate": 0.00019997867883527498, - "loss": 46.0, - "step": 40828 - }, - { - "epoch": 6.57522444542856, - "grad_norm": 0.010315612889826298, - "learning_rate": 0.00019997867779060507, - "loss": 46.0, - "step": 40829 - }, - { - "epoch": 6.575385482507348, - "grad_norm": 0.001964476192370057, - "learning_rate": 0.00019997867674590957, - "loss": 46.0, - "step": 40830 - }, - { - "epoch": 6.575546519586135, - "grad_norm": 0.007544730789959431, - "learning_rate": 0.00019997867570118849, - "loss": 46.0, - "step": 40831 - }, - { - "epoch": 6.575707556664922, - "grad_norm": 0.0013011571718379855, - "learning_rate": 0.0001999786746564418, - "loss": 46.0, - "step": 40832 - }, - { - "epoch": 6.575868593743709, - "grad_norm": 0.00497397780418396, - "learning_rate": 0.00019997867361166955, - "loss": 46.0, - "step": 40833 - }, - { - "epoch": 6.576029630822497, - "grad_norm": 0.003266011830419302, - "learning_rate": 0.0001999786725668717, - "loss": 46.0, - "step": 40834 - }, - { - "epoch": 6.576190667901284, - "grad_norm": 0.025336232036352158, - "learning_rate": 0.00019997867152204824, - "loss": 46.0, - "step": 40835 - }, - { - "epoch": 6.5763517049800715, - "grad_norm": 0.0018570650136098266, - "learning_rate": 0.00019997867047719922, - "loss": 46.0, - "step": 40836 - }, - { - "epoch": 6.576512742058859, - "grad_norm": 0.00737730972468853, - "learning_rate": 0.00019997866943232458, - "loss": 46.0, - "step": 40837 - }, - { - "epoch": 6.576673779137646, - "grad_norm": 0.0024157322477549314, - "learning_rate": 0.0001999786683874244, - "loss": 46.0, - "step": 40838 - }, - { - "epoch": 6.576834816216434, - "grad_norm": 0.00592298386618495, - "learning_rate": 0.00019997866734249858, - "loss": 46.0, - "step": 40839 - }, - { - "epoch": 6.576995853295221, - "grad_norm": 0.001229961053468287, - "learning_rate": 0.00019997866629754718, - "loss": 46.0, - "step": 40840 - }, - { - "epoch": 6.577156890374009, - "grad_norm": 0.006123790983110666, - "learning_rate": 0.0001999786652525702, - "loss": 46.0, - "step": 40841 - }, - { - "epoch": 6.577317927452796, - "grad_norm": 0.007423703093081713, - "learning_rate": 0.00019997866420756762, - "loss": 46.0, - "step": 40842 - }, - { - "epoch": 6.577478964531584, - "grad_norm": 0.01447257585823536, - "learning_rate": 0.00019997866316253947, - "loss": 46.0, - "step": 40843 - }, - { - "epoch": 6.577640001610371, - "grad_norm": 0.0025015114806592464, - "learning_rate": 0.00019997866211748572, - "loss": 46.0, - "step": 40844 - }, - { - "epoch": 6.5778010386891586, - "grad_norm": 0.007844620384275913, - "learning_rate": 0.00019997866107240636, - "loss": 46.0, - "step": 40845 - }, - { - "epoch": 6.577962075767946, - "grad_norm": 0.007799843791872263, - "learning_rate": 0.00019997866002730144, - "loss": 46.0, - "step": 40846 - }, - { - "epoch": 6.578123112846733, - "grad_norm": 0.003350872313603759, - "learning_rate": 0.00019997865898217093, - "loss": 46.0, - "step": 40847 - }, - { - "epoch": 6.57828414992552, - "grad_norm": 0.0009966054931282997, - "learning_rate": 0.00019997865793701484, - "loss": 46.0, - "step": 40848 - }, - { - "epoch": 6.5784451870043075, - "grad_norm": 0.00975534226745367, - "learning_rate": 0.00019997865689183313, - "loss": 46.0, - "step": 40849 - }, - { - "epoch": 6.578606224083095, - "grad_norm": 0.0023474418558180332, - "learning_rate": 0.00019997865584662586, - "loss": 46.0, - "step": 40850 - }, - { - "epoch": 6.578767261161882, - "grad_norm": 0.004771187901496887, - "learning_rate": 0.00019997865480139297, - "loss": 46.0, - "step": 40851 - }, - { - "epoch": 6.57892829824067, - "grad_norm": 0.0008967381436377764, - "learning_rate": 0.0001999786537561345, - "loss": 46.0, - "step": 40852 - }, - { - "epoch": 6.579089335319457, - "grad_norm": 0.011502484790980816, - "learning_rate": 0.00019997865271085044, - "loss": 46.0, - "step": 40853 - }, - { - "epoch": 6.579250372398245, - "grad_norm": 0.0028328821063041687, - "learning_rate": 0.00019997865166554083, - "loss": 46.0, - "step": 40854 - }, - { - "epoch": 6.579411409477032, - "grad_norm": 0.019505729898810387, - "learning_rate": 0.00019997865062020557, - "loss": 46.0, - "step": 40855 - }, - { - "epoch": 6.57957244655582, - "grad_norm": 0.005499968770891428, - "learning_rate": 0.00019997864957484475, - "loss": 46.0, - "step": 40856 - }, - { - "epoch": 6.579733483634607, - "grad_norm": 0.008117297664284706, - "learning_rate": 0.00019997864852945834, - "loss": 46.0, - "step": 40857 - }, - { - "epoch": 6.5798945207133945, - "grad_norm": 0.0017934462521225214, - "learning_rate": 0.00019997864748404632, - "loss": 46.0, - "step": 40858 - }, - { - "epoch": 6.580055557792182, - "grad_norm": 0.0010216551600024104, - "learning_rate": 0.00019997864643860874, - "loss": 46.0, - "step": 40859 - }, - { - "epoch": 6.5802165948709685, - "grad_norm": 0.008551942184567451, - "learning_rate": 0.00019997864539314554, - "loss": 46.0, - "step": 40860 - }, - { - "epoch": 6.580377631949757, - "grad_norm": 0.003321589669212699, - "learning_rate": 0.0001999786443476568, - "loss": 46.0, - "step": 40861 - }, - { - "epoch": 6.5805386690285435, - "grad_norm": 0.0028386928606778383, - "learning_rate": 0.00019997864330214245, - "loss": 46.0, - "step": 40862 - }, - { - "epoch": 6.580699706107331, - "grad_norm": 0.007586246356368065, - "learning_rate": 0.0001999786422566025, - "loss": 46.0, - "step": 40863 - }, - { - "epoch": 6.580860743186118, - "grad_norm": 0.0038805403746664524, - "learning_rate": 0.00019997864121103692, - "loss": 46.0, - "step": 40864 - }, - { - "epoch": 6.581021780264906, - "grad_norm": 0.002877336461097002, - "learning_rate": 0.00019997864016544582, - "loss": 46.0, - "step": 40865 - }, - { - "epoch": 6.581182817343693, - "grad_norm": 0.01757013238966465, - "learning_rate": 0.0001999786391198291, - "loss": 46.0, - "step": 40866 - }, - { - "epoch": 6.581343854422481, - "grad_norm": 0.0022511135321110487, - "learning_rate": 0.0001999786380741868, - "loss": 46.0, - "step": 40867 - }, - { - "epoch": 6.581504891501268, - "grad_norm": 0.0058492147363722324, - "learning_rate": 0.0001999786370285189, - "loss": 46.0, - "step": 40868 - }, - { - "epoch": 6.581665928580056, - "grad_norm": 0.009627859108150005, - "learning_rate": 0.00019997863598282542, - "loss": 46.0, - "step": 40869 - }, - { - "epoch": 6.581826965658843, - "grad_norm": 0.0015005981549620628, - "learning_rate": 0.00019997863493710633, - "loss": 46.0, - "step": 40870 - }, - { - "epoch": 6.5819880027376305, - "grad_norm": 0.0010363651672378182, - "learning_rate": 0.00019997863389136167, - "loss": 46.0, - "step": 40871 - }, - { - "epoch": 6.582149039816418, - "grad_norm": 0.004977967590093613, - "learning_rate": 0.00019997863284559143, - "loss": 46.0, - "step": 40872 - }, - { - "epoch": 6.582310076895205, - "grad_norm": 0.0015544480411335826, - "learning_rate": 0.0001999786317997956, - "loss": 46.0, - "step": 40873 - }, - { - "epoch": 6.582471113973993, - "grad_norm": 0.01028465386480093, - "learning_rate": 0.00019997863075397416, - "loss": 46.0, - "step": 40874 - }, - { - "epoch": 6.582632151052779, - "grad_norm": 0.0031032762490212917, - "learning_rate": 0.00019997862970812713, - "loss": 46.0, - "step": 40875 - }, - { - "epoch": 6.582793188131568, - "grad_norm": 0.0034985090605914593, - "learning_rate": 0.0001999786286622545, - "loss": 46.0, - "step": 40876 - }, - { - "epoch": 6.582954225210354, - "grad_norm": 0.004831480793654919, - "learning_rate": 0.0001999786276163563, - "loss": 46.0, - "step": 40877 - }, - { - "epoch": 6.583115262289142, - "grad_norm": 0.016321226954460144, - "learning_rate": 0.00019997862657043252, - "loss": 46.0, - "step": 40878 - }, - { - "epoch": 6.583276299367929, - "grad_norm": 0.0049707540310919285, - "learning_rate": 0.00019997862552448314, - "loss": 46.0, - "step": 40879 - }, - { - "epoch": 6.583437336446717, - "grad_norm": 0.013099361211061478, - "learning_rate": 0.0001999786244785082, - "loss": 46.0, - "step": 40880 - }, - { - "epoch": 6.583598373525504, - "grad_norm": 0.0033694191370159388, - "learning_rate": 0.00019997862343250762, - "loss": 46.0, - "step": 40881 - }, - { - "epoch": 6.583759410604292, - "grad_norm": 0.0035773268900811672, - "learning_rate": 0.00019997862238648145, - "loss": 46.0, - "step": 40882 - }, - { - "epoch": 6.583920447683079, - "grad_norm": 0.002976084128022194, - "learning_rate": 0.00019997862134042973, - "loss": 46.0, - "step": 40883 - }, - { - "epoch": 6.5840814847618665, - "grad_norm": 0.017070339992642403, - "learning_rate": 0.0001999786202943524, - "loss": 46.0, - "step": 40884 - }, - { - "epoch": 6.584242521840654, - "grad_norm": 0.0022377576678991318, - "learning_rate": 0.0001999786192482495, - "loss": 46.0, - "step": 40885 - }, - { - "epoch": 6.584403558919441, - "grad_norm": 0.005276334472000599, - "learning_rate": 0.000199978618202121, - "loss": 46.0, - "step": 40886 - }, - { - "epoch": 6.584564595998229, - "grad_norm": 0.0013971786247566342, - "learning_rate": 0.00019997861715596692, - "loss": 46.0, - "step": 40887 - }, - { - "epoch": 6.584725633077016, - "grad_norm": 0.0033143588807433844, - "learning_rate": 0.0001999786161097872, - "loss": 46.0, - "step": 40888 - }, - { - "epoch": 6.584886670155804, - "grad_norm": 0.001610062550753355, - "learning_rate": 0.00019997861506358192, - "loss": 46.0, - "step": 40889 - }, - { - "epoch": 6.58504770723459, - "grad_norm": 0.013162648305296898, - "learning_rate": 0.00019997861401735109, - "loss": 46.0, - "step": 40890 - }, - { - "epoch": 6.585208744313378, - "grad_norm": 0.00695267878472805, - "learning_rate": 0.00019997861297109463, - "loss": 46.0, - "step": 40891 - }, - { - "epoch": 6.585369781392165, - "grad_norm": 0.004841205198317766, - "learning_rate": 0.0001999786119248126, - "loss": 46.0, - "step": 40892 - }, - { - "epoch": 6.585530818470953, - "grad_norm": 0.003625884186476469, - "learning_rate": 0.00019997861087850497, - "loss": 46.0, - "step": 40893 - }, - { - "epoch": 6.58569185554974, - "grad_norm": 0.011560887098312378, - "learning_rate": 0.00019997860983217173, - "loss": 46.0, - "step": 40894 - }, - { - "epoch": 6.585852892628528, - "grad_norm": 0.0012081846361979842, - "learning_rate": 0.00019997860878581293, - "loss": 46.0, - "step": 40895 - }, - { - "epoch": 6.586013929707315, - "grad_norm": 0.013927397318184376, - "learning_rate": 0.0001999786077394285, - "loss": 46.0, - "step": 40896 - }, - { - "epoch": 6.5861749667861025, - "grad_norm": 0.0018666948890313506, - "learning_rate": 0.00019997860669301854, - "loss": 46.0, - "step": 40897 - }, - { - "epoch": 6.58633600386489, - "grad_norm": 0.003324862103909254, - "learning_rate": 0.00019997860564658298, - "loss": 46.0, - "step": 40898 - }, - { - "epoch": 6.586497040943677, - "grad_norm": 0.018707364797592163, - "learning_rate": 0.0001999786046001218, - "loss": 46.0, - "step": 40899 - }, - { - "epoch": 6.586658078022465, - "grad_norm": 0.0038193203508853912, - "learning_rate": 0.00019997860355363504, - "loss": 46.0, - "step": 40900 - }, - { - "epoch": 6.586819115101252, - "grad_norm": 0.002704898826777935, - "learning_rate": 0.0001999786025071227, - "loss": 46.0, - "step": 40901 - }, - { - "epoch": 6.58698015218004, - "grad_norm": 0.0006604773225262761, - "learning_rate": 0.00019997860146058475, - "loss": 46.0, - "step": 40902 - }, - { - "epoch": 6.587141189258827, - "grad_norm": 0.008635904639959335, - "learning_rate": 0.00019997860041402122, - "loss": 46.0, - "step": 40903 - }, - { - "epoch": 6.587302226337615, - "grad_norm": 0.001925706397742033, - "learning_rate": 0.00019997859936743214, - "loss": 46.0, - "step": 40904 - }, - { - "epoch": 6.587463263416401, - "grad_norm": 0.005759078543633223, - "learning_rate": 0.0001999785983208174, - "loss": 46.0, - "step": 40905 - }, - { - "epoch": 6.587624300495189, - "grad_norm": 0.006487691309303045, - "learning_rate": 0.00019997859727417713, - "loss": 46.0, - "step": 40906 - }, - { - "epoch": 6.587785337573976, - "grad_norm": 0.006073189899325371, - "learning_rate": 0.00019997859622751123, - "loss": 46.0, - "step": 40907 - }, - { - "epoch": 6.587946374652764, - "grad_norm": 0.006661845836788416, - "learning_rate": 0.0001999785951808198, - "loss": 46.0, - "step": 40908 - }, - { - "epoch": 6.588107411731551, - "grad_norm": 0.01026956457644701, - "learning_rate": 0.00019997859413410272, - "loss": 46.0, - "step": 40909 - }, - { - "epoch": 6.5882684488103385, - "grad_norm": 0.007520552258938551, - "learning_rate": 0.00019997859308736006, - "loss": 46.0, - "step": 40910 - }, - { - "epoch": 6.588429485889126, - "grad_norm": 0.007870912551879883, - "learning_rate": 0.0001999785920405918, - "loss": 46.0, - "step": 40911 - }, - { - "epoch": 6.588590522967913, - "grad_norm": 0.019782068207859993, - "learning_rate": 0.000199978590993798, - "loss": 46.0, - "step": 40912 - }, - { - "epoch": 6.588751560046701, - "grad_norm": 0.002928605070337653, - "learning_rate": 0.00019997858994697857, - "loss": 46.0, - "step": 40913 - }, - { - "epoch": 6.588912597125488, - "grad_norm": 0.006589285098016262, - "learning_rate": 0.00019997858890013356, - "loss": 46.0, - "step": 40914 - }, - { - "epoch": 6.589073634204276, - "grad_norm": 0.013120392337441444, - "learning_rate": 0.00019997858785326296, - "loss": 46.0, - "step": 40915 - }, - { - "epoch": 6.589234671283063, - "grad_norm": 0.0015728623839095235, - "learning_rate": 0.00019997858680636678, - "loss": 46.0, - "step": 40916 - }, - { - "epoch": 6.589395708361851, - "grad_norm": 0.0145568223670125, - "learning_rate": 0.00019997858575944498, - "loss": 46.0, - "step": 40917 - }, - { - "epoch": 6.589556745440638, - "grad_norm": 0.005859727505594492, - "learning_rate": 0.00019997858471249765, - "loss": 46.0, - "step": 40918 - }, - { - "epoch": 6.5897177825194255, - "grad_norm": 0.0022197493817657232, - "learning_rate": 0.0001999785836655247, - "loss": 46.0, - "step": 40919 - }, - { - "epoch": 6.589878819598212, - "grad_norm": 0.004570066463202238, - "learning_rate": 0.00019997858261852614, - "loss": 46.0, - "step": 40920 - }, - { - "epoch": 6.5900398566769995, - "grad_norm": 0.0077544027008116245, - "learning_rate": 0.00019997858157150202, - "loss": 46.0, - "step": 40921 - }, - { - "epoch": 6.590200893755787, - "grad_norm": 0.005387146957218647, - "learning_rate": 0.00019997858052445228, - "loss": 46.0, - "step": 40922 - }, - { - "epoch": 6.5903619308345744, - "grad_norm": 0.0050185625441372395, - "learning_rate": 0.00019997857947737696, - "loss": 46.0, - "step": 40923 - }, - { - "epoch": 6.590522967913362, - "grad_norm": 0.010602330788969994, - "learning_rate": 0.00019997857843027608, - "loss": 46.0, - "step": 40924 - }, - { - "epoch": 6.590684004992149, - "grad_norm": 0.0017769129481166601, - "learning_rate": 0.00019997857738314958, - "loss": 46.0, - "step": 40925 - }, - { - "epoch": 6.590845042070937, - "grad_norm": 0.0017785043455660343, - "learning_rate": 0.0001999785763359975, - "loss": 46.0, - "step": 40926 - }, - { - "epoch": 6.591006079149724, - "grad_norm": 0.002208475489169359, - "learning_rate": 0.00019997857528881983, - "loss": 46.0, - "step": 40927 - }, - { - "epoch": 6.591167116228512, - "grad_norm": 0.0021965792402625084, - "learning_rate": 0.00019997857424161657, - "loss": 46.0, - "step": 40928 - }, - { - "epoch": 6.591328153307299, - "grad_norm": 0.006657461170107126, - "learning_rate": 0.00019997857319438772, - "loss": 46.0, - "step": 40929 - }, - { - "epoch": 6.591489190386087, - "grad_norm": 0.004748990293592215, - "learning_rate": 0.00019997857214713326, - "loss": 46.0, - "step": 40930 - }, - { - "epoch": 6.591650227464874, - "grad_norm": 0.005602410063147545, - "learning_rate": 0.00019997857109985327, - "loss": 46.0, - "step": 40931 - }, - { - "epoch": 6.5918112645436615, - "grad_norm": 0.010183610953390598, - "learning_rate": 0.00019997857005254766, - "loss": 46.0, - "step": 40932 - }, - { - "epoch": 6.591972301622448, - "grad_norm": 0.005670685321092606, - "learning_rate": 0.00019997856900521644, - "loss": 46.0, - "step": 40933 - }, - { - "epoch": 6.592133338701236, - "grad_norm": 0.0024473739322274923, - "learning_rate": 0.00019997856795785963, - "loss": 46.0, - "step": 40934 - }, - { - "epoch": 6.592294375780023, - "grad_norm": 0.014006580226123333, - "learning_rate": 0.00019997856691047724, - "loss": 46.0, - "step": 40935 - }, - { - "epoch": 6.59245541285881, - "grad_norm": 0.0041218772530555725, - "learning_rate": 0.00019997856586306928, - "loss": 46.0, - "step": 40936 - }, - { - "epoch": 6.592616449937598, - "grad_norm": 0.0031477452721446753, - "learning_rate": 0.00019997856481563574, - "loss": 46.0, - "step": 40937 - }, - { - "epoch": 6.592777487016385, - "grad_norm": 0.026103513315320015, - "learning_rate": 0.00019997856376817655, - "loss": 46.0, - "step": 40938 - }, - { - "epoch": 6.592938524095173, - "grad_norm": 0.00866468995809555, - "learning_rate": 0.0001999785627206918, - "loss": 46.0, - "step": 40939 - }, - { - "epoch": 6.59309956117396, - "grad_norm": 0.0032765506766736507, - "learning_rate": 0.00019997856167318148, - "loss": 46.0, - "step": 40940 - }, - { - "epoch": 6.593260598252748, - "grad_norm": 0.0032905288971960545, - "learning_rate": 0.00019997856062564556, - "loss": 46.0, - "step": 40941 - }, - { - "epoch": 6.593421635331535, - "grad_norm": 0.003337105503305793, - "learning_rate": 0.00019997855957808405, - "loss": 46.0, - "step": 40942 - }, - { - "epoch": 6.593582672410323, - "grad_norm": 0.006013059988617897, - "learning_rate": 0.00019997855853049696, - "loss": 46.0, - "step": 40943 - }, - { - "epoch": 6.59374370948911, - "grad_norm": 0.007998323999345303, - "learning_rate": 0.00019997855748288425, - "loss": 46.0, - "step": 40944 - }, - { - "epoch": 6.5939047465678975, - "grad_norm": 0.005047450307756662, - "learning_rate": 0.00019997855643524599, - "loss": 46.0, - "step": 40945 - }, - { - "epoch": 6.594065783646685, - "grad_norm": 0.013005949556827545, - "learning_rate": 0.00019997855538758213, - "loss": 46.0, - "step": 40946 - }, - { - "epoch": 6.594226820725472, - "grad_norm": 0.008321783505380154, - "learning_rate": 0.00019997855433989266, - "loss": 46.0, - "step": 40947 - }, - { - "epoch": 6.594387857804259, - "grad_norm": 0.003362649353221059, - "learning_rate": 0.00019997855329217763, - "loss": 46.0, - "step": 40948 - }, - { - "epoch": 6.594548894883047, - "grad_norm": 0.006200097035616636, - "learning_rate": 0.000199978552244437, - "loss": 46.0, - "step": 40949 - }, - { - "epoch": 6.594709931961834, - "grad_norm": 0.015620601363480091, - "learning_rate": 0.00019997855119667076, - "loss": 46.0, - "step": 40950 - }, - { - "epoch": 6.594870969040621, - "grad_norm": 0.004222431220114231, - "learning_rate": 0.00019997855014887894, - "loss": 46.0, - "step": 40951 - }, - { - "epoch": 6.595032006119409, - "grad_norm": 0.003103445516899228, - "learning_rate": 0.00019997854910106156, - "loss": 46.0, - "step": 40952 - }, - { - "epoch": 6.595193043198196, - "grad_norm": 0.013094968162477016, - "learning_rate": 0.00019997854805321855, - "loss": 46.0, - "step": 40953 - }, - { - "epoch": 6.595354080276984, - "grad_norm": 0.004147983621805906, - "learning_rate": 0.00019997854700534994, - "loss": 46.0, - "step": 40954 - }, - { - "epoch": 6.595515117355771, - "grad_norm": 0.004095536656677723, - "learning_rate": 0.00019997854595745577, - "loss": 46.0, - "step": 40955 - }, - { - "epoch": 6.595676154434559, - "grad_norm": 0.010508733801543713, - "learning_rate": 0.00019997854490953602, - "loss": 46.0, - "step": 40956 - }, - { - "epoch": 6.595837191513346, - "grad_norm": 0.006186679471284151, - "learning_rate": 0.00019997854386159068, - "loss": 46.0, - "step": 40957 - }, - { - "epoch": 6.5959982285921335, - "grad_norm": 0.016042983159422874, - "learning_rate": 0.00019997854281361975, - "loss": 46.0, - "step": 40958 - }, - { - "epoch": 6.596159265670921, - "grad_norm": 0.0016949169803410769, - "learning_rate": 0.0001999785417656232, - "loss": 46.0, - "step": 40959 - }, - { - "epoch": 6.596320302749708, - "grad_norm": 0.004380188882350922, - "learning_rate": 0.00019997854071760108, - "loss": 46.0, - "step": 40960 - }, - { - "epoch": 6.596481339828496, - "grad_norm": 0.0059052156284451485, - "learning_rate": 0.00019997853966955336, - "loss": 46.0, - "step": 40961 - }, - { - "epoch": 6.596642376907283, - "grad_norm": 0.009181967005133629, - "learning_rate": 0.0001999785386214801, - "loss": 46.0, - "step": 40962 - }, - { - "epoch": 6.59680341398607, - "grad_norm": 0.0032888425048440695, - "learning_rate": 0.0001999785375733812, - "loss": 46.0, - "step": 40963 - }, - { - "epoch": 6.596964451064858, - "grad_norm": 0.006101379171013832, - "learning_rate": 0.00019997853652525672, - "loss": 46.0, - "step": 40964 - }, - { - "epoch": 6.597125488143645, - "grad_norm": 0.014859865419566631, - "learning_rate": 0.00019997853547710665, - "loss": 46.0, - "step": 40965 - }, - { - "epoch": 6.597286525222432, - "grad_norm": 0.005723286885768175, - "learning_rate": 0.000199978534428931, - "loss": 46.0, - "step": 40966 - }, - { - "epoch": 6.59744756230122, - "grad_norm": 0.0016400391468778253, - "learning_rate": 0.00019997853338072976, - "loss": 46.0, - "step": 40967 - }, - { - "epoch": 6.597608599380007, - "grad_norm": 0.003897425951436162, - "learning_rate": 0.0001999785323325029, - "loss": 46.0, - "step": 40968 - }, - { - "epoch": 6.5977696364587946, - "grad_norm": 0.00242862687446177, - "learning_rate": 0.0001999785312842505, - "loss": 46.0, - "step": 40969 - }, - { - "epoch": 6.597930673537582, - "grad_norm": 0.011331310495734215, - "learning_rate": 0.00019997853023597247, - "loss": 46.0, - "step": 40970 - }, - { - "epoch": 6.5980917106163695, - "grad_norm": 0.008717311546206474, - "learning_rate": 0.00019997852918766888, - "loss": 46.0, - "step": 40971 - }, - { - "epoch": 6.598252747695157, - "grad_norm": 0.009111875668168068, - "learning_rate": 0.00019997852813933968, - "loss": 46.0, - "step": 40972 - }, - { - "epoch": 6.598413784773944, - "grad_norm": 0.0011732830898836255, - "learning_rate": 0.00019997852709098492, - "loss": 46.0, - "step": 40973 - }, - { - "epoch": 6.598574821852732, - "grad_norm": 0.0075826495885849, - "learning_rate": 0.00019997852604260454, - "loss": 46.0, - "step": 40974 - }, - { - "epoch": 6.598735858931519, - "grad_norm": 0.013696345500648022, - "learning_rate": 0.00019997852499419855, - "loss": 46.0, - "step": 40975 - }, - { - "epoch": 6.598896896010307, - "grad_norm": 0.0021244550589472055, - "learning_rate": 0.00019997852394576702, - "loss": 46.0, - "step": 40976 - }, - { - "epoch": 6.599057933089094, - "grad_norm": 0.0035381598863750696, - "learning_rate": 0.00019997852289730989, - "loss": 46.0, - "step": 40977 - }, - { - "epoch": 6.599218970167881, - "grad_norm": 0.0014341837959364057, - "learning_rate": 0.00019997852184882716, - "loss": 46.0, - "step": 40978 - }, - { - "epoch": 6.599380007246668, - "grad_norm": 0.00937070045620203, - "learning_rate": 0.00019997852080031882, - "loss": 46.0, - "step": 40979 - }, - { - "epoch": 6.599541044325456, - "grad_norm": 0.0015439860289916396, - "learning_rate": 0.00019997851975178495, - "loss": 46.0, - "step": 40980 - }, - { - "epoch": 6.599702081404243, - "grad_norm": 0.004059721250087023, - "learning_rate": 0.00019997851870322543, - "loss": 46.0, - "step": 40981 - }, - { - "epoch": 6.5998631184830305, - "grad_norm": 0.0023277876898646355, - "learning_rate": 0.00019997851765464036, - "loss": 46.0, - "step": 40982 - }, - { - "epoch": 6.600024155561818, - "grad_norm": 0.006967457942664623, - "learning_rate": 0.0001999785166060297, - "loss": 46.0, - "step": 40983 - }, - { - "epoch": 6.600185192640605, - "grad_norm": 0.0016623716801404953, - "learning_rate": 0.0001999785155573934, - "loss": 46.0, - "step": 40984 - }, - { - "epoch": 6.600346229719393, - "grad_norm": 0.011348284780979156, - "learning_rate": 0.00019997851450873156, - "loss": 46.0, - "step": 40985 - }, - { - "epoch": 6.60050726679818, - "grad_norm": 0.005747534334659576, - "learning_rate": 0.00019997851346004414, - "loss": 46.0, - "step": 40986 - }, - { - "epoch": 6.600668303876968, - "grad_norm": 0.0016448006499558687, - "learning_rate": 0.00019997851241133107, - "loss": 46.0, - "step": 40987 - }, - { - "epoch": 6.600829340955755, - "grad_norm": 0.009472008794546127, - "learning_rate": 0.00019997851136259245, - "loss": 46.0, - "step": 40988 - }, - { - "epoch": 6.600990378034543, - "grad_norm": 0.004862797446548939, - "learning_rate": 0.00019997851031382827, - "loss": 46.0, - "step": 40989 - }, - { - "epoch": 6.60115141511333, - "grad_norm": 0.010619417764246464, - "learning_rate": 0.00019997850926503844, - "loss": 46.0, - "step": 40990 - }, - { - "epoch": 6.601312452192118, - "grad_norm": 0.0033544304315000772, - "learning_rate": 0.00019997850821622305, - "loss": 46.0, - "step": 40991 - }, - { - "epoch": 6.601473489270905, - "grad_norm": 0.01468256488442421, - "learning_rate": 0.00019997850716738208, - "loss": 46.0, - "step": 40992 - }, - { - "epoch": 6.601634526349692, - "grad_norm": 0.009828968904912472, - "learning_rate": 0.0001999785061185155, - "loss": 46.0, - "step": 40993 - }, - { - "epoch": 6.601795563428479, - "grad_norm": 0.00132093601860106, - "learning_rate": 0.00019997850506962335, - "loss": 46.0, - "step": 40994 - }, - { - "epoch": 6.6019566005072665, - "grad_norm": 0.0014363840455189347, - "learning_rate": 0.0001999785040207056, - "loss": 46.0, - "step": 40995 - }, - { - "epoch": 6.602117637586054, - "grad_norm": 0.004275656770914793, - "learning_rate": 0.00019997850297176226, - "loss": 46.0, - "step": 40996 - }, - { - "epoch": 6.602278674664841, - "grad_norm": 0.006909401621669531, - "learning_rate": 0.00019997850192279333, - "loss": 46.0, - "step": 40997 - }, - { - "epoch": 6.602439711743629, - "grad_norm": 0.0018044457538053393, - "learning_rate": 0.00019997850087379883, - "loss": 46.0, - "step": 40998 - }, - { - "epoch": 6.602600748822416, - "grad_norm": 0.012258270755410194, - "learning_rate": 0.00019997849982477872, - "loss": 46.0, - "step": 40999 - }, - { - "epoch": 6.602761785901204, - "grad_norm": 0.01446872390806675, - "learning_rate": 0.00019997849877573302, - "loss": 46.0, - "step": 41000 - }, - { - "epoch": 6.602922822979991, - "grad_norm": 0.0036528734490275383, - "learning_rate": 0.00019997849772666174, - "loss": 46.0, - "step": 41001 - }, - { - "epoch": 6.603083860058779, - "grad_norm": 0.0015007511246949434, - "learning_rate": 0.00019997849667756484, - "loss": 46.0, - "step": 41002 - }, - { - "epoch": 6.603244897137566, - "grad_norm": 0.0034847212955355644, - "learning_rate": 0.0001999784956284424, - "loss": 46.0, - "step": 41003 - }, - { - "epoch": 6.603405934216354, - "grad_norm": 0.003368258010596037, - "learning_rate": 0.00019997849457929436, - "loss": 46.0, - "step": 41004 - }, - { - "epoch": 6.603566971295141, - "grad_norm": 0.0019945099484175444, - "learning_rate": 0.00019997849353012073, - "loss": 46.0, - "step": 41005 - }, - { - "epoch": 6.6037280083739285, - "grad_norm": 0.002101441379636526, - "learning_rate": 0.00019997849248092148, - "loss": 46.0, - "step": 41006 - }, - { - "epoch": 6.603889045452716, - "grad_norm": 0.013742894865572453, - "learning_rate": 0.00019997849143169668, - "loss": 46.0, - "step": 41007 - }, - { - "epoch": 6.6040500825315025, - "grad_norm": 0.022664552554488182, - "learning_rate": 0.00019997849038244625, - "loss": 46.0, - "step": 41008 - }, - { - "epoch": 6.60421111961029, - "grad_norm": 0.006416258867830038, - "learning_rate": 0.00019997848933317025, - "loss": 46.0, - "step": 41009 - }, - { - "epoch": 6.604372156689077, - "grad_norm": 0.0036397199146449566, - "learning_rate": 0.00019997848828386868, - "loss": 46.0, - "step": 41010 - }, - { - "epoch": 6.604533193767865, - "grad_norm": 0.012458142824470997, - "learning_rate": 0.0001999784872345415, - "loss": 46.0, - "step": 41011 - }, - { - "epoch": 6.604694230846652, - "grad_norm": 0.009527890011668205, - "learning_rate": 0.00019997848618518872, - "loss": 46.0, - "step": 41012 - }, - { - "epoch": 6.60485526792544, - "grad_norm": 0.009535655379295349, - "learning_rate": 0.00019997848513581036, - "loss": 46.0, - "step": 41013 - }, - { - "epoch": 6.605016305004227, - "grad_norm": 0.007483928930014372, - "learning_rate": 0.00019997848408640642, - "loss": 46.0, - "step": 41014 - }, - { - "epoch": 6.605177342083015, - "grad_norm": 0.0066192299127578735, - "learning_rate": 0.00019997848303697691, - "loss": 46.0, - "step": 41015 - }, - { - "epoch": 6.605338379161802, - "grad_norm": 0.004171983804553747, - "learning_rate": 0.00019997848198752177, - "loss": 46.0, - "step": 41016 - }, - { - "epoch": 6.60549941624059, - "grad_norm": 0.004527946934103966, - "learning_rate": 0.00019997848093804104, - "loss": 46.0, - "step": 41017 - }, - { - "epoch": 6.605660453319377, - "grad_norm": 0.004235798958688974, - "learning_rate": 0.00019997847988853474, - "loss": 46.0, - "step": 41018 - }, - { - "epoch": 6.6058214903981645, - "grad_norm": 0.00447089271619916, - "learning_rate": 0.00019997847883900283, - "loss": 46.0, - "step": 41019 - }, - { - "epoch": 6.605982527476952, - "grad_norm": 0.002794915111735463, - "learning_rate": 0.0001999784777894454, - "loss": 46.0, - "step": 41020 - }, - { - "epoch": 6.6061435645557385, - "grad_norm": 0.003605864243581891, - "learning_rate": 0.00019997847673986228, - "loss": 46.0, - "step": 41021 - }, - { - "epoch": 6.606304601634527, - "grad_norm": 0.007824013940989971, - "learning_rate": 0.00019997847569025364, - "loss": 46.0, - "step": 41022 - }, - { - "epoch": 6.606465638713313, - "grad_norm": 0.012308187782764435, - "learning_rate": 0.00019997847464061939, - "loss": 46.0, - "step": 41023 - }, - { - "epoch": 6.606626675792101, - "grad_norm": 0.004476560279726982, - "learning_rate": 0.00019997847359095957, - "loss": 46.0, - "step": 41024 - }, - { - "epoch": 6.606787712870888, - "grad_norm": 0.0016742307925596833, - "learning_rate": 0.00019997847254127414, - "loss": 46.0, - "step": 41025 - }, - { - "epoch": 6.606948749949676, - "grad_norm": 0.006550919730216265, - "learning_rate": 0.00019997847149156312, - "loss": 46.0, - "step": 41026 - }, - { - "epoch": 6.607109787028463, - "grad_norm": 0.001879259361885488, - "learning_rate": 0.00019997847044182651, - "loss": 46.0, - "step": 41027 - }, - { - "epoch": 6.607270824107251, - "grad_norm": 0.001983418595045805, - "learning_rate": 0.0001999784693920643, - "loss": 46.0, - "step": 41028 - }, - { - "epoch": 6.607431861186038, - "grad_norm": 0.002833044156432152, - "learning_rate": 0.00019997846834227652, - "loss": 46.0, - "step": 41029 - }, - { - "epoch": 6.6075928982648255, - "grad_norm": 0.0032925051636993885, - "learning_rate": 0.00019997846729246315, - "loss": 46.0, - "step": 41030 - }, - { - "epoch": 6.607753935343613, - "grad_norm": 0.0021400924306362867, - "learning_rate": 0.0001999784662426242, - "loss": 46.0, - "step": 41031 - }, - { - "epoch": 6.6079149724224004, - "grad_norm": 0.00787995383143425, - "learning_rate": 0.00019997846519275963, - "loss": 46.0, - "step": 41032 - }, - { - "epoch": 6.608076009501188, - "grad_norm": 0.004491230007261038, - "learning_rate": 0.00019997846414286947, - "loss": 46.0, - "step": 41033 - }, - { - "epoch": 6.608237046579975, - "grad_norm": 0.0006392892100848258, - "learning_rate": 0.00019997846309295376, - "loss": 46.0, - "step": 41034 - }, - { - "epoch": 6.608398083658763, - "grad_norm": 0.0030184241477400064, - "learning_rate": 0.00019997846204301243, - "loss": 46.0, - "step": 41035 - }, - { - "epoch": 6.608559120737549, - "grad_norm": 0.0017763242358341813, - "learning_rate": 0.0001999784609930455, - "loss": 46.0, - "step": 41036 - }, - { - "epoch": 6.608720157816338, - "grad_norm": 0.021407047286629677, - "learning_rate": 0.00019997845994305303, - "loss": 46.0, - "step": 41037 - }, - { - "epoch": 6.608881194895124, - "grad_norm": 0.006149849388748407, - "learning_rate": 0.00019997845889303492, - "loss": 46.0, - "step": 41038 - }, - { - "epoch": 6.609042231973912, - "grad_norm": 0.011370550841093063, - "learning_rate": 0.00019997845784299126, - "loss": 46.0, - "step": 41039 - }, - { - "epoch": 6.609203269052699, - "grad_norm": 0.0022126734256744385, - "learning_rate": 0.00019997845679292197, - "loss": 46.0, - "step": 41040 - }, - { - "epoch": 6.609364306131487, - "grad_norm": 0.008452162146568298, - "learning_rate": 0.00019997845574282712, - "loss": 46.0, - "step": 41041 - }, - { - "epoch": 6.609525343210274, - "grad_norm": 0.005821246653795242, - "learning_rate": 0.00019997845469270668, - "loss": 46.0, - "step": 41042 - }, - { - "epoch": 6.6096863802890615, - "grad_norm": 0.018352357670664787, - "learning_rate": 0.00019997845364256062, - "loss": 46.0, - "step": 41043 - }, - { - "epoch": 6.609847417367849, - "grad_norm": 0.008488418534398079, - "learning_rate": 0.000199978452592389, - "loss": 46.0, - "step": 41044 - }, - { - "epoch": 6.610008454446636, - "grad_norm": 0.008687576279044151, - "learning_rate": 0.0001999784515421918, - "loss": 46.0, - "step": 41045 - }, - { - "epoch": 6.610169491525424, - "grad_norm": 0.0065894159488379955, - "learning_rate": 0.000199978450491969, - "loss": 46.0, - "step": 41046 - }, - { - "epoch": 6.610330528604211, - "grad_norm": 0.003859002608805895, - "learning_rate": 0.0001999784494417206, - "loss": 46.0, - "step": 41047 - }, - { - "epoch": 6.610491565682999, - "grad_norm": 0.0011809971183538437, - "learning_rate": 0.00019997844839144663, - "loss": 46.0, - "step": 41048 - }, - { - "epoch": 6.610652602761786, - "grad_norm": 0.020778413861989975, - "learning_rate": 0.00019997844734114705, - "loss": 46.0, - "step": 41049 - }, - { - "epoch": 6.610813639840574, - "grad_norm": 0.006073105148971081, - "learning_rate": 0.0001999784462908219, - "loss": 46.0, - "step": 41050 - }, - { - "epoch": 6.61097467691936, - "grad_norm": 0.00774040911346674, - "learning_rate": 0.0001999784452404711, - "loss": 46.0, - "step": 41051 - }, - { - "epoch": 6.611135713998148, - "grad_norm": 0.0050262147560715675, - "learning_rate": 0.0001999784441900948, - "loss": 46.0, - "step": 41052 - }, - { - "epoch": 6.611296751076935, - "grad_norm": 0.006304901093244553, - "learning_rate": 0.00019997844313969284, - "loss": 46.0, - "step": 41053 - }, - { - "epoch": 6.611457788155723, - "grad_norm": 0.00688542565330863, - "learning_rate": 0.00019997844208926533, - "loss": 46.0, - "step": 41054 - }, - { - "epoch": 6.61161882523451, - "grad_norm": 0.0029033017344772816, - "learning_rate": 0.00019997844103881223, - "loss": 46.0, - "step": 41055 - }, - { - "epoch": 6.6117798623132975, - "grad_norm": 0.004210735205560923, - "learning_rate": 0.00019997843998833354, - "loss": 46.0, - "step": 41056 - }, - { - "epoch": 6.611940899392085, - "grad_norm": 0.005546304862946272, - "learning_rate": 0.00019997843893782924, - "loss": 46.0, - "step": 41057 - }, - { - "epoch": 6.612101936470872, - "grad_norm": 0.0018246410181745887, - "learning_rate": 0.00019997843788729935, - "loss": 46.0, - "step": 41058 - }, - { - "epoch": 6.61226297354966, - "grad_norm": 0.018975183367729187, - "learning_rate": 0.00019997843683674388, - "loss": 46.0, - "step": 41059 - }, - { - "epoch": 6.612424010628447, - "grad_norm": 0.01114060077816248, - "learning_rate": 0.00019997843578616284, - "loss": 46.0, - "step": 41060 - }, - { - "epoch": 6.612585047707235, - "grad_norm": 0.00270092417486012, - "learning_rate": 0.0001999784347355562, - "loss": 46.0, - "step": 41061 - }, - { - "epoch": 6.612746084786022, - "grad_norm": 0.004035099409520626, - "learning_rate": 0.00019997843368492398, - "loss": 46.0, - "step": 41062 - }, - { - "epoch": 6.61290712186481, - "grad_norm": 0.005571081768721342, - "learning_rate": 0.00019997843263426616, - "loss": 46.0, - "step": 41063 - }, - { - "epoch": 6.613068158943597, - "grad_norm": 0.02523900754749775, - "learning_rate": 0.00019997843158358274, - "loss": 46.0, - "step": 41064 - }, - { - "epoch": 6.613229196022385, - "grad_norm": 0.01615004613995552, - "learning_rate": 0.00019997843053287372, - "loss": 46.0, - "step": 41065 - }, - { - "epoch": 6.613390233101171, - "grad_norm": 0.008677395060658455, - "learning_rate": 0.00019997842948213913, - "loss": 46.0, - "step": 41066 - }, - { - "epoch": 6.613551270179959, - "grad_norm": 0.02636909857392311, - "learning_rate": 0.00019997842843137896, - "loss": 46.0, - "step": 41067 - }, - { - "epoch": 6.613712307258746, - "grad_norm": 0.0016285745659843087, - "learning_rate": 0.00019997842738059317, - "loss": 46.0, - "step": 41068 - }, - { - "epoch": 6.6138733443375335, - "grad_norm": 0.0035759760066866875, - "learning_rate": 0.00019997842632978182, - "loss": 46.0, - "step": 41069 - }, - { - "epoch": 6.614034381416321, - "grad_norm": 0.001556383678689599, - "learning_rate": 0.00019997842527894486, - "loss": 46.0, - "step": 41070 - }, - { - "epoch": 6.614195418495108, - "grad_norm": 0.006428986322134733, - "learning_rate": 0.00019997842422808234, - "loss": 46.0, - "step": 41071 - }, - { - "epoch": 6.614356455573896, - "grad_norm": 0.002071127062663436, - "learning_rate": 0.00019997842317719423, - "loss": 46.0, - "step": 41072 - }, - { - "epoch": 6.614517492652683, - "grad_norm": 0.0017701896140351892, - "learning_rate": 0.00019997842212628048, - "loss": 46.0, - "step": 41073 - }, - { - "epoch": 6.614678529731471, - "grad_norm": 0.017276708036661148, - "learning_rate": 0.00019997842107534117, - "loss": 46.0, - "step": 41074 - }, - { - "epoch": 6.614839566810258, - "grad_norm": 0.0033349853474646807, - "learning_rate": 0.0001999784200243763, - "loss": 46.0, - "step": 41075 - }, - { - "epoch": 6.615000603889046, - "grad_norm": 0.004386052954941988, - "learning_rate": 0.0001999784189733858, - "loss": 46.0, - "step": 41076 - }, - { - "epoch": 6.615161640967833, - "grad_norm": 0.019192369654774666, - "learning_rate": 0.00019997841792236974, - "loss": 46.0, - "step": 41077 - }, - { - "epoch": 6.6153226780466206, - "grad_norm": 0.011368497274816036, - "learning_rate": 0.00019997841687132808, - "loss": 46.0, - "step": 41078 - }, - { - "epoch": 6.615483715125408, - "grad_norm": 0.0017469103913754225, - "learning_rate": 0.0001999784158202608, - "loss": 46.0, - "step": 41079 - }, - { - "epoch": 6.6156447522041955, - "grad_norm": 0.0031596715562045574, - "learning_rate": 0.00019997841476916797, - "loss": 46.0, - "step": 41080 - }, - { - "epoch": 6.615805789282982, - "grad_norm": 0.0018933404935523868, - "learning_rate": 0.00019997841371804955, - "loss": 46.0, - "step": 41081 - }, - { - "epoch": 6.6159668263617695, - "grad_norm": 0.0034992152359336615, - "learning_rate": 0.00019997841266690552, - "loss": 46.0, - "step": 41082 - }, - { - "epoch": 6.616127863440557, - "grad_norm": 0.0057185981422662735, - "learning_rate": 0.0001999784116157359, - "loss": 46.0, - "step": 41083 - }, - { - "epoch": 6.616288900519344, - "grad_norm": 0.001386741641908884, - "learning_rate": 0.00019997841056454071, - "loss": 46.0, - "step": 41084 - }, - { - "epoch": 6.616449937598132, - "grad_norm": 0.0051393453031778336, - "learning_rate": 0.00019997840951331992, - "loss": 46.0, - "step": 41085 - }, - { - "epoch": 6.616610974676919, - "grad_norm": 0.0008212599204853177, - "learning_rate": 0.00019997840846207353, - "loss": 46.0, - "step": 41086 - }, - { - "epoch": 6.616772011755707, - "grad_norm": 0.01296071894466877, - "learning_rate": 0.0001999784074108016, - "loss": 46.0, - "step": 41087 - }, - { - "epoch": 6.616933048834494, - "grad_norm": 0.002062899060547352, - "learning_rate": 0.000199978406359504, - "loss": 46.0, - "step": 41088 - }, - { - "epoch": 6.617094085913282, - "grad_norm": 0.0179916899651289, - "learning_rate": 0.00019997840530818086, - "loss": 46.0, - "step": 41089 - }, - { - "epoch": 6.617255122992069, - "grad_norm": 0.002371684880927205, - "learning_rate": 0.00019997840425683215, - "loss": 46.0, - "step": 41090 - }, - { - "epoch": 6.6174161600708565, - "grad_norm": 0.0025576360058039427, - "learning_rate": 0.0001999784032054578, - "loss": 46.0, - "step": 41091 - }, - { - "epoch": 6.617577197149644, - "grad_norm": 0.0023558472748845816, - "learning_rate": 0.00019997840215405787, - "loss": 46.0, - "step": 41092 - }, - { - "epoch": 6.617738234228431, - "grad_norm": 0.003779823426157236, - "learning_rate": 0.0001999784011026324, - "loss": 46.0, - "step": 41093 - }, - { - "epoch": 6.617899271307218, - "grad_norm": 0.0016630009049549699, - "learning_rate": 0.0001999784000511813, - "loss": 46.0, - "step": 41094 - }, - { - "epoch": 6.618060308386006, - "grad_norm": 0.0050531914457678795, - "learning_rate": 0.0001999783989997046, - "loss": 46.0, - "step": 41095 - }, - { - "epoch": 6.618221345464793, - "grad_norm": 0.007217828184366226, - "learning_rate": 0.00019997839794820235, - "loss": 46.0, - "step": 41096 - }, - { - "epoch": 6.61838238254358, - "grad_norm": 0.004020967986434698, - "learning_rate": 0.00019997839689667445, - "loss": 46.0, - "step": 41097 - }, - { - "epoch": 6.618543419622368, - "grad_norm": 0.0027252137660980225, - "learning_rate": 0.00019997839584512105, - "loss": 46.0, - "step": 41098 - }, - { - "epoch": 6.618704456701155, - "grad_norm": 0.008718476630747318, - "learning_rate": 0.000199978394793542, - "loss": 46.0, - "step": 41099 - }, - { - "epoch": 6.618865493779943, - "grad_norm": 0.004874077625572681, - "learning_rate": 0.00019997839374193737, - "loss": 46.0, - "step": 41100 - }, - { - "epoch": 6.61902653085873, - "grad_norm": 0.001031780382618308, - "learning_rate": 0.00019997839269030713, - "loss": 46.0, - "step": 41101 - }, - { - "epoch": 6.619187567937518, - "grad_norm": 0.0021287561394274235, - "learning_rate": 0.00019997839163865132, - "loss": 46.0, - "step": 41102 - }, - { - "epoch": 6.619348605016305, - "grad_norm": 0.005838374141603708, - "learning_rate": 0.00019997839058696993, - "loss": 46.0, - "step": 41103 - }, - { - "epoch": 6.6195096420950925, - "grad_norm": 0.01185868214815855, - "learning_rate": 0.00019997838953526292, - "loss": 46.0, - "step": 41104 - }, - { - "epoch": 6.61967067917388, - "grad_norm": 0.002571073593571782, - "learning_rate": 0.00019997838848353035, - "loss": 46.0, - "step": 41105 - }, - { - "epoch": 6.619831716252667, - "grad_norm": 0.008395260199904442, - "learning_rate": 0.0001999783874317722, - "loss": 46.0, - "step": 41106 - }, - { - "epoch": 6.619992753331455, - "grad_norm": 0.0031780200079083443, - "learning_rate": 0.00019997838637998846, - "loss": 46.0, - "step": 41107 - }, - { - "epoch": 6.620153790410242, - "grad_norm": 0.0061030020006000996, - "learning_rate": 0.0001999783853281791, - "loss": 46.0, - "step": 41108 - }, - { - "epoch": 6.620314827489029, - "grad_norm": 0.0018235614988952875, - "learning_rate": 0.00019997838427634416, - "loss": 46.0, - "step": 41109 - }, - { - "epoch": 6.620475864567817, - "grad_norm": 0.003984051290899515, - "learning_rate": 0.00019997838322448366, - "loss": 46.0, - "step": 41110 - }, - { - "epoch": 6.620636901646604, - "grad_norm": 0.0016804715851321816, - "learning_rate": 0.0001999783821725975, - "loss": 46.0, - "step": 41111 - }, - { - "epoch": 6.620797938725391, - "grad_norm": 0.0058973077684640884, - "learning_rate": 0.00019997838112068583, - "loss": 46.0, - "step": 41112 - }, - { - "epoch": 6.620958975804179, - "grad_norm": 0.0025775195099413395, - "learning_rate": 0.00019997838006874854, - "loss": 46.0, - "step": 41113 - }, - { - "epoch": 6.621120012882966, - "grad_norm": 0.0027494679670780897, - "learning_rate": 0.00019997837901678566, - "loss": 46.0, - "step": 41114 - }, - { - "epoch": 6.621281049961754, - "grad_norm": 0.002431021071970463, - "learning_rate": 0.0001999783779647972, - "loss": 46.0, - "step": 41115 - }, - { - "epoch": 6.621442087040541, - "grad_norm": 0.0037709027528762817, - "learning_rate": 0.00019997837691278312, - "loss": 46.0, - "step": 41116 - }, - { - "epoch": 6.6216031241193285, - "grad_norm": 0.0024342546239495277, - "learning_rate": 0.00019997837586074348, - "loss": 46.0, - "step": 41117 - }, - { - "epoch": 6.621764161198116, - "grad_norm": 0.0029722130857408047, - "learning_rate": 0.00019997837480867825, - "loss": 46.0, - "step": 41118 - }, - { - "epoch": 6.621925198276903, - "grad_norm": 0.006364222150295973, - "learning_rate": 0.0001999783737565874, - "loss": 46.0, - "step": 41119 - }, - { - "epoch": 6.622086235355691, - "grad_norm": 0.0027700962964445353, - "learning_rate": 0.000199978372704471, - "loss": 46.0, - "step": 41120 - }, - { - "epoch": 6.622247272434478, - "grad_norm": 0.003449861891567707, - "learning_rate": 0.000199978371652329, - "loss": 46.0, - "step": 41121 - }, - { - "epoch": 6.622408309513266, - "grad_norm": 0.00423266040161252, - "learning_rate": 0.00019997837060016141, - "loss": 46.0, - "step": 41122 - }, - { - "epoch": 6.622569346592053, - "grad_norm": 0.007254303898662329, - "learning_rate": 0.00019997836954796822, - "loss": 46.0, - "step": 41123 - }, - { - "epoch": 6.62273038367084, - "grad_norm": 0.0058316076174378395, - "learning_rate": 0.00019997836849574945, - "loss": 46.0, - "step": 41124 - }, - { - "epoch": 6.622891420749627, - "grad_norm": 0.008839089423418045, - "learning_rate": 0.00019997836744350508, - "loss": 46.0, - "step": 41125 - }, - { - "epoch": 6.623052457828415, - "grad_norm": 0.004016009159386158, - "learning_rate": 0.00019997836639123513, - "loss": 46.0, - "step": 41126 - }, - { - "epoch": 6.623213494907202, - "grad_norm": 0.012918100692331791, - "learning_rate": 0.0001999783653389396, - "loss": 46.0, - "step": 41127 - }, - { - "epoch": 6.62337453198599, - "grad_norm": 0.02669021673500538, - "learning_rate": 0.00019997836428661846, - "loss": 46.0, - "step": 41128 - }, - { - "epoch": 6.623535569064777, - "grad_norm": 0.002213901374489069, - "learning_rate": 0.00019997836323427172, - "loss": 46.0, - "step": 41129 - }, - { - "epoch": 6.6236966061435645, - "grad_norm": 0.0016288220649585128, - "learning_rate": 0.00019997836218189942, - "loss": 46.0, - "step": 41130 - }, - { - "epoch": 6.623857643222352, - "grad_norm": 0.007245820015668869, - "learning_rate": 0.00019997836112950154, - "loss": 46.0, - "step": 41131 - }, - { - "epoch": 6.624018680301139, - "grad_norm": 0.00630686990916729, - "learning_rate": 0.00019997836007707803, - "loss": 46.0, - "step": 41132 - }, - { - "epoch": 6.624179717379927, - "grad_norm": 0.002153727924451232, - "learning_rate": 0.00019997835902462897, - "loss": 46.0, - "step": 41133 - }, - { - "epoch": 6.624340754458714, - "grad_norm": 0.001160130137577653, - "learning_rate": 0.0001999783579721543, - "loss": 46.0, - "step": 41134 - }, - { - "epoch": 6.624501791537502, - "grad_norm": 0.0025597941130399704, - "learning_rate": 0.00019997835691965406, - "loss": 46.0, - "step": 41135 - }, - { - "epoch": 6.624662828616289, - "grad_norm": 0.005943742580711842, - "learning_rate": 0.0001999783558671282, - "loss": 46.0, - "step": 41136 - }, - { - "epoch": 6.624823865695077, - "grad_norm": 0.0011956996750086546, - "learning_rate": 0.00019997835481457677, - "loss": 46.0, - "step": 41137 - }, - { - "epoch": 6.624984902773864, - "grad_norm": 0.008160853758454323, - "learning_rate": 0.00019997835376199974, - "loss": 46.0, - "step": 41138 - }, - { - "epoch": 6.625145939852651, - "grad_norm": 0.0016330907819792628, - "learning_rate": 0.0001999783527093971, - "loss": 46.0, - "step": 41139 - }, - { - "epoch": 6.625306976931438, - "grad_norm": 0.0023520688991993666, - "learning_rate": 0.00019997835165676893, - "loss": 46.0, - "step": 41140 - }, - { - "epoch": 6.625468014010226, - "grad_norm": 0.0037449656520038843, - "learning_rate": 0.00019997835060411512, - "loss": 46.0, - "step": 41141 - }, - { - "epoch": 6.625629051089013, - "grad_norm": 0.013129186816513538, - "learning_rate": 0.00019997834955143575, - "loss": 46.0, - "step": 41142 - }, - { - "epoch": 6.6257900881678005, - "grad_norm": 0.001787198125384748, - "learning_rate": 0.00019997834849873076, - "loss": 46.0, - "step": 41143 - }, - { - "epoch": 6.625951125246588, - "grad_norm": 0.003374287160113454, - "learning_rate": 0.0001999783474460002, - "loss": 46.0, - "step": 41144 - }, - { - "epoch": 6.626112162325375, - "grad_norm": 0.005721678491681814, - "learning_rate": 0.00019997834639324407, - "loss": 46.0, - "step": 41145 - }, - { - "epoch": 6.626273199404163, - "grad_norm": 0.0050403098575770855, - "learning_rate": 0.00019997834534046232, - "loss": 46.0, - "step": 41146 - }, - { - "epoch": 6.62643423648295, - "grad_norm": 0.0048050605691969395, - "learning_rate": 0.00019997834428765501, - "loss": 46.0, - "step": 41147 - }, - { - "epoch": 6.626595273561738, - "grad_norm": 0.012133794836699963, - "learning_rate": 0.00019997834323482206, - "loss": 46.0, - "step": 41148 - }, - { - "epoch": 6.626756310640525, - "grad_norm": 0.007100074551999569, - "learning_rate": 0.00019997834218196355, - "loss": 46.0, - "step": 41149 - }, - { - "epoch": 6.626917347719313, - "grad_norm": 0.006903825327754021, - "learning_rate": 0.00019997834112907948, - "loss": 46.0, - "step": 41150 - }, - { - "epoch": 6.6270783847981, - "grad_norm": 0.006233078893274069, - "learning_rate": 0.0001999783400761698, - "loss": 46.0, - "step": 41151 - }, - { - "epoch": 6.6272394218768875, - "grad_norm": 0.015550930052995682, - "learning_rate": 0.0001999783390232345, - "loss": 46.0, - "step": 41152 - }, - { - "epoch": 6.627400458955675, - "grad_norm": 0.004609704948961735, - "learning_rate": 0.00019997833797027366, - "loss": 46.0, - "step": 41153 - }, - { - "epoch": 6.6275614960344615, - "grad_norm": 0.006760106887668371, - "learning_rate": 0.0001999783369172872, - "loss": 46.0, - "step": 41154 - }, - { - "epoch": 6.627722533113249, - "grad_norm": 0.002083009108901024, - "learning_rate": 0.00019997833586427516, - "loss": 46.0, - "step": 41155 - }, - { - "epoch": 6.6278835701920364, - "grad_norm": 0.0013089803978800774, - "learning_rate": 0.00019997833481123753, - "loss": 46.0, - "step": 41156 - }, - { - "epoch": 6.628044607270824, - "grad_norm": 0.0077623240649700165, - "learning_rate": 0.0001999783337581743, - "loss": 46.0, - "step": 41157 - }, - { - "epoch": 6.628205644349611, - "grad_norm": 0.0020460295490920544, - "learning_rate": 0.00019997833270508548, - "loss": 46.0, - "step": 41158 - }, - { - "epoch": 6.628366681428399, - "grad_norm": 0.0013743790332227945, - "learning_rate": 0.0001999783316519711, - "loss": 46.0, - "step": 41159 - }, - { - "epoch": 6.628527718507186, - "grad_norm": 0.004074026830494404, - "learning_rate": 0.00019997833059883112, - "loss": 46.0, - "step": 41160 - }, - { - "epoch": 6.628688755585974, - "grad_norm": 0.003076382912695408, - "learning_rate": 0.00019997832954566554, - "loss": 46.0, - "step": 41161 - }, - { - "epoch": 6.628849792664761, - "grad_norm": 0.0028957887552678585, - "learning_rate": 0.00019997832849247437, - "loss": 46.0, - "step": 41162 - }, - { - "epoch": 6.629010829743549, - "grad_norm": 0.0035534012131392956, - "learning_rate": 0.0001999783274392576, - "loss": 46.0, - "step": 41163 - }, - { - "epoch": 6.629171866822336, - "grad_norm": 0.008254371583461761, - "learning_rate": 0.00019997832638601526, - "loss": 46.0, - "step": 41164 - }, - { - "epoch": 6.6293329039011235, - "grad_norm": 0.004110126290470362, - "learning_rate": 0.00019997832533274733, - "loss": 46.0, - "step": 41165 - }, - { - "epoch": 6.629493940979911, - "grad_norm": 0.0065147290006279945, - "learning_rate": 0.0001999783242794538, - "loss": 46.0, - "step": 41166 - }, - { - "epoch": 6.6296549780586975, - "grad_norm": 0.007026699371635914, - "learning_rate": 0.00019997832322613468, - "loss": 46.0, - "step": 41167 - }, - { - "epoch": 6.629816015137486, - "grad_norm": 0.010335328988730907, - "learning_rate": 0.00019997832217278998, - "loss": 46.0, - "step": 41168 - }, - { - "epoch": 6.629977052216272, - "grad_norm": 0.0031468693632632494, - "learning_rate": 0.00019997832111941967, - "loss": 46.0, - "step": 41169 - }, - { - "epoch": 6.63013808929506, - "grad_norm": 0.0037579922936856747, - "learning_rate": 0.0001999783200660238, - "loss": 46.0, - "step": 41170 - }, - { - "epoch": 6.630299126373847, - "grad_norm": 0.015171720646321774, - "learning_rate": 0.00019997831901260232, - "loss": 46.0, - "step": 41171 - }, - { - "epoch": 6.630460163452635, - "grad_norm": 0.0034958517644554377, - "learning_rate": 0.00019997831795915528, - "loss": 46.0, - "step": 41172 - }, - { - "epoch": 6.630621200531422, - "grad_norm": 0.0050625279545784, - "learning_rate": 0.00019997831690568262, - "loss": 46.0, - "step": 41173 - }, - { - "epoch": 6.63078223761021, - "grad_norm": 0.0009731936734169722, - "learning_rate": 0.00019997831585218435, - "loss": 46.0, - "step": 41174 - }, - { - "epoch": 6.630943274688997, - "grad_norm": 0.013226950541138649, - "learning_rate": 0.00019997831479866057, - "loss": 46.0, - "step": 41175 - }, - { - "epoch": 6.631104311767785, - "grad_norm": 0.00621444545686245, - "learning_rate": 0.00019997831374511112, - "loss": 46.0, - "step": 41176 - }, - { - "epoch": 6.631265348846572, - "grad_norm": 0.0019267479656264186, - "learning_rate": 0.00019997831269153612, - "loss": 46.0, - "step": 41177 - }, - { - "epoch": 6.6314263859253595, - "grad_norm": 0.0015656901523470879, - "learning_rate": 0.00019997831163793552, - "loss": 46.0, - "step": 41178 - }, - { - "epoch": 6.631587423004147, - "grad_norm": 0.00155361695215106, - "learning_rate": 0.00019997831058430934, - "loss": 46.0, - "step": 41179 - }, - { - "epoch": 6.631748460082934, - "grad_norm": 0.006082166451960802, - "learning_rate": 0.00019997830953065755, - "loss": 46.0, - "step": 41180 - }, - { - "epoch": 6.631909497161722, - "grad_norm": 0.004673150833696127, - "learning_rate": 0.0001999783084769802, - "loss": 46.0, - "step": 41181 - }, - { - "epoch": 6.632070534240508, - "grad_norm": 0.0023002601228654385, - "learning_rate": 0.00019997830742327723, - "loss": 46.0, - "step": 41182 - }, - { - "epoch": 6.632231571319297, - "grad_norm": 0.003658955218270421, - "learning_rate": 0.0001999783063695487, - "loss": 46.0, - "step": 41183 - }, - { - "epoch": 6.632392608398083, - "grad_norm": 0.0015056031988933682, - "learning_rate": 0.00019997830531579455, - "loss": 46.0, - "step": 41184 - }, - { - "epoch": 6.632553645476871, - "grad_norm": 0.004236246459186077, - "learning_rate": 0.00019997830426201485, - "loss": 46.0, - "step": 41185 - }, - { - "epoch": 6.632714682555658, - "grad_norm": 0.0032955333590507507, - "learning_rate": 0.00019997830320820953, - "loss": 46.0, - "step": 41186 - }, - { - "epoch": 6.632875719634446, - "grad_norm": 0.003155897604301572, - "learning_rate": 0.00019997830215437863, - "loss": 46.0, - "step": 41187 - }, - { - "epoch": 6.633036756713233, - "grad_norm": 0.0054049682803452015, - "learning_rate": 0.00019997830110052216, - "loss": 46.0, - "step": 41188 - }, - { - "epoch": 6.633197793792021, - "grad_norm": 0.006927745882421732, - "learning_rate": 0.00019997830004664006, - "loss": 46.0, - "step": 41189 - }, - { - "epoch": 6.633358830870808, - "grad_norm": 0.003276033326983452, - "learning_rate": 0.0001999782989927324, - "loss": 46.0, - "step": 41190 - }, - { - "epoch": 6.6335198679495955, - "grad_norm": 0.003885750425979495, - "learning_rate": 0.00019997829793879914, - "loss": 46.0, - "step": 41191 - }, - { - "epoch": 6.633680905028383, - "grad_norm": 0.026463288813829422, - "learning_rate": 0.0001999782968848403, - "loss": 46.0, - "step": 41192 - }, - { - "epoch": 6.63384194210717, - "grad_norm": 0.004569042939692736, - "learning_rate": 0.00019997829583085587, - "loss": 46.0, - "step": 41193 - }, - { - "epoch": 6.634002979185958, - "grad_norm": 0.003324605990201235, - "learning_rate": 0.00019997829477684582, - "loss": 46.0, - "step": 41194 - }, - { - "epoch": 6.634164016264745, - "grad_norm": 0.002257521264255047, - "learning_rate": 0.0001999782937228102, - "loss": 46.0, - "step": 41195 - }, - { - "epoch": 6.634325053343533, - "grad_norm": 0.0006985857035033405, - "learning_rate": 0.000199978292668749, - "loss": 46.0, - "step": 41196 - }, - { - "epoch": 6.634486090422319, - "grad_norm": 0.004117724485695362, - "learning_rate": 0.0001999782916146622, - "loss": 46.0, - "step": 41197 - }, - { - "epoch": 6.634647127501107, - "grad_norm": 0.005097700282931328, - "learning_rate": 0.00019997829056054984, - "loss": 46.0, - "step": 41198 - }, - { - "epoch": 6.634808164579894, - "grad_norm": 0.02677321434020996, - "learning_rate": 0.00019997828950641186, - "loss": 46.0, - "step": 41199 - }, - { - "epoch": 6.634969201658682, - "grad_norm": 0.0101937772706151, - "learning_rate": 0.0001999782884522483, - "loss": 46.0, - "step": 41200 - }, - { - "epoch": 6.635130238737469, - "grad_norm": 0.0020081985276192427, - "learning_rate": 0.00019997828739805912, - "loss": 46.0, - "step": 41201 - }, - { - "epoch": 6.6352912758162566, - "grad_norm": 0.002573740668594837, - "learning_rate": 0.0001999782863438444, - "loss": 46.0, - "step": 41202 - }, - { - "epoch": 6.635452312895044, - "grad_norm": 0.009583241306245327, - "learning_rate": 0.00019997828528960408, - "loss": 46.0, - "step": 41203 - }, - { - "epoch": 6.6356133499738315, - "grad_norm": 0.007890709675848484, - "learning_rate": 0.00019997828423533816, - "loss": 46.0, - "step": 41204 - }, - { - "epoch": 6.635774387052619, - "grad_norm": 0.0036622032057493925, - "learning_rate": 0.00019997828318104666, - "loss": 46.0, - "step": 41205 - }, - { - "epoch": 6.635935424131406, - "grad_norm": 0.0033045532181859016, - "learning_rate": 0.00019997828212672955, - "loss": 46.0, - "step": 41206 - }, - { - "epoch": 6.636096461210194, - "grad_norm": 0.003874134737998247, - "learning_rate": 0.00019997828107238684, - "loss": 46.0, - "step": 41207 - }, - { - "epoch": 6.636257498288981, - "grad_norm": 0.005551476962864399, - "learning_rate": 0.00019997828001801858, - "loss": 46.0, - "step": 41208 - }, - { - "epoch": 6.636418535367769, - "grad_norm": 0.0020345996599644423, - "learning_rate": 0.00019997827896362473, - "loss": 46.0, - "step": 41209 - }, - { - "epoch": 6.636579572446556, - "grad_norm": 0.006998830940574408, - "learning_rate": 0.00019997827790920524, - "loss": 46.0, - "step": 41210 - }, - { - "epoch": 6.636740609525344, - "grad_norm": 0.005958764813840389, - "learning_rate": 0.00019997827685476022, - "loss": 46.0, - "step": 41211 - }, - { - "epoch": 6.63690164660413, - "grad_norm": 0.005613343324512243, - "learning_rate": 0.00019997827580028958, - "loss": 46.0, - "step": 41212 - }, - { - "epoch": 6.637062683682918, - "grad_norm": 0.0014376823091879487, - "learning_rate": 0.00019997827474579335, - "loss": 46.0, - "step": 41213 - }, - { - "epoch": 6.637223720761705, - "grad_norm": 0.007475843653082848, - "learning_rate": 0.00019997827369127157, - "loss": 46.0, - "step": 41214 - }, - { - "epoch": 6.6373847578404925, - "grad_norm": 0.0028920944314450026, - "learning_rate": 0.00019997827263672414, - "loss": 46.0, - "step": 41215 - }, - { - "epoch": 6.63754579491928, - "grad_norm": 0.003980892710387707, - "learning_rate": 0.00019997827158215118, - "loss": 46.0, - "step": 41216 - }, - { - "epoch": 6.637706831998067, - "grad_norm": 0.018523935228586197, - "learning_rate": 0.00019997827052755258, - "loss": 46.0, - "step": 41217 - }, - { - "epoch": 6.637867869076855, - "grad_norm": 0.003589930944144726, - "learning_rate": 0.00019997826947292842, - "loss": 46.0, - "step": 41218 - }, - { - "epoch": 6.638028906155642, - "grad_norm": 0.0012552345870062709, - "learning_rate": 0.00019997826841827864, - "loss": 46.0, - "step": 41219 - }, - { - "epoch": 6.63818994323443, - "grad_norm": 0.006195634603500366, - "learning_rate": 0.0001999782673636033, - "loss": 46.0, - "step": 41220 - }, - { - "epoch": 6.638350980313217, - "grad_norm": 0.0015473488019779325, - "learning_rate": 0.00019997826630890235, - "loss": 46.0, - "step": 41221 - }, - { - "epoch": 6.638512017392005, - "grad_norm": 0.01875738613307476, - "learning_rate": 0.00019997826525417582, - "loss": 46.0, - "step": 41222 - }, - { - "epoch": 6.638673054470792, - "grad_norm": 0.0023227131459861994, - "learning_rate": 0.00019997826419942372, - "loss": 46.0, - "step": 41223 - }, - { - "epoch": 6.63883409154958, - "grad_norm": 0.0035696225240826607, - "learning_rate": 0.00019997826314464604, - "loss": 46.0, - "step": 41224 - }, - { - "epoch": 6.638995128628367, - "grad_norm": 0.0018136863363906741, - "learning_rate": 0.0001999782620898427, - "loss": 46.0, - "step": 41225 - }, - { - "epoch": 6.6391561657071545, - "grad_norm": 0.0017965954029932618, - "learning_rate": 0.00019997826103501382, - "loss": 46.0, - "step": 41226 - }, - { - "epoch": 6.639317202785941, - "grad_norm": 0.002883186796680093, - "learning_rate": 0.00019997825998015935, - "loss": 46.0, - "step": 41227 - }, - { - "epoch": 6.6394782398647285, - "grad_norm": 0.006738876923918724, - "learning_rate": 0.00019997825892527932, - "loss": 46.0, - "step": 41228 - }, - { - "epoch": 6.639639276943516, - "grad_norm": 0.0017054156633093953, - "learning_rate": 0.00019997825787037364, - "loss": 46.0, - "step": 41229 - }, - { - "epoch": 6.639800314022303, - "grad_norm": 0.015789788216352463, - "learning_rate": 0.0001999782568154424, - "loss": 46.0, - "step": 41230 - }, - { - "epoch": 6.639961351101091, - "grad_norm": 0.0012283873511478305, - "learning_rate": 0.0001999782557604856, - "loss": 46.0, - "step": 41231 - }, - { - "epoch": 6.640122388179878, - "grad_norm": 0.0022991145960986614, - "learning_rate": 0.00019997825470550315, - "loss": 46.0, - "step": 41232 - }, - { - "epoch": 6.640283425258666, - "grad_norm": 0.007703986018896103, - "learning_rate": 0.00019997825365049513, - "loss": 46.0, - "step": 41233 - }, - { - "epoch": 6.640444462337453, - "grad_norm": 0.006665561348199844, - "learning_rate": 0.00019997825259546154, - "loss": 46.0, - "step": 41234 - }, - { - "epoch": 6.640605499416241, - "grad_norm": 0.006929056718945503, - "learning_rate": 0.00019997825154040237, - "loss": 46.0, - "step": 41235 - }, - { - "epoch": 6.640766536495028, - "grad_norm": 0.0023154662922024727, - "learning_rate": 0.00019997825048531756, - "loss": 46.0, - "step": 41236 - }, - { - "epoch": 6.640927573573816, - "grad_norm": 0.011944940313696861, - "learning_rate": 0.0001999782494302072, - "loss": 46.0, - "step": 41237 - }, - { - "epoch": 6.641088610652603, - "grad_norm": 0.006466531660407782, - "learning_rate": 0.00019997824837507126, - "loss": 46.0, - "step": 41238 - }, - { - "epoch": 6.6412496477313905, - "grad_norm": 0.013151127845048904, - "learning_rate": 0.0001999782473199097, - "loss": 46.0, - "step": 41239 - }, - { - "epoch": 6.641410684810177, - "grad_norm": 0.03186663240194321, - "learning_rate": 0.00019997824626472258, - "loss": 46.0, - "step": 41240 - }, - { - "epoch": 6.641571721888965, - "grad_norm": 0.0044538117945194244, - "learning_rate": 0.00019997824520950986, - "loss": 46.0, - "step": 41241 - }, - { - "epoch": 6.641732758967752, - "grad_norm": 0.012548520229756832, - "learning_rate": 0.00019997824415427155, - "loss": 46.0, - "step": 41242 - }, - { - "epoch": 6.641893796046539, - "grad_norm": 0.011111288331449032, - "learning_rate": 0.00019997824309900763, - "loss": 46.0, - "step": 41243 - }, - { - "epoch": 6.642054833125327, - "grad_norm": 0.003127770032733679, - "learning_rate": 0.00019997824204371815, - "loss": 46.0, - "step": 41244 - }, - { - "epoch": 6.642215870204114, - "grad_norm": 0.008705846033990383, - "learning_rate": 0.00019997824098840305, - "loss": 46.0, - "step": 41245 - }, - { - "epoch": 6.642376907282902, - "grad_norm": 0.0007114775362424552, - "learning_rate": 0.0001999782399330624, - "loss": 46.0, - "step": 41246 - }, - { - "epoch": 6.642537944361689, - "grad_norm": 0.0087512182071805, - "learning_rate": 0.00019997823887769615, - "loss": 46.0, - "step": 41247 - }, - { - "epoch": 6.642698981440477, - "grad_norm": 0.0017549317562952638, - "learning_rate": 0.0001999782378223043, - "loss": 46.0, - "step": 41248 - }, - { - "epoch": 6.642860018519264, - "grad_norm": 0.007432641927152872, - "learning_rate": 0.00019997823676688685, - "loss": 46.0, - "step": 41249 - }, - { - "epoch": 6.643021055598052, - "grad_norm": 0.003654295578598976, - "learning_rate": 0.00019997823571144384, - "loss": 46.0, - "step": 41250 - }, - { - "epoch": 6.643182092676839, - "grad_norm": 0.009870448149740696, - "learning_rate": 0.0001999782346559752, - "loss": 46.0, - "step": 41251 - }, - { - "epoch": 6.6433431297556265, - "grad_norm": 0.0028439168818295, - "learning_rate": 0.00019997823360048102, - "loss": 46.0, - "step": 41252 - }, - { - "epoch": 6.643504166834414, - "grad_norm": 0.005359800532460213, - "learning_rate": 0.0001999782325449612, - "loss": 46.0, - "step": 41253 - }, - { - "epoch": 6.643665203913201, - "grad_norm": 0.003559744916856289, - "learning_rate": 0.00019997823148941581, - "loss": 46.0, - "step": 41254 - }, - { - "epoch": 6.643826240991988, - "grad_norm": 0.0015891611110419035, - "learning_rate": 0.00019997823043384485, - "loss": 46.0, - "step": 41255 - }, - { - "epoch": 6.643987278070776, - "grad_norm": 0.001680420245975256, - "learning_rate": 0.0001999782293782483, - "loss": 46.0, - "step": 41256 - }, - { - "epoch": 6.644148315149563, - "grad_norm": 0.003986607771366835, - "learning_rate": 0.00019997822832262615, - "loss": 46.0, - "step": 41257 - }, - { - "epoch": 6.64430935222835, - "grad_norm": 0.005513923708349466, - "learning_rate": 0.0001999782272669784, - "loss": 46.0, - "step": 41258 - }, - { - "epoch": 6.644470389307138, - "grad_norm": 0.0016328264027833939, - "learning_rate": 0.00019997822621130507, - "loss": 46.0, - "step": 41259 - }, - { - "epoch": 6.644631426385925, - "grad_norm": 0.008640449494123459, - "learning_rate": 0.00019997822515560614, - "loss": 46.0, - "step": 41260 - }, - { - "epoch": 6.644792463464713, - "grad_norm": 0.008669750764966011, - "learning_rate": 0.00019997822409988163, - "loss": 46.0, - "step": 41261 - }, - { - "epoch": 6.6449535005435, - "grad_norm": 0.004570953082293272, - "learning_rate": 0.00019997822304413152, - "loss": 46.0, - "step": 41262 - }, - { - "epoch": 6.6451145376222875, - "grad_norm": 0.0025238145608454943, - "learning_rate": 0.00019997822198835583, - "loss": 46.0, - "step": 41263 - }, - { - "epoch": 6.645275574701075, - "grad_norm": 0.00553938327357173, - "learning_rate": 0.00019997822093255455, - "loss": 46.0, - "step": 41264 - }, - { - "epoch": 6.6454366117798624, - "grad_norm": 0.006852875463664532, - "learning_rate": 0.0001999782198767277, - "loss": 46.0, - "step": 41265 - }, - { - "epoch": 6.64559764885865, - "grad_norm": 0.004326727241277695, - "learning_rate": 0.00019997821882087525, - "loss": 46.0, - "step": 41266 - }, - { - "epoch": 6.645758685937437, - "grad_norm": 0.0020337312016636133, - "learning_rate": 0.00019997821776499719, - "loss": 46.0, - "step": 41267 - }, - { - "epoch": 6.645919723016225, - "grad_norm": 0.009487706236541271, - "learning_rate": 0.00019997821670909356, - "loss": 46.0, - "step": 41268 - }, - { - "epoch": 6.646080760095012, - "grad_norm": 0.0014569255290552974, - "learning_rate": 0.00019997821565316434, - "loss": 46.0, - "step": 41269 - }, - { - "epoch": 6.646241797173799, - "grad_norm": 0.005698317661881447, - "learning_rate": 0.0001999782145972095, - "loss": 46.0, - "step": 41270 - }, - { - "epoch": 6.646402834252587, - "grad_norm": 0.019783319905400276, - "learning_rate": 0.00019997821354122912, - "loss": 46.0, - "step": 41271 - }, - { - "epoch": 6.646563871331374, - "grad_norm": 0.007254421710968018, - "learning_rate": 0.00019997821248522314, - "loss": 46.0, - "step": 41272 - }, - { - "epoch": 6.646724908410161, - "grad_norm": 0.0017364928498864174, - "learning_rate": 0.00019997821142919155, - "loss": 46.0, - "step": 41273 - }, - { - "epoch": 6.646885945488949, - "grad_norm": 0.0014317418681457639, - "learning_rate": 0.00019997821037313437, - "loss": 46.0, - "step": 41274 - }, - { - "epoch": 6.647046982567736, - "grad_norm": 0.010185652412474155, - "learning_rate": 0.00019997820931705164, - "loss": 46.0, - "step": 41275 - }, - { - "epoch": 6.6472080196465235, - "grad_norm": 0.002763714175671339, - "learning_rate": 0.00019997820826094328, - "loss": 46.0, - "step": 41276 - }, - { - "epoch": 6.647369056725311, - "grad_norm": 0.0007979915826581419, - "learning_rate": 0.00019997820720480934, - "loss": 46.0, - "step": 41277 - }, - { - "epoch": 6.647530093804098, - "grad_norm": 0.02134522795677185, - "learning_rate": 0.0001999782061486498, - "loss": 46.0, - "step": 41278 - }, - { - "epoch": 6.647691130882886, - "grad_norm": 0.003303972538560629, - "learning_rate": 0.0001999782050924647, - "loss": 46.0, - "step": 41279 - }, - { - "epoch": 6.647852167961673, - "grad_norm": 0.005645588971674442, - "learning_rate": 0.00019997820403625397, - "loss": 46.0, - "step": 41280 - }, - { - "epoch": 6.648013205040461, - "grad_norm": 0.005770730786025524, - "learning_rate": 0.00019997820298001768, - "loss": 46.0, - "step": 41281 - }, - { - "epoch": 6.648174242119248, - "grad_norm": 0.0007078899652697146, - "learning_rate": 0.0001999782019237558, - "loss": 46.0, - "step": 41282 - }, - { - "epoch": 6.648335279198036, - "grad_norm": 0.0071189431473612785, - "learning_rate": 0.00019997820086746834, - "loss": 46.0, - "step": 41283 - }, - { - "epoch": 6.648496316276823, - "grad_norm": 0.0028874436393380165, - "learning_rate": 0.00019997819981115527, - "loss": 46.0, - "step": 41284 - }, - { - "epoch": 6.64865735335561, - "grad_norm": 0.030310943722724915, - "learning_rate": 0.0001999781987548166, - "loss": 46.0, - "step": 41285 - }, - { - "epoch": 6.648818390434397, - "grad_norm": 0.003774383570998907, - "learning_rate": 0.00019997819769845235, - "loss": 46.0, - "step": 41286 - }, - { - "epoch": 6.648979427513185, - "grad_norm": 0.00117953447625041, - "learning_rate": 0.00019997819664206254, - "loss": 46.0, - "step": 41287 - }, - { - "epoch": 6.649140464591972, - "grad_norm": 0.003956024069339037, - "learning_rate": 0.0001999781955856471, - "loss": 46.0, - "step": 41288 - }, - { - "epoch": 6.6493015016707595, - "grad_norm": 0.0027459862176328897, - "learning_rate": 0.00019997819452920613, - "loss": 46.0, - "step": 41289 - }, - { - "epoch": 6.649462538749547, - "grad_norm": 0.0014338220935314894, - "learning_rate": 0.00019997819347273953, - "loss": 46.0, - "step": 41290 - }, - { - "epoch": 6.649623575828334, - "grad_norm": 0.007602814119309187, - "learning_rate": 0.00019997819241624734, - "loss": 46.0, - "step": 41291 - }, - { - "epoch": 6.649784612907122, - "grad_norm": 0.016116492450237274, - "learning_rate": 0.00019997819135972954, - "loss": 46.0, - "step": 41292 - }, - { - "epoch": 6.649945649985909, - "grad_norm": 0.0049855369143188, - "learning_rate": 0.00019997819030318618, - "loss": 46.0, - "step": 41293 - }, - { - "epoch": 6.650106687064697, - "grad_norm": 0.016336875036358833, - "learning_rate": 0.00019997818924661723, - "loss": 46.0, - "step": 41294 - }, - { - "epoch": 6.650267724143484, - "grad_norm": 0.012784531340003014, - "learning_rate": 0.00019997818819002266, - "loss": 46.0, - "step": 41295 - }, - { - "epoch": 6.650428761222272, - "grad_norm": 0.012742854654788971, - "learning_rate": 0.00019997818713340254, - "loss": 46.0, - "step": 41296 - }, - { - "epoch": 6.650589798301059, - "grad_norm": 0.01026222575455904, - "learning_rate": 0.00019997818607675683, - "loss": 46.0, - "step": 41297 - }, - { - "epoch": 6.650750835379847, - "grad_norm": 0.02060762047767639, - "learning_rate": 0.00019997818502008553, - "loss": 46.0, - "step": 41298 - }, - { - "epoch": 6.650911872458634, - "grad_norm": 0.0023718683514744043, - "learning_rate": 0.00019997818396338862, - "loss": 46.0, - "step": 41299 - }, - { - "epoch": 6.651072909537421, - "grad_norm": 0.026104917749762535, - "learning_rate": 0.00019997818290666612, - "loss": 46.0, - "step": 41300 - }, - { - "epoch": 6.651233946616208, - "grad_norm": 0.009435472078621387, - "learning_rate": 0.00019997818184991803, - "loss": 46.0, - "step": 41301 - }, - { - "epoch": 6.6513949836949955, - "grad_norm": 0.006782672833651304, - "learning_rate": 0.00019997818079314436, - "loss": 46.0, - "step": 41302 - }, - { - "epoch": 6.651556020773783, - "grad_norm": 0.0013868138194084167, - "learning_rate": 0.0001999781797363451, - "loss": 46.0, - "step": 41303 - }, - { - "epoch": 6.65171705785257, - "grad_norm": 0.004889416974037886, - "learning_rate": 0.00019997817867952025, - "loss": 46.0, - "step": 41304 - }, - { - "epoch": 6.651878094931358, - "grad_norm": 0.002161378040909767, - "learning_rate": 0.00019997817762266982, - "loss": 46.0, - "step": 41305 - }, - { - "epoch": 6.652039132010145, - "grad_norm": 0.00307559035718441, - "learning_rate": 0.0001999781765657938, - "loss": 46.0, - "step": 41306 - }, - { - "epoch": 6.652200169088933, - "grad_norm": 0.021258361637592316, - "learning_rate": 0.00019997817550889218, - "loss": 46.0, - "step": 41307 - }, - { - "epoch": 6.65236120616772, - "grad_norm": 0.0028325102757662535, - "learning_rate": 0.00019997817445196496, - "loss": 46.0, - "step": 41308 - }, - { - "epoch": 6.652522243246508, - "grad_norm": 0.001658924506045878, - "learning_rate": 0.00019997817339501218, - "loss": 46.0, - "step": 41309 - }, - { - "epoch": 6.652683280325295, - "grad_norm": 0.0026529680471867323, - "learning_rate": 0.0001999781723380338, - "loss": 46.0, - "step": 41310 - }, - { - "epoch": 6.6528443174040826, - "grad_norm": 0.003789222100749612, - "learning_rate": 0.0001999781712810298, - "loss": 46.0, - "step": 41311 - }, - { - "epoch": 6.65300535448287, - "grad_norm": 0.015022702515125275, - "learning_rate": 0.00019997817022400025, - "loss": 46.0, - "step": 41312 - }, - { - "epoch": 6.6531663915616575, - "grad_norm": 0.005480973049998283, - "learning_rate": 0.0001999781691669451, - "loss": 46.0, - "step": 41313 - }, - { - "epoch": 6.653327428640445, - "grad_norm": 0.00444228108972311, - "learning_rate": 0.00019997816810986434, - "loss": 46.0, - "step": 41314 - }, - { - "epoch": 6.6534884657192315, - "grad_norm": 0.002165646757930517, - "learning_rate": 0.00019997816705275804, - "loss": 46.0, - "step": 41315 - }, - { - "epoch": 6.653649502798019, - "grad_norm": 0.002898496575653553, - "learning_rate": 0.0001999781659956261, - "loss": 46.0, - "step": 41316 - }, - { - "epoch": 6.653810539876806, - "grad_norm": 0.005737585015594959, - "learning_rate": 0.0001999781649384686, - "loss": 46.0, - "step": 41317 - }, - { - "epoch": 6.653971576955594, - "grad_norm": 0.0014970627380535007, - "learning_rate": 0.00019997816388128548, - "loss": 46.0, - "step": 41318 - }, - { - "epoch": 6.654132614034381, - "grad_norm": 0.008356812410056591, - "learning_rate": 0.0001999781628240768, - "loss": 46.0, - "step": 41319 - }, - { - "epoch": 6.654293651113169, - "grad_norm": 0.0011939759133383632, - "learning_rate": 0.00019997816176684253, - "loss": 46.0, - "step": 41320 - }, - { - "epoch": 6.654454688191956, - "grad_norm": 0.0013400658499449492, - "learning_rate": 0.00019997816070958267, - "loss": 46.0, - "step": 41321 - }, - { - "epoch": 6.654615725270744, - "grad_norm": 0.003531099995598197, - "learning_rate": 0.0001999781596522972, - "loss": 46.0, - "step": 41322 - }, - { - "epoch": 6.654776762349531, - "grad_norm": 0.0037914314307272434, - "learning_rate": 0.00019997815859498617, - "loss": 46.0, - "step": 41323 - }, - { - "epoch": 6.6549377994283185, - "grad_norm": 0.003836628282442689, - "learning_rate": 0.00019997815753764953, - "loss": 46.0, - "step": 41324 - }, - { - "epoch": 6.655098836507106, - "grad_norm": 0.003639188129454851, - "learning_rate": 0.0001999781564802873, - "loss": 46.0, - "step": 41325 - }, - { - "epoch": 6.655259873585893, - "grad_norm": 0.004865992348641157, - "learning_rate": 0.00019997815542289947, - "loss": 46.0, - "step": 41326 - }, - { - "epoch": 6.655420910664681, - "grad_norm": 0.00623518880456686, - "learning_rate": 0.0001999781543654861, - "loss": 46.0, - "step": 41327 - }, - { - "epoch": 6.6555819477434675, - "grad_norm": 0.018353795632719994, - "learning_rate": 0.0001999781533080471, - "loss": 46.0, - "step": 41328 - }, - { - "epoch": 6.655742984822256, - "grad_norm": 0.001177751924842596, - "learning_rate": 0.0001999781522505825, - "loss": 46.0, - "step": 41329 - }, - { - "epoch": 6.655904021901042, - "grad_norm": 0.003544985083863139, - "learning_rate": 0.00019997815119309235, - "loss": 46.0, - "step": 41330 - }, - { - "epoch": 6.65606505897983, - "grad_norm": 0.006752661429345608, - "learning_rate": 0.0001999781501355766, - "loss": 46.0, - "step": 41331 - }, - { - "epoch": 6.656226096058617, - "grad_norm": 0.008299527689814568, - "learning_rate": 0.00019997814907803522, - "loss": 46.0, - "step": 41332 - }, - { - "epoch": 6.656387133137405, - "grad_norm": 0.006667337380349636, - "learning_rate": 0.0001999781480204683, - "loss": 46.0, - "step": 41333 - }, - { - "epoch": 6.656548170216192, - "grad_norm": 0.0055719828233122826, - "learning_rate": 0.00019997814696287578, - "loss": 46.0, - "step": 41334 - }, - { - "epoch": 6.65670920729498, - "grad_norm": 0.0019611932802945375, - "learning_rate": 0.00019997814590525765, - "loss": 46.0, - "step": 41335 - }, - { - "epoch": 6.656870244373767, - "grad_norm": 0.0023944592103362083, - "learning_rate": 0.00019997814484761395, - "loss": 46.0, - "step": 41336 - }, - { - "epoch": 6.6570312814525545, - "grad_norm": 0.003260157536715269, - "learning_rate": 0.00019997814378994465, - "loss": 46.0, - "step": 41337 - }, - { - "epoch": 6.657192318531342, - "grad_norm": 0.0038181780837476254, - "learning_rate": 0.00019997814273224975, - "loss": 46.0, - "step": 41338 - }, - { - "epoch": 6.657353355610129, - "grad_norm": 0.002535885199904442, - "learning_rate": 0.00019997814167452927, - "loss": 46.0, - "step": 41339 - }, - { - "epoch": 6.657514392688917, - "grad_norm": 0.0018500848673284054, - "learning_rate": 0.00019997814061678323, - "loss": 46.0, - "step": 41340 - }, - { - "epoch": 6.657675429767704, - "grad_norm": 0.010041836649179459, - "learning_rate": 0.00019997813955901158, - "loss": 46.0, - "step": 41341 - }, - { - "epoch": 6.657836466846492, - "grad_norm": 0.0033802934922277927, - "learning_rate": 0.0001999781385012143, - "loss": 46.0, - "step": 41342 - }, - { - "epoch": 6.657997503925278, - "grad_norm": 0.007863650098443031, - "learning_rate": 0.0001999781374433915, - "loss": 46.0, - "step": 41343 - }, - { - "epoch": 6.658158541004067, - "grad_norm": 0.0029994079377502203, - "learning_rate": 0.0001999781363855431, - "loss": 46.0, - "step": 41344 - }, - { - "epoch": 6.658319578082853, - "grad_norm": 0.004304221365600824, - "learning_rate": 0.00019997813532766906, - "loss": 46.0, - "step": 41345 - }, - { - "epoch": 6.658480615161641, - "grad_norm": 0.004520541988313198, - "learning_rate": 0.00019997813426976947, - "loss": 46.0, - "step": 41346 - }, - { - "epoch": 6.658641652240428, - "grad_norm": 0.0008796003530733287, - "learning_rate": 0.0001999781332118443, - "loss": 46.0, - "step": 41347 - }, - { - "epoch": 6.658802689319216, - "grad_norm": 0.008355273865163326, - "learning_rate": 0.0001999781321538935, - "loss": 46.0, - "step": 41348 - }, - { - "epoch": 6.658963726398003, - "grad_norm": 0.0015010188799351454, - "learning_rate": 0.00019997813109591715, - "loss": 46.0, - "step": 41349 - }, - { - "epoch": 6.6591247634767905, - "grad_norm": 0.01182329747825861, - "learning_rate": 0.00019997813003791518, - "loss": 46.0, - "step": 41350 - }, - { - "epoch": 6.659285800555578, - "grad_norm": 0.0036371885798871517, - "learning_rate": 0.00019997812897988763, - "loss": 46.0, - "step": 41351 - }, - { - "epoch": 6.659446837634365, - "grad_norm": 0.003482918255031109, - "learning_rate": 0.0001999781279218345, - "loss": 46.0, - "step": 41352 - }, - { - "epoch": 6.659607874713153, - "grad_norm": 0.00894628930836916, - "learning_rate": 0.00019997812686375576, - "loss": 46.0, - "step": 41353 - }, - { - "epoch": 6.65976891179194, - "grad_norm": 0.0015628489200025797, - "learning_rate": 0.00019997812580565148, - "loss": 46.0, - "step": 41354 - }, - { - "epoch": 6.659929948870728, - "grad_norm": 0.008763380348682404, - "learning_rate": 0.00019997812474752155, - "loss": 46.0, - "step": 41355 - }, - { - "epoch": 6.660090985949515, - "grad_norm": 0.006190703250467777, - "learning_rate": 0.00019997812368936608, - "loss": 46.0, - "step": 41356 - }, - { - "epoch": 6.660252023028303, - "grad_norm": 0.006778019946068525, - "learning_rate": 0.00019997812263118498, - "loss": 46.0, - "step": 41357 - }, - { - "epoch": 6.660413060107089, - "grad_norm": 0.001629995065741241, - "learning_rate": 0.00019997812157297832, - "loss": 46.0, - "step": 41358 - }, - { - "epoch": 6.660574097185877, - "grad_norm": 0.00679272273555398, - "learning_rate": 0.00019997812051474607, - "loss": 46.0, - "step": 41359 - }, - { - "epoch": 6.660735134264664, - "grad_norm": 0.003945086617022753, - "learning_rate": 0.0001999781194564882, - "loss": 46.0, - "step": 41360 - }, - { - "epoch": 6.660896171343452, - "grad_norm": 0.0028573949821293354, - "learning_rate": 0.0001999781183982048, - "loss": 46.0, - "step": 41361 - }, - { - "epoch": 6.661057208422239, - "grad_norm": 0.010052439756691456, - "learning_rate": 0.00019997811733989576, - "loss": 46.0, - "step": 41362 - }, - { - "epoch": 6.6612182455010265, - "grad_norm": 0.0043817260302603245, - "learning_rate": 0.00019997811628156114, - "loss": 46.0, - "step": 41363 - }, - { - "epoch": 6.661379282579814, - "grad_norm": 0.005444617010653019, - "learning_rate": 0.00019997811522320092, - "loss": 46.0, - "step": 41364 - }, - { - "epoch": 6.661540319658601, - "grad_norm": 0.004544877912849188, - "learning_rate": 0.00019997811416481512, - "loss": 46.0, - "step": 41365 - }, - { - "epoch": 6.661701356737389, - "grad_norm": 0.003842040430754423, - "learning_rate": 0.00019997811310640376, - "loss": 46.0, - "step": 41366 - }, - { - "epoch": 6.661862393816176, - "grad_norm": 0.007663982454687357, - "learning_rate": 0.00019997811204796676, - "loss": 46.0, - "step": 41367 - }, - { - "epoch": 6.662023430894964, - "grad_norm": 0.0035739578306674957, - "learning_rate": 0.0001999781109895042, - "loss": 46.0, - "step": 41368 - }, - { - "epoch": 6.662184467973751, - "grad_norm": 0.0126936174929142, - "learning_rate": 0.00019997810993101607, - "loss": 46.0, - "step": 41369 - }, - { - "epoch": 6.662345505052539, - "grad_norm": 0.007733174134045839, - "learning_rate": 0.0001999781088725023, - "loss": 46.0, - "step": 41370 - }, - { - "epoch": 6.662506542131326, - "grad_norm": 0.002514298539608717, - "learning_rate": 0.00019997810781396301, - "loss": 46.0, - "step": 41371 - }, - { - "epoch": 6.6626675792101135, - "grad_norm": 0.0024871560744941235, - "learning_rate": 0.00019997810675539808, - "loss": 46.0, - "step": 41372 - }, - { - "epoch": 6.6628286162889, - "grad_norm": 0.006003184709697962, - "learning_rate": 0.00019997810569680758, - "loss": 46.0, - "step": 41373 - }, - { - "epoch": 6.662989653367688, - "grad_norm": 0.012818343006074429, - "learning_rate": 0.00019997810463819146, - "loss": 46.0, - "step": 41374 - }, - { - "epoch": 6.663150690446475, - "grad_norm": 0.00463420944288373, - "learning_rate": 0.0001999781035795498, - "loss": 46.0, - "step": 41375 - }, - { - "epoch": 6.6633117275252625, - "grad_norm": 0.005430162884294987, - "learning_rate": 0.0001999781025208825, - "loss": 46.0, - "step": 41376 - }, - { - "epoch": 6.66347276460405, - "grad_norm": 0.0032714323606342077, - "learning_rate": 0.00019997810146218963, - "loss": 46.0, - "step": 41377 - }, - { - "epoch": 6.663633801682837, - "grad_norm": 0.003485821420326829, - "learning_rate": 0.00019997810040347117, - "loss": 46.0, - "step": 41378 - }, - { - "epoch": 6.663794838761625, - "grad_norm": 0.0026943886186927557, - "learning_rate": 0.00019997809934472712, - "loss": 46.0, - "step": 41379 - }, - { - "epoch": 6.663955875840412, - "grad_norm": 0.0012250575236976147, - "learning_rate": 0.0001999780982859575, - "loss": 46.0, - "step": 41380 - }, - { - "epoch": 6.6641169129192, - "grad_norm": 0.0010569618316367269, - "learning_rate": 0.00019997809722716226, - "loss": 46.0, - "step": 41381 - }, - { - "epoch": 6.664277949997987, - "grad_norm": 0.006684210617095232, - "learning_rate": 0.00019997809616834145, - "loss": 46.0, - "step": 41382 - }, - { - "epoch": 6.664438987076775, - "grad_norm": 0.0024920450523495674, - "learning_rate": 0.00019997809510949508, - "loss": 46.0, - "step": 41383 - }, - { - "epoch": 6.664600024155562, - "grad_norm": 0.00532752787694335, - "learning_rate": 0.00019997809405062307, - "loss": 46.0, - "step": 41384 - }, - { - "epoch": 6.6647610612343495, - "grad_norm": 0.004439402371644974, - "learning_rate": 0.0001999780929917255, - "loss": 46.0, - "step": 41385 - }, - { - "epoch": 6.664922098313137, - "grad_norm": 0.006457884330302477, - "learning_rate": 0.00019997809193280232, - "loss": 46.0, - "step": 41386 - }, - { - "epoch": 6.665083135391924, - "grad_norm": 0.013806230388581753, - "learning_rate": 0.00019997809087385354, - "loss": 46.0, - "step": 41387 - }, - { - "epoch": 6.665244172470711, - "grad_norm": 0.002769937738776207, - "learning_rate": 0.0001999780898148792, - "loss": 46.0, - "step": 41388 - }, - { - "epoch": 6.6654052095494984, - "grad_norm": 0.0013071835273876786, - "learning_rate": 0.00019997808875587926, - "loss": 46.0, - "step": 41389 - }, - { - "epoch": 6.665566246628286, - "grad_norm": 0.0019575459882616997, - "learning_rate": 0.00019997808769685375, - "loss": 46.0, - "step": 41390 - }, - { - "epoch": 6.665727283707073, - "grad_norm": 0.0015985085628926754, - "learning_rate": 0.00019997808663780263, - "loss": 46.0, - "step": 41391 - }, - { - "epoch": 6.665888320785861, - "grad_norm": 0.009649241343140602, - "learning_rate": 0.00019997808557872592, - "loss": 46.0, - "step": 41392 - }, - { - "epoch": 6.666049357864648, - "grad_norm": 0.0017637225100770593, - "learning_rate": 0.00019997808451962366, - "loss": 46.0, - "step": 41393 - }, - { - "epoch": 6.666210394943436, - "grad_norm": 0.004917894024401903, - "learning_rate": 0.00019997808346049575, - "loss": 46.0, - "step": 41394 - }, - { - "epoch": 6.666371432022223, - "grad_norm": 0.007446544244885445, - "learning_rate": 0.00019997808240134228, - "loss": 46.0, - "step": 41395 - }, - { - "epoch": 6.666532469101011, - "grad_norm": 0.0043029217049479485, - "learning_rate": 0.00019997808134216322, - "loss": 46.0, - "step": 41396 - }, - { - "epoch": 6.666693506179798, - "grad_norm": 0.001443596906028688, - "learning_rate": 0.00019997808028295857, - "loss": 46.0, - "step": 41397 - }, - { - "epoch": 6.6668545432585855, - "grad_norm": 0.0046377237886190414, - "learning_rate": 0.00019997807922372834, - "loss": 46.0, - "step": 41398 - }, - { - "epoch": 6.667015580337373, - "grad_norm": 0.008895192295312881, - "learning_rate": 0.0001999780781644725, - "loss": 46.0, - "step": 41399 - }, - { - "epoch": 6.66717661741616, - "grad_norm": 0.0017852013697847724, - "learning_rate": 0.00019997807710519106, - "loss": 46.0, - "step": 41400 - }, - { - "epoch": 6.667337654494947, - "grad_norm": 0.009846286848187447, - "learning_rate": 0.00019997807604588407, - "loss": 46.0, - "step": 41401 - }, - { - "epoch": 6.667498691573735, - "grad_norm": 0.0038804770447313786, - "learning_rate": 0.0001999780749865515, - "loss": 46.0, - "step": 41402 - }, - { - "epoch": 6.667659728652522, - "grad_norm": 0.003125979797914624, - "learning_rate": 0.0001999780739271933, - "loss": 46.0, - "step": 41403 - }, - { - "epoch": 6.667820765731309, - "grad_norm": 0.01097989734262228, - "learning_rate": 0.0001999780728678095, - "loss": 46.0, - "step": 41404 - }, - { - "epoch": 6.667981802810097, - "grad_norm": 0.003038732334971428, - "learning_rate": 0.00019997807180840017, - "loss": 46.0, - "step": 41405 - }, - { - "epoch": 6.668142839888884, - "grad_norm": 0.00931643694639206, - "learning_rate": 0.0001999780707489652, - "loss": 46.0, - "step": 41406 - }, - { - "epoch": 6.668303876967672, - "grad_norm": 0.00387683417648077, - "learning_rate": 0.00019997806968950464, - "loss": 46.0, - "step": 41407 - }, - { - "epoch": 6.668464914046459, - "grad_norm": 0.0029038600623607635, - "learning_rate": 0.0001999780686300185, - "loss": 46.0, - "step": 41408 - }, - { - "epoch": 6.668625951125247, - "grad_norm": 0.002887092065066099, - "learning_rate": 0.0001999780675705068, - "loss": 46.0, - "step": 41409 - }, - { - "epoch": 6.668786988204034, - "grad_norm": 0.005947868339717388, - "learning_rate": 0.0001999780665109695, - "loss": 46.0, - "step": 41410 - }, - { - "epoch": 6.6689480252828215, - "grad_norm": 0.0013755843974649906, - "learning_rate": 0.00019997806545140657, - "loss": 46.0, - "step": 41411 - }, - { - "epoch": 6.669109062361609, - "grad_norm": 0.007008018903434277, - "learning_rate": 0.0001999780643918181, - "loss": 46.0, - "step": 41412 - }, - { - "epoch": 6.669270099440396, - "grad_norm": 0.0030276132747530937, - "learning_rate": 0.00019997806333220403, - "loss": 46.0, - "step": 41413 - }, - { - "epoch": 6.669431136519184, - "grad_norm": 0.009313974529504776, - "learning_rate": 0.00019997806227256435, - "loss": 46.0, - "step": 41414 - }, - { - "epoch": 6.669592173597971, - "grad_norm": 0.0027604824863374233, - "learning_rate": 0.00019997806121289908, - "loss": 46.0, - "step": 41415 - }, - { - "epoch": 6.669753210676758, - "grad_norm": 0.006833921652287245, - "learning_rate": 0.00019997806015320822, - "loss": 46.0, - "step": 41416 - }, - { - "epoch": 6.669914247755546, - "grad_norm": 0.0011460887035354972, - "learning_rate": 0.00019997805909349178, - "loss": 46.0, - "step": 41417 - }, - { - "epoch": 6.670075284834333, - "grad_norm": 0.002286622766405344, - "learning_rate": 0.00019997805803374975, - "loss": 46.0, - "step": 41418 - }, - { - "epoch": 6.67023632191312, - "grad_norm": 0.002973241964355111, - "learning_rate": 0.00019997805697398216, - "loss": 46.0, - "step": 41419 - }, - { - "epoch": 6.670397358991908, - "grad_norm": 0.010130366310477257, - "learning_rate": 0.00019997805591418896, - "loss": 46.0, - "step": 41420 - }, - { - "epoch": 6.670558396070695, - "grad_norm": 0.0011209291405975819, - "learning_rate": 0.00019997805485437017, - "loss": 46.0, - "step": 41421 - }, - { - "epoch": 6.670719433149483, - "grad_norm": 0.004139076452702284, - "learning_rate": 0.00019997805379452576, - "loss": 46.0, - "step": 41422 - }, - { - "epoch": 6.67088047022827, - "grad_norm": 0.006929545197635889, - "learning_rate": 0.0001999780527346558, - "loss": 46.0, - "step": 41423 - }, - { - "epoch": 6.6710415073070575, - "grad_norm": 0.002197990193963051, - "learning_rate": 0.00019997805167476024, - "loss": 46.0, - "step": 41424 - }, - { - "epoch": 6.671202544385845, - "grad_norm": 0.00770388450473547, - "learning_rate": 0.0001999780506148391, - "loss": 46.0, - "step": 41425 - }, - { - "epoch": 6.671363581464632, - "grad_norm": 0.003448836738243699, - "learning_rate": 0.00019997804955489238, - "loss": 46.0, - "step": 41426 - }, - { - "epoch": 6.67152461854342, - "grad_norm": 0.0026357972528785467, - "learning_rate": 0.00019997804849492004, - "loss": 46.0, - "step": 41427 - }, - { - "epoch": 6.671685655622207, - "grad_norm": 0.0041018882766366005, - "learning_rate": 0.00019997804743492214, - "loss": 46.0, - "step": 41428 - }, - { - "epoch": 6.671846692700995, - "grad_norm": 0.0043464601039886475, - "learning_rate": 0.0001999780463748986, - "loss": 46.0, - "step": 41429 - }, - { - "epoch": 6.672007729779782, - "grad_norm": 0.002962283091619611, - "learning_rate": 0.00019997804531484952, - "loss": 46.0, - "step": 41430 - }, - { - "epoch": 6.672168766858569, - "grad_norm": 0.0074380990117788315, - "learning_rate": 0.00019997804425477483, - "loss": 46.0, - "step": 41431 - }, - { - "epoch": 6.672329803937356, - "grad_norm": 0.002464896999299526, - "learning_rate": 0.00019997804319467455, - "loss": 46.0, - "step": 41432 - }, - { - "epoch": 6.672490841016144, - "grad_norm": 0.003040800103917718, - "learning_rate": 0.0001999780421345487, - "loss": 46.0, - "step": 41433 - }, - { - "epoch": 6.672651878094931, - "grad_norm": 0.009164533577859402, - "learning_rate": 0.0001999780410743972, - "loss": 46.0, - "step": 41434 - }, - { - "epoch": 6.6728129151737186, - "grad_norm": 0.006473027169704437, - "learning_rate": 0.00019997804001422017, - "loss": 46.0, - "step": 41435 - }, - { - "epoch": 6.672973952252506, - "grad_norm": 0.002698664553463459, - "learning_rate": 0.00019997803895401757, - "loss": 46.0, - "step": 41436 - }, - { - "epoch": 6.6731349893312935, - "grad_norm": 0.009651648811995983, - "learning_rate": 0.00019997803789378933, - "loss": 46.0, - "step": 41437 - }, - { - "epoch": 6.673296026410081, - "grad_norm": 0.007370231673121452, - "learning_rate": 0.0001999780368335355, - "loss": 46.0, - "step": 41438 - }, - { - "epoch": 6.673457063488868, - "grad_norm": 0.0070519051514565945, - "learning_rate": 0.00019997803577325614, - "loss": 46.0, - "step": 41439 - }, - { - "epoch": 6.673618100567656, - "grad_norm": 0.002306445501744747, - "learning_rate": 0.00019997803471295114, - "loss": 46.0, - "step": 41440 - }, - { - "epoch": 6.673779137646443, - "grad_norm": 0.009618941694498062, - "learning_rate": 0.00019997803365262055, - "loss": 46.0, - "step": 41441 - }, - { - "epoch": 6.673940174725231, - "grad_norm": 0.008067110553383827, - "learning_rate": 0.0001999780325922644, - "loss": 46.0, - "step": 41442 - }, - { - "epoch": 6.674101211804018, - "grad_norm": 0.01169788371771574, - "learning_rate": 0.00019997803153188262, - "loss": 46.0, - "step": 41443 - }, - { - "epoch": 6.674262248882806, - "grad_norm": 0.009598050266504288, - "learning_rate": 0.0001999780304714753, - "loss": 46.0, - "step": 41444 - }, - { - "epoch": 6.674423285961593, - "grad_norm": 0.01065739430487156, - "learning_rate": 0.00019997802941104236, - "loss": 46.0, - "step": 41445 - }, - { - "epoch": 6.67458432304038, - "grad_norm": 0.006738572847098112, - "learning_rate": 0.00019997802835058383, - "loss": 46.0, - "step": 41446 - }, - { - "epoch": 6.674745360119167, - "grad_norm": 0.0030417251400649548, - "learning_rate": 0.00019997802729009972, - "loss": 46.0, - "step": 41447 - }, - { - "epoch": 6.6749063971979545, - "grad_norm": 0.0015606951201334596, - "learning_rate": 0.00019997802622959002, - "loss": 46.0, - "step": 41448 - }, - { - "epoch": 6.675067434276742, - "grad_norm": 0.012461822479963303, - "learning_rate": 0.0001999780251690547, - "loss": 46.0, - "step": 41449 - }, - { - "epoch": 6.675228471355529, - "grad_norm": 0.01988958939909935, - "learning_rate": 0.00019997802410849384, - "loss": 46.0, - "step": 41450 - }, - { - "epoch": 6.675389508434317, - "grad_norm": 0.005897939670830965, - "learning_rate": 0.00019997802304790735, - "loss": 46.0, - "step": 41451 - }, - { - "epoch": 6.675550545513104, - "grad_norm": 0.0061839912086725235, - "learning_rate": 0.00019997802198729533, - "loss": 46.0, - "step": 41452 - }, - { - "epoch": 6.675711582591892, - "grad_norm": 0.001188946538604796, - "learning_rate": 0.00019997802092665764, - "loss": 46.0, - "step": 41453 - }, - { - "epoch": 6.675872619670679, - "grad_norm": 0.007058894261717796, - "learning_rate": 0.0001999780198659944, - "loss": 46.0, - "step": 41454 - }, - { - "epoch": 6.676033656749467, - "grad_norm": 0.0012896310072392225, - "learning_rate": 0.00019997801880530558, - "loss": 46.0, - "step": 41455 - }, - { - "epoch": 6.676194693828254, - "grad_norm": 0.002240738831460476, - "learning_rate": 0.00019997801774459119, - "loss": 46.0, - "step": 41456 - }, - { - "epoch": 6.676355730907042, - "grad_norm": 0.001775287906639278, - "learning_rate": 0.00019997801668385117, - "loss": 46.0, - "step": 41457 - }, - { - "epoch": 6.676516767985829, - "grad_norm": 0.005676504224538803, - "learning_rate": 0.00019997801562308558, - "loss": 46.0, - "step": 41458 - }, - { - "epoch": 6.6766778050646165, - "grad_norm": 0.004192544613033533, - "learning_rate": 0.0001999780145622944, - "loss": 46.0, - "step": 41459 - }, - { - "epoch": 6.676838842143404, - "grad_norm": 0.021272780373692513, - "learning_rate": 0.00019997801350147762, - "loss": 46.0, - "step": 41460 - }, - { - "epoch": 6.6769998792221905, - "grad_norm": 0.004423180595040321, - "learning_rate": 0.00019997801244063523, - "loss": 46.0, - "step": 41461 - }, - { - "epoch": 6.677160916300978, - "grad_norm": 0.0009819597471505404, - "learning_rate": 0.00019997801137976732, - "loss": 46.0, - "step": 41462 - }, - { - "epoch": 6.677321953379765, - "grad_norm": 0.009840531274676323, - "learning_rate": 0.00019997801031887375, - "loss": 46.0, - "step": 41463 - }, - { - "epoch": 6.677482990458553, - "grad_norm": 0.01274296548217535, - "learning_rate": 0.00019997800925795463, - "loss": 46.0, - "step": 41464 - }, - { - "epoch": 6.67764402753734, - "grad_norm": 0.0029992652125656605, - "learning_rate": 0.00019997800819700993, - "loss": 46.0, - "step": 41465 - }, - { - "epoch": 6.677805064616128, - "grad_norm": 0.0011716948356479406, - "learning_rate": 0.00019997800713603958, - "loss": 46.0, - "step": 41466 - }, - { - "epoch": 6.677966101694915, - "grad_norm": 0.0017167023615911603, - "learning_rate": 0.0001999780060750437, - "loss": 46.0, - "step": 41467 - }, - { - "epoch": 6.678127138773703, - "grad_norm": 0.005630992352962494, - "learning_rate": 0.00019997800501402223, - "loss": 46.0, - "step": 41468 - }, - { - "epoch": 6.67828817585249, - "grad_norm": 0.006417365744709969, - "learning_rate": 0.00019997800395297512, - "loss": 46.0, - "step": 41469 - }, - { - "epoch": 6.678449212931278, - "grad_norm": 0.008727450855076313, - "learning_rate": 0.00019997800289190244, - "loss": 46.0, - "step": 41470 - }, - { - "epoch": 6.678610250010065, - "grad_norm": 0.004421089310199022, - "learning_rate": 0.00019997800183080421, - "loss": 46.0, - "step": 41471 - }, - { - "epoch": 6.6787712870888525, - "grad_norm": 0.0031029530800879, - "learning_rate": 0.00019997800076968037, - "loss": 46.0, - "step": 41472 - }, - { - "epoch": 6.67893232416764, - "grad_norm": 0.006378788035362959, - "learning_rate": 0.00019997799970853094, - "loss": 46.0, - "step": 41473 - }, - { - "epoch": 6.6790933612464265, - "grad_norm": 0.0020963973365724087, - "learning_rate": 0.00019997799864735592, - "loss": 46.0, - "step": 41474 - }, - { - "epoch": 6.679254398325215, - "grad_norm": 0.002083128783851862, - "learning_rate": 0.00019997799758615528, - "loss": 46.0, - "step": 41475 - }, - { - "epoch": 6.679415435404001, - "grad_norm": 0.003655440639704466, - "learning_rate": 0.0001999779965249291, - "loss": 46.0, - "step": 41476 - }, - { - "epoch": 6.679576472482789, - "grad_norm": 0.028048155829310417, - "learning_rate": 0.0001999779954636773, - "loss": 46.0, - "step": 41477 - }, - { - "epoch": 6.679737509561576, - "grad_norm": 0.002458154456689954, - "learning_rate": 0.00019997799440239991, - "loss": 46.0, - "step": 41478 - }, - { - "epoch": 6.679898546640364, - "grad_norm": 0.007947004400193691, - "learning_rate": 0.00019997799334109696, - "loss": 46.0, - "step": 41479 - }, - { - "epoch": 6.680059583719151, - "grad_norm": 0.0083960285410285, - "learning_rate": 0.0001999779922797684, - "loss": 46.0, - "step": 41480 - }, - { - "epoch": 6.680220620797939, - "grad_norm": 0.00619344599545002, - "learning_rate": 0.00019997799121841423, - "loss": 46.0, - "step": 41481 - }, - { - "epoch": 6.680381657876726, - "grad_norm": 0.0034263241104781628, - "learning_rate": 0.0001999779901570345, - "loss": 46.0, - "step": 41482 - }, - { - "epoch": 6.680542694955514, - "grad_norm": 0.0025248266756534576, - "learning_rate": 0.00019997798909562916, - "loss": 46.0, - "step": 41483 - }, - { - "epoch": 6.680703732034301, - "grad_norm": 0.01644541509449482, - "learning_rate": 0.00019997798803419827, - "loss": 46.0, - "step": 41484 - }, - { - "epoch": 6.6808647691130885, - "grad_norm": 0.0025362572632730007, - "learning_rate": 0.00019997798697274176, - "loss": 46.0, - "step": 41485 - }, - { - "epoch": 6.681025806191876, - "grad_norm": 0.0027080951258540154, - "learning_rate": 0.00019997798591125964, - "loss": 46.0, - "step": 41486 - }, - { - "epoch": 6.681186843270663, - "grad_norm": 0.0037105679512023926, - "learning_rate": 0.00019997798484975196, - "loss": 46.0, - "step": 41487 - }, - { - "epoch": 6.681347880349451, - "grad_norm": 0.005108132492750883, - "learning_rate": 0.0001999779837882187, - "loss": 46.0, - "step": 41488 - }, - { - "epoch": 6.681508917428237, - "grad_norm": 0.009482454508543015, - "learning_rate": 0.00019997798272665984, - "loss": 46.0, - "step": 41489 - }, - { - "epoch": 6.681669954507026, - "grad_norm": 0.0033914861269295216, - "learning_rate": 0.0001999779816650754, - "loss": 46.0, - "step": 41490 - }, - { - "epoch": 6.681830991585812, - "grad_norm": 0.0020544622093439102, - "learning_rate": 0.00019997798060346534, - "loss": 46.0, - "step": 41491 - }, - { - "epoch": 6.6819920286646, - "grad_norm": 0.0039254650473594666, - "learning_rate": 0.00019997797954182973, - "loss": 46.0, - "step": 41492 - }, - { - "epoch": 6.682153065743387, - "grad_norm": 0.002754541812464595, - "learning_rate": 0.0001999779784801685, - "loss": 46.0, - "step": 41493 - }, - { - "epoch": 6.682314102822175, - "grad_norm": 0.0027967761270701885, - "learning_rate": 0.0001999779774184817, - "loss": 46.0, - "step": 41494 - }, - { - "epoch": 6.682475139900962, - "grad_norm": 0.007121903821825981, - "learning_rate": 0.0001999779763567693, - "loss": 46.0, - "step": 41495 - }, - { - "epoch": 6.6826361769797495, - "grad_norm": 0.004999418742954731, - "learning_rate": 0.0001999779752950313, - "loss": 46.0, - "step": 41496 - }, - { - "epoch": 6.682797214058537, - "grad_norm": 0.00398630416020751, - "learning_rate": 0.00019997797423326773, - "loss": 46.0, - "step": 41497 - }, - { - "epoch": 6.6829582511373244, - "grad_norm": 0.0035232247319072485, - "learning_rate": 0.00019997797317147857, - "loss": 46.0, - "step": 41498 - }, - { - "epoch": 6.683119288216112, - "grad_norm": 0.008806174620985985, - "learning_rate": 0.00019997797210966382, - "loss": 46.0, - "step": 41499 - }, - { - "epoch": 6.683280325294899, - "grad_norm": 0.0030216407030820847, - "learning_rate": 0.00019997797104782348, - "loss": 46.0, - "step": 41500 - }, - { - "epoch": 6.683441362373687, - "grad_norm": 0.007164289243519306, - "learning_rate": 0.00019997796998595755, - "loss": 46.0, - "step": 41501 - }, - { - "epoch": 6.683602399452474, - "grad_norm": 0.007627043407410383, - "learning_rate": 0.000199977968924066, - "loss": 46.0, - "step": 41502 - }, - { - "epoch": 6.683763436531262, - "grad_norm": 0.006922471337020397, - "learning_rate": 0.0001999779678621489, - "loss": 46.0, - "step": 41503 - }, - { - "epoch": 6.683924473610048, - "grad_norm": 0.0027551406528800726, - "learning_rate": 0.0001999779668002062, - "loss": 46.0, - "step": 41504 - }, - { - "epoch": 6.684085510688836, - "grad_norm": 0.0064051453955471516, - "learning_rate": 0.00019997796573823791, - "loss": 46.0, - "step": 41505 - }, - { - "epoch": 6.684246547767623, - "grad_norm": 0.0018681363435462117, - "learning_rate": 0.00019997796467624402, - "loss": 46.0, - "step": 41506 - }, - { - "epoch": 6.684407584846411, - "grad_norm": 0.0018531293608248234, - "learning_rate": 0.00019997796361422457, - "loss": 46.0, - "step": 41507 - }, - { - "epoch": 6.684568621925198, - "grad_norm": 0.011441242881119251, - "learning_rate": 0.0001999779625521795, - "loss": 46.0, - "step": 41508 - }, - { - "epoch": 6.6847296590039855, - "grad_norm": 0.0034289825707674026, - "learning_rate": 0.00019997796149010886, - "loss": 46.0, - "step": 41509 - }, - { - "epoch": 6.684890696082773, - "grad_norm": 0.0196436308324337, - "learning_rate": 0.00019997796042801262, - "loss": 46.0, - "step": 41510 - }, - { - "epoch": 6.68505173316156, - "grad_norm": 0.00993046909570694, - "learning_rate": 0.00019997795936589082, - "loss": 46.0, - "step": 41511 - }, - { - "epoch": 6.685212770240348, - "grad_norm": 0.0011435869382694364, - "learning_rate": 0.00019997795830374338, - "loss": 46.0, - "step": 41512 - }, - { - "epoch": 6.685373807319135, - "grad_norm": 0.012656190432608128, - "learning_rate": 0.0001999779572415704, - "loss": 46.0, - "step": 41513 - }, - { - "epoch": 6.685534844397923, - "grad_norm": 0.02194136008620262, - "learning_rate": 0.00019997795617937182, - "loss": 46.0, - "step": 41514 - }, - { - "epoch": 6.68569588147671, - "grad_norm": 0.001996238250285387, - "learning_rate": 0.00019997795511714762, - "loss": 46.0, - "step": 41515 - }, - { - "epoch": 6.685856918555498, - "grad_norm": 0.0031546689569950104, - "learning_rate": 0.00019997795405489783, - "loss": 46.0, - "step": 41516 - }, - { - "epoch": 6.686017955634285, - "grad_norm": 0.002523136092349887, - "learning_rate": 0.00019997795299262248, - "loss": 46.0, - "step": 41517 - }, - { - "epoch": 6.686178992713073, - "grad_norm": 0.0032731450628489256, - "learning_rate": 0.00019997795193032154, - "loss": 46.0, - "step": 41518 - }, - { - "epoch": 6.686340029791859, - "grad_norm": 0.003066750941798091, - "learning_rate": 0.000199977950867995, - "loss": 46.0, - "step": 41519 - }, - { - "epoch": 6.686501066870647, - "grad_norm": 0.007604025304317474, - "learning_rate": 0.00019997794980564285, - "loss": 46.0, - "step": 41520 - }, - { - "epoch": 6.686662103949434, - "grad_norm": 0.010633991099894047, - "learning_rate": 0.00019997794874326513, - "loss": 46.0, - "step": 41521 - }, - { - "epoch": 6.6868231410282215, - "grad_norm": 0.0023399614728987217, - "learning_rate": 0.00019997794768086184, - "loss": 46.0, - "step": 41522 - }, - { - "epoch": 6.686984178107009, - "grad_norm": 0.008343911729753017, - "learning_rate": 0.00019997794661843295, - "loss": 46.0, - "step": 41523 - }, - { - "epoch": 6.687145215185796, - "grad_norm": 0.0035168544854968786, - "learning_rate": 0.00019997794555597846, - "loss": 46.0, - "step": 41524 - }, - { - "epoch": 6.687306252264584, - "grad_norm": 0.0018394142389297485, - "learning_rate": 0.00019997794449349839, - "loss": 46.0, - "step": 41525 - }, - { - "epoch": 6.687467289343371, - "grad_norm": 0.003639424219727516, - "learning_rate": 0.00019997794343099272, - "loss": 46.0, - "step": 41526 - }, - { - "epoch": 6.687628326422159, - "grad_norm": 0.00887287687510252, - "learning_rate": 0.00019997794236846148, - "loss": 46.0, - "step": 41527 - }, - { - "epoch": 6.687789363500946, - "grad_norm": 0.0015143896453082561, - "learning_rate": 0.00019997794130590464, - "loss": 46.0, - "step": 41528 - }, - { - "epoch": 6.687950400579734, - "grad_norm": 0.006364369299262762, - "learning_rate": 0.0001999779402433222, - "loss": 46.0, - "step": 41529 - }, - { - "epoch": 6.688111437658521, - "grad_norm": 0.004334803204983473, - "learning_rate": 0.00019997793918071416, - "loss": 46.0, - "step": 41530 - }, - { - "epoch": 6.688272474737309, - "grad_norm": 0.0014573076041415334, - "learning_rate": 0.00019997793811808056, - "loss": 46.0, - "step": 41531 - }, - { - "epoch": 6.688433511816096, - "grad_norm": 0.00799465086311102, - "learning_rate": 0.00019997793705542138, - "loss": 46.0, - "step": 41532 - }, - { - "epoch": 6.6885945488948835, - "grad_norm": 0.001455390825867653, - "learning_rate": 0.00019997793599273658, - "loss": 46.0, - "step": 41533 - }, - { - "epoch": 6.68875558597367, - "grad_norm": 0.010175321251153946, - "learning_rate": 0.0001999779349300262, - "loss": 46.0, - "step": 41534 - }, - { - "epoch": 6.6889166230524575, - "grad_norm": 0.0028319982811808586, - "learning_rate": 0.00019997793386729025, - "loss": 46.0, - "step": 41535 - }, - { - "epoch": 6.689077660131245, - "grad_norm": 0.0052672214806079865, - "learning_rate": 0.0001999779328045287, - "loss": 46.0, - "step": 41536 - }, - { - "epoch": 6.689238697210032, - "grad_norm": 0.017750782892107964, - "learning_rate": 0.00019997793174174154, - "loss": 46.0, - "step": 41537 - }, - { - "epoch": 6.68939973428882, - "grad_norm": 0.010550031438469887, - "learning_rate": 0.0001999779306789288, - "loss": 46.0, - "step": 41538 - }, - { - "epoch": 6.689560771367607, - "grad_norm": 0.00702440133318305, - "learning_rate": 0.00019997792961609049, - "loss": 46.0, - "step": 41539 - }, - { - "epoch": 6.689721808446395, - "grad_norm": 0.00806315429508686, - "learning_rate": 0.00019997792855322658, - "loss": 46.0, - "step": 41540 - }, - { - "epoch": 6.689882845525182, - "grad_norm": 0.004118355456739664, - "learning_rate": 0.00019997792749033706, - "loss": 46.0, - "step": 41541 - }, - { - "epoch": 6.69004388260397, - "grad_norm": 0.00675570871680975, - "learning_rate": 0.00019997792642742197, - "loss": 46.0, - "step": 41542 - }, - { - "epoch": 6.690204919682757, - "grad_norm": 0.003215635195374489, - "learning_rate": 0.0001999779253644813, - "loss": 46.0, - "step": 41543 - }, - { - "epoch": 6.6903659567615446, - "grad_norm": 0.0011074589565396309, - "learning_rate": 0.00019997792430151502, - "loss": 46.0, - "step": 41544 - }, - { - "epoch": 6.690526993840332, - "grad_norm": 0.003002199809998274, - "learning_rate": 0.00019997792323852317, - "loss": 46.0, - "step": 41545 - }, - { - "epoch": 6.6906880309191195, - "grad_norm": 0.003767826361581683, - "learning_rate": 0.00019997792217550572, - "loss": 46.0, - "step": 41546 - }, - { - "epoch": 6.690849067997906, - "grad_norm": 0.008559972047805786, - "learning_rate": 0.0001999779211124627, - "loss": 46.0, - "step": 41547 - }, - { - "epoch": 6.691010105076694, - "grad_norm": 0.01038906816393137, - "learning_rate": 0.00019997792004939404, - "loss": 46.0, - "step": 41548 - }, - { - "epoch": 6.691171142155481, - "grad_norm": 0.00894656777381897, - "learning_rate": 0.00019997791898629985, - "loss": 46.0, - "step": 41549 - }, - { - "epoch": 6.691332179234268, - "grad_norm": 0.002707582665607333, - "learning_rate": 0.00019997791792318004, - "loss": 46.0, - "step": 41550 - } - ], - "logging_steps": 1, - "max_steps": 6209750, - "num_input_tokens_seen": 0, - "num_train_epochs": 1001, - "save_steps": 150, - "stateful_callbacks": { - "TrainerControl": { - "args": { - "should_epoch_stop": false, - "should_evaluate": false, - "should_log": false, - "should_save": true, - "should_training_stop": false - }, - "attributes": {} - } - }, - "total_flos": 829708196511744.0, - "train_batch_size": 2, - "trial_name": null, - "trial_params": null -} +version https://git-lfs.github.com/spec/v1 +oid sha256:142d481d5f81d6911ad6da3cb2e30569135f1d5ed220426e31d4e642bba94e24 +size 11039870